From 5249c263d2897920a8c2a211201e1f0fd5e44d43 Mon Sep 17 00:00:00 2001 From: atauzki Date: Fri, 13 Oct 2023 10:04:38 +0800 Subject: [PATCH 1/4] Port boost::regex as regex engine. This is a rebased commit. --- build/VS2017/Notepad2.vcxproj | 16 +- scintilla/include/boost/regex.hpp | 41 + scintilla/include/boost/regex/concepts.hpp | 1134 ++++++ scintilla/include/boost/regex/config.hpp | 480 +++ .../include/boost/regex/config/borland.hpp | 72 + .../include/boost/regex/config/cwchar.hpp | 207 ++ scintilla/include/boost/regex/icu.hpp | 30 + scintilla/include/boost/regex/mfc.hpp | 186 + .../include/boost/regex/pattern_except.hpp | 32 + .../boost/regex/pending/object_cache.hpp | 29 + .../boost/regex/pending/static_mutex.hpp | 182 + .../boost/regex/pending/unicode_iterator.hpp | 32 + .../include/boost/regex/regex_traits.hpp | 39 + scintilla/include/boost/regex/user.hpp | 95 + .../include/boost/regex/v5/basic_regex.hpp | 734 ++++ .../boost/regex/v5/basic_regex_creator.hpp | 1576 +++++++++ .../boost/regex/v5/basic_regex_parser.hpp | 3130 +++++++++++++++++ .../include/boost/regex/v5/c_regex_traits.hpp | 474 +++ .../boost/regex/v5/char_regex_traits.hpp | 59 + .../boost/regex/v5/cpp_regex_traits.hpp | 1040 ++++++ scintilla/include/boost/regex/v5/cregex.hpp | 195 + .../include/boost/regex/v5/error_type.hpp | 59 + scintilla/include/boost/regex/v5/icu.hpp | 1402 ++++++++ .../boost/regex/v5/iterator_category.hpp | 84 + .../boost/regex/v5/iterator_traits.hpp | 32 + .../include/boost/regex/v5/match_flags.hpp | 156 + .../include/boost/regex/v5/match_results.hpp | 667 ++++ .../boost/regex/v5/mem_block_cache.hpp | 173 + .../include/boost/regex/v5/object_cache.hpp | 160 + .../include/boost/regex/v5/pattern_except.hpp | 106 + .../include/boost/regex/v5/perl_matcher.hpp | 576 +++ .../boost/regex/v5/perl_matcher_common.hpp | 921 +++++ .../regex/v5/perl_matcher_non_recursive.hpp | 1874 ++++++++++ .../boost/regex/v5/primary_transform.hpp | 120 + scintilla/include/boost/regex/v5/regbase.hpp | 158 + scintilla/include/boost/regex/v5/regex.hpp | 106 + .../include/boost/regex/v5/regex_format.hpp | 1124 ++++++ .../include/boost/regex/v5/regex_fwd.hpp | 73 + .../include/boost/regex/v5/regex_grep.hpp | 98 + .../include/boost/regex/v5/regex_iterator.hpp | 173 + .../include/boost/regex/v5/regex_match.hpp | 92 + .../include/boost/regex/v5/regex_merge.hpp | 71 + .../boost/regex/v5/regex_raw_buffer.hpp | 213 ++ .../include/boost/regex/v5/regex_replace.hpp | 77 + .../include/boost/regex/v5/regex_search.hpp | 103 + .../include/boost/regex/v5/regex_split.hpp | 152 + .../boost/regex/v5/regex_token_iterator.hpp | 255 ++ .../include/boost/regex/v5/regex_traits.hpp | 130 + .../boost/regex/v5/regex_traits_defaults.hpp | 996 ++++++ .../boost/regex/v5/regex_workaround.hpp | 159 + scintilla/include/boost/regex/v5/states.hpp | 299 ++ .../include/boost/regex/v5/sub_match.hpp | 382 ++ .../include/boost/regex/v5/syntax_type.hpp | 105 + .../boost/regex/v5/u32regex_iterator.hpp | 177 + .../regex/v5/u32regex_token_iterator.hpp | 312 ++ .../boost/regex/v5/unicode_iterator.hpp | 862 +++++ .../boost/regex/v5/w32_regex_traits.hpp | 1311 +++++++ scintilla/include/boost/regex_fwd.hpp | 37 + scintilla/src/Document.cxx | 94 +- 59 files changed, 23660 insertions(+), 12 deletions(-) create mode 100644 scintilla/include/boost/regex.hpp create mode 100644 scintilla/include/boost/regex/concepts.hpp create mode 100644 scintilla/include/boost/regex/config.hpp create mode 100644 scintilla/include/boost/regex/config/borland.hpp create mode 100644 scintilla/include/boost/regex/config/cwchar.hpp create mode 100644 scintilla/include/boost/regex/icu.hpp create mode 100644 scintilla/include/boost/regex/mfc.hpp create mode 100644 scintilla/include/boost/regex/pattern_except.hpp create mode 100644 scintilla/include/boost/regex/pending/object_cache.hpp create mode 100644 scintilla/include/boost/regex/pending/static_mutex.hpp create mode 100644 scintilla/include/boost/regex/pending/unicode_iterator.hpp create mode 100644 scintilla/include/boost/regex/regex_traits.hpp create mode 100644 scintilla/include/boost/regex/user.hpp create mode 100644 scintilla/include/boost/regex/v5/basic_regex.hpp create mode 100644 scintilla/include/boost/regex/v5/basic_regex_creator.hpp create mode 100644 scintilla/include/boost/regex/v5/basic_regex_parser.hpp create mode 100644 scintilla/include/boost/regex/v5/c_regex_traits.hpp create mode 100644 scintilla/include/boost/regex/v5/char_regex_traits.hpp create mode 100644 scintilla/include/boost/regex/v5/cpp_regex_traits.hpp create mode 100644 scintilla/include/boost/regex/v5/cregex.hpp create mode 100644 scintilla/include/boost/regex/v5/error_type.hpp create mode 100644 scintilla/include/boost/regex/v5/icu.hpp create mode 100644 scintilla/include/boost/regex/v5/iterator_category.hpp create mode 100644 scintilla/include/boost/regex/v5/iterator_traits.hpp create mode 100644 scintilla/include/boost/regex/v5/match_flags.hpp create mode 100644 scintilla/include/boost/regex/v5/match_results.hpp create mode 100644 scintilla/include/boost/regex/v5/mem_block_cache.hpp create mode 100644 scintilla/include/boost/regex/v5/object_cache.hpp create mode 100644 scintilla/include/boost/regex/v5/pattern_except.hpp create mode 100644 scintilla/include/boost/regex/v5/perl_matcher.hpp create mode 100644 scintilla/include/boost/regex/v5/perl_matcher_common.hpp create mode 100644 scintilla/include/boost/regex/v5/perl_matcher_non_recursive.hpp create mode 100644 scintilla/include/boost/regex/v5/primary_transform.hpp create mode 100644 scintilla/include/boost/regex/v5/regbase.hpp create mode 100644 scintilla/include/boost/regex/v5/regex.hpp create mode 100644 scintilla/include/boost/regex/v5/regex_format.hpp create mode 100644 scintilla/include/boost/regex/v5/regex_fwd.hpp create mode 100644 scintilla/include/boost/regex/v5/regex_grep.hpp create mode 100644 scintilla/include/boost/regex/v5/regex_iterator.hpp create mode 100644 scintilla/include/boost/regex/v5/regex_match.hpp create mode 100644 scintilla/include/boost/regex/v5/regex_merge.hpp create mode 100644 scintilla/include/boost/regex/v5/regex_raw_buffer.hpp create mode 100644 scintilla/include/boost/regex/v5/regex_replace.hpp create mode 100644 scintilla/include/boost/regex/v5/regex_search.hpp create mode 100644 scintilla/include/boost/regex/v5/regex_split.hpp create mode 100644 scintilla/include/boost/regex/v5/regex_token_iterator.hpp create mode 100644 scintilla/include/boost/regex/v5/regex_traits.hpp create mode 100644 scintilla/include/boost/regex/v5/regex_traits_defaults.hpp create mode 100644 scintilla/include/boost/regex/v5/regex_workaround.hpp create mode 100644 scintilla/include/boost/regex/v5/states.hpp create mode 100644 scintilla/include/boost/regex/v5/sub_match.hpp create mode 100644 scintilla/include/boost/regex/v5/syntax_type.hpp create mode 100644 scintilla/include/boost/regex/v5/u32regex_iterator.hpp create mode 100644 scintilla/include/boost/regex/v5/u32regex_token_iterator.hpp create mode 100644 scintilla/include/boost/regex/v5/unicode_iterator.hpp create mode 100644 scintilla/include/boost/regex/v5/w32_regex_traits.hpp create mode 100644 scintilla/include/boost/regex_fwd.hpp diff --git a/build/VS2017/Notepad2.vcxproj b/build/VS2017/Notepad2.vcxproj index 1e07e56e8f..5c21f6e0ed 100644 --- a/build/VS2017/Notepad2.vcxproj +++ b/build/VS2017/Notepad2.vcxproj @@ -242,7 +242,7 @@ - _DEBUG;_WIN64;_WIN32_WINNT=0x0600;WINVER=0x0600;%(PreprocessorDefinitions) + _DEBUG;_WIN64;_WIN32_WINNT=0x0600;WINVER=0x0600;%(PreprocessorDefinitions);SCI_OWNREGEX;BOOST_REGEX_STANDALONE /FS %(AdditionalOptions) @@ -254,7 +254,7 @@ - _DEBUG;_WIN64;_WIN32_WINNT=0x0601;WINVER=0x0601;%(PreprocessorDefinitions) + _DEBUG;_WIN64;_WIN32_WINNT=0x0601;WINVER=0x0601;%(PreprocessorDefinitions);SCI_OWNREGEX;BOOST_REGEX_STANDALONE AdvancedVectorExtensions2 @@ -290,7 +290,7 @@ - _DEBUG;_WIN64;_WIN32_WINNT=0x0600;WINVER=0x0600;%(PreprocessorDefinitions) + _DEBUG;_WIN64;_WIN32_WINNT=0x0600;WINVER=0x0600;%(PreprocessorDefinitions);SCI_OWNREGEX;BOOST_REGEX_STANDALONE -Wextra -Wshadow -Wimplicit-fallthrough -Wformat=2 -Wundef -Wcomma %(AdditionalOptions) @@ -302,7 +302,7 @@ - _DEBUG;_WIN64;_WIN32_WINNT=0x0601;WINVER=0x0601;%(PreprocessorDefinitions) + _DEBUG;_WIN64;_WIN32_WINNT=0x0601;WINVER=0x0601;%(PreprocessorDefinitions);SCI_OWNREGEX;BOOST_REGEX_STANDALONE -march=x86-64-v3 -Wextra -Wshadow -Wimplicit-fallthrough -Wformat=2 -Wundef -Wcomma %(AdditionalOptions) @@ -369,7 +369,7 @@ - NDEBUG;_WIN64;_WIN32_WINNT=0x0600;WINVER=0x0600;%(PreprocessorDefinitions) + NDEBUG;_WIN64;_WIN32_WINNT=0x0600;WINVER=0x0600;%(PreprocessorDefinitions);SCI_OWNREGEX;BOOST_REGEX_STANDALONE true /GA /wd26429 /wd26446 /wd26472 /wd26481 /wd26482 /wd26485 /wd26486 /wd26489 %(AdditionalOptions) VectorCall @@ -383,7 +383,7 @@ - NDEBUG;_WIN64;_WIN32_WINNT=0x0601;WINVER=0x0601;%(PreprocessorDefinitions) + NDEBUG;_WIN64;_WIN32_WINNT=0x0601;WINVER=0x0601;%(PreprocessorDefinitions);SCI_OWNREGEX;BOOST_REGEX_STANDALONE true /GA /wd26429 /wd26446 /wd26472 /wd26481 /wd26482 /wd26485 /wd26486 /wd26489 %(AdditionalOptions) VectorCall @@ -426,7 +426,7 @@ - NDEBUG;_WIN64;_WIN32_WINNT=0x0600;WINVER=0x0600;%(PreprocessorDefinitions) + NDEBUG;_WIN64;_WIN32_WINNT=0x0600;WINVER=0x0600;%(PreprocessorDefinitions);SCI_OWNREGEX;BOOST_REGEX_STANDALONE /GA -Wextra -Wshadow -Wimplicit-fallthrough -Wformat=2 -Wundef -Wcomma %(AdditionalOptions) @@ -438,7 +438,7 @@ - NDEBUG;_WIN64;_WIN32_WINNT=0x0601;WINVER=0x0601;%(PreprocessorDefinitions) + NDEBUG;_WIN64;_WIN32_WINNT=0x0601;WINVER=0x0601;%(PreprocessorDefinitions);SCI_OWNREGEX;BOOST_REGEX_STANDALONE /GA -march=x86-64-v3 -Wextra -Wshadow -Wimplicit-fallthrough -Wformat=2 -Wundef -Wcomma %(AdditionalOptions) diff --git a/scintilla/include/boost/regex.hpp b/scintilla/include/boost/regex.hpp new file mode 100644 index 0000000000..b0c8bf13c7 --- /dev/null +++ b/scintilla/include/boost/regex.hpp @@ -0,0 +1,41 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org/libs/regex for documentation. + * FILE regex.cpp + * VERSION see + * DESCRIPTION: Declares boost::basic_regex<> and associated + * functions and classes. This header is the main + * entry point for the template regex code. + */ + + +/* start with C compatibility API */ + +#ifndef BOOST_RE_REGEX_HPP +#define BOOST_RE_REGEX_HPP + +#ifndef BOOST_REGEX_CONFIG_HPP +#include +#endif + +#ifdef BOOST_REGEX_CXX03 +#include +#else +#include +#endif + +#endif // include + + + + diff --git a/scintilla/include/boost/regex/concepts.hpp b/scintilla/include/boost/regex/concepts.hpp new file mode 100644 index 0000000000..2eafac1b57 --- /dev/null +++ b/scintilla/include/boost/regex/concepts.hpp @@ -0,0 +1,1134 @@ +/* + * + * Copyright (c) 2004 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE concepts.hpp + * VERSION see + * DESCRIPTION: Declares regular expression concepts. + */ + +#ifndef BOOST_REGEX_CONCEPTS_HPP_INCLUDED +#define BOOST_REGEX_CONCEPTS_HPP_INCLUDED + +#include +#include +#include +#include +#include +#ifndef BOOST_TEST_TR1_REGEX +#include +#endif +#include +#include +#include + +#ifdef BOOST_REGEX_CXX03 +#define RW_NS boost +#else +#define RW_NS std +#endif + +namespace boost{ + +// +// bitmask_archetype: +// this can be either an integer type, an enum, or a std::bitset, +// we use the latter as the architype as it offers the "strictest" +// of the possible interfaces: +// +typedef std::bitset<512> bitmask_archetype; +// +// char_architype: +// A strict model for the character type interface. +// +struct char_architype +{ + // default constructable: + char_architype(); + // copy constructable / assignable: + char_architype(const char_architype&); + char_architype& operator=(const char_architype&); + // constructable from an integral value: + char_architype(unsigned long val); + // comparable: + bool operator==(const char_architype&)const; + bool operator!=(const char_architype&)const; + bool operator<(const char_architype&)const; + bool operator<=(const char_architype&)const; + bool operator>=(const char_architype&)const; + bool operator>(const char_architype&)const; + // conversion to integral type: + operator long()const; +}; +inline long hash_value(char_architype val) +{ return val; } +// +// char_architype can not be used with basic_string: +// +} // namespace boost +namespace std{ + template<> struct char_traits + { + // The intent is that this template is not instantiated, + // but this typedef gives us a chance of compilation in + // case it is: + typedef boost::char_architype char_type; + }; +} +// +// Allocator architype: +// +template +class allocator_architype +{ +public: + typedef T* pointer; + typedef const T* const_pointer; + typedef T& reference; + typedef const T& const_reference; + typedef T value_type; + typedef unsigned size_type; + typedef int difference_type; + + template + struct rebind + { + typedef allocator_architype other; + }; + + pointer address(reference r){ return &r; } + const_pointer address(const_reference r) { return &r; } + pointer allocate(size_type n) { return static_cast(std::malloc(n)); } + pointer allocate(size_type n, pointer) { return static_cast(std::malloc(n)); } + void deallocate(pointer p, size_type) { std::free(p); } + size_type max_size()const { return UINT_MAX; } + + allocator_architype(){} + allocator_architype(const allocator_architype&){} + + template + allocator_architype(const allocator_architype&){} + + void construct(pointer p, const_reference r) { new (p)T(r); } + void destroy(pointer p) { p->~T(); } +}; + +template +bool operator == (const allocator_architype&, const allocator_architype&) {return true; } +template +bool operator != (const allocator_architype&, const allocator_architype&) { return false; } + +namespace boost{ +// +// regex_traits_architype: +// A strict interpretation of the regular expression traits class requirements. +// +template +struct regex_traits_architype +{ +public: + regex_traits_architype(){} + typedef charT char_type; + // typedef std::size_t size_type; + typedef std::vector string_type; + typedef copy_constructible_archetype > locale_type; + typedef bitmask_archetype char_class_type; + + static std::size_t length(const char_type* ) { return 0; } + + charT translate(charT ) const { return charT(); } + charT translate_nocase(charT ) const { return static_object::get(); } + + template + string_type transform(ForwardIterator , ForwardIterator ) const + { return static_object::get(); } + template + string_type transform_primary(ForwardIterator , ForwardIterator ) const + { return static_object::get(); } + + template + char_class_type lookup_classname(ForwardIterator , ForwardIterator ) const + { return static_object::get(); } + template + string_type lookup_collatename(ForwardIterator , ForwardIterator ) const + { return static_object::get(); } + + bool isctype(charT, char_class_type) const + { return false; } + int value(charT, int) const + { return 0; } + + locale_type imbue(locale_type l) + { return l; } + locale_type getloc()const + { return static_object::get(); } + +private: + // this type is not copyable: + regex_traits_architype(const regex_traits_architype&){} + regex_traits_architype& operator=(const regex_traits_architype&){ return *this; } +}; + +// +// alter this to std::tr1, to test a std implementation: +// +#ifndef BOOST_TEST_TR1_REGEX +namespace global_regex_namespace = ::boost; +#else +namespace global_regex_namespace = ::std::tr1; +#endif + +template +struct BitmaskConcept +{ + void constraints() + { + function_requires >(); + function_requires >(); + + m_mask1 = m_mask2 | m_mask3; + m_mask1 = m_mask2 & m_mask3; + m_mask1 = m_mask2 ^ m_mask3; + + m_mask1 = ~m_mask2; + + m_mask1 |= m_mask2; + m_mask1 &= m_mask2; + m_mask1 ^= m_mask2; + } + Bitmask m_mask1, m_mask2, m_mask3; +}; + +template +struct RegexTraitsConcept +{ + RegexTraitsConcept(); + // required typedefs: + typedef typename traits::char_type char_type; + // typedef typename traits::size_type size_type; + typedef typename traits::string_type string_type; + typedef typename traits::locale_type locale_type; + typedef typename traits::char_class_type char_class_type; + + void constraints() + { + //function_requires >(); + function_requires >(); + function_requires >(); + function_requires >(); + function_requires >(); + function_requires >(); + + std::size_t n = traits::length(m_pointer); + ignore_unused_variable_warning(n); + + char_type c = m_ctraits.translate(m_char); + ignore_unused_variable_warning(c); + c = m_ctraits.translate_nocase(m_char); + + //string_type::foobar bar; + string_type s1 = m_ctraits.transform(m_pointer, m_pointer); + ignore_unused_variable_warning(s1); + + string_type s2 = m_ctraits.transform_primary(m_pointer, m_pointer); + ignore_unused_variable_warning(s2); + + char_class_type cc = m_ctraits.lookup_classname(m_pointer, m_pointer); + ignore_unused_variable_warning(cc); + + string_type s3 = m_ctraits.lookup_collatename(m_pointer, m_pointer); + ignore_unused_variable_warning(s3); + + bool b = m_ctraits.isctype(m_char, cc); + ignore_unused_variable_warning(b); + + int v = m_ctraits.value(m_char, 16); + ignore_unused_variable_warning(v); + + locale_type l(m_ctraits.getloc()); + m_traits.imbue(l); + ignore_unused_variable_warning(l); + } + traits m_traits; + const traits m_ctraits; + const char_type* m_pointer; + char_type m_char; +private: + RegexTraitsConcept& operator=(RegexTraitsConcept&); +}; + +// +// helper class to compute what traits class a regular expression type is using: +// +template +struct regex_traits_computer; + +template +struct regex_traits_computer< global_regex_namespace::basic_regex > +{ + typedef traits type; +}; + +// +// BaseRegexConcept does not test anything dependent on basic_string, +// in case our charT does not have an associated char_traits: +// +template +struct BaseRegexConcept +{ + typedef typename Regex::value_type value_type; + //typedef typename Regex::size_type size_type; + typedef typename Regex::flag_type flag_type; + typedef typename Regex::locale_type locale_type; + typedef input_iterator_archetype input_iterator_type; + + // derived test types: + typedef const value_type* pointer_type; + typedef bidirectional_iterator_archetype BidiIterator; + typedef global_regex_namespace::sub_match sub_match_type; + typedef global_regex_namespace::match_results > match_results_type; + typedef global_regex_namespace::match_results match_results_default_type; + typedef output_iterator_archetype OutIterator; + typedef typename regex_traits_computer::type traits_type; + typedef global_regex_namespace::regex_iterator regex_iterator_type; + typedef global_regex_namespace::regex_token_iterator regex_token_iterator_type; + + void global_constraints() + { + // + // test non-template components: + // + function_requires >(); + global_regex_namespace::regex_constants::syntax_option_type opts + = global_regex_namespace::regex_constants::icase + | global_regex_namespace::regex_constants::nosubs + | global_regex_namespace::regex_constants::optimize + | global_regex_namespace::regex_constants::collate + | global_regex_namespace::regex_constants::ECMAScript + | global_regex_namespace::regex_constants::basic + | global_regex_namespace::regex_constants::extended + | global_regex_namespace::regex_constants::awk + | global_regex_namespace::regex_constants::grep + | global_regex_namespace::regex_constants::egrep; + ignore_unused_variable_warning(opts); + + function_requires >(); + global_regex_namespace::regex_constants::match_flag_type mopts + = global_regex_namespace::regex_constants::match_default + | global_regex_namespace::regex_constants::match_not_bol + | global_regex_namespace::regex_constants::match_not_eol + | global_regex_namespace::regex_constants::match_not_bow + | global_regex_namespace::regex_constants::match_not_eow + | global_regex_namespace::regex_constants::match_any + | global_regex_namespace::regex_constants::match_not_null + | global_regex_namespace::regex_constants::match_continuous + | global_regex_namespace::regex_constants::match_prev_avail + | global_regex_namespace::regex_constants::format_default + | global_regex_namespace::regex_constants::format_sed + | global_regex_namespace::regex_constants::format_no_copy + | global_regex_namespace::regex_constants::format_first_only; + ignore_unused_variable_warning(mopts); + + BOOST_STATIC_ASSERT((::boost::is_enum::value)); + global_regex_namespace::regex_constants::error_type e1 = global_regex_namespace::regex_constants::error_collate; + ignore_unused_variable_warning(e1); + e1 = global_regex_namespace::regex_constants::error_ctype; + ignore_unused_variable_warning(e1); + e1 = global_regex_namespace::regex_constants::error_escape; + ignore_unused_variable_warning(e1); + e1 = global_regex_namespace::regex_constants::error_backref; + ignore_unused_variable_warning(e1); + e1 = global_regex_namespace::regex_constants::error_brack; + ignore_unused_variable_warning(e1); + e1 = global_regex_namespace::regex_constants::error_paren; + ignore_unused_variable_warning(e1); + e1 = global_regex_namespace::regex_constants::error_brace; + ignore_unused_variable_warning(e1); + e1 = global_regex_namespace::regex_constants::error_badbrace; + ignore_unused_variable_warning(e1); + e1 = global_regex_namespace::regex_constants::error_range; + ignore_unused_variable_warning(e1); + e1 = global_regex_namespace::regex_constants::error_space; + ignore_unused_variable_warning(e1); + e1 = global_regex_namespace::regex_constants::error_badrepeat; + ignore_unused_variable_warning(e1); + e1 = global_regex_namespace::regex_constants::error_complexity; + ignore_unused_variable_warning(e1); + e1 = global_regex_namespace::regex_constants::error_stack; + ignore_unused_variable_warning(e1); + + BOOST_STATIC_ASSERT((::boost::is_base_and_derived::value )); + const global_regex_namespace::regex_error except(e1); + e1 = except.code(); + + typedef typename Regex::value_type regex_value_type; + function_requires< RegexTraitsConcept > >(); + function_requires< BaseRegexConcept > >(); + } + void constraints() + { + global_constraints(); + + BOOST_STATIC_ASSERT((::boost::is_same< flag_type, global_regex_namespace::regex_constants::syntax_option_type>::value)); + flag_type opts + = Regex::icase + | Regex::nosubs + | Regex::optimize + | Regex::collate + | Regex::ECMAScript + | Regex::basic + | Regex::extended + | Regex::awk + | Regex::grep + | Regex::egrep; + ignore_unused_variable_warning(opts); + + function_requires >(); + function_requires >(); + + // Regex constructors: + Regex e1(m_pointer); + ignore_unused_variable_warning(e1); + Regex e2(m_pointer, m_flags); + ignore_unused_variable_warning(e2); + Regex e3(m_pointer, m_size, m_flags); + ignore_unused_variable_warning(e3); + Regex e4(in1, in2); + ignore_unused_variable_warning(e4); + Regex e5(in1, in2, m_flags); + ignore_unused_variable_warning(e5); + + // assign etc: + Regex e; + e = m_pointer; + e = e1; + e.assign(e1); + e.assign(m_pointer); + e.assign(m_pointer, m_flags); + e.assign(m_pointer, m_size, m_flags); + e.assign(in1, in2); + e.assign(in1, in2, m_flags); + + // access: + const Regex ce; + typename Regex::size_type i = ce.mark_count(); + ignore_unused_variable_warning(i); + m_flags = ce.flags(); + e.imbue(ce.getloc()); + e.swap(e1); + + global_regex_namespace::swap(e, e1); + + // sub_match: + BOOST_STATIC_ASSERT((::boost::is_base_and_derived, sub_match_type>::value)); + typedef typename sub_match_type::value_type sub_value_type; + typedef typename sub_match_type::difference_type sub_diff_type; + typedef typename sub_match_type::iterator sub_iter_type; + BOOST_STATIC_ASSERT((::boost::is_same::value)); + BOOST_STATIC_ASSERT((::boost::is_same::value)); + bool b = m_sub.matched; + ignore_unused_variable_warning(b); + BidiIterator bi = m_sub.first; + ignore_unused_variable_warning(bi); + bi = m_sub.second; + ignore_unused_variable_warning(bi); + sub_diff_type diff = m_sub.length(); + ignore_unused_variable_warning(diff); + // match_results tests - some typedefs are not used, however these + // guarante that they exist (some compilers may warn on non-usage) + typedef typename match_results_type::value_type mr_value_type; + typedef typename match_results_type::const_reference mr_const_reference; + typedef typename match_results_type::reference mr_reference; + typedef typename match_results_type::const_iterator mr_const_iterator; + typedef typename match_results_type::iterator mr_iterator; + typedef typename match_results_type::difference_type mr_difference_type; + typedef typename match_results_type::size_type mr_size_type; + typedef typename match_results_type::allocator_type mr_allocator_type; + typedef typename match_results_type::char_type mr_char_type; + typedef typename match_results_type::string_type mr_string_type; + + match_results_type m1; + mr_allocator_type at; + match_results_type m2(at); + match_results_type m3(m1); + m1 = m2; + + int ival = 0; + + mr_size_type mrs = m_cresults.size(); + ignore_unused_variable_warning(mrs); + mrs = m_cresults.max_size(); + ignore_unused_variable_warning(mrs); + b = m_cresults.empty(); + ignore_unused_variable_warning(b); + mr_difference_type mrd = m_cresults.length(); + ignore_unused_variable_warning(mrd); + mrd = m_cresults.length(ival); + ignore_unused_variable_warning(mrd); + mrd = m_cresults.position(); + ignore_unused_variable_warning(mrd); + mrd = m_cresults.position(mrs); + ignore_unused_variable_warning(mrd); + + mr_const_reference mrcr = m_cresults[ival]; + ignore_unused_variable_warning(mrcr); + mr_const_reference mrcr2 = m_cresults.prefix(); + ignore_unused_variable_warning(mrcr2); + mr_const_reference mrcr3 = m_cresults.suffix(); + ignore_unused_variable_warning(mrcr3); + mr_const_iterator mrci = m_cresults.begin(); + ignore_unused_variable_warning(mrci); + mrci = m_cresults.end(); + ignore_unused_variable_warning(mrci); + + (void) m_cresults.get_allocator(); + m_results.swap(m_results); + global_regex_namespace::swap(m_results, m_results); + + // regex_match: + b = global_regex_namespace::regex_match(m_in, m_in, m_results, e); + ignore_unused_variable_warning(b); + b = global_regex_namespace::regex_match(m_in, m_in, m_results, e, m_mft); + ignore_unused_variable_warning(b); + b = global_regex_namespace::regex_match(m_in, m_in, e); + ignore_unused_variable_warning(b); + b = global_regex_namespace::regex_match(m_in, m_in, e, m_mft); + ignore_unused_variable_warning(b); + b = global_regex_namespace::regex_match(m_pointer, m_pmatch, e); + ignore_unused_variable_warning(b); + b = global_regex_namespace::regex_match(m_pointer, m_pmatch, e, m_mft); + ignore_unused_variable_warning(b); + b = global_regex_namespace::regex_match(m_pointer, e); + ignore_unused_variable_warning(b); + b = global_regex_namespace::regex_match(m_pointer, e, m_mft); + ignore_unused_variable_warning(b); + // regex_search: + b = global_regex_namespace::regex_search(m_in, m_in, m_results, e); + ignore_unused_variable_warning(b); + b = global_regex_namespace::regex_search(m_in, m_in, m_results, e, m_mft); + ignore_unused_variable_warning(b); + b = global_regex_namespace::regex_search(m_in, m_in, e); + ignore_unused_variable_warning(b); + b = global_regex_namespace::regex_search(m_in, m_in, e, m_mft); + ignore_unused_variable_warning(b); + b = global_regex_namespace::regex_search(m_pointer, m_pmatch, e); + ignore_unused_variable_warning(b); + b = global_regex_namespace::regex_search(m_pointer, m_pmatch, e, m_mft); + ignore_unused_variable_warning(b); + b = global_regex_namespace::regex_search(m_pointer, e); + ignore_unused_variable_warning(b); + b = global_regex_namespace::regex_search(m_pointer, e, m_mft); + ignore_unused_variable_warning(b); + + // regex_iterator: + typedef typename regex_iterator_type::regex_type rit_regex_type; + typedef typename regex_iterator_type::value_type rit_value_type; + typedef typename regex_iterator_type::difference_type rit_difference_type; + typedef typename regex_iterator_type::pointer rit_pointer; + typedef typename regex_iterator_type::reference rit_reference; + typedef typename regex_iterator_type::iterator_category rit_iterator_category; + BOOST_STATIC_ASSERT((::boost::is_same::value)); + BOOST_STATIC_ASSERT((::boost::is_same::value)); + BOOST_STATIC_ASSERT((::boost::is_same::value)); + BOOST_STATIC_ASSERT((::boost::is_same::value)); + BOOST_STATIC_ASSERT((::boost::is_same::value)); + BOOST_STATIC_ASSERT((::boost::is_convertible::value)); + // this takes care of most of the checks needed: + function_requires >(); + regex_iterator_type iter1(m_in, m_in, e); + ignore_unused_variable_warning(iter1); + regex_iterator_type iter2(m_in, m_in, e, m_mft); + ignore_unused_variable_warning(iter2); + + // regex_token_iterator: + typedef typename regex_token_iterator_type::regex_type rtit_regex_type; + typedef typename regex_token_iterator_type::value_type rtit_value_type; + typedef typename regex_token_iterator_type::difference_type rtit_difference_type; + typedef typename regex_token_iterator_type::pointer rtit_pointer; + typedef typename regex_token_iterator_type::reference rtit_reference; + typedef typename regex_token_iterator_type::iterator_category rtit_iterator_category; + BOOST_STATIC_ASSERT((::boost::is_same::value)); + BOOST_STATIC_ASSERT((::boost::is_same::value)); + BOOST_STATIC_ASSERT((::boost::is_same::value)); + BOOST_STATIC_ASSERT((::boost::is_same::value)); + BOOST_STATIC_ASSERT((::boost::is_same::value)); + BOOST_STATIC_ASSERT((::boost::is_convertible::value)); + // this takes care of most of the checks needed: + function_requires >(); + regex_token_iterator_type ti1(m_in, m_in, e); + ignore_unused_variable_warning(ti1); + regex_token_iterator_type ti2(m_in, m_in, e, 0); + ignore_unused_variable_warning(ti2); + regex_token_iterator_type ti3(m_in, m_in, e, 0, m_mft); + ignore_unused_variable_warning(ti3); + std::vector subs; + regex_token_iterator_type ti4(m_in, m_in, e, subs); + ignore_unused_variable_warning(ti4); + regex_token_iterator_type ti5(m_in, m_in, e, subs, m_mft); + ignore_unused_variable_warning(ti5); + static const int i_array[3] = { 1, 2, 3, }; + regex_token_iterator_type ti6(m_in, m_in, e, i_array); + ignore_unused_variable_warning(ti6); + regex_token_iterator_type ti7(m_in, m_in, e, i_array, m_mft); + ignore_unused_variable_warning(ti7); + } + + pointer_type m_pointer; + flag_type m_flags; + std::size_t m_size; + input_iterator_type in1, in2; + const sub_match_type m_sub; + const value_type m_char; + match_results_type m_results; + const match_results_type m_cresults; + OutIterator m_out; + BidiIterator m_in; + global_regex_namespace::regex_constants::match_flag_type m_mft; + global_regex_namespace::match_results< + pointer_type, + allocator_architype > > + m_pmatch; + + BaseRegexConcept(); + BaseRegexConcept(const BaseRegexConcept&); + BaseRegexConcept& operator=(const BaseRegexConcept&); +}; + +// +// RegexConcept: +// Test every interface in the std: +// +template +struct RegexConcept +{ + typedef typename Regex::value_type value_type; + //typedef typename Regex::size_type size_type; + typedef typename Regex::flag_type flag_type; + typedef typename Regex::locale_type locale_type; + + // derived test types: + typedef const value_type* pointer_type; + typedef std::basic_string string_type; + typedef boost::bidirectional_iterator_archetype BidiIterator; + typedef global_regex_namespace::sub_match sub_match_type; + typedef global_regex_namespace::match_results > match_results_type; + typedef output_iterator_archetype OutIterator; + + + void constraints() + { + function_requires >(); + // string based construct: + Regex e1(m_string); + ignore_unused_variable_warning(e1); + Regex e2(m_string, m_flags); + ignore_unused_variable_warning(e2); + + // assign etc: + Regex e; + e = m_string; + e.assign(m_string); + e.assign(m_string, m_flags); + + // sub_match: + string_type s(m_sub); + ignore_unused_variable_warning(s); + s = m_sub.str(); + ignore_unused_variable_warning(s); + int i = m_sub.compare(m_string); + ignore_unused_variable_warning(i); + + int i2 = m_sub.compare(m_sub); + ignore_unused_variable_warning(i2); + i2 = m_sub.compare(m_pointer); + ignore_unused_variable_warning(i2); + + bool b = m_sub == m_sub; + ignore_unused_variable_warning(b); + b = m_sub != m_sub; + ignore_unused_variable_warning(b); + b = m_sub <= m_sub; + ignore_unused_variable_warning(b); + b = m_sub <= m_sub; + ignore_unused_variable_warning(b); + b = m_sub > m_sub; + ignore_unused_variable_warning(b); + b = m_sub >= m_sub; + ignore_unused_variable_warning(b); + + b = m_sub == m_pointer; + ignore_unused_variable_warning(b); + b = m_sub != m_pointer; + ignore_unused_variable_warning(b); + b = m_sub <= m_pointer; + ignore_unused_variable_warning(b); + b = m_sub <= m_pointer; + ignore_unused_variable_warning(b); + b = m_sub > m_pointer; + ignore_unused_variable_warning(b); + b = m_sub >= m_pointer; + ignore_unused_variable_warning(b); + + b = m_pointer == m_sub; + ignore_unused_variable_warning(b); + b = m_pointer != m_sub; + ignore_unused_variable_warning(b); + b = m_pointer <= m_sub; + ignore_unused_variable_warning(b); + b = m_pointer <= m_sub; + ignore_unused_variable_warning(b); + b = m_pointer > m_sub; + ignore_unused_variable_warning(b); + b = m_pointer >= m_sub; + ignore_unused_variable_warning(b); + + b = m_sub == m_char; + ignore_unused_variable_warning(b); + b = m_sub != m_char; + ignore_unused_variable_warning(b); + b = m_sub <= m_char; + ignore_unused_variable_warning(b); + b = m_sub <= m_char; + ignore_unused_variable_warning(b); + b = m_sub > m_char; + ignore_unused_variable_warning(b); + b = m_sub >= m_char; + ignore_unused_variable_warning(b); + + b = m_char == m_sub; + ignore_unused_variable_warning(b); + b = m_char != m_sub; + ignore_unused_variable_warning(b); + b = m_char <= m_sub; + ignore_unused_variable_warning(b); + b = m_char <= m_sub; + ignore_unused_variable_warning(b); + b = m_char > m_sub; + ignore_unused_variable_warning(b); + b = m_char >= m_sub; + ignore_unused_variable_warning(b); + + b = m_sub == m_string; + ignore_unused_variable_warning(b); + b = m_sub != m_string; + ignore_unused_variable_warning(b); + b = m_sub <= m_string; + ignore_unused_variable_warning(b); + b = m_sub <= m_string; + ignore_unused_variable_warning(b); + b = m_sub > m_string; + ignore_unused_variable_warning(b); + b = m_sub >= m_string; + ignore_unused_variable_warning(b); + + b = m_string == m_sub; + ignore_unused_variable_warning(b); + b = m_string != m_sub; + ignore_unused_variable_warning(b); + b = m_string <= m_sub; + ignore_unused_variable_warning(b); + b = m_string <= m_sub; + ignore_unused_variable_warning(b); + b = m_string > m_sub; + ignore_unused_variable_warning(b); + b = m_string >= m_sub; + ignore_unused_variable_warning(b); + + // match results: + m_string = m_results.str(); + ignore_unused_variable_warning(m_string); + m_string = m_results.str(0); + ignore_unused_variable_warning(m_string); + m_out = m_cresults.format(m_out, m_string); + m_out = m_cresults.format(m_out, m_string, m_mft); + m_string = m_cresults.format(m_string); + ignore_unused_variable_warning(m_string); + m_string = m_cresults.format(m_string, m_mft); + ignore_unused_variable_warning(m_string); + + // regex_match: + b = global_regex_namespace::regex_match(m_string, m_smatch, e); + ignore_unused_variable_warning(b); + b = global_regex_namespace::regex_match(m_string, m_smatch, e, m_mft); + ignore_unused_variable_warning(b); + b = global_regex_namespace::regex_match(m_string, e); + ignore_unused_variable_warning(b); + b = global_regex_namespace::regex_match(m_string, e, m_mft); + ignore_unused_variable_warning(b); + + // regex_search: + b = global_regex_namespace::regex_search(m_string, m_smatch, e); + ignore_unused_variable_warning(b); + b = global_regex_namespace::regex_search(m_string, m_smatch, e, m_mft); + ignore_unused_variable_warning(b); + b = global_regex_namespace::regex_search(m_string, e); + ignore_unused_variable_warning(b); + b = global_regex_namespace::regex_search(m_string, e, m_mft); + ignore_unused_variable_warning(b); + + // regex_replace: + m_out = global_regex_namespace::regex_replace(m_out, m_in, m_in, e, m_string, m_mft); + m_out = global_regex_namespace::regex_replace(m_out, m_in, m_in, e, m_string); + m_string = global_regex_namespace::regex_replace(m_string, e, m_string, m_mft); + ignore_unused_variable_warning(m_string); + m_string = global_regex_namespace::regex_replace(m_string, e, m_string); + ignore_unused_variable_warning(m_string); + + } + + flag_type m_flags; + string_type m_string; + const sub_match_type m_sub; + match_results_type m_results; + pointer_type m_pointer; + value_type m_char; + const match_results_type m_cresults; + OutIterator m_out; + BidiIterator m_in; + global_regex_namespace::regex_constants::match_flag_type m_mft; + global_regex_namespace::match_results > > m_smatch; + + RegexConcept(); + RegexConcept(const RegexConcept&); + RegexConcept& operator=(const RegexConcept&); +}; + +#ifndef BOOST_REGEX_TEST_STD + +template +struct functor1 +{ + typedef typename M::char_type char_type; + const char_type* operator()(const M&)const + { + static const char_type c = static_cast(0); + return &c; + } +}; +template +struct functor1b +{ + typedef typename M::char_type char_type; + std::vector operator()(const M&)const + { + static const std::vector c; + return c; + } +}; +template +struct functor2 +{ + template + O operator()(const M& /*m*/, O i)const + { + return i; + } +}; +template +struct functor3 +{ + template + O operator()(const M& /*m*/, O i, regex_constants::match_flag_type)const + { + return i; + } +}; + +// +// BoostRegexConcept: +// Test every interface in the Boost implementation: +// +template +struct BoostRegexConcept +{ + typedef typename Regex::value_type value_type; + typedef typename Regex::size_type size_type; + typedef typename Regex::flag_type flag_type; + typedef typename Regex::locale_type locale_type; + + // derived test types: + typedef const value_type* pointer_type; + typedef std::basic_string string_type; + typedef typename Regex::const_iterator const_iterator; + typedef bidirectional_iterator_archetype BidiIterator; + typedef output_iterator_archetype OutputIterator; + typedef global_regex_namespace::sub_match sub_match_type; + typedef global_regex_namespace::match_results > match_results_type; + typedef global_regex_namespace::match_results match_results_default_type; + + void constraints() + { + global_regex_namespace::regex_constants::match_flag_type mopts + = global_regex_namespace::regex_constants::match_default + | global_regex_namespace::regex_constants::match_not_bol + | global_regex_namespace::regex_constants::match_not_eol + | global_regex_namespace::regex_constants::match_not_bow + | global_regex_namespace::regex_constants::match_not_eow + | global_regex_namespace::regex_constants::match_any + | global_regex_namespace::regex_constants::match_not_null + | global_regex_namespace::regex_constants::match_continuous + | global_regex_namespace::regex_constants::match_partial + | global_regex_namespace::regex_constants::match_prev_avail + | global_regex_namespace::regex_constants::format_default + | global_regex_namespace::regex_constants::format_sed + | global_regex_namespace::regex_constants::format_perl + | global_regex_namespace::regex_constants::format_no_copy + | global_regex_namespace::regex_constants::format_first_only; + + (void)mopts; + + function_requires >(); + const global_regex_namespace::regex_error except(global_regex_namespace::regex_constants::error_collate); + std::ptrdiff_t pt = except.position(); + ignore_unused_variable_warning(pt); + const Regex ce, ce2; +#ifndef BOOST_NO_STD_LOCALE + m_stream << ce; +#endif + unsigned i = ce.error_code(); + ignore_unused_variable_warning(i); + pointer_type p = ce.expression(); + ignore_unused_variable_warning(p); + int i2 = ce.compare(ce2); + ignore_unused_variable_warning(i2); + bool b = ce == ce2; + ignore_unused_variable_warning(b); + b = ce.empty(); + ignore_unused_variable_warning(b); + b = ce != ce2; + ignore_unused_variable_warning(b); + b = ce < ce2; + ignore_unused_variable_warning(b); + b = ce > ce2; + ignore_unused_variable_warning(b); + b = ce <= ce2; + ignore_unused_variable_warning(b); + b = ce >= ce2; + ignore_unused_variable_warning(b); + i = ce.status(); + ignore_unused_variable_warning(i); + size_type s = ce.max_size(); + ignore_unused_variable_warning(s); + s = ce.size(); + ignore_unused_variable_warning(s); + const_iterator pi = ce.begin(); + ignore_unused_variable_warning(pi); + pi = ce.end(); + ignore_unused_variable_warning(pi); + string_type s2 = ce.str(); + ignore_unused_variable_warning(s2); + + m_string = m_sub + m_sub; + ignore_unused_variable_warning(m_string); + m_string = m_sub + m_pointer; + ignore_unused_variable_warning(m_string); + m_string = m_pointer + m_sub; + ignore_unused_variable_warning(m_string); + m_string = m_sub + m_string; + ignore_unused_variable_warning(m_string); + m_string = m_string + m_sub; + ignore_unused_variable_warning(m_string); + m_string = m_sub + m_char; + ignore_unused_variable_warning(m_string); + m_string = m_char + m_sub; + ignore_unused_variable_warning(m_string); + + // Named sub-expressions: + m_sub = m_cresults[&m_char]; + ignore_unused_variable_warning(m_sub); + m_sub = m_cresults[m_string]; + ignore_unused_variable_warning(m_sub); + m_sub = m_cresults[""]; + ignore_unused_variable_warning(m_sub); + m_sub = m_cresults[std::string("")]; + ignore_unused_variable_warning(m_sub); + m_string = m_cresults.str(&m_char); + ignore_unused_variable_warning(m_string); + m_string = m_cresults.str(m_string); + ignore_unused_variable_warning(m_string); + m_string = m_cresults.str(""); + ignore_unused_variable_warning(m_string); + m_string = m_cresults.str(std::string("")); + ignore_unused_variable_warning(m_string); + + typename match_results_type::difference_type diff; + diff = m_cresults.length(&m_char); + ignore_unused_variable_warning(diff); + diff = m_cresults.length(m_string); + ignore_unused_variable_warning(diff); + diff = m_cresults.length(""); + ignore_unused_variable_warning(diff); + diff = m_cresults.length(std::string("")); + ignore_unused_variable_warning(diff); + diff = m_cresults.position(&m_char); + ignore_unused_variable_warning(diff); + diff = m_cresults.position(m_string); + ignore_unused_variable_warning(diff); + diff = m_cresults.position(""); + ignore_unused_variable_warning(diff); + diff = m_cresults.position(std::string("")); + ignore_unused_variable_warning(diff); + +#ifndef BOOST_NO_STD_LOCALE + m_stream << m_sub; + m_stream << m_cresults; +#endif + // + // Extended formatting with a functor: + // + regex_constants::match_flag_type f = regex_constants::match_default; + OutputIterator out = static_object::get(); + + functor3 func3; + functor2 func2; + functor1 func1; + + functor3 func3b; + functor2 func2b; + functor1 func1b; + + out = regex_format(out, m_cresults, func3b, f); + out = regex_format(out, m_cresults, func3b); + out = regex_format(out, m_cresults, func2b, f); + out = regex_format(out, m_cresults, func2b); + out = regex_format(out, m_cresults, func1b, f); + out = regex_format(out, m_cresults, func1b); + out = regex_format(out, m_cresults, RW_NS::ref(func3b), f); + out = regex_format(out, m_cresults, RW_NS::ref(func3b)); + out = regex_format(out, m_cresults, RW_NS::ref(func2b), f); + out = regex_format(out, m_cresults, RW_NS::ref(func2b)); + out = regex_format(out, m_cresults, RW_NS::ref(func1b), f); + out = regex_format(out, m_cresults, RW_NS::ref(func1b)); + out = regex_format(out, m_cresults, RW_NS::cref(func3b), f); + out = regex_format(out, m_cresults, RW_NS::cref(func3b)); + out = regex_format(out, m_cresults, RW_NS::cref(func2b), f); + out = regex_format(out, m_cresults, RW_NS::cref(func2b)); + out = regex_format(out, m_cresults, RW_NS::cref(func1b), f); + out = regex_format(out, m_cresults, RW_NS::cref(func1b)); + m_string += regex_format(m_cresults, func3b, f); + m_string += regex_format(m_cresults, func3b); + m_string += regex_format(m_cresults, func2b, f); + m_string += regex_format(m_cresults, func2b); + m_string += regex_format(m_cresults, func1b, f); + m_string += regex_format(m_cresults, func1b); + m_string += regex_format(m_cresults, RW_NS::ref(func3b), f); + m_string += regex_format(m_cresults, RW_NS::ref(func3b)); + m_string += regex_format(m_cresults, RW_NS::ref(func2b), f); + m_string += regex_format(m_cresults, RW_NS::ref(func2b)); + m_string += regex_format(m_cresults, RW_NS::ref(func1b), f); + m_string += regex_format(m_cresults, RW_NS::ref(func1b)); + m_string += regex_format(m_cresults, RW_NS::cref(func3b), f); + m_string += regex_format(m_cresults, RW_NS::cref(func3b)); + m_string += regex_format(m_cresults, RW_NS::cref(func2b), f); + m_string += regex_format(m_cresults, RW_NS::cref(func2b)); + m_string += regex_format(m_cresults, RW_NS::cref(func1b), f); + m_string += regex_format(m_cresults, RW_NS::cref(func1b)); + + out = m_cresults.format(out, func3b, f); + out = m_cresults.format(out, func3b); + out = m_cresults.format(out, func2b, f); + out = m_cresults.format(out, func2b); + out = m_cresults.format(out, func1b, f); + out = m_cresults.format(out, func1b); + out = m_cresults.format(out, RW_NS::ref(func3b), f); + out = m_cresults.format(out, RW_NS::ref(func3b)); + out = m_cresults.format(out, RW_NS::ref(func2b), f); + out = m_cresults.format(out, RW_NS::ref(func2b)); + out = m_cresults.format(out, RW_NS::ref(func1b), f); + out = m_cresults.format(out, RW_NS::ref(func1b)); + out = m_cresults.format(out, RW_NS::cref(func3b), f); + out = m_cresults.format(out, RW_NS::cref(func3b)); + out = m_cresults.format(out, RW_NS::cref(func2b), f); + out = m_cresults.format(out, RW_NS::cref(func2b)); + out = m_cresults.format(out, RW_NS::cref(func1b), f); + out = m_cresults.format(out, RW_NS::cref(func1b)); + + m_string += m_cresults.format(func3b, f); + m_string += m_cresults.format(func3b); + m_string += m_cresults.format(func2b, f); + m_string += m_cresults.format(func2b); + m_string += m_cresults.format(func1b, f); + m_string += m_cresults.format(func1b); + m_string += m_cresults.format(RW_NS::ref(func3b), f); + m_string += m_cresults.format(RW_NS::ref(func3b)); + m_string += m_cresults.format(RW_NS::ref(func2b), f); + m_string += m_cresults.format(RW_NS::ref(func2b)); + m_string += m_cresults.format(RW_NS::ref(func1b), f); + m_string += m_cresults.format(RW_NS::ref(func1b)); + m_string += m_cresults.format(RW_NS::cref(func3b), f); + m_string += m_cresults.format(RW_NS::cref(func3b)); + m_string += m_cresults.format(RW_NS::cref(func2b), f); + m_string += m_cresults.format(RW_NS::cref(func2b)); + m_string += m_cresults.format(RW_NS::cref(func1b), f); + m_string += m_cresults.format(RW_NS::cref(func1b)); + + out = regex_replace(out, m_in, m_in, ce, func3, f); + out = regex_replace(out, m_in, m_in, ce, func3); + out = regex_replace(out, m_in, m_in, ce, func2, f); + out = regex_replace(out, m_in, m_in, ce, func2); + out = regex_replace(out, m_in, m_in, ce, func1, f); + out = regex_replace(out, m_in, m_in, ce, func1); + out = regex_replace(out, m_in, m_in, ce, RW_NS::ref(func3), f); + out = regex_replace(out, m_in, m_in, ce, RW_NS::ref(func3)); + out = regex_replace(out, m_in, m_in, ce, RW_NS::ref(func2), f); + out = regex_replace(out, m_in, m_in, ce, RW_NS::ref(func2)); + out = regex_replace(out, m_in, m_in, ce, RW_NS::ref(func1), f); + out = regex_replace(out, m_in, m_in, ce, RW_NS::ref(func1)); + out = regex_replace(out, m_in, m_in, ce, RW_NS::cref(func3), f); + out = regex_replace(out, m_in, m_in, ce, RW_NS::cref(func3)); + out = regex_replace(out, m_in, m_in, ce, RW_NS::cref(func2), f); + out = regex_replace(out, m_in, m_in, ce, RW_NS::cref(func2)); + out = regex_replace(out, m_in, m_in, ce, RW_NS::cref(func1), f); + out = regex_replace(out, m_in, m_in, ce, RW_NS::cref(func1)); + + functor3 > func3s; + functor2 > func2s; + functor1 > func1s; + m_string += regex_replace(m_string, ce, func3s, f); + m_string += regex_replace(m_string, ce, func3s); + m_string += regex_replace(m_string, ce, func2s, f); + m_string += regex_replace(m_string, ce, func2s); + m_string += regex_replace(m_string, ce, func1s, f); + m_string += regex_replace(m_string, ce, func1s); + m_string += regex_replace(m_string, ce, RW_NS::ref(func3s), f); + m_string += regex_replace(m_string, ce, RW_NS::ref(func3s)); + m_string += regex_replace(m_string, ce, RW_NS::ref(func2s), f); + m_string += regex_replace(m_string, ce, RW_NS::ref(func2s)); + m_string += regex_replace(m_string, ce, RW_NS::ref(func1s), f); + m_string += regex_replace(m_string, ce, RW_NS::ref(func1s)); + m_string += regex_replace(m_string, ce, RW_NS::cref(func3s), f); + m_string += regex_replace(m_string, ce, RW_NS::cref(func3s)); + m_string += regex_replace(m_string, ce, RW_NS::cref(func2s), f); + m_string += regex_replace(m_string, ce, RW_NS::cref(func2s)); + m_string += regex_replace(m_string, ce, RW_NS::cref(func1s), f); + m_string += regex_replace(m_string, ce, RW_NS::cref(func1s)); + } + + std::basic_ostream m_stream; + sub_match_type m_sub; + pointer_type m_pointer; + string_type m_string; + const value_type m_char; + match_results_type m_results; + const match_results_type m_cresults; + BidiIterator m_in; + + BoostRegexConcept(); + BoostRegexConcept(const BoostRegexConcept&); + BoostRegexConcept& operator=(const BoostRegexConcept&); +}; + +#endif // BOOST_REGEX_TEST_STD + +} + +#endif diff --git a/scintilla/include/boost/regex/config.hpp b/scintilla/include/boost/regex/config.hpp new file mode 100644 index 0000000000..bed485fa12 --- /dev/null +++ b/scintilla/include/boost/regex/config.hpp @@ -0,0 +1,480 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE config.hpp + * VERSION see + * DESCRIPTION: regex extended config setup. + */ + +#ifndef BOOST_REGEX_CONFIG_HPP +#define BOOST_REGEX_CONFIG_HPP + +#if !((__cplusplus >= 201103L) || (defined(_MSC_VER) && (_MSC_VER >= 1600)) || defined(BOOST_REGEX_CXX03)) +# define BOOST_REGEX_CXX03 +#endif + +#if defined(BOOST_REGEX_RECURSIVE) && !defined(BOOST_REGEX_CXX03) +# define BOOST_REGEX_CXX03 +#endif + +#if defined(__has_include) +#if !defined(BOOST_REGEX_STANDALONE) && !__has_include() +#define BOOST_REGEX_STANDALONE +#endif +#endif + +/* + * Borland C++ Fix/error check + * this has to go *before* we include any std lib headers: + */ +#if defined(__BORLANDC__) && !defined(__clang__) +# include +#endif +#ifndef BOOST_REGEX_STANDALONE +#include +#endif + +/************************************************************************* +* +* Asserts: +* +*************************************************************************/ + +#ifdef BOOST_REGEX_STANDALONE +#include +# define BOOST_REGEX_ASSERT(x) assert(x) +#else +#include +# define BOOST_REGEX_ASSERT(x) BOOST_ASSERT(x) +#endif + +/***************************************************************************** + * + * Include all the headers we need here: + * + ****************************************************************************/ + +#ifdef __cplusplus + +# ifndef BOOST_REGEX_USER_CONFIG +# define BOOST_REGEX_USER_CONFIG +# endif + +# include BOOST_REGEX_USER_CONFIG + +#ifndef BOOST_REGEX_STANDALONE +# include +# include +#endif + +#else + /* + * C build, + * don't include because that may + * do C++ specific things in future... + */ +# include +# include +# ifdef _MSC_VER +# define BOOST_MSVC _MSC_VER +# endif +#endif + + +/**************************************************************************** +* +* Legacy support: +* +*******************************************************************************/ + +#if defined(BOOST_NO_STD_LOCALE) || defined(BOOST_NO_CXX11_HDR_MUTEX) || defined(BOOST_NO_CXX11_HDR_TYPE_TRAITS) \ + || defined(BOOST_NO_CXX11_HDR_ATOMIC) || defined(BOOST_NO_CXX11_ALLOCATOR) || defined(BOOST_NO_CXX11_SMART_PTR) \ + || defined(BOOST_NO_CXX11_STATIC_ASSERT) || defined(BOOST_NO_NOEXCEPT) +#ifndef BOOST_REGEX_CXX03 +# define BOOST_REGEX_CXX03 +#endif +#endif + +/***************************************************************************** + * + * Boilerplate regex config options: + * + ****************************************************************************/ + +/* Obsolete macro, use BOOST_VERSION instead: */ +#define BOOST_RE_VERSION 500 + +/* fix: */ +#if defined(_UNICODE) && !defined(UNICODE) +#define UNICODE +#endif + +#define BOOST_REGEX_JOIN(X, Y) BOOST_REGEX_DO_JOIN(X, Y) +#define BOOST_REGEX_DO_JOIN(X, Y) BOOST_REGEX_DO_JOIN2(X,Y) +#define BOOST_REGEX_DO_JOIN2(X, Y) X##Y + +#ifdef BOOST_FALLTHROUGH +# define BOOST_REGEX_FALLTHROUGH BOOST_FALLTHROUGH +#else + +#if defined(__clang__) && (__cplusplus >= 201103L) && defined(__has_warning) +# if __has_feature(cxx_attributes) && __has_warning("-Wimplicit-fallthrough") +# define BOOST_REGEX_FALLTHROUGH [[clang::fallthrough]] +# endif +#endif +#if !defined(BOOST_REGEX_FALLTHROUGH) && defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1800) && (__cplusplus >= 201703) +# define BOOST_REGEX_FALLTHROUGH [[fallthrough]] +#endif +#if !defined(BOOST_REGEX_FALLTHROUGH) && defined(__GNUC__) && (__GNUC__ >= 7) +# define BOOST_REGEX_FALLTHROUGH __attribute__((fallthrough)) +#endif + +#if !defined(BOOST_REGEX_FALLTHROUGH) +# define BOOST_REGEX_FALLTHROUGH +#endif +#endif + +#ifdef BOOST_NORETURN +# define BOOST_REGEX_NORETURN BOOST_NORETURN +#else +# define BOOST_REGEX_NORETURN +#endif + + +/* +* Define a macro for the namespace that details are placed in, this includes the Boost +* version number to avoid mismatched header and library versions: +*/ +#define BOOST_REGEX_DETAIL_NS BOOST_REGEX_JOIN(re_detail_, BOOST_RE_VERSION) + +/* + * Fix for gcc prior to 3.4: std::ctype doesn't allow + * masks to be combined, for example: + * std::use_facet >.is(std::ctype_base::lower|std::ctype_base::upper, L'a'); + * returns *false*. + */ +#if defined(__GLIBCPP__) && defined(BOOST_REGEX_CXX03) +# define BOOST_REGEX_BUGGY_CTYPE_FACET +#endif + +/* + * If there isn't good enough wide character support then there will + * be no wide character regular expressions: + */ +#if (defined(BOOST_NO_CWCHAR) || defined(BOOST_NO_CWCTYPE) || defined(BOOST_NO_STD_WSTRING)) +# if !defined(BOOST_NO_WREGEX) +# define BOOST_NO_WREGEX +# endif +#else +# if defined(__sgi) && (defined(__SGI_STL_PORT) || defined(_STLPORT_VERSION)) + /* STLPort on IRIX is misconfigured: does not compile + * as a temporary fix include instead and prevent inclusion + * of STLPort version of */ +# include +# define __STLPORT_CWCTYPE +# define _STLP_CWCTYPE +# endif + +#if defined(__cplusplus) && defined(BOOST_REGEX_CXX03) +# include +#endif + +#endif + +/* + * If Win32 support has been disabled for boost in general, then + * it is for regex in particular: + */ +#if defined(BOOST_DISABLE_WIN32) && !defined(BOOST_REGEX_NO_W32) +# define BOOST_REGEX_NO_W32 +#endif + +/* disable our own file-iterators and mapfiles if we can't + * support them: */ +#if defined(_WIN32) +# if defined(BOOST_REGEX_NO_W32) || BOOST_PLAT_WINDOWS_RUNTIME +# define BOOST_REGEX_NO_FILEITER +# endif +#else /* defined(_WIN32) */ +# if !defined(BOOST_HAS_DIRENT_H) +# define BOOST_REGEX_NO_FILEITER +# endif +#endif + +/* backwards compatibitity: */ +#if defined(BOOST_RE_NO_LIB) +# define BOOST_REGEX_NO_LIB +#endif + +#if defined(__GNUC__) && !defined(_MSC_VER) && (defined(_WIN32) || defined(__CYGWIN__)) +/* gcc on win32 has problems if you include + (sporadically generates bad code). */ +# define BOOST_REGEX_NO_W32 +#endif +#if defined(__COMO__) && !defined(BOOST_REGEX_NO_W32) && !defined(_MSC_EXTENSIONS) +# define BOOST_REGEX_NO_W32 +#endif + +#ifdef BOOST_REGEX_STANDALONE +# if defined(_MSC_VER) && !defined(__clang__) && !defined(__GNUC__) +# define BOOST_REGEX_MSVC _MSC_VER +#endif +#elif defined(BOOST_MSVC) +# define BOOST_REGEX_MSVC BOOST_MSVC +#endif + + +/***************************************************************************** + * + * Set up dll import/export options: + * + ****************************************************************************/ + +#if (defined(BOOST_REGEX_DYN_LINK) || defined(BOOST_ALL_DYN_LINK)) && !defined(BOOST_REGEX_STATIC_LINK) && defined(BOOST_SYMBOL_IMPORT) +# if defined(BOOST_REGEX_SOURCE) +# define BOOST_REGEX_BUILD_DLL +# define BOOST_REGEX_DECL BOOST_SYMBOL_EXPORT +# else +# define BOOST_REGEX_DECL BOOST_SYMBOL_IMPORT +# endif +#else +# define BOOST_REGEX_DECL +#endif + +#ifdef BOOST_REGEX_CXX03 +#if !defined(BOOST_REGEX_NO_LIB) && !defined(BOOST_REGEX_SOURCE) && !defined(BOOST_ALL_NO_LIB) && defined(__cplusplus) +# define BOOST_LIB_NAME boost_regex +# if defined(BOOST_REGEX_DYN_LINK) || defined(BOOST_ALL_DYN_LINK) +# define BOOST_DYN_LINK +# endif +# ifdef BOOST_REGEX_DIAG +# define BOOST_LIB_DIAGNOSTIC +# endif +# include +#endif +#endif + +/***************************************************************************** + * + * Set up function call type: + * + ****************************************************************************/ + +#if defined(_MSC_VER) && defined(_MSC_EXTENSIONS) +#if defined(_DEBUG) || defined(__MSVC_RUNTIME_CHECKS) || defined(_MANAGED) || defined(BOOST_REGEX_NO_FASTCALL) +# define BOOST_REGEX_CALL __cdecl +#else +# define BOOST_REGEX_CALL __fastcall +#endif +# define BOOST_REGEX_CCALL __cdecl +#endif + +#if defined(__BORLANDC__) && !defined(BOOST_DISABLE_WIN32) +#if defined(__clang__) +# define BOOST_REGEX_CALL __cdecl +# define BOOST_REGEX_CCALL __cdecl +#else +# define BOOST_REGEX_CALL __fastcall +# define BOOST_REGEX_CCALL __stdcall +#endif +#endif + +#ifndef BOOST_REGEX_CALL +# define BOOST_REGEX_CALL +#endif +#ifndef BOOST_REGEX_CCALL +#define BOOST_REGEX_CCALL +#endif + +/***************************************************************************** + * + * Set up localisation model: + * + ****************************************************************************/ + +/* backwards compatibility: */ +#ifdef BOOST_RE_LOCALE_C +# define BOOST_REGEX_USE_C_LOCALE +#endif + +#ifdef BOOST_RE_LOCALE_CPP +# define BOOST_REGEX_USE_CPP_LOCALE +#endif + +#if defined(__CYGWIN__) +# define BOOST_REGEX_USE_C_LOCALE +#endif + +/* use C++ locale when targeting windows store */ +#if BOOST_PLAT_WINDOWS_RUNTIME +# define BOOST_REGEX_USE_CPP_LOCALE +# define BOOST_REGEX_NO_WIN32_LOCALE +#endif + +/* Win32 defaults to native Win32 locale: */ +#if defined(_WIN32) && \ + !defined(BOOST_REGEX_USE_WIN32_LOCALE) && \ + !defined(BOOST_REGEX_USE_C_LOCALE) && \ + !defined(BOOST_REGEX_USE_CPP_LOCALE) && \ + !defined(BOOST_REGEX_NO_W32) && \ + !defined(BOOST_REGEX_NO_WIN32_LOCALE) +# define BOOST_REGEX_USE_WIN32_LOCALE +#endif +/* otherwise use C++ locale if supported: */ +#if !defined(BOOST_REGEX_USE_WIN32_LOCALE) && !defined(BOOST_REGEX_USE_C_LOCALE) && !defined(BOOST_REGEX_USE_CPP_LOCALE) && !defined(BOOST_NO_STD_LOCALE) +# define BOOST_REGEX_USE_CPP_LOCALE +#endif +/* otherwise use C locale: */ +#if !defined(BOOST_REGEX_USE_WIN32_LOCALE) && !defined(BOOST_REGEX_USE_C_LOCALE) && !defined(BOOST_REGEX_USE_CPP_LOCALE) +# define BOOST_REGEX_USE_C_LOCALE +#endif + +#ifndef BOOST_REGEX_MAX_STATE_COUNT +# define BOOST_REGEX_MAX_STATE_COUNT 100000000 +#endif + + +/***************************************************************************** + * + * Error Handling for exception free compilers: + * + ****************************************************************************/ + +#ifdef BOOST_NO_EXCEPTIONS +/* + * If there are no exceptions then we must report critical-errors + * the only way we know how; by terminating. + */ +#include +#include +#include + +# define BOOST_REGEX_NOEH_ASSERT(x)\ +if(0 == (x))\ +{\ + std::string s("Error: critical regex++ failure in: ");\ + s.append(#x);\ + std::runtime_error e(s);\ + boost::throw_exception(e);\ +} +#else +/* + * With exceptions then error handling is taken care of and + * there is no need for these checks: + */ +# define BOOST_REGEX_NOEH_ASSERT(x) +#endif + + +/***************************************************************************** + * + * Stack protection under MS Windows: + * + ****************************************************************************/ + +#if !defined(BOOST_REGEX_NO_W32) && !defined(BOOST_REGEX_V3) +# if(defined(_WIN32) || defined(_WIN64) || defined(_WINCE)) \ + && !(defined(__GNUC__) || defined(__BORLANDC__) && defined(__clang__)) \ + && !(defined(__BORLANDC__) && (__BORLANDC__ >= 0x600)) \ + && !(defined(__MWERKS__) && (__MWERKS__ <= 0x3003)) +# define BOOST_REGEX_HAS_MS_STACK_GUARD +# endif +#elif defined(BOOST_REGEX_HAS_MS_STACK_GUARD) +# undef BOOST_REGEX_HAS_MS_STACK_GUARD +#endif + +#if defined(__cplusplus) && defined(BOOST_REGEX_HAS_MS_STACK_GUARD) + +namespace boost{ +namespace BOOST_REGEX_DETAIL_NS{ + +BOOST_REGEX_DECL void BOOST_REGEX_CALL reset_stack_guard_page(); + +} +} + +#endif + + +/***************************************************************************** + * + * Algorithm selection and configuration. + * These options are now obsolete for C++11 and later (regex v5). + * + ****************************************************************************/ + +#if !defined(BOOST_REGEX_RECURSIVE) && !defined(BOOST_REGEX_NON_RECURSIVE) +# if defined(BOOST_REGEX_HAS_MS_STACK_GUARD) && !defined(_STLP_DEBUG) && !defined(__STL_DEBUG) && !(defined(_MSC_VER) && (_MSC_VER >= 1400)) && defined(BOOST_REGEX_CXX03) +# define BOOST_REGEX_RECURSIVE +# else +# define BOOST_REGEX_NON_RECURSIVE +# endif +#endif + +#ifdef BOOST_REGEX_NON_RECURSIVE +# ifdef BOOST_REGEX_RECURSIVE +# error "Can't set both BOOST_REGEX_RECURSIVE and BOOST_REGEX_NON_RECURSIVE" +# endif +# ifndef BOOST_REGEX_BLOCKSIZE +# define BOOST_REGEX_BLOCKSIZE 4096 +# endif +# if BOOST_REGEX_BLOCKSIZE < 512 +# error "BOOST_REGEX_BLOCKSIZE must be at least 512" +# endif +# ifndef BOOST_REGEX_MAX_BLOCKS +# define BOOST_REGEX_MAX_BLOCKS 1024 +# endif +# ifdef BOOST_REGEX_HAS_MS_STACK_GUARD +# undef BOOST_REGEX_HAS_MS_STACK_GUARD +# endif +# ifndef BOOST_REGEX_MAX_CACHE_BLOCKS +# define BOOST_REGEX_MAX_CACHE_BLOCKS 16 +# endif +#endif + + +/***************************************************************************** + * + * Diagnostics: + * + ****************************************************************************/ + +#ifdef BOOST_REGEX_CONFIG_INFO +BOOST_REGEX_DECL void BOOST_REGEX_CALL print_regex_library_info(); +#endif + +#if defined(BOOST_REGEX_DIAG) +# pragma message ("BOOST_REGEX_DECL" BOOST_STRINGIZE(=BOOST_REGEX_DECL)) +# pragma message ("BOOST_REGEX_CALL" BOOST_STRINGIZE(=BOOST_REGEX_CALL)) +# pragma message ("BOOST_REGEX_CCALL" BOOST_STRINGIZE(=BOOST_REGEX_CCALL)) +#ifdef BOOST_REGEX_USE_C_LOCALE +# pragma message ("Using C locale in regex traits class") +#elif BOOST_REGEX_USE_CPP_LOCALE +# pragma message ("Using C++ locale in regex traits class") +#else +# pragma message ("Using Win32 locale in regex traits class") +#endif +#if defined(BOOST_REGEX_DYN_LINK) || defined(BOOST_ALL_DYN_LINK) +# pragma message ("Dynamic linking enabled") +#endif +#if defined(BOOST_REGEX_NO_LIB) || defined(BOOST_ALL_NO_LIB) +# pragma message ("Auto-linking disabled") +#endif +#ifdef BOOST_REGEX_NO_EXTERNAL_TEMPLATES +# pragma message ("Extern templates disabled") +#endif + +#endif + +#endif + diff --git a/scintilla/include/boost/regex/config/borland.hpp b/scintilla/include/boost/regex/config/borland.hpp new file mode 100644 index 0000000000..981113e5cf --- /dev/null +++ b/scintilla/include/boost/regex/config/borland.hpp @@ -0,0 +1,72 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE boost/regex/config/borland.hpp + * VERSION see + * DESCRIPTION: regex borland-specific config setup. + */ + + +#if defined(__BORLANDC__) && !defined(__clang__) +# if (__BORLANDC__ == 0x550) || (__BORLANDC__ == 0x551) + // problems with std::basic_string and dll RTL: +# if defined(_RTLDLL) && defined(_RWSTD_COMPILE_INSTANTIATE) +# ifdef BOOST_REGEX_BUILD_DLL +# error _RWSTD_COMPILE_INSTANTIATE must not be defined when building regex++ as a DLL +# else +# pragma message("Defining _RWSTD_COMPILE_INSTANTIATE when linking to the DLL version of the RTL may produce memory corruption problems in std::basic_string, as a result of separate versions of basic_string's static data in the RTL and you're exe/dll: be warned!!") +# endif +# endif +# ifndef _RTLDLL + // this is harmless for a staic link: +# define _RWSTD_COMPILE_INSTANTIATE +# endif + // external templates cause problems for some reason: +# define BOOST_REGEX_NO_EXTERNAL_TEMPLATES +# endif +# if (__BORLANDC__ <= 0x540) && !defined(BOOST_REGEX_NO_LIB) && !defined(_NO_VCL) + // C++ Builder 4 and earlier, we can't tell whether we should be using + // the VCL runtime or not, do a static link instead: +# define BOOST_REGEX_STATIC_LINK +# endif + // + // VCL support: + // if we're building a console app then there can't be any VCL (can there?) +# if !defined(__CONSOLE__) && !defined(_NO_VCL) +# define BOOST_REGEX_USE_VCL +# endif + // + // if this isn't Win32 then don't automatically select link + // libraries: + // +# ifndef _Windows +# ifndef BOOST_REGEX_NO_LIB +# define BOOST_REGEX_NO_LIB +# endif +# ifndef BOOST_REGEX_STATIC_LINK +# define BOOST_REGEX_STATIC_LINK +# endif +# endif + +#if __BORLANDC__ < 0x600 +// +// string workarounds: +// +#include +#undef strcmp +#undef strcpy +#endif + +#endif + + diff --git a/scintilla/include/boost/regex/config/cwchar.hpp b/scintilla/include/boost/regex/config/cwchar.hpp new file mode 100644 index 0000000000..a55089d0ab --- /dev/null +++ b/scintilla/include/boost/regex/config/cwchar.hpp @@ -0,0 +1,207 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE boost/regex/config/cwchar.hpp + * VERSION see + * DESCRIPTION: regex wide character string fixes. + */ + +#ifndef BOOST_REGEX_CONFIG_CWCHAR_HPP +#define BOOST_REGEX_CONFIG_CWCHAR_HPP + +#include +#include +#include + +#if defined(__STD_RWCOMPILER_H__) || defined(_RWSTD_VER) +// apparently this is required for the RW STL on Linux: +#undef iswalnum +#undef iswalpha +#undef iswblank +#undef iswcntrl +#undef iswdigit +#undef iswgraph +#undef iswlower +#undef iswprint +#undef iswprint +#undef iswpunct +#undef iswspace +#undef iswupper +#undef iswxdigit +#undef iswctype +#undef towlower +#undef towupper +#undef towctrans +#undef wctrans +#undef wctype +#endif + +namespace std{ + +#ifndef BOOST_NO_STDC_NAMESPACE +extern "C"{ +#endif + +#ifdef iswalnum +inline int (iswalnum)(wint_t i) +{ return iswalnum(i); } +#undef iswalnum +#elif defined(BOOST_NO_STDC_NAMESPACE) +using ::iswalnum; +#endif + +#ifdef iswalpha +inline int (iswalpha)(wint_t i) +{ return iswalpha(i); } +#undef iswalpha +#elif defined(BOOST_NO_STDC_NAMESPACE) +using ::iswalpha; +#endif + +#ifdef iswcntrl +inline int (iswcntrl)(wint_t i) +{ return iswcntrl(i); } +#undef iswcntrl +#elif defined(BOOST_NO_STDC_NAMESPACE) +using ::iswcntrl; +#endif + +#ifdef iswdigit +inline int (iswdigit)(wint_t i) +{ return iswdigit(i); } +#undef iswdigit +#elif defined(BOOST_NO_STDC_NAMESPACE) +using ::iswdigit; +#endif + +#ifdef iswgraph +inline int (iswgraph)(wint_t i) +{ return iswgraph(i); } +#undef iswgraph +#elif defined(BOOST_NO_STDC_NAMESPACE) +using ::iswgraph; +#endif + +#ifdef iswlower +inline int (iswlower)(wint_t i) +{ return iswlower(i); } +#undef iswlower +#elif defined(BOOST_NO_STDC_NAMESPACE) +using ::iswlower; +#endif + +#ifdef iswprint +inline int (iswprint)(wint_t i) +{ return iswprint(i); } +#undef iswprint +#elif defined(BOOST_NO_STDC_NAMESPACE) +using ::iswprint; +#endif + +#ifdef iswpunct +inline int (iswpunct)(wint_t i) +{ return iswpunct(i); } +#undef iswpunct +#elif defined(BOOST_NO_STDC_NAMESPACE) +using ::iswpunct; +#endif + +#ifdef iswspace +inline int (iswspace)(wint_t i) +{ return iswspace(i); } +#undef iswspace +#elif defined(BOOST_NO_STDC_NAMESPACE) +using ::iswspace; +#endif + +#ifdef iswupper +inline int (iswupper)(wint_t i) +{ return iswupper(i); } +#undef iswupper +#elif defined(BOOST_NO_STDC_NAMESPACE) +using ::iswupper; +#endif + +#ifdef iswxdigit +inline int (iswxdigit)(wint_t i) +{ return iswxdigit(i); } +#undef iswxdigit +#elif defined(BOOST_NO_STDC_NAMESPACE) +using ::iswxdigit; +#endif + +#ifdef towlower +inline wint_t (towlower)(wint_t i) +{ return towlower(i); } +#undef towlower +#elif defined(BOOST_NO_STDC_NAMESPACE) +using ::towlower; +#endif + +#ifdef towupper +inline wint_t (towupper)(wint_t i) +{ return towupper(i); } +#undef towupper +#elif defined(BOOST_NO_STDC_NAMESPACE) +using :: towupper; +#endif + +#ifdef wcscmp +inline int (wcscmp)(const wchar_t *p1, const wchar_t *p2) +{ return wcscmp(p1,p2); } +#undef wcscmp +#elif defined(BOOST_NO_STDC_NAMESPACE) +using ::wcscmp; +#endif + +#ifdef wcscoll +inline int (wcscoll)(const wchar_t *p1, const wchar_t *p2) +{ return wcscoll(p1,p2); } +#undef wcscoll +#elif defined(BOOST_NO_STDC_NAMESPACE) && !defined(UNDER_CE) +using ::wcscoll; +#endif + +#ifdef wcscpy +inline wchar_t *(wcscpy)(wchar_t *p1, const wchar_t *p2) +{ return wcscpy(p1,p2); } +#undef wcscpy +#elif defined(BOOST_NO_STDC_NAMESPACE) +using ::wcscpy; +#endif + +#ifdef wcslen +inline size_t (wcslen)(const wchar_t *p) +{ return wcslen(p); } +#undef wcslen +#elif defined(BOOST_NO_STDC_NAMESPACE) +using ::wcslen; +#endif + +#ifdef wcsxfrm +size_t wcsxfrm(wchar_t *p1, const wchar_t *p2, size_t s) +{ return wcsxfrm(p1,p2,s); } +#undef wcsxfrm +#elif defined(BOOST_NO_STDC_NAMESPACE) +using ::wcsxfrm; +#endif + + +#ifndef BOOST_NO_STDC_NAMESPACE +} // extern "C" +#endif + +} // namespace std + +#endif + diff --git a/scintilla/include/boost/regex/icu.hpp b/scintilla/include/boost/regex/icu.hpp new file mode 100644 index 0000000000..b312612d19 --- /dev/null +++ b/scintilla/include/boost/regex/icu.hpp @@ -0,0 +1,30 @@ +/* + * + * Copyright (c) 2020 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE icu.hpp + * VERSION see + * DESCRIPTION: Unicode regular expressions on top of the ICU Library. + */ + +#ifndef BOOST_REGEX_ICU_HPP +#define BOOST_REGEX_ICU_HPP + +#include + +#ifdef BOOST_REGEX_CXX03 +#include +#else +#include +#endif + +#endif diff --git a/scintilla/include/boost/regex/mfc.hpp b/scintilla/include/boost/regex/mfc.hpp new file mode 100644 index 0000000000..d780673931 --- /dev/null +++ b/scintilla/include/boost/regex/mfc.hpp @@ -0,0 +1,186 @@ +/* + * + * Copyright (c) 2004 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE mfc.hpp + * VERSION see + * DESCRIPTION: Overloads and helpers for using MFC/ATL string types with Boost.Regex. + */ + +#ifndef BOOST_REGEX_MFC_HPP +#define BOOST_REGEX_MFC_HPP + +#include +#include + +namespace boost{ + +// +// define the types used for TCHAR's: +typedef basic_regex tregex; +typedef match_results tmatch; +typedef regex_iterator tregex_iterator; +typedef regex_token_iterator tregex_token_iterator; + +// Obsolete. Remove +#define SIMPLE_STRING_PARAM class B, bool b +#define SIMPLE_STRING_ARG_LIST B, b + +// +// define regex creation functions: +// +template +inline basic_regex +make_regex(const ATL::CSimpleStringT& s, ::boost::regex_constants::syntax_option_type f = boost::regex_constants::normal) +{ + basic_regex result(s.GetString(), s.GetString() + s.GetLength(), f); + return result; +} +// +// regex_match overloads: +// +template +inline bool regex_match(const ATL::CSimpleStringT& s, + match_results& what, + const basic_regex& e, + boost::regex_constants::match_flag_type f = boost::regex_constants::match_default) +{ + return ::boost::regex_match(s.GetString(), + s.GetString() + s.GetLength(), + what, + e, + f); +} + +template +inline bool regex_match(const ATL::CSimpleStringT& s, + const basic_regex& e, + boost::regex_constants::match_flag_type f = boost::regex_constants::match_default) +{ + return ::boost::regex_match(s.GetString(), + s.GetString() + s.GetLength(), + e, + f); +} +// +// regex_search overloads: +// +template +inline bool regex_search(const ATL::CSimpleStringT& s, + match_results& what, + const basic_regex& e, + boost::regex_constants::match_flag_type f = boost::regex_constants::match_default) +{ + return ::boost::regex_search(s.GetString(), + s.GetString() + s.GetLength(), + what, + e, + f); +} + +template +inline bool regex_search(const ATL::CSimpleStringT& s, + const basic_regex& e, + boost::regex_constants::match_flag_type f = boost::regex_constants::match_default) +{ + return ::boost::regex_search(s.GetString(), + s.GetString() + s.GetLength(), + e, + f); +} +// +// regex_iterator creation: +// +template +inline regex_iterator +make_regex_iterator(const ATL::CSimpleStringT& s, const basic_regex& e, ::boost::regex_constants::match_flag_type f = boost::regex_constants::match_default) +{ + regex_iterator result(s.GetString(), s.GetString() + s.GetLength(), e, f); + return result; +} + +template +inline regex_token_iterator + make_regex_token_iterator(const ATL::CSimpleStringT& s, const basic_regex& e, int sub = 0, ::boost::regex_constants::match_flag_type f = boost::regex_constants::match_default) +{ + regex_token_iterator result(s.GetString(), s.GetString() + s.GetLength(), e, sub, f); + return result; +} + +template +inline regex_token_iterator +make_regex_token_iterator(const ATL::CSimpleStringT& s, const basic_regex& e, const std::vector& subs, ::boost::regex_constants::match_flag_type f = boost::regex_constants::match_default) +{ + regex_token_iterator result(s.GetString(), s.GetString() + s.GetLength(), e, subs, f); + return result; +} + +template +inline regex_token_iterator +make_regex_token_iterator(const ATL::CSimpleStringT& s, const basic_regex& e, const int (& subs)[N], ::boost::regex_constants::match_flag_type f = boost::regex_constants::match_default) +{ + regex_token_iterator result(s.GetString(), s.GetString() + s.GetLength(), e, subs, f); + return result; +} + +template +OutputIterator regex_replace(OutputIterator out, + BidirectionalIterator first, + BidirectionalIterator last, + const basic_regex& e, + const ATL::CSimpleStringT& fmt, + match_flag_type flags = match_default) +{ + return ::boost::regex_replace(out, first, last, e, fmt.GetString(), flags); +} + +namespace BOOST_REGEX_DETAIL_NS{ + +template +class mfc_string_out_iterator +{ + ATL::CSimpleStringT* out; +public: + mfc_string_out_iterator(ATL::CSimpleStringT& s) : out(&s) {} + mfc_string_out_iterator& operator++() { return *this; } + mfc_string_out_iterator& operator++(int) { return *this; } + mfc_string_out_iterator& operator*() { return *this; } + mfc_string_out_iterator& operator=(B v) + { + out->AppendChar(v); + return *this; + } + typedef std::ptrdiff_t difference_type; + typedef B value_type; + typedef value_type* pointer; + typedef value_type& reference; + typedef std::output_iterator_tag iterator_category; +}; + +} + +template +ATL::CSimpleStringT regex_replace(const ATL::CSimpleStringT& s, + const basic_regex& e, + const ATL::CSimpleStringT& fmt, + match_flag_type flags = match_default) +{ + ATL::CSimpleStringT result(s.GetManager()); + BOOST_REGEX_DETAIL_NS::mfc_string_out_iterator i(result); + regex_replace(i, s.GetString(), s.GetString() + s.GetLength(), e, fmt.GetString(), flags); + return result; +} + +} // namespace boost. + +#endif diff --git a/scintilla/include/boost/regex/pattern_except.hpp b/scintilla/include/boost/regex/pattern_except.hpp new file mode 100644 index 0000000000..f2af1afe64 --- /dev/null +++ b/scintilla/include/boost/regex/pattern_except.hpp @@ -0,0 +1,32 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE pattern_except.hpp + * VERSION see + * DESCRIPTION: Declares pattern-matching exception classes. + */ + +#ifndef BOOST_RE_PAT_EXCEPT_HPP +#define BOOST_RE_PAT_EXCEPT_HPP + +#ifndef BOOST_REGEX_CONFIG_HPP +#include +#endif + +#ifdef BOOST_REGEX_CXX03 +#include +#else +#include +#endif + +#endif diff --git a/scintilla/include/boost/regex/pending/object_cache.hpp b/scintilla/include/boost/regex/pending/object_cache.hpp new file mode 100644 index 0000000000..0ddbdadfa3 --- /dev/null +++ b/scintilla/include/boost/regex/pending/object_cache.hpp @@ -0,0 +1,29 @@ +/* + * + * Copyright (c) 2004 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE object_cache.hpp + * VERSION see + * DESCRIPTION: Implements a generic object cache. + */ + +#ifndef BOOST_REGEX_OBJECT_CACHE_HPP +#define BOOST_REGEX_OBJECT_CACHE_HPP + +#include +#ifdef BOOST_REGEX_CXX03 +#include +#else +#include +#endif + +#endif diff --git a/scintilla/include/boost/regex/pending/static_mutex.hpp b/scintilla/include/boost/regex/pending/static_mutex.hpp new file mode 100644 index 0000000000..344926f6ab --- /dev/null +++ b/scintilla/include/boost/regex/pending/static_mutex.hpp @@ -0,0 +1,182 @@ +/* + * + * Copyright (c) 2004 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE static_mutex.hpp + * VERSION see + * DESCRIPTION: Declares static_mutex lock type, there are three different + * implementations: POSIX pthreads, WIN32 threads, and portable, + * these are described in more detail below. + */ + +#ifndef BOOST_REGEX_STATIC_MUTEX_HPP +#define BOOST_REGEX_STATIC_MUTEX_HPP + +#include +#include // dll import/export options. + +#ifdef BOOST_HAS_PTHREADS +#include +#endif + +#if defined(BOOST_HAS_PTHREADS) && defined(PTHREAD_MUTEX_INITIALIZER) +// +// pthreads version: +// simple wrap around a pthread_mutex_t initialized with +// PTHREAD_MUTEX_INITIALIZER. +// +namespace boost{ + +class static_mutex; + +#define BOOST_STATIC_MUTEX_INIT { PTHREAD_MUTEX_INITIALIZER, } + +class BOOST_REGEX_DECL scoped_static_mutex_lock +{ +public: + scoped_static_mutex_lock(static_mutex& mut, bool lk = true); + ~scoped_static_mutex_lock(); + inline bool locked()const + { + return m_have_lock; + } + inline operator void const*()const + { + return locked() ? this : 0; + } + void lock(); + void unlock(); +private: + static_mutex& m_mutex; + bool m_have_lock; +}; + +class static_mutex +{ +public: + typedef scoped_static_mutex_lock scoped_lock; + pthread_mutex_t m_mutex; +}; + +} // namespace boost +#elif defined(BOOST_HAS_WINTHREADS) +// +// Win32 version: +// Use a 32-bit int as a lock, along with a test-and-set +// implementation using InterlockedCompareExchange. +// + +#include + +namespace boost{ + +class BOOST_REGEX_DECL scoped_static_mutex_lock; + +class static_mutex +{ +public: + typedef scoped_static_mutex_lock scoped_lock; + boost::int32_t m_mutex; +}; + +#define BOOST_STATIC_MUTEX_INIT { 0, } + +class BOOST_REGEX_DECL scoped_static_mutex_lock +{ +public: + scoped_static_mutex_lock(static_mutex& mut, bool lk = true); + ~scoped_static_mutex_lock(); + operator void const*()const + { + return locked() ? this : 0; + } + bool locked()const + { + return m_have_lock; + } + void lock(); + void unlock(); +private: + static_mutex& m_mutex; + bool m_have_lock; + scoped_static_mutex_lock(const scoped_static_mutex_lock&); + scoped_static_mutex_lock& operator=(const scoped_static_mutex_lock&); +}; + +} // namespace + +#else +// +// Portable version of a static mutex based on Boost.Thread library: +// This has to use a single mutex shared by all instances of static_mutex +// because boost::call_once doesn't alow us to pass instance information +// down to the initialisation proceedure. In fact the initialisation routine +// may need to be called more than once - but only once per instance. +// +// Since this preprocessor path is almost never taken, we hide these header +// dependencies so that build tools don't find them. +// +#define BOOST_REGEX_H1 +#define BOOST_REGEX_H2 +#define BOOST_REGEX_H3 +#include BOOST_REGEX_H1 +#include BOOST_REGEX_H2 +#include BOOST_REGEX_H3 +#undef BOOST_REGEX_H1 +#undef BOOST_REGEX_H2 +#undef BOOST_REGEX_H3 + +namespace boost{ + +class BOOST_REGEX_DECL scoped_static_mutex_lock; +extern "C" BOOST_REGEX_DECL void boost_regex_free_static_mutex(); + +class BOOST_REGEX_DECL static_mutex +{ +public: + typedef scoped_static_mutex_lock scoped_lock; + static void init(); + static boost::recursive_mutex* m_pmutex; + static boost::once_flag m_once; +}; + +#define BOOST_STATIC_MUTEX_INIT { } + +class BOOST_REGEX_DECL scoped_static_mutex_lock +{ +public: + scoped_static_mutex_lock(static_mutex& mut, bool lk = true); + ~scoped_static_mutex_lock(); + operator void const*()const; + bool locked()const; + void lock(); + void unlock(); +private: + boost::unique_lock* m_plock; + bool m_have_lock; +}; + +inline scoped_static_mutex_lock::operator void const*()const +{ + return locked() ? this : 0; +} + +inline bool scoped_static_mutex_lock::locked()const +{ + return m_have_lock; +} + +} // namespace + +#endif + +#endif diff --git a/scintilla/include/boost/regex/pending/unicode_iterator.hpp b/scintilla/include/boost/regex/pending/unicode_iterator.hpp new file mode 100644 index 0000000000..a565570a22 --- /dev/null +++ b/scintilla/include/boost/regex/pending/unicode_iterator.hpp @@ -0,0 +1,32 @@ +/* + * + * Copyright (c) 2020 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE unicode_iterator.hpp + * VERSION see + * DESCRIPTION: Iterator adapters for converting between different Unicode encodings. + */ + +#ifndef BOOST_REGEX_PENDING_UNICODE_ITERATOR_HPP +#define BOOST_REGEX_PENDING_UNICODE_ITERATOR_HPP + +#include + +#if defined(BOOST_REGEX_CXX03) +#include +#else +#include +#endif + + +#endif // BOOST_REGEX_PENDING_UNICODE_ITERATOR_HPP + diff --git a/scintilla/include/boost/regex/regex_traits.hpp b/scintilla/include/boost/regex/regex_traits.hpp new file mode 100644 index 0000000000..2b383160c1 --- /dev/null +++ b/scintilla/include/boost/regex/regex_traits.hpp @@ -0,0 +1,39 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE regex_traits.hpp + * VERSION see + * DESCRIPTION: Declares regular expression traits classes. + */ + +#ifndef BOOST_REGEX_TRAITS_HPP +#define BOOST_REGEX_TRAITS_HPP + +#ifndef BOOST_REGEX_CONFIG_HPP +# include +#endif + +# ifndef BOOST_REGEX_TRAITS_HPP_INCLUDED +#ifdef BOOST_REGEX_CXX03 +# include +#else +# include +#endif +# endif + +#endif // include + + + + + diff --git a/scintilla/include/boost/regex/user.hpp b/scintilla/include/boost/regex/user.hpp new file mode 100644 index 0000000000..4b159bc581 --- /dev/null +++ b/scintilla/include/boost/regex/user.hpp @@ -0,0 +1,95 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE user.hpp + * VERSION see + * DESCRIPTION: User settable options. + */ + +// define if you want the regex library to use the C locale +// even on Win32: +// #define BOOST_REGEX_USE_C_LOCALE + +// define this is you want the regex library to use the C++ +// locale: +// #define BOOST_REGEX_USE_CPP_LOCALE + +// define this if the runtime library is a dll, and you +// want BOOST_REGEX_DYN_LINK to set up dll exports/imports +// with __declspec(dllexport)/__declspec(dllimport.) +// #define BOOST_REGEX_HAS_DLL_RUNTIME + +// define this if you want to dynamically link to regex, +// if the runtime library is also a dll (Probably Win32 specific, +// and has no effect unless BOOST_REGEX_HAS_DLL_RUNTIME is set): +// #define BOOST_REGEX_DYN_LINK + +// define this if you don't want the lib to automatically +// select its link libraries: +// #define BOOST_REGEX_NO_LIB + +// define this if templates with switch statements cause problems: +// #define BOOST_REGEX_NO_TEMPLATE_SWITCH_MERGE + +// define this to disable Win32 support when available: +// #define BOOST_REGEX_NO_W32 + +// define this if bool is not a real type: +// #define BOOST_REGEX_NO_BOOL + +// define this if no template instances are to be placed in +// the library rather than users object files: +// #define BOOST_REGEX_NO_EXTERNAL_TEMPLATES + +// define this if the forward declarations in regex_fwd.hpp +// cause more problems than they are worth: +// #define BOOST_REGEX_NO_FWD + +// define this if your compiler supports MS Windows structured +// exception handling. +// #define BOOST_REGEX_HAS_MS_STACK_GUARD + +// define this if you want to use the recursive algorithm +// even if BOOST_REGEX_HAS_MS_STACK_GUARD is not defined. +// NOTE: OBSOLETE!! +// #define BOOST_REGEX_RECURSIVE + +// define this if you want to use the non-recursive +// algorithm, even if the recursive version would be the default. +// NOTE: OBSOLETE!! +// #define BOOST_REGEX_NON_RECURSIVE + +// define this if you want to set the size of the memory blocks +// used by the non-recursive algorithm. +// #define BOOST_REGEX_BLOCKSIZE 4096 + +// define this if you want to set the maximum number of memory blocks +// used by the non-recursive algorithm. +// #define BOOST_REGEX_MAX_BLOCKS 1024 + +// define this if you want to set the maximum number of memory blocks +// cached by the non-recursive algorithm: Normally this is 16, but can be +// higher if you have multiple threads all using boost.regex, or lower +// if you don't want boost.regex to cache memory. +// #define BOOST_REGEX_MAX_CACHE_BLOCKS 16 + +// define this if you want to be able to access extended capture +// information in your sub_match's (caution this will slow things +// down quite a bit). +// #define BOOST_REGEX_MATCH_EXTRA + +// define this if you want to enable support for Unicode via ICU. +// #define BOOST_HAS_ICU + +// define this if you want regex to use __cdecl calling convensions, even when __fastcall is available: +// #define BOOST_REGEX_NO_FASTCALL diff --git a/scintilla/include/boost/regex/v5/basic_regex.hpp b/scintilla/include/boost/regex/v5/basic_regex.hpp new file mode 100644 index 0000000000..5c73775f06 --- /dev/null +++ b/scintilla/include/boost/regex/v5/basic_regex.hpp @@ -0,0 +1,734 @@ +/* + * + * Copyright (c) 1998-2004 John Maddock + * Copyright 2011 Garmin Ltd. or its subsidiaries + * + * Distributed under the Boost Software License, Version 1.0. + * (See accompanying file LICENSE_1_0.txt or copy at + * http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org/ for most recent version. + * FILE basic_regex.cpp + * VERSION see + * DESCRIPTION: Declares template class basic_regex. + */ + +#ifndef BOOST_REGEX_V5_BASIC_REGEX_HPP +#define BOOST_REGEX_V5_BASIC_REGEX_HPP + +#include + +namespace boost{ +#ifdef BOOST_REGEX_MSVC +#pragma warning(push) +#pragma warning(disable : 4251) +#if BOOST_REGEX_MSVC < 1700 +# pragma warning(disable : 4231) +#endif +#if BOOST_REGEX_MSVC < 1600 +#pragma warning(disable : 4660) +#endif +#if BOOST_REGEX_MSVC < 1910 +#pragma warning(disable:4800) +#endif +#endif + +namespace BOOST_REGEX_DETAIL_NS{ + +// +// forward declaration, we will need this one later: +// +template +class basic_regex_parser; + +template +void bubble_down_one(I first, I last) +{ + if(first != last) + { + I next = last - 1; + while((next != first) && (*next < *(next-1))) + { + (next-1)->swap(*next); + --next; + } + } +} + +static const int hash_value_mask = 1 << (std::numeric_limits::digits - 1); + +template +inline int hash_value_from_capture_name(Iterator i, Iterator j) +{ + std::size_t r = 0; + while (i != j) + { + r ^= *i + 0x9e3779b9 + (r << 6) + (r >> 2); + ++i; + } + r %= ((std::numeric_limits::max)()); + return static_cast(r) | hash_value_mask; +} + +class named_subexpressions +{ +public: + struct name + { + template + name(const charT* i, const charT* j, int idx) + : index(idx) + { + hash = hash_value_from_capture_name(i, j); + } + name(int h, int idx) + : index(idx), hash(h) + { + } + int index; + int hash; + bool operator < (const name& other)const + { + return hash < other.hash; + } + bool operator == (const name& other)const + { + return hash == other.hash; + } + void swap(name& other) + { + std::swap(index, other.index); + std::swap(hash, other.hash); + } + }; + + typedef std::vector::const_iterator const_iterator; + typedef std::pair range_type; + + named_subexpressions(){} + + template + void set_name(const charT* i, const charT* j, int index) + { + m_sub_names.push_back(name(i, j, index)); + bubble_down_one(m_sub_names.begin(), m_sub_names.end()); + } + template + int get_id(const charT* i, const charT* j)const + { + name t(i, j, 0); + typename std::vector::const_iterator pos = std::lower_bound(m_sub_names.begin(), m_sub_names.end(), t); + if((pos != m_sub_names.end()) && (*pos == t)) + { + return pos->index; + } + return -1; + } + template + range_type equal_range(const charT* i, const charT* j)const + { + name t(i, j, 0); + return std::equal_range(m_sub_names.begin(), m_sub_names.end(), t); + } + int get_id(int h)const + { + name t(h, 0); + std::vector::const_iterator pos = std::lower_bound(m_sub_names.begin(), m_sub_names.end(), t); + if((pos != m_sub_names.end()) && (*pos == t)) + { + return pos->index; + } + return -1; + } + range_type equal_range(int h)const + { + name t(h, 0); + return std::equal_range(m_sub_names.begin(), m_sub_names.end(), t); + } +private: + std::vector m_sub_names; +}; + +// +// class regex_data: +// represents the data we wish to expose to the matching algorithms. +// +template +struct regex_data : public named_subexpressions +{ + typedef regex_constants::syntax_option_type flag_type; + typedef std::size_t size_type; + + regex_data(const ::std::shared_ptr< + ::boost::regex_traits_wrapper >& t) + : m_ptraits(t), m_flags(0), m_status(0), m_expression(0), m_expression_len(0), + m_mark_count(0), m_first_state(0), m_restart_type(0), + m_startmap{ 0 }, + m_can_be_null(0), m_word_mask(0), m_has_recursions(false), m_disable_match_any(false) {} + regex_data() + : m_ptraits(new ::boost::regex_traits_wrapper()), m_flags(0), m_status(0), m_expression(0), m_expression_len(0), + m_mark_count(0), m_first_state(0), m_restart_type(0), + m_startmap{ 0 }, + m_can_be_null(0), m_word_mask(0), m_has_recursions(false), m_disable_match_any(false) {} + + ::std::shared_ptr< + ::boost::regex_traits_wrapper + > m_ptraits; // traits class instance + flag_type m_flags; // flags with which we were compiled + int m_status; // error code (0 implies OK). + const charT* m_expression; // the original expression + std::ptrdiff_t m_expression_len; // the length of the original expression + size_type m_mark_count; // the number of marked sub-expressions + BOOST_REGEX_DETAIL_NS::re_syntax_base* m_first_state; // the first state of the machine + unsigned m_restart_type; // search optimisation type + unsigned char m_startmap[1 << CHAR_BIT]; // which characters can start a match + unsigned int m_can_be_null; // whether we can match a null string + BOOST_REGEX_DETAIL_NS::raw_storage m_data; // the buffer in which our states are constructed + typename traits::char_class_type m_word_mask; // mask used to determine if a character is a word character + std::vector< + std::pair< + std::size_t, std::size_t> > m_subs; // Position of sub-expressions within the *string*. + bool m_has_recursions; // whether we have recursive expressions; + bool m_disable_match_any; // when set we need to disable the match_any flag as it causes different/buggy behaviour. +}; +// +// class basic_regex_implementation +// pimpl implementation class for basic_regex. +// +template +class basic_regex_implementation + : public regex_data +{ +public: + typedef regex_constants::syntax_option_type flag_type; + typedef std::ptrdiff_t difference_type; + typedef std::size_t size_type; + typedef typename traits::locale_type locale_type; + typedef const charT* const_iterator; + + basic_regex_implementation(){} + basic_regex_implementation(const ::std::shared_ptr< + ::boost::regex_traits_wrapper >& t) + : regex_data(t) {} + void assign(const charT* arg_first, + const charT* arg_last, + flag_type f) + { + regex_data* pdat = this; + basic_regex_parser parser(pdat); + parser.parse(arg_first, arg_last, f); + } + + locale_type imbue(locale_type l) + { + return this->m_ptraits->imbue(l); + } + locale_type getloc()const + { + return this->m_ptraits->getloc(); + } + std::basic_string str()const + { + std::basic_string result; + if(this->m_status == 0) + result = std::basic_string(this->m_expression, this->m_expression_len); + return result; + } + const_iterator expression()const + { + return this->m_expression; + } + std::pair subexpression(std::size_t n)const + { + const std::pair& pi = this->m_subs.at(n); + std::pair p(expression() + pi.first, expression() + pi.second); + return p; + } + // + // begin, end: + const_iterator begin()const + { + return (this->m_status ? 0 : this->m_expression); + } + const_iterator end()const + { + return (this->m_status ? 0 : this->m_expression + this->m_expression_len); + } + flag_type flags()const + { + return this->m_flags; + } + size_type size()const + { + return this->m_expression_len; + } + int status()const + { + return this->m_status; + } + size_type mark_count()const + { + return this->m_mark_count - 1; + } + const BOOST_REGEX_DETAIL_NS::re_syntax_base* get_first_state()const + { + return this->m_first_state; + } + unsigned get_restart_type()const + { + return this->m_restart_type; + } + const unsigned char* get_map()const + { + return this->m_startmap; + } + const ::boost::regex_traits_wrapper& get_traits()const + { + return *(this->m_ptraits); + } + bool can_be_null()const + { + return this->m_can_be_null; + } + const regex_data& get_data()const + { + basic_regex_implementation const* p = this; + return *static_cast*>(p); + } +}; + +} // namespace BOOST_REGEX_DETAIL_NS +// +// class basic_regex: +// represents the compiled +// regular expression: +// + +#ifdef BOOST_REGEX_NO_FWD +template > +#else +template +#endif +class basic_regex : public regbase +{ +public: + // typedefs: + typedef std::size_t traits_size_type; + typedef typename traits::string_type traits_string_type; + typedef charT char_type; + typedef traits traits_type; + + typedef charT value_type; + typedef charT& reference; + typedef const charT& const_reference; + typedef const charT* const_iterator; + typedef const_iterator iterator; + typedef std::ptrdiff_t difference_type; + typedef std::size_t size_type; + typedef regex_constants::syntax_option_type flag_type; + // locale_type + // placeholder for actual locale type used by the + // traits class to localise *this. + typedef typename traits::locale_type locale_type; + +public: + explicit basic_regex(){} + explicit basic_regex(const charT* p, flag_type f = regex_constants::normal) + { + assign(p, f); + } + basic_regex(const charT* p1, const charT* p2, flag_type f = regex_constants::normal) + { + assign(p1, p2, f); + } + basic_regex(const charT* p, size_type len, flag_type f) + { + assign(p, len, f); + } + basic_regex(const basic_regex& that) + : m_pimpl(that.m_pimpl) {} + ~basic_regex(){} + basic_regex& operator=(const basic_regex& that) + { + return assign(that); + } + basic_regex& operator=(const charT* ptr) + { + return assign(ptr); + } + + // + // assign: + basic_regex& assign(const basic_regex& that) + { + m_pimpl = that.m_pimpl; + return *this; + } + basic_regex& assign(const charT* p, flag_type f = regex_constants::normal) + { + return assign(p, p + traits::length(p), f); + } + basic_regex& assign(const charT* p, size_type len, flag_type f) + { + return assign(p, p + len, f); + } +private: + basic_regex& do_assign(const charT* p1, + const charT* p2, + flag_type f); +public: + basic_regex& assign(const charT* p1, + const charT* p2, + flag_type f = regex_constants::normal) + { + return do_assign(p1, p2, f); + } + + template + unsigned int set_expression(const std::basic_string& p, flag_type f = regex_constants::normal) + { + return set_expression(p.data(), p.data() + p.size(), f); + } + + template + explicit basic_regex(const std::basic_string& p, flag_type f = regex_constants::normal) + { + assign(p, f); + } + + template + basic_regex(InputIterator arg_first, InputIterator arg_last, flag_type f = regex_constants::normal) + { + typedef typename traits::string_type seq_type; + seq_type a(arg_first, arg_last); + if(!a.empty()) + assign(static_cast(&*a.begin()), static_cast(&*a.begin() + a.size()), f); + else + assign(static_cast(0), static_cast(0), f); + } + + template + basic_regex& operator=(const std::basic_string& p) + { + return assign(p.data(), p.data() + p.size(), regex_constants::normal); + } + + template + basic_regex& assign( + const std::basic_string& s, + flag_type f = regex_constants::normal) + { + return assign(s.data(), s.data() + s.size(), f); + } + + template + basic_regex& assign(InputIterator arg_first, + InputIterator arg_last, + flag_type f = regex_constants::normal) + { + typedef typename traits::string_type seq_type; + seq_type a(arg_first, arg_last); + if(a.size()) + { + const charT* p1 = &*a.begin(); + const charT* p2 = &*a.begin() + a.size(); + return assign(p1, p2, f); + } + return assign(static_cast(0), static_cast(0), f); + } + + // + // locale: + locale_type imbue(locale_type l); + locale_type getloc()const + { + return m_pimpl.get() ? m_pimpl->getloc() : locale_type(); + } + // + // getflags: + // retained for backwards compatibility only, "flags" + // is now the preferred name: + flag_type getflags()const + { + return flags(); + } + flag_type flags()const + { + return m_pimpl.get() ? m_pimpl->flags() : 0; + } + // + // str: + std::basic_string str()const + { + return m_pimpl.get() ? m_pimpl->str() : std::basic_string(); + } + // + // begin, end, subexpression: + std::pair subexpression(std::size_t n)const + { +#ifdef BOOST_REGEX_STANDALONE + if (!m_pimpl.get()) + throw std::logic_error("Can't access subexpressions in an invalid regex."); +#else + if(!m_pimpl.get()) + boost::throw_exception(std::logic_error("Can't access subexpressions in an invalid regex.")); +#endif + return m_pimpl->subexpression(n); + } + const_iterator begin()const + { + return (m_pimpl.get() ? m_pimpl->begin() : 0); + } + const_iterator end()const + { + return (m_pimpl.get() ? m_pimpl->end() : 0); + } + // + // swap: + void swap(basic_regex& that)throw() + { + m_pimpl.swap(that.m_pimpl); + } + // + // size: + size_type size()const + { + return (m_pimpl.get() ? m_pimpl->size() : 0); + } + // + // max_size: + size_type max_size()const + { + return UINT_MAX; + } + // + // empty: + bool empty()const + { + return (m_pimpl.get() ? 0 != m_pimpl->status() : true); + } + + size_type mark_count()const + { + return (m_pimpl.get() ? m_pimpl->mark_count() : 0); + } + + int status()const + { + return (m_pimpl.get() ? m_pimpl->status() : regex_constants::error_empty); + } + + int compare(const basic_regex& that) const + { + if(m_pimpl.get() == that.m_pimpl.get()) + return 0; + if(!m_pimpl.get()) + return -1; + if(!that.m_pimpl.get()) + return 1; + if(status() != that.status()) + return status() - that.status(); + if(flags() != that.flags()) + return flags() - that.flags(); + return str().compare(that.str()); + } + bool operator==(const basic_regex& e)const + { + return compare(e) == 0; + } + bool operator != (const basic_regex& e)const + { + return compare(e) != 0; + } + bool operator<(const basic_regex& e)const + { + return compare(e) < 0; + } + bool operator>(const basic_regex& e)const + { + return compare(e) > 0; + } + bool operator<=(const basic_regex& e)const + { + return compare(e) <= 0; + } + bool operator>=(const basic_regex& e)const + { + return compare(e) >= 0; + } + + // + // The following are deprecated as public interfaces + // but are available for compatibility with earlier versions. + const charT* expression()const + { + return (m_pimpl.get() && !m_pimpl->status() ? m_pimpl->expression() : 0); + } + unsigned int set_expression(const charT* p1, const charT* p2, flag_type f = regex_constants::normal) + { + assign(p1, p2, f | regex_constants::no_except); + return status(); + } + unsigned int set_expression(const charT* p, flag_type f = regex_constants::normal) + { + assign(p, f | regex_constants::no_except); + return status(); + } + unsigned int error_code()const + { + return status(); + } + // + // private access methods: + // + const BOOST_REGEX_DETAIL_NS::re_syntax_base* get_first_state()const + { + BOOST_REGEX_ASSERT(0 != m_pimpl.get()); + return m_pimpl->get_first_state(); + } + unsigned get_restart_type()const + { + BOOST_REGEX_ASSERT(0 != m_pimpl.get()); + return m_pimpl->get_restart_type(); + } + const unsigned char* get_map()const + { + BOOST_REGEX_ASSERT(0 != m_pimpl.get()); + return m_pimpl->get_map(); + } + const ::boost::regex_traits_wrapper& get_traits()const + { + BOOST_REGEX_ASSERT(0 != m_pimpl.get()); + return m_pimpl->get_traits(); + } + bool can_be_null()const + { + BOOST_REGEX_ASSERT(0 != m_pimpl.get()); + return m_pimpl->can_be_null(); + } + const BOOST_REGEX_DETAIL_NS::regex_data& get_data()const + { + BOOST_REGEX_ASSERT(0 != m_pimpl.get()); + return m_pimpl->get_data(); + } + std::shared_ptr get_named_subs()const + { + return m_pimpl; + } + +private: + std::shared_ptr > m_pimpl; +}; + +// +// out of line members; +// these are the only members that mutate the basic_regex object, +// and are designed to provide the strong exception guarantee +// (in the event of a throw, the state of the object remains unchanged). +// +template +basic_regex& basic_regex::do_assign(const charT* p1, + const charT* p2, + flag_type f) +{ + std::shared_ptr > temp; + if(!m_pimpl.get()) + { + temp = std::shared_ptr >(new BOOST_REGEX_DETAIL_NS::basic_regex_implementation()); + } + else + { + temp = std::shared_ptr >(new BOOST_REGEX_DETAIL_NS::basic_regex_implementation(m_pimpl->m_ptraits)); + } + temp->assign(p1, p2, f); + temp.swap(m_pimpl); + return *this; +} + +template +typename basic_regex::locale_type basic_regex::imbue(locale_type l) +{ + std::shared_ptr > temp(new BOOST_REGEX_DETAIL_NS::basic_regex_implementation()); + locale_type result = temp->imbue(l); + temp.swap(m_pimpl); + return result; +} + +// +// non-members: +// +template +void swap(basic_regex& e1, basic_regex& e2) +{ + e1.swap(e2); +} + +template +std::basic_ostream& + operator << (std::basic_ostream& os, + const basic_regex& e) +{ + return (os << e.str()); +} + +// +// class reg_expression: +// this is provided for backwards compatibility only, +// it is deprecated, no not use! +// +#ifdef BOOST_REGEX_NO_FWD +template > +#else +template +#endif +class reg_expression : public basic_regex +{ +public: + typedef typename basic_regex::flag_type flag_type; + typedef typename basic_regex::size_type size_type; + explicit reg_expression(){} + explicit reg_expression(const charT* p, flag_type f = regex_constants::normal) + : basic_regex(p, f){} + reg_expression(const charT* p1, const charT* p2, flag_type f = regex_constants::normal) + : basic_regex(p1, p2, f){} + reg_expression(const charT* p, size_type len, flag_type f) + : basic_regex(p, len, f){} + reg_expression(const reg_expression& that) + : basic_regex(that) {} + ~reg_expression(){} + reg_expression& operator=(const reg_expression& that) + { + return this->assign(that); + } + + template + explicit reg_expression(const std::basic_string& p, flag_type f = regex_constants::normal) + : basic_regex(p, f) + { + } + + template + reg_expression(InputIterator arg_first, InputIterator arg_last, flag_type f = regex_constants::normal) + : basic_regex(arg_first, arg_last, f) + { + } + + template + reg_expression& operator=(const std::basic_string& p) + { + this->assign(p); + return *this; + } + +}; + +#ifdef BOOST_REGEX_MSVC +#pragma warning (pop) +#endif + +} // namespace boost + +#endif diff --git a/scintilla/include/boost/regex/v5/basic_regex_creator.hpp b/scintilla/include/boost/regex/v5/basic_regex_creator.hpp new file mode 100644 index 0000000000..bb76c7c1f2 --- /dev/null +++ b/scintilla/include/boost/regex/v5/basic_regex_creator.hpp @@ -0,0 +1,1576 @@ +/* + * + * Copyright (c) 2004 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE basic_regex_creator.cpp + * VERSION see + * DESCRIPTION: Declares template class basic_regex_creator which fills in + * the data members of a regex_data object. + */ + +#ifndef BOOST_REGEX_V5_BASIC_REGEX_CREATOR_HPP +#define BOOST_REGEX_V5_BASIC_REGEX_CREATOR_HPP + +#ifdef BOOST_REGEX_MSVC +# pragma warning(push) +#pragma warning(disable:4459) +#if BOOST_REGEX_MSVC < 1910 +#pragma warning(disable:4800) +#endif +#endif + +#include + +namespace boost{ + +namespace BOOST_REGEX_DETAIL_NS{ + +template +struct digraph : public std::pair +{ + digraph() : std::pair(charT(0), charT(0)){} + digraph(charT c1) : std::pair(c1, charT(0)){} + digraph(charT c1, charT c2) : std::pair(c1, c2) + {} + digraph(const digraph& d) : std::pair(d.first, d.second){} + digraph& operator=(const digraph&) = default; + template + digraph(const Seq& s) : std::pair() + { + BOOST_REGEX_ASSERT(s.size() <= 2); + BOOST_REGEX_ASSERT(s.size()); + this->first = s[0]; + this->second = (s.size() > 1) ? s[1] : 0; + } +}; + +template +class basic_char_set +{ +public: + typedef digraph digraph_type; + typedef typename traits::string_type string_type; + typedef typename traits::char_class_type m_type; + + basic_char_set() + { + m_negate = false; + m_has_digraphs = false; + m_classes = 0; + m_negated_classes = 0; + m_empty = true; + } + + void add_single(const digraph_type& s) + { + m_singles.insert(s); + if(s.second) + m_has_digraphs = true; + m_empty = false; + } + void add_range(const digraph_type& first, const digraph_type& end) + { + m_ranges.push_back(first); + m_ranges.push_back(end); + if(first.second) + { + m_has_digraphs = true; + add_single(first); + } + if(end.second) + { + m_has_digraphs = true; + add_single(end); + } + m_empty = false; + } + void add_class(m_type m) + { + m_classes |= m; + m_empty = false; + } + void add_negated_class(m_type m) + { + m_negated_classes |= m; + m_empty = false; + } + void add_equivalent(const digraph_type& s) + { + m_equivalents.insert(s); + if(s.second) + { + m_has_digraphs = true; + add_single(s); + } + m_empty = false; + } + void negate() + { + m_negate = true; + //m_empty = false; + } + + // + // accessor functions: + // + bool has_digraphs()const + { + return m_has_digraphs; + } + bool is_negated()const + { + return m_negate; + } + typedef typename std::vector::const_iterator list_iterator; + typedef typename std::set::const_iterator set_iterator; + set_iterator singles_begin()const + { + return m_singles.begin(); + } + set_iterator singles_end()const + { + return m_singles.end(); + } + list_iterator ranges_begin()const + { + return m_ranges.begin(); + } + list_iterator ranges_end()const + { + return m_ranges.end(); + } + set_iterator equivalents_begin()const + { + return m_equivalents.begin(); + } + set_iterator equivalents_end()const + { + return m_equivalents.end(); + } + m_type classes()const + { + return m_classes; + } + m_type negated_classes()const + { + return m_negated_classes; + } + bool empty()const + { + return m_empty; + } +private: + std::set m_singles; // a list of single characters to match + std::vector m_ranges; // a list of end points of our ranges + bool m_negate; // true if the set is to be negated + bool m_has_digraphs; // true if we have digraphs present + m_type m_classes; // character classes to match + m_type m_negated_classes; // negated character classes to match + bool m_empty; // whether we've added anything yet + std::set m_equivalents; // a list of equivalence classes +}; + +template +class basic_regex_creator +{ +public: + basic_regex_creator(regex_data* data); + std::ptrdiff_t getoffset(void* addr) + { + return getoffset(addr, m_pdata->m_data.data()); + } + std::ptrdiff_t getoffset(const void* addr, const void* base) + { + return static_cast(addr) - static_cast(base); + } + re_syntax_base* getaddress(std::ptrdiff_t off) + { + return getaddress(off, m_pdata->m_data.data()); + } + re_syntax_base* getaddress(std::ptrdiff_t off, void* base) + { + return static_cast(static_cast(static_cast(base) + off)); + } + void init(unsigned l_flags) + { + m_pdata->m_flags = l_flags; + m_icase = l_flags & regex_constants::icase; + } + regbase::flag_type flags() + { + return m_pdata->m_flags; + } + void flags(regbase::flag_type f) + { + m_pdata->m_flags = f; + if(m_icase != static_cast(f & regbase::icase)) + { + m_icase = static_cast(f & regbase::icase); + } + } + re_syntax_base* append_state(syntax_element_type t, std::size_t s = sizeof(re_syntax_base)); + re_syntax_base* insert_state(std::ptrdiff_t pos, syntax_element_type t, std::size_t s = sizeof(re_syntax_base)); + re_literal* append_literal(charT c); + re_syntax_base* append_set(const basic_char_set& char_set); + re_syntax_base* append_set(const basic_char_set& char_set, std::integral_constant*); + re_syntax_base* append_set(const basic_char_set& char_set, std::integral_constant*); + void finalize(const charT* p1, const charT* p2); +protected: + regex_data* m_pdata; // pointer to the basic_regex_data struct we are filling in + const ::boost::regex_traits_wrapper& + m_traits; // convenience reference to traits class + re_syntax_base* m_last_state; // the last state we added + bool m_icase; // true for case insensitive matches + unsigned m_repeater_id; // the state_id of the next repeater + bool m_has_backrefs; // true if there are actually any backrefs + std::uintmax_t m_bad_repeats; // bitmask of repeats we can't deduce a startmap for; + bool m_has_recursions; // set when we have recursive expressions to fixup + std::vector m_recursion_checks; // notes which recursions we've followed while analysing this expression + typename traits::char_class_type m_word_mask; // mask used to determine if a character is a word character + typename traits::char_class_type m_mask_space; // mask used to determine if a character is a word character + typename traits::char_class_type m_lower_mask; // mask used to determine if a character is a lowercase character + typename traits::char_class_type m_upper_mask; // mask used to determine if a character is an uppercase character + typename traits::char_class_type m_alpha_mask; // mask used to determine if a character is an alphabetic character +private: + basic_regex_creator& operator=(const basic_regex_creator&); + basic_regex_creator(const basic_regex_creator&); + + void fixup_pointers(re_syntax_base* state); + void fixup_recursions(re_syntax_base* state); + void create_startmaps(re_syntax_base* state); + int calculate_backstep(re_syntax_base* state); + void create_startmap(re_syntax_base* state, unsigned char* l_map, unsigned int* pnull, unsigned char mask); + unsigned get_restart_type(re_syntax_base* state); + void set_all_masks(unsigned char* bits, unsigned char); + bool is_bad_repeat(re_syntax_base* pt); + void set_bad_repeat(re_syntax_base* pt); + syntax_element_type get_repeat_type(re_syntax_base* state); + void probe_leading_repeat(re_syntax_base* state); +}; + +template +basic_regex_creator::basic_regex_creator(regex_data* data) + : m_pdata(data), m_traits(*(data->m_ptraits)), m_last_state(0), m_icase(false), m_repeater_id(0), + m_has_backrefs(false), m_bad_repeats(0), m_has_recursions(false), m_word_mask(0), m_mask_space(0), m_lower_mask(0), m_upper_mask(0), m_alpha_mask(0) +{ + m_pdata->m_data.clear(); + m_pdata->m_status = ::boost::regex_constants::error_ok; + static const charT w = 'w'; + static const charT s = 's'; + static const charT l[5] = { 'l', 'o', 'w', 'e', 'r', }; + static const charT u[5] = { 'u', 'p', 'p', 'e', 'r', }; + static const charT a[5] = { 'a', 'l', 'p', 'h', 'a', }; + m_word_mask = m_traits.lookup_classname(&w, &w +1); + m_mask_space = m_traits.lookup_classname(&s, &s +1); + m_lower_mask = m_traits.lookup_classname(l, l + 5); + m_upper_mask = m_traits.lookup_classname(u, u + 5); + m_alpha_mask = m_traits.lookup_classname(a, a + 5); + m_pdata->m_word_mask = m_word_mask; + BOOST_REGEX_ASSERT(m_word_mask != 0); + BOOST_REGEX_ASSERT(m_mask_space != 0); + BOOST_REGEX_ASSERT(m_lower_mask != 0); + BOOST_REGEX_ASSERT(m_upper_mask != 0); + BOOST_REGEX_ASSERT(m_alpha_mask != 0); +} + +template +re_syntax_base* basic_regex_creator::append_state(syntax_element_type t, std::size_t s) +{ + // if the state is a backref then make a note of it: + if(t == syntax_element_backref) + this->m_has_backrefs = true; + // append a new state, start by aligning our last one: + m_pdata->m_data.align(); + // set the offset to the next state in our last one: + if(m_last_state) + m_last_state->next.i = m_pdata->m_data.size() - getoffset(m_last_state); + // now actually extend our data: + m_last_state = static_cast(m_pdata->m_data.extend(s)); + // fill in boilerplate options in the new state: + m_last_state->next.i = 0; + m_last_state->type = t; + return m_last_state; +} + +template +re_syntax_base* basic_regex_creator::insert_state(std::ptrdiff_t pos, syntax_element_type t, std::size_t s) +{ + // append a new state, start by aligning our last one: + m_pdata->m_data.align(); + // set the offset to the next state in our last one: + if(m_last_state) + m_last_state->next.i = m_pdata->m_data.size() - getoffset(m_last_state); + // remember the last state position: + std::ptrdiff_t off = getoffset(m_last_state) + s; + // now actually insert our data: + re_syntax_base* new_state = static_cast(m_pdata->m_data.insert(pos, s)); + // fill in boilerplate options in the new state: + new_state->next.i = s; + new_state->type = t; + m_last_state = getaddress(off); + return new_state; +} + +template +re_literal* basic_regex_creator::append_literal(charT c) +{ + re_literal* result; + // start by seeing if we have an existing re_literal we can extend: + if((0 == m_last_state) || (m_last_state->type != syntax_element_literal)) + { + // no existing re_literal, create a new one: + result = static_cast(append_state(syntax_element_literal, sizeof(re_literal) + sizeof(charT))); + result->length = 1; + *static_cast(static_cast(result+1)) = m_traits.translate(c, m_icase); + } + else + { + // we have an existing re_literal, extend it: + std::ptrdiff_t off = getoffset(m_last_state); + m_pdata->m_data.extend(sizeof(charT)); + m_last_state = result = static_cast(getaddress(off)); + charT* characters = static_cast(static_cast(result+1)); + characters[result->length] = m_traits.translate(c, m_icase); + result->length += 1; + } + return result; +} + +template +inline re_syntax_base* basic_regex_creator::append_set( + const basic_char_set& char_set) +{ + typedef std::integral_constant truth_type; + return char_set.has_digraphs() + ? append_set(char_set, static_cast*>(0)) + : append_set(char_set, static_cast(0)); +} + +template +re_syntax_base* basic_regex_creator::append_set( + const basic_char_set& char_set, std::integral_constant*) +{ + typedef typename traits::string_type string_type; + typedef typename basic_char_set::list_iterator item_iterator; + typedef typename basic_char_set::set_iterator set_iterator; + typedef typename traits::char_class_type m_type; + + re_set_long* result = static_cast*>(append_state(syntax_element_long_set, sizeof(re_set_long))); + // + // fill in the basics: + // + result->csingles = static_cast(std::distance(char_set.singles_begin(), char_set.singles_end())); + result->cranges = static_cast(std::distance(char_set.ranges_begin(), char_set.ranges_end())) / 2; + result->cequivalents = static_cast(std::distance(char_set.equivalents_begin(), char_set.equivalents_end())); + result->cclasses = char_set.classes(); + result->cnclasses = char_set.negated_classes(); + if(flags() & regbase::icase) + { + // adjust classes as needed: + if(((result->cclasses & m_lower_mask) == m_lower_mask) || ((result->cclasses & m_upper_mask) == m_upper_mask)) + result->cclasses |= m_alpha_mask; + if(((result->cnclasses & m_lower_mask) == m_lower_mask) || ((result->cnclasses & m_upper_mask) == m_upper_mask)) + result->cnclasses |= m_alpha_mask; + } + + result->isnot = char_set.is_negated(); + result->singleton = !char_set.has_digraphs(); + // + // remember where the state is for later: + // + std::ptrdiff_t offset = getoffset(result); + // + // now extend with all the singles: + // + item_iterator first, last; + set_iterator sfirst, slast; + sfirst = char_set.singles_begin(); + slast = char_set.singles_end(); + while(sfirst != slast) + { + charT* p = static_cast(this->m_pdata->m_data.extend(sizeof(charT) * (sfirst->first == static_cast(0) ? 1 : sfirst->second ? 3 : 2))); + p[0] = m_traits.translate(sfirst->first, m_icase); + if(sfirst->first == static_cast(0)) + { + p[0] = 0; + } + else if(sfirst->second) + { + p[1] = m_traits.translate(sfirst->second, m_icase); + p[2] = 0; + } + else + p[1] = 0; + ++sfirst; + } + // + // now extend with all the ranges: + // + first = char_set.ranges_begin(); + last = char_set.ranges_end(); + while(first != last) + { + // first grab the endpoints of the range: + digraph c1 = *first; + c1.first = this->m_traits.translate(c1.first, this->m_icase); + c1.second = this->m_traits.translate(c1.second, this->m_icase); + ++first; + digraph c2 = *first; + c2.first = this->m_traits.translate(c2.first, this->m_icase); + c2.second = this->m_traits.translate(c2.second, this->m_icase); + ++first; + string_type s1, s2; + // different actions now depending upon whether collation is turned on: + if(flags() & regex_constants::collate) + { + // we need to transform our range into sort keys: + charT a1[3] = { c1.first, c1.second, charT(0), }; + charT a2[3] = { c2.first, c2.second, charT(0), }; + s1 = this->m_traits.transform(a1, (a1[1] ? a1+2 : a1+1)); + s2 = this->m_traits.transform(a2, (a2[1] ? a2+2 : a2+1)); + if(s1.empty()) + s1 = string_type(1, charT(0)); + if(s2.empty()) + s2 = string_type(1, charT(0)); + } + else + { + if(c1.second) + { + s1.insert(s1.end(), c1.first); + s1.insert(s1.end(), c1.second); + } + else + s1 = string_type(1, c1.first); + if(c2.second) + { + s2.insert(s2.end(), c2.first); + s2.insert(s2.end(), c2.second); + } + else + s2.insert(s2.end(), c2.first); + } + if(s1 > s2) + { + // Oops error: + return 0; + } + charT* p = static_cast(this->m_pdata->m_data.extend(sizeof(charT) * (s1.size() + s2.size() + 2) ) ); + BOOST_REGEX_DETAIL_NS::copy(s1.begin(), s1.end(), p); + p[s1.size()] = charT(0); + p += s1.size() + 1; + BOOST_REGEX_DETAIL_NS::copy(s2.begin(), s2.end(), p); + p[s2.size()] = charT(0); + } + // + // now process the equivalence classes: + // + sfirst = char_set.equivalents_begin(); + slast = char_set.equivalents_end(); + while(sfirst != slast) + { + string_type s; + if(sfirst->second) + { + charT cs[3] = { sfirst->first, sfirst->second, charT(0), }; + s = m_traits.transform_primary(cs, cs+2); + } + else + s = m_traits.transform_primary(&sfirst->first, &sfirst->first+1); + if(s.empty()) + return 0; // invalid or unsupported equivalence class + charT* p = static_cast(this->m_pdata->m_data.extend(sizeof(charT) * (s.size()+1) ) ); + BOOST_REGEX_DETAIL_NS::copy(s.begin(), s.end(), p); + p[s.size()] = charT(0); + ++sfirst; + } + // + // finally reset the address of our last state: + // + m_last_state = result = static_cast*>(getaddress(offset)); + return result; +} + +template +inline bool char_less(T t1, T t2) +{ + return t1 < t2; +} +inline bool char_less(char t1, char t2) +{ + return static_cast(t1) < static_cast(t2); +} +inline bool char_less(signed char t1, signed char t2) +{ + return static_cast(t1) < static_cast(t2); +} + +template +re_syntax_base* basic_regex_creator::append_set( + const basic_char_set& char_set, std::integral_constant*) +{ + typedef typename traits::string_type string_type; + typedef typename basic_char_set::list_iterator item_iterator; + typedef typename basic_char_set::set_iterator set_iterator; + + re_set* result = static_cast(append_state(syntax_element_set, sizeof(re_set))); + bool negate = char_set.is_negated(); + std::memset(result->_map, 0, sizeof(result->_map)); + // + // handle singles first: + // + item_iterator first, last; + set_iterator sfirst, slast; + sfirst = char_set.singles_begin(); + slast = char_set.singles_end(); + while(sfirst != slast) + { + for(unsigned int i = 0; i < (1 << CHAR_BIT); ++i) + { + if(this->m_traits.translate(static_cast(i), this->m_icase) + == this->m_traits.translate(sfirst->first, this->m_icase)) + result->_map[i] = true; + } + ++sfirst; + } + // + // OK now handle ranges: + // + first = char_set.ranges_begin(); + last = char_set.ranges_end(); + while(first != last) + { + // first grab the endpoints of the range: + charT c1 = this->m_traits.translate(first->first, this->m_icase); + ++first; + charT c2 = this->m_traits.translate(first->first, this->m_icase); + ++first; + // different actions now depending upon whether collation is turned on: + if(flags() & regex_constants::collate) + { + // we need to transform our range into sort keys: + charT c3[2] = { c1, charT(0), }; + string_type s1 = this->m_traits.transform(c3, c3+1); + c3[0] = c2; + string_type s2 = this->m_traits.transform(c3, c3+1); + if(s1 > s2) + { + // Oops error: + return 0; + } + BOOST_REGEX_ASSERT(c3[1] == charT(0)); + for(unsigned i = 0; i < (1u << CHAR_BIT); ++i) + { + c3[0] = static_cast(i); + string_type s3 = this->m_traits.transform(c3, c3 +1); + if((s1 <= s3) && (s3 <= s2)) + result->_map[i] = true; + } + } + else + { + if(char_less(c2, c1)) + { + // Oops error: + return 0; + } + // everything in range matches: + std::memset(result->_map + static_cast(c1), true, static_cast(1u) + static_cast(static_cast(c2) - static_cast(c1))); + } + } + // + // and now the classes: + // + typedef typename traits::char_class_type m_type; + m_type m = char_set.classes(); + if(flags() & regbase::icase) + { + // adjust m as needed: + if(((m & m_lower_mask) == m_lower_mask) || ((m & m_upper_mask) == m_upper_mask)) + m |= m_alpha_mask; + } + if(m != 0) + { + for(unsigned i = 0; i < (1u << CHAR_BIT); ++i) + { + if(this->m_traits.isctype(static_cast(i), m)) + result->_map[i] = true; + } + } + // + // and now the negated classes: + // + m = char_set.negated_classes(); + if(flags() & regbase::icase) + { + // adjust m as needed: + if(((m & m_lower_mask) == m_lower_mask) || ((m & m_upper_mask) == m_upper_mask)) + m |= m_alpha_mask; + } + if(m != 0) + { + for(unsigned i = 0; i < (1u << CHAR_BIT); ++i) + { + if(0 == this->m_traits.isctype(static_cast(i), m)) + result->_map[i] = true; + } + } + // + // now process the equivalence classes: + // + sfirst = char_set.equivalents_begin(); + slast = char_set.equivalents_end(); + while(sfirst != slast) + { + string_type s; + BOOST_REGEX_ASSERT(static_cast(0) == sfirst->second); + s = m_traits.transform_primary(&sfirst->first, &sfirst->first+1); + if(s.empty()) + return 0; // invalid or unsupported equivalence class + for(unsigned i = 0; i < (1u << CHAR_BIT); ++i) + { + charT c[2] = { (static_cast(i)), charT(0), }; + string_type s2 = this->m_traits.transform_primary(c, c+1); + if(s == s2) + result->_map[i] = true; + } + ++sfirst; + } + if(negate) + { + for(unsigned i = 0; i < (1u << CHAR_BIT); ++i) + { + result->_map[i] = !(result->_map[i]); + } + } + return result; +} + +template +void basic_regex_creator::finalize(const charT* p1, const charT* p2) +{ + if(this->m_pdata->m_status) + return; + // we've added all the states we need, now finish things off. + // start by adding a terminating state: + append_state(syntax_element_match); + // extend storage to store original expression: + std::ptrdiff_t len = p2 - p1; + m_pdata->m_expression_len = len; + charT* ps = static_cast(m_pdata->m_data.extend(sizeof(charT) * (1 + (p2 - p1)))); + m_pdata->m_expression = ps; + BOOST_REGEX_DETAIL_NS::copy(p1, p2, ps); + ps[p2 - p1] = 0; + // fill in our other data... + // successful parsing implies a zero status: + m_pdata->m_status = 0; + // get the first state of the machine: + m_pdata->m_first_state = static_cast(m_pdata->m_data.data()); + // fixup pointers in the machine: + fixup_pointers(m_pdata->m_first_state); + if(m_has_recursions) + { + m_pdata->m_has_recursions = true; + fixup_recursions(m_pdata->m_first_state); + if(this->m_pdata->m_status) + return; + } + else + m_pdata->m_has_recursions = false; + // create nested startmaps: + create_startmaps(m_pdata->m_first_state); + // create main startmap: + std::memset(m_pdata->m_startmap, 0, sizeof(m_pdata->m_startmap)); + m_pdata->m_can_be_null = 0; + + m_bad_repeats = 0; + if(m_has_recursions) + m_recursion_checks.assign(1 + m_pdata->m_mark_count, 0u); + create_startmap(m_pdata->m_first_state, m_pdata->m_startmap, &(m_pdata->m_can_be_null), mask_all); + // get the restart type: + m_pdata->m_restart_type = get_restart_type(m_pdata->m_first_state); + // optimise a leading repeat if there is one: + probe_leading_repeat(m_pdata->m_first_state); +} + +template +void basic_regex_creator::fixup_pointers(re_syntax_base* state) +{ + while(state) + { + switch(state->type) + { + case syntax_element_recurse: + m_has_recursions = true; + if(state->next.i) + state->next.p = getaddress(state->next.i, state); + else + state->next.p = 0; + break; + case syntax_element_rep: + case syntax_element_dot_rep: + case syntax_element_char_rep: + case syntax_element_short_set_rep: + case syntax_element_long_set_rep: + // set the state_id of this repeat: + static_cast(state)->state_id = m_repeater_id++; + BOOST_REGEX_FALLTHROUGH; + case syntax_element_alt: + std::memset(static_cast(state)->_map, 0, sizeof(static_cast(state)->_map)); + static_cast(state)->can_be_null = 0; + BOOST_REGEX_FALLTHROUGH; + case syntax_element_jump: + static_cast(state)->alt.p = getaddress(static_cast(state)->alt.i, state); + BOOST_REGEX_FALLTHROUGH; + default: + if(state->next.i) + state->next.p = getaddress(state->next.i, state); + else + state->next.p = 0; + } + state = state->next.p; + } +} + +template +void basic_regex_creator::fixup_recursions(re_syntax_base* state) +{ + re_syntax_base* base = state; + while(state) + { + switch(state->type) + { + case syntax_element_assert_backref: + { + // just check that the index is valid: + int idx = static_cast(state)->index; + if(idx < 0) + { + idx = -idx-1; + if(idx >= hash_value_mask) + { + idx = m_pdata->get_id(idx); + if(idx <= 0) + { + // check of sub-expression that doesn't exist: + if(0 == this->m_pdata->m_status) // update the error code if not already set + this->m_pdata->m_status = boost::regex_constants::error_bad_pattern; + // + // clear the expression, we should be empty: + // + this->m_pdata->m_expression = 0; + this->m_pdata->m_expression_len = 0; + // + // and throw if required: + // + if(0 == (this->flags() & regex_constants::no_except)) + { + std::string message = "Encountered a forward reference to a marked sub-expression that does not exist."; + boost::regex_error e(message, boost::regex_constants::error_bad_pattern, 0); + e.raise(); + } + } + } + } + } + break; + case syntax_element_recurse: + { + bool ok = false; + re_syntax_base* p = base; + std::ptrdiff_t idx = static_cast(state)->alt.i; + if(idx >= hash_value_mask) + { + // + // There may be more than one capture group with this hash, just do what Perl + // does and recurse to the leftmost: + // + idx = m_pdata->get_id(static_cast(idx)); + } + if(idx < 0) + { + ok = false; + } + else + { + while(p) + { + if((p->type == syntax_element_startmark) && (static_cast(p)->index == idx)) + { + // + // We've found the target of the recursion, set the jump target: + // + static_cast(state)->alt.p = p; + ok = true; + // + // Now scan the target for nested repeats: + // + p = p->next.p; + int next_rep_id = 0; + while(p) + { + switch(p->type) + { + case syntax_element_rep: + case syntax_element_dot_rep: + case syntax_element_char_rep: + case syntax_element_short_set_rep: + case syntax_element_long_set_rep: + next_rep_id = static_cast(p)->state_id; + break; + case syntax_element_endmark: + if(static_cast(p)->index == idx) + next_rep_id = -1; + break; + default: + break; + } + if(next_rep_id) + break; + p = p->next.p; + } + if(next_rep_id > 0) + { + static_cast(state)->state_id = next_rep_id - 1; + } + + break; + } + p = p->next.p; + } + } + if(!ok) + { + // recursion to sub-expression that doesn't exist: + if(0 == this->m_pdata->m_status) // update the error code if not already set + this->m_pdata->m_status = boost::regex_constants::error_bad_pattern; + // + // clear the expression, we should be empty: + // + this->m_pdata->m_expression = 0; + this->m_pdata->m_expression_len = 0; + // + // and throw if required: + // + if(0 == (this->flags() & regex_constants::no_except)) + { + std::string message = "Encountered a forward reference to a recursive sub-expression that does not exist."; + boost::regex_error e(message, boost::regex_constants::error_bad_pattern, 0); + e.raise(); + } + } + } + break; + default: + break; + } + state = state->next.p; + } +} + +template +void basic_regex_creator::create_startmaps(re_syntax_base* state) +{ + // non-recursive implementation: + // create the last map in the machine first, so that earlier maps + // can make use of the result... + // + // This was originally a recursive implementation, but that caused stack + // overflows with complex expressions on small stacks (think COM+). + + // start by saving the case setting: + bool l_icase = m_icase; + std::vector > v; + + while(state) + { + switch(state->type) + { + case syntax_element_toggle_case: + // we need to track case changes here: + m_icase = static_cast(state)->icase; + state = state->next.p; + continue; + case syntax_element_alt: + case syntax_element_rep: + case syntax_element_dot_rep: + case syntax_element_char_rep: + case syntax_element_short_set_rep: + case syntax_element_long_set_rep: + // just push the state onto our stack for now: + v.push_back(std::pair(m_icase, state)); + state = state->next.p; + break; + case syntax_element_backstep: + // we need to calculate how big the backstep is: + static_cast(state)->index + = this->calculate_backstep(state->next.p); + if(static_cast(state)->index < 0) + { + // Oops error: + if(0 == this->m_pdata->m_status) // update the error code if not already set + this->m_pdata->m_status = boost::regex_constants::error_bad_pattern; + // + // clear the expression, we should be empty: + // + this->m_pdata->m_expression = 0; + this->m_pdata->m_expression_len = 0; + // + // and throw if required: + // + if(0 == (this->flags() & regex_constants::no_except)) + { + std::string message = "Invalid lookbehind assertion encountered in the regular expression."; + boost::regex_error e(message, boost::regex_constants::error_bad_pattern, 0); + e.raise(); + } + } + BOOST_REGEX_FALLTHROUGH; + default: + state = state->next.p; + } + } + + // now work through our list, building all the maps as we go: + while(!v.empty()) + { + // Initialize m_recursion_checks if we need it: + if(m_has_recursions) + m_recursion_checks.assign(1 + m_pdata->m_mark_count, 0u); + + const std::pair& p = v.back(); + m_icase = p.first; + state = p.second; + v.pop_back(); + + // Build maps: + m_bad_repeats = 0; + create_startmap(state->next.p, static_cast(state)->_map, &static_cast(state)->can_be_null, mask_take); + m_bad_repeats = 0; + + if(m_has_recursions) + m_recursion_checks.assign(1 + m_pdata->m_mark_count, 0u); + create_startmap(static_cast(state)->alt.p, static_cast(state)->_map, &static_cast(state)->can_be_null, mask_skip); + // adjust the type of the state to allow for faster matching: + state->type = this->get_repeat_type(state); + } + // restore case sensitivity: + m_icase = l_icase; +} + +template +int basic_regex_creator::calculate_backstep(re_syntax_base* state) +{ + typedef typename traits::char_class_type m_type; + int result = 0; + while(state) + { + switch(state->type) + { + case syntax_element_startmark: + if((static_cast(state)->index == -1) + || (static_cast(state)->index == -2)) + { + state = static_cast(state->next.p)->alt.p->next.p; + continue; + } + else if(static_cast(state)->index == -3) + { + state = state->next.p->next.p; + continue; + } + break; + case syntax_element_endmark: + if((static_cast(state)->index == -1) + || (static_cast(state)->index == -2)) + return result; + break; + case syntax_element_literal: + result += static_cast(state)->length; + break; + case syntax_element_wild: + case syntax_element_set: + result += 1; + break; + case syntax_element_dot_rep: + case syntax_element_char_rep: + case syntax_element_short_set_rep: + case syntax_element_backref: + case syntax_element_rep: + case syntax_element_combining: + case syntax_element_long_set_rep: + case syntax_element_backstep: + { + re_repeat* rep = static_cast(state); + // adjust the type of the state to allow for faster matching: + state->type = this->get_repeat_type(state); + if((state->type == syntax_element_dot_rep) + || (state->type == syntax_element_char_rep) + || (state->type == syntax_element_short_set_rep)) + { + if(rep->max != rep->min) + return -1; + if (static_cast((std::numeric_limits::max)() - result) < rep->min) + return -1; // protection against overflow, we can't calculate a backstep in this case and the expression is probably ill-formed. + result += static_cast(rep->min); + state = rep->alt.p; + continue; + } + else if(state->type == syntax_element_long_set_rep) + { + BOOST_REGEX_ASSERT(rep->next.p->type == syntax_element_long_set); + if(static_cast*>(rep->next.p)->singleton == 0) + return -1; + if(rep->max != rep->min) + return -1; + result += static_cast(rep->min); + state = rep->alt.p; + continue; + } + } + return -1; + case syntax_element_long_set: + if(static_cast*>(state)->singleton == 0) + return -1; + result += 1; + break; + case syntax_element_jump: + state = static_cast(state)->alt.p; + continue; + case syntax_element_alt: + { + int r1 = calculate_backstep(state->next.p); + int r2 = calculate_backstep(static_cast(state)->alt.p); + if((r1 < 0) || (r1 != r2)) + return -1; + return result + r1; + } + default: + break; + } + state = state->next.p; + } + return -1; +} + +struct recursion_saver +{ + std::vector saved_state; + std::vector* state; + recursion_saver(std::vector* p) : saved_state(*p), state(p) {} + ~recursion_saver() + { + state->swap(saved_state); + } +}; + +template +void basic_regex_creator::create_startmap(re_syntax_base* state, unsigned char* l_map, unsigned int* pnull, unsigned char mask) +{ + recursion_saver saved_recursions(&m_recursion_checks); + int not_last_jump = 1; + re_syntax_base* recursion_start = 0; + int recursion_sub = 0; + re_syntax_base* recursion_restart = 0; + + // track case sensitivity: + bool l_icase = m_icase; + + while(state) + { + switch(state->type) + { + case syntax_element_toggle_case: + l_icase = static_cast(state)->icase; + state = state->next.p; + break; + case syntax_element_literal: + { + // don't set anything in *pnull, set each element in l_map + // that could match the first character in the literal: + if(l_map) + { + l_map[0] |= mask_init; + charT first_char = *static_cast(static_cast(static_cast(state) + 1)); + for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i) + { + if(m_traits.translate(static_cast(i), l_icase) == first_char) + l_map[i] |= mask; + } + } + return; + } + case syntax_element_end_line: + { + // next character must be a line separator (if there is one): + if(l_map) + { + l_map[0] |= mask_init; + l_map[static_cast('\n')] |= mask; + l_map[static_cast('\r')] |= mask; + l_map[static_cast('\f')] |= mask; + l_map[0x85] |= mask; + } + // now figure out if we can match a NULL string at this point: + if(pnull) + create_startmap(state->next.p, 0, pnull, mask); + return; + } + case syntax_element_recurse: + { + BOOST_REGEX_ASSERT(static_cast(state)->alt.p->type == syntax_element_startmark); + recursion_sub = static_cast(static_cast(state)->alt.p)->index; + if(m_recursion_checks[recursion_sub] & 1u) + { + // Infinite recursion!! + if(0 == this->m_pdata->m_status) // update the error code if not already set + this->m_pdata->m_status = boost::regex_constants::error_bad_pattern; + // + // clear the expression, we should be empty: + // + this->m_pdata->m_expression = 0; + this->m_pdata->m_expression_len = 0; + // + // and throw if required: + // + if(0 == (this->flags() & regex_constants::no_except)) + { + std::string message = "Encountered an infinite recursion."; + boost::regex_error e(message, boost::regex_constants::error_bad_pattern, 0); + e.raise(); + } + } + else if(recursion_start == 0) + { + recursion_start = state; + recursion_restart = state->next.p; + state = static_cast(state)->alt.p; + m_recursion_checks[recursion_sub] |= 1u; + break; + } + m_recursion_checks[recursion_sub] |= 1u; + // can't handle nested recursion here... + BOOST_REGEX_FALLTHROUGH; + } + case syntax_element_backref: + // can be null, and any character can match: + if(pnull) + *pnull |= mask; + BOOST_REGEX_FALLTHROUGH; + case syntax_element_wild: + { + // can't be null, any character can match: + set_all_masks(l_map, mask); + return; + } + case syntax_element_accept: + case syntax_element_match: + { + // must be null, any character can match: + set_all_masks(l_map, mask); + if(pnull) + *pnull |= mask; + return; + } + case syntax_element_word_start: + { + // recurse, then AND with all the word characters: + create_startmap(state->next.p, l_map, pnull, mask); + if(l_map) + { + l_map[0] |= mask_init; + for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i) + { + if(!m_traits.isctype(static_cast(i), m_word_mask)) + l_map[i] &= static_cast(~mask); + } + } + return; + } + case syntax_element_word_end: + { + // recurse, then AND with all the word characters: + create_startmap(state->next.p, l_map, pnull, mask); + if(l_map) + { + l_map[0] |= mask_init; + for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i) + { + if(m_traits.isctype(static_cast(i), m_word_mask)) + l_map[i] &= static_cast(~mask); + } + } + return; + } + case syntax_element_buffer_end: + { + // we *must be null* : + if(pnull) + *pnull |= mask; + return; + } + case syntax_element_long_set: + if(l_map) + { + typedef typename traits::char_class_type m_type; + if(static_cast*>(state)->singleton) + { + l_map[0] |= mask_init; + for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i) + { + charT c = static_cast(i); + if(&c != re_is_set_member(&c, &c + 1, static_cast*>(state), *m_pdata, l_icase)) + l_map[i] |= mask; + } + } + else + set_all_masks(l_map, mask); + } + return; + case syntax_element_set: + if(l_map) + { + l_map[0] |= mask_init; + for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i) + { + if(static_cast(state)->_map[ + static_cast(m_traits.translate(static_cast(i), l_icase))]) + l_map[i] |= mask; + } + } + return; + case syntax_element_jump: + // take the jump: + state = static_cast(state)->alt.p; + not_last_jump = -1; + break; + case syntax_element_alt: + case syntax_element_rep: + case syntax_element_dot_rep: + case syntax_element_char_rep: + case syntax_element_short_set_rep: + case syntax_element_long_set_rep: + { + re_alt* rep = static_cast(state); + if(rep->_map[0] & mask_init) + { + if(l_map) + { + // copy previous results: + l_map[0] |= mask_init; + for(unsigned int i = 0; i <= UCHAR_MAX; ++i) + { + if(rep->_map[i] & mask_any) + l_map[i] |= mask; + } + } + if(pnull) + { + if(rep->can_be_null & mask_any) + *pnull |= mask; + } + } + else + { + // we haven't created a startmap for this alternative yet + // so take the union of the two options: + if(is_bad_repeat(state)) + { + set_all_masks(l_map, mask); + if(pnull) + *pnull |= mask; + return; + } + set_bad_repeat(state); + create_startmap(state->next.p, l_map, pnull, mask); + if((state->type == syntax_element_alt) + || (static_cast(state)->min == 0) + || (not_last_jump == 0)) + create_startmap(rep->alt.p, l_map, pnull, mask); + } + } + return; + case syntax_element_soft_buffer_end: + // match newline or null: + if(l_map) + { + l_map[0] |= mask_init; + l_map[static_cast('\n')] |= mask; + l_map[static_cast('\r')] |= mask; + } + if(pnull) + *pnull |= mask; + return; + case syntax_element_endmark: + // need to handle independent subs as a special case: + if(static_cast(state)->index < 0) + { + // can be null, any character can match: + set_all_masks(l_map, mask); + if(pnull) + *pnull |= mask; + return; + } + else if(recursion_start && (recursion_sub != 0) && (recursion_sub == static_cast(state)->index)) + { + // recursion termination: + recursion_start = 0; + state = recursion_restart; + break; + } + + // + // Normally we just go to the next state... but if this sub-expression is + // the target of a recursion, then we might be ending a recursion, in which + // case we should check whatever follows that recursion, as well as whatever + // follows this state: + // + if(m_pdata->m_has_recursions && static_cast(state)->index) + { + bool ok = false; + re_syntax_base* p = m_pdata->m_first_state; + while(p) + { + if(p->type == syntax_element_recurse) + { + re_brace* p2 = static_cast(static_cast(p)->alt.p); + if((p2->type == syntax_element_startmark) && (p2->index == static_cast(state)->index)) + { + ok = true; + break; + } + } + p = p->next.p; + } + if(ok && ((m_recursion_checks[static_cast(state)->index] & 2u) == 0)) + { + m_recursion_checks[static_cast(state)->index] |= 2u; + create_startmap(p->next.p, l_map, pnull, mask); + } + } + state = state->next.p; + break; + + case syntax_element_commit: + set_all_masks(l_map, mask); + // Continue scanning so we can figure out whether we can be null: + state = state->next.p; + break; + case syntax_element_startmark: + // need to handle independent subs as a special case: + if(static_cast(state)->index == -3) + { + state = state->next.p->next.p; + break; + } + BOOST_REGEX_FALLTHROUGH; + default: + state = state->next.p; + } + ++not_last_jump; + } +} + +template +unsigned basic_regex_creator::get_restart_type(re_syntax_base* state) +{ + // + // find out how the machine starts, so we can optimise the search: + // + while(state) + { + switch(state->type) + { + case syntax_element_startmark: + case syntax_element_endmark: + state = state->next.p; + continue; + case syntax_element_start_line: + return regbase::restart_line; + case syntax_element_word_start: + return regbase::restart_word; + case syntax_element_buffer_start: + return regbase::restart_buf; + case syntax_element_restart_continue: + return regbase::restart_continue; + default: + state = 0; + continue; + } + } + return regbase::restart_any; +} + +template +void basic_regex_creator::set_all_masks(unsigned char* bits, unsigned char mask) +{ + // + // set mask in all of bits elements, + // if bits[0] has mask_init not set then we can + // optimise this to a call to memset: + // + if(bits) + { + if(bits[0] == 0) + (std::memset)(bits, mask, 1u << CHAR_BIT); + else + { + for(unsigned i = 0; i < (1u << CHAR_BIT); ++i) + bits[i] |= mask; + } + bits[0] |= mask_init; + } +} + +template +bool basic_regex_creator::is_bad_repeat(re_syntax_base* pt) +{ + switch(pt->type) + { + case syntax_element_rep: + case syntax_element_dot_rep: + case syntax_element_char_rep: + case syntax_element_short_set_rep: + case syntax_element_long_set_rep: + { + unsigned state_id = static_cast(pt)->state_id; + if(state_id >= sizeof(m_bad_repeats) * CHAR_BIT) + return true; // run out of bits, assume we can't traverse this one. + static const std::uintmax_t one = 1uL; + return m_bad_repeats & (one << state_id); + } + default: + return false; + } +} + +template +void basic_regex_creator::set_bad_repeat(re_syntax_base* pt) +{ + switch(pt->type) + { + case syntax_element_rep: + case syntax_element_dot_rep: + case syntax_element_char_rep: + case syntax_element_short_set_rep: + case syntax_element_long_set_rep: + { + unsigned state_id = static_cast(pt)->state_id; + static const std::uintmax_t one = 1uL; + if(state_id <= sizeof(m_bad_repeats) * CHAR_BIT) + m_bad_repeats |= (one << state_id); + } + break; + default: + break; + } +} + +template +syntax_element_type basic_regex_creator::get_repeat_type(re_syntax_base* state) +{ + typedef typename traits::char_class_type m_type; + if(state->type == syntax_element_rep) + { + // check to see if we are repeating a single state: + if(state->next.p->next.p->next.p == static_cast(state)->alt.p) + { + switch(state->next.p->type) + { + case BOOST_REGEX_DETAIL_NS::syntax_element_wild: + return BOOST_REGEX_DETAIL_NS::syntax_element_dot_rep; + case BOOST_REGEX_DETAIL_NS::syntax_element_literal: + return BOOST_REGEX_DETAIL_NS::syntax_element_char_rep; + case BOOST_REGEX_DETAIL_NS::syntax_element_set: + return BOOST_REGEX_DETAIL_NS::syntax_element_short_set_rep; + case BOOST_REGEX_DETAIL_NS::syntax_element_long_set: + if(static_cast*>(state->next.p)->singleton) + return BOOST_REGEX_DETAIL_NS::syntax_element_long_set_rep; + break; + default: + break; + } + } + } + return state->type; +} + +template +void basic_regex_creator::probe_leading_repeat(re_syntax_base* state) +{ + // enumerate our states, and see if we have a leading repeat + // for which failed search restarts can be optimized; + do + { + switch(state->type) + { + case syntax_element_startmark: + if(static_cast(state)->index >= 0) + { + state = state->next.p; + continue; + } +#ifdef BOOST_REGEX_MSVC +# pragma warning(push) +#pragma warning(disable:6011) +#endif + if((static_cast(state)->index == -1) + || (static_cast(state)->index == -2)) + { + // skip past the zero width assertion: + state = static_cast(state->next.p)->alt.p->next.p; + continue; + } +#ifdef BOOST_REGEX_MSVC +# pragma warning(pop) +#endif + if(static_cast(state)->index == -3) + { + // Have to skip the leading jump state: + state = state->next.p->next.p; + continue; + } + return; + case syntax_element_endmark: + case syntax_element_start_line: + case syntax_element_end_line: + case syntax_element_word_boundary: + case syntax_element_within_word: + case syntax_element_word_start: + case syntax_element_word_end: + case syntax_element_buffer_start: + case syntax_element_buffer_end: + case syntax_element_restart_continue: + state = state->next.p; + break; + case syntax_element_dot_rep: + case syntax_element_char_rep: + case syntax_element_short_set_rep: + case syntax_element_long_set_rep: + if(this->m_has_backrefs == 0) + static_cast(state)->leading = true; + BOOST_REGEX_FALLTHROUGH; + default: + return; + } + }while(state); +} + +} // namespace BOOST_REGEX_DETAIL_NS + +} // namespace boost + +#ifdef BOOST_REGEX_MSVC +# pragma warning(pop) +#endif + +#endif diff --git a/scintilla/include/boost/regex/v5/basic_regex_parser.hpp b/scintilla/include/boost/regex/v5/basic_regex_parser.hpp new file mode 100644 index 0000000000..34d775ef84 --- /dev/null +++ b/scintilla/include/boost/regex/v5/basic_regex_parser.hpp @@ -0,0 +1,3130 @@ +/* + * + * Copyright (c) 2004 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE basic_regex_parser.cpp + * VERSION see + * DESCRIPTION: Declares template class basic_regex_parser. + */ + +#ifndef BOOST_REGEX_V5_BASIC_REGEX_PARSER_HPP +#define BOOST_REGEX_V5_BASIC_REGEX_PARSER_HPP + +namespace boost{ +namespace BOOST_REGEX_DETAIL_NS{ + +#ifdef BOOST_REGEX_MSVC +#pragma warning(push) +#pragma warning(disable:4244 4459) +#if BOOST_REGEX_MSVC < 1910 +#pragma warning(disable:4800) +#endif +#endif + +inline std::intmax_t umax(std::integral_constant const&) +{ + // Get out clause here, just in case numeric_limits is unspecialized: + return std::numeric_limits::is_specialized ? (std::numeric_limits::max)() : INT_MAX; +} +inline std::intmax_t umax(std::integral_constant const&) +{ + return (std::numeric_limits::max)(); +} + +inline std::intmax_t umax() +{ + return umax(std::integral_constant::digits >= std::numeric_limits::digits>()); +} + +template +class basic_regex_parser : public basic_regex_creator +{ +public: + basic_regex_parser(regex_data* data); + void parse(const charT* p1, const charT* p2, unsigned flags); + void fail(regex_constants::error_type error_code, std::ptrdiff_t position); + void fail(regex_constants::error_type error_code, std::ptrdiff_t position, std::string message, std::ptrdiff_t start_pos); + void fail(regex_constants::error_type error_code, std::ptrdiff_t position, const std::string& message) + { + fail(error_code, position, message, position); + } + + bool parse_all(); + bool parse_basic(); + bool parse_extended(); + bool parse_literal(); + bool parse_open_paren(); + bool parse_basic_escape(); + bool parse_extended_escape(); + bool parse_match_any(); + bool parse_repeat(std::size_t low = 0, std::size_t high = (std::numeric_limits::max)()); + bool parse_repeat_range(bool isbasic); + bool parse_alt(); + bool parse_set(); + bool parse_backref(); + void parse_set_literal(basic_char_set& char_set); + bool parse_inner_set(basic_char_set& char_set); + bool parse_QE(); + bool parse_perl_extension(); + bool parse_perl_verb(); + bool match_verb(const char*); + bool add_emacs_code(bool negate); + bool unwind_alts(std::ptrdiff_t last_paren_start); + digraph get_next_set_literal(basic_char_set& char_set); + charT unescape_character(); + regex_constants::syntax_option_type parse_options(); + +private: + typedef bool (basic_regex_parser::*parser_proc_type)(); + typedef typename traits::string_type string_type; + typedef typename traits::char_class_type char_class_type; + parser_proc_type m_parser_proc; // the main parser to use + const charT* m_base; // the start of the string being parsed + const charT* m_end; // the end of the string being parsed + const charT* m_position; // our current parser position + unsigned m_mark_count; // how many sub-expressions we have + int m_mark_reset; // used to indicate that we're inside a (?|...) block. + unsigned m_max_mark; // largest mark count seen inside a (?|...) block. + std::ptrdiff_t m_paren_start; // where the last seen ')' began (where repeats are inserted). + std::ptrdiff_t m_alt_insert_point; // where to insert the next alternative + bool m_has_case_change; // true if somewhere in the current block the case has changed + unsigned m_recursion_count; // How many times we've called parse_all. + unsigned m_max_backref; // Largest index of any backref. +#if defined(BOOST_REGEX_MSVC) && defined(_M_IX86) + // This is an ugly warning suppression workaround (for warnings *inside* std::vector + // that can not otherwise be suppressed)... + static_assert(sizeof(long) >= sizeof(void*), "Long isn't long enough!"); + std::vector m_alt_jumps; // list of alternative in the current scope. +#else + std::vector m_alt_jumps; // list of alternative in the current scope. +#endif + + basic_regex_parser& operator=(const basic_regex_parser&); + basic_regex_parser(const basic_regex_parser&); +}; + +template +basic_regex_parser::basic_regex_parser(regex_data* data) + : basic_regex_creator(data), m_parser_proc(), m_base(0), m_end(0), m_position(0), + m_mark_count(0), m_mark_reset(-1), m_max_mark(0), m_paren_start(0), m_alt_insert_point(0), m_has_case_change(false), m_recursion_count(0), m_max_backref(0) +{ +} + +template +void basic_regex_parser::parse(const charT* p1, const charT* p2, unsigned l_flags) +{ + // pass l_flags on to base class: + this->init(l_flags); + // set up pointers: + m_position = m_base = p1; + m_end = p2; + // empty strings are errors: + if((p1 == p2) && + ( + ((l_flags & regbase::main_option_type) != regbase::perl_syntax_group) + || (l_flags & regbase::no_empty_expressions) + ) + ) + { + fail(regex_constants::error_empty, 0); + return; + } + // select which parser to use: + switch(l_flags & regbase::main_option_type) + { + case regbase::perl_syntax_group: + { + m_parser_proc = &basic_regex_parser::parse_extended; + // + // Add a leading paren with index zero to give recursions a target: + // + re_brace* br = static_cast(this->append_state(syntax_element_startmark, sizeof(re_brace))); + br->index = 0; + br->icase = this->flags() & regbase::icase; + break; + } + case regbase::basic_syntax_group: + m_parser_proc = &basic_regex_parser::parse_basic; + break; + case regbase::literal: + m_parser_proc = &basic_regex_parser::parse_literal; + break; + default: + // Oops, someone has managed to set more than one of the main option flags, + // so this must be an error: + fail(regex_constants::error_unknown, 0, "An invalid combination of regular expression syntax flags was used."); + return; + } + + // parse all our characters: + bool result = parse_all(); + // + // Unwind our alternatives: + // + unwind_alts(-1); + // reset l_flags as a global scope (?imsx) may have altered them: + this->flags(l_flags); + // if we haven't gobbled up all the characters then we must + // have had an unexpected ')' : + if(!result) + { + fail(regex_constants::error_paren, std::distance(m_base, m_position), "Found a closing ) with no corresponding opening parenthesis."); + return; + } + // if an error has been set then give up now: + if(this->m_pdata->m_status) + return; + // fill in our sub-expression count: + this->m_pdata->m_mark_count = 1u + (std::size_t)m_mark_count; + // + // Check we don't have backreferences to sub-expressions which don't exist: + // + if (m_max_backref > m_mark_count) + { + fail(regex_constants::error_backref, std::distance(m_base, m_position), "Found a backreference to a non-existant sub-expression."); + } + this->finalize(p1, p2); +} + +template +void basic_regex_parser::fail(regex_constants::error_type error_code, std::ptrdiff_t position) +{ + // get the error message: + std::string message = this->m_pdata->m_ptraits->error_string(error_code); + fail(error_code, position, message); +} + +template +void basic_regex_parser::fail(regex_constants::error_type error_code, std::ptrdiff_t position, std::string message, std::ptrdiff_t start_pos) +{ + if(0 == this->m_pdata->m_status) // update the error code if not already set + this->m_pdata->m_status = error_code; + m_position = m_end; // don't bother parsing anything else + + // + // Augment error message with the regular expression text: + // + if(start_pos == position) + start_pos = (std::max)(static_cast(0), position - static_cast(10)); + std::ptrdiff_t end_pos = (std::min)(position + static_cast(10), static_cast(m_end - m_base)); + if(error_code != regex_constants::error_empty) + { + if((start_pos != 0) || (end_pos != (m_end - m_base))) + message += " The error occurred while parsing the regular expression fragment: '"; + else + message += " The error occurred while parsing the regular expression: '"; + if(start_pos != end_pos) + { + message += std::string(m_base + start_pos, m_base + position); + message += ">>>HERE>>>"; + message += std::string(m_base + position, m_base + end_pos); + } + message += "'."; + } + +#ifndef BOOST_NO_EXCEPTIONS + if(0 == (this->flags() & regex_constants::no_except)) + { + boost::regex_error e(message, error_code, position); + e.raise(); + } +#else + (void)position; // suppress warnings. +#endif +} + +template +bool basic_regex_parser::parse_all() +{ + if (++m_recursion_count > 400) + { + // exceeded internal limits + fail(boost::regex_constants::error_complexity, m_position - m_base, "Exceeded nested brace limit."); + } + bool result = true; + while(result && (m_position != m_end)) + { + result = (this->*m_parser_proc)(); + } + --m_recursion_count; + return result; +} + +#ifdef BOOST_REGEX_MSVC +#pragma warning(push) +#pragma warning(disable:4702) +#endif +template +bool basic_regex_parser::parse_basic() +{ + switch(this->m_traits.syntax_type(*m_position)) + { + case regex_constants::syntax_escape: + return parse_basic_escape(); + case regex_constants::syntax_dot: + return parse_match_any(); + case regex_constants::syntax_caret: + ++m_position; + this->append_state(syntax_element_start_line); + break; + case regex_constants::syntax_dollar: + ++m_position; + this->append_state(syntax_element_end_line); + break; + case regex_constants::syntax_star: + if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line)) + return parse_literal(); + else + { + ++m_position; + return parse_repeat(); + } + case regex_constants::syntax_plus: + if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line) || !(this->flags() & regbase::emacs_ex)) + return parse_literal(); + else + { + ++m_position; + return parse_repeat(1); + } + case regex_constants::syntax_question: + if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line) || !(this->flags() & regbase::emacs_ex)) + return parse_literal(); + else + { + ++m_position; + return parse_repeat(0, 1); + } + case regex_constants::syntax_open_set: + return parse_set(); + case regex_constants::syntax_newline: + if(this->flags() & regbase::newline_alt) + return parse_alt(); + else + return parse_literal(); + default: + return parse_literal(); + } + return true; +} + +#ifdef BOOST_REGEX_MSVC +# pragma warning(push) +#if BOOST_REGEX_MSVC >= 1800 +#pragma warning(disable:26812) +#endif +#endif +template +bool basic_regex_parser::parse_extended() +{ + bool result = true; + switch(this->m_traits.syntax_type(*m_position)) + { + case regex_constants::syntax_open_mark: + return parse_open_paren(); + case regex_constants::syntax_close_mark: + return false; + case regex_constants::syntax_escape: + return parse_extended_escape(); + case regex_constants::syntax_dot: + return parse_match_any(); + case regex_constants::syntax_caret: + ++m_position; + this->append_state( + (this->flags() & regex_constants::no_mod_m ? syntax_element_buffer_start : syntax_element_start_line)); + break; + case regex_constants::syntax_dollar: + ++m_position; + this->append_state( + (this->flags() & regex_constants::no_mod_m ? syntax_element_buffer_end : syntax_element_end_line)); + break; + case regex_constants::syntax_star: + if(m_position == this->m_base) + { + fail(regex_constants::error_badrepeat, 0, "The repeat operator \"*\" cannot start a regular expression."); + return false; + } + ++m_position; + return parse_repeat(); + case regex_constants::syntax_question: + if(m_position == this->m_base) + { + fail(regex_constants::error_badrepeat, 0, "The repeat operator \"?\" cannot start a regular expression."); + return false; + } + ++m_position; + return parse_repeat(0,1); + case regex_constants::syntax_plus: + if(m_position == this->m_base) + { + fail(regex_constants::error_badrepeat, 0, "The repeat operator \"+\" cannot start a regular expression."); + return false; + } + ++m_position; + return parse_repeat(1); + case regex_constants::syntax_open_brace: + ++m_position; + return parse_repeat_range(false); + case regex_constants::syntax_close_brace: + if((this->flags() & regbase::no_perl_ex) == regbase::no_perl_ex) + { + fail(regex_constants::error_brace, this->m_position - this->m_base, "Found a closing repetition operator } with no corresponding {."); + return false; + } + result = parse_literal(); + break; + case regex_constants::syntax_or: + return parse_alt(); + case regex_constants::syntax_open_set: + return parse_set(); + case regex_constants::syntax_newline: + if(this->flags() & regbase::newline_alt) + return parse_alt(); + else + return parse_literal(); + case regex_constants::syntax_hash: + // + // If we have a mod_x flag set, then skip until + // we get to a newline character: + // + if((this->flags() + & (regbase::no_perl_ex|regbase::mod_x)) + == regbase::mod_x) + { + while((m_position != m_end) && !is_separator(*m_position++)){} + return true; + } + BOOST_REGEX_FALLTHROUGH; + default: + result = parse_literal(); + break; + } + return result; +} +#ifdef BOOST_REGEX_MSVC +# pragma warning(pop) +#endif +#ifdef BOOST_REGEX_MSVC +#pragma warning(pop) +#endif + +template +bool basic_regex_parser::parse_literal() +{ + // append this as a literal provided it's not a space character + // or the perl option regbase::mod_x is not set: + if( + ((this->flags() + & (regbase::main_option_type|regbase::mod_x|regbase::no_perl_ex)) + != regbase::mod_x) + || !this->m_traits.isctype(*m_position, this->m_mask_space)) + this->append_literal(*m_position); + ++m_position; + return true; +} + +template +bool basic_regex_parser::parse_open_paren() +{ + // + // skip the '(' and error check: + // + if(++m_position == m_end) + { + fail(regex_constants::error_paren, m_position - m_base); + return false; + } + // + // begin by checking for a perl-style (?...) extension: + // + if( + ((this->flags() & (regbase::main_option_type | regbase::no_perl_ex)) == 0) + || ((this->flags() & (regbase::main_option_type | regbase::emacs_ex)) == (regbase::basic_syntax_group|regbase::emacs_ex)) + ) + { + if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_question) + return parse_perl_extension(); + if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_star) + return parse_perl_verb(); + } + // + // update our mark count, and append the required state: + // + unsigned markid = 0; + if(0 == (this->flags() & regbase::nosubs)) + { + markid = ++m_mark_count; + if(this->flags() & regbase::save_subexpression_location) + this->m_pdata->m_subs.push_back(std::pair(std::distance(m_base, m_position) - 1, 0)); + } + re_brace* pb = static_cast(this->append_state(syntax_element_startmark, sizeof(re_brace))); + pb->index = markid; + pb->icase = this->flags() & regbase::icase; + std::ptrdiff_t last_paren_start = this->getoffset(pb); + // back up insertion point for alternations, and set new point: + std::ptrdiff_t last_alt_point = m_alt_insert_point; + this->m_pdata->m_data.align(); + m_alt_insert_point = this->m_pdata->m_data.size(); + // + // back up the current flags in case we have a nested (?imsx) group: + // + regex_constants::syntax_option_type opts = this->flags(); + bool old_case_change = m_has_case_change; + m_has_case_change = false; // no changes to this scope as yet... + // + // Back up branch reset data in case we have a nested (?|...) + // + int mark_reset = m_mark_reset; + m_mark_reset = -1; + // + // now recursively add more states, this will terminate when we get to a + // matching ')' : + // + parse_all(); + // + // Unwind pushed alternatives: + // + if(0 == unwind_alts(last_paren_start)) + return false; + // + // restore flags: + // + if(m_has_case_change) + { + // the case has changed in one or more of the alternatives + // within the scoped (...) block: we have to add a state + // to reset the case sensitivity: + static_cast( + this->append_state(syntax_element_toggle_case, sizeof(re_case)) + )->icase = opts & regbase::icase; + } + this->flags(opts); + m_has_case_change = old_case_change; + // + // restore branch reset: + // + m_mark_reset = mark_reset; + // + // we either have a ')' or we have run out of characters prematurely: + // + if(m_position == m_end) + { + this->fail(regex_constants::error_paren, std::distance(m_base, m_end)); + return false; + } + if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark) + return false; + if(markid && (this->flags() & regbase::save_subexpression_location)) + this->m_pdata->m_subs.at(markid - 1).second = std::distance(m_base, m_position); + ++m_position; + // + // append closing parenthesis state: + // + pb = static_cast(this->append_state(syntax_element_endmark, sizeof(re_brace))); + pb->index = markid; + pb->icase = this->flags() & regbase::icase; + this->m_paren_start = last_paren_start; + // + // restore the alternate insertion point: + // + this->m_alt_insert_point = last_alt_point; + + return true; +} + +template +bool basic_regex_parser::parse_basic_escape() +{ + if(++m_position == m_end) + { + fail(regex_constants::error_paren, m_position - m_base); + return false; + } + bool result = true; + switch(this->m_traits.escape_syntax_type(*m_position)) + { + case regex_constants::syntax_open_mark: + return parse_open_paren(); + case regex_constants::syntax_close_mark: + return false; + case regex_constants::syntax_plus: + if(this->flags() & regex_constants::bk_plus_qm) + { + ++m_position; + return parse_repeat(1); + } + else + return parse_literal(); + case regex_constants::syntax_question: + if(this->flags() & regex_constants::bk_plus_qm) + { + ++m_position; + return parse_repeat(0, 1); + } + else + return parse_literal(); + case regex_constants::syntax_open_brace: + if(this->flags() & regbase::no_intervals) + return parse_literal(); + ++m_position; + return parse_repeat_range(true); + case regex_constants::syntax_close_brace: + if(this->flags() & regbase::no_intervals) + return parse_literal(); + fail(regex_constants::error_brace, this->m_position - this->m_base, "Found a closing repetition operator } with no corresponding {."); + return false; + case regex_constants::syntax_or: + if(this->flags() & regbase::bk_vbar) + return parse_alt(); + else + result = parse_literal(); + break; + case regex_constants::syntax_digit: + return parse_backref(); + case regex_constants::escape_type_start_buffer: + if(this->flags() & regbase::emacs_ex) + { + ++m_position; + this->append_state(syntax_element_buffer_start); + } + else + result = parse_literal(); + break; + case regex_constants::escape_type_end_buffer: + if(this->flags() & regbase::emacs_ex) + { + ++m_position; + this->append_state(syntax_element_buffer_end); + } + else + result = parse_literal(); + break; + case regex_constants::escape_type_word_assert: + if(this->flags() & regbase::emacs_ex) + { + ++m_position; + this->append_state(syntax_element_word_boundary); + } + else + result = parse_literal(); + break; + case regex_constants::escape_type_not_word_assert: + if(this->flags() & regbase::emacs_ex) + { + ++m_position; + this->append_state(syntax_element_within_word); + } + else + result = parse_literal(); + break; + case regex_constants::escape_type_left_word: + if(this->flags() & regbase::emacs_ex) + { + ++m_position; + this->append_state(syntax_element_word_start); + } + else + result = parse_literal(); + break; + case regex_constants::escape_type_right_word: + if(this->flags() & regbase::emacs_ex) + { + ++m_position; + this->append_state(syntax_element_word_end); + } + else + result = parse_literal(); + break; + default: + if(this->flags() & regbase::emacs_ex) + { + bool negate = true; + switch(*m_position) + { + case 'w': + negate = false; + BOOST_REGEX_FALLTHROUGH; + case 'W': + { + basic_char_set char_set; + if(negate) + char_set.negate(); + char_set.add_class(this->m_word_mask); + if(0 == this->append_set(char_set)) + { + fail(regex_constants::error_ctype, m_position - m_base); + return false; + } + ++m_position; + return true; + } + case 's': + negate = false; + BOOST_REGEX_FALLTHROUGH; + case 'S': + return add_emacs_code(negate); + case 'c': + case 'C': + // not supported yet: + fail(regex_constants::error_escape, m_position - m_base, "The \\c and \\C escape sequences are not supported by POSIX basic regular expressions: try the Perl syntax instead."); + return false; + default: + break; + } + } + result = parse_literal(); + break; + } + return result; +} + +template +bool basic_regex_parser::parse_extended_escape() +{ + ++m_position; + if(m_position == m_end) + { + fail(regex_constants::error_escape, m_position - m_base, "Incomplete escape sequence found."); + return false; + } + bool negate = false; // in case this is a character class escape: \w \d etc + switch(this->m_traits.escape_syntax_type(*m_position)) + { + case regex_constants::escape_type_not_class: + negate = true; + BOOST_REGEX_FALLTHROUGH; + case regex_constants::escape_type_class: + { +escape_type_class_jump: + typedef typename traits::char_class_type m_type; + m_type m = this->m_traits.lookup_classname(m_position, m_position+1); + if(m != 0) + { + basic_char_set char_set; + if(negate) + char_set.negate(); + char_set.add_class(m); + if(0 == this->append_set(char_set)) + { + fail(regex_constants::error_ctype, m_position - m_base); + return false; + } + ++m_position; + return true; + } + // + // not a class, just a regular unknown escape: + // + this->append_literal(unescape_character()); + break; + } + case regex_constants::syntax_digit: + return parse_backref(); + case regex_constants::escape_type_left_word: + ++m_position; + this->append_state(syntax_element_word_start); + break; + case regex_constants::escape_type_right_word: + ++m_position; + this->append_state(syntax_element_word_end); + break; + case regex_constants::escape_type_start_buffer: + ++m_position; + this->append_state(syntax_element_buffer_start); + break; + case regex_constants::escape_type_end_buffer: + ++m_position; + this->append_state(syntax_element_buffer_end); + break; + case regex_constants::escape_type_word_assert: + ++m_position; + this->append_state(syntax_element_word_boundary); + break; + case regex_constants::escape_type_not_word_assert: + ++m_position; + this->append_state(syntax_element_within_word); + break; + case regex_constants::escape_type_Z: + ++m_position; + this->append_state(syntax_element_soft_buffer_end); + break; + case regex_constants::escape_type_Q: + return parse_QE(); + case regex_constants::escape_type_C: + return parse_match_any(); + case regex_constants::escape_type_X: + ++m_position; + this->append_state(syntax_element_combining); + break; + case regex_constants::escape_type_G: + ++m_position; + this->append_state(syntax_element_restart_continue); + break; + case regex_constants::escape_type_not_property: + negate = true; + BOOST_REGEX_FALLTHROUGH; + case regex_constants::escape_type_property: + { + ++m_position; + char_class_type m; + if(m_position == m_end) + { + fail(regex_constants::error_escape, m_position - m_base, "Incomplete property escape found."); + return false; + } + // maybe have \p{ddd} + if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace) + { + const charT* base = m_position; + // skip forward until we find enclosing brace: + while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace)) + ++m_position; + if(m_position == m_end) + { + fail(regex_constants::error_escape, m_position - m_base, "Closing } missing from property escape sequence."); + return false; + } + m = this->m_traits.lookup_classname(++base, m_position++); + } + else + { + m = this->m_traits.lookup_classname(m_position, m_position+1); + ++m_position; + } + if(m != 0) + { + basic_char_set char_set; + if(negate) + char_set.negate(); + char_set.add_class(m); + if(0 == this->append_set(char_set)) + { + fail(regex_constants::error_ctype, m_position - m_base); + return false; + } + return true; + } + fail(regex_constants::error_ctype, m_position - m_base, "Escape sequence was neither a valid property nor a valid character class name."); + return false; + } + case regex_constants::escape_type_reset_start_mark: + if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex))) + { + re_brace* pb = static_cast(this->append_state(syntax_element_startmark, sizeof(re_brace))); + pb->index = -5; + pb->icase = this->flags() & regbase::icase; + this->m_pdata->m_data.align(); + ++m_position; + return true; + } + goto escape_type_class_jump; + case regex_constants::escape_type_line_ending: + if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex))) + { + const charT* e = get_escape_R_string(); + const charT* old_position = m_position; + const charT* old_end = m_end; + const charT* old_base = m_base; + m_position = e; + m_base = e; + m_end = e + traits::length(e); + bool r = parse_all(); + m_position = ++old_position; + m_end = old_end; + m_base = old_base; + return r; + } + goto escape_type_class_jump; + case regex_constants::escape_type_extended_backref: + if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex))) + { + bool have_brace = false; + bool negative = false; + static const char incomplete_message[] = "Incomplete \\g escape found."; + if(++m_position == m_end) + { + fail(regex_constants::error_escape, m_position - m_base, incomplete_message); + return false; + } + // maybe have \g{ddd} + regex_constants::syntax_type syn = this->m_traits.syntax_type(*m_position); + regex_constants::syntax_type syn_end = 0; + if((syn == regex_constants::syntax_open_brace) + || (syn == regex_constants::escape_type_left_word) + || (syn == regex_constants::escape_type_end_buffer)) + { + if(++m_position == m_end) + { + fail(regex_constants::error_escape, m_position - m_base, incomplete_message); + return false; + } + have_brace = true; + switch(syn) + { + case regex_constants::syntax_open_brace: + syn_end = regex_constants::syntax_close_brace; + break; + case regex_constants::escape_type_left_word: + syn_end = regex_constants::escape_type_right_word; + break; + default: + syn_end = regex_constants::escape_type_end_buffer; + break; + } + } + negative = (*m_position == static_cast('-')); + if((negative) && (++m_position == m_end)) + { + fail(regex_constants::error_escape, m_position - m_base, incomplete_message); + return false; + } + const charT* pc = m_position; + std::intmax_t i = this->m_traits.toi(pc, m_end, 10); + if((i < 0) && syn_end) + { + // Check for a named capture, get the leftmost one if there is more than one: + const charT* base = m_position; + while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != syn_end)) + { + ++m_position; + } + i = hash_value_from_capture_name(base, m_position); + pc = m_position; + } + if(negative) + i = 1 + (static_cast(m_mark_count) - i); + if(((i < hash_value_mask) && (i > 0)) || ((i >= hash_value_mask) && (this->m_pdata->get_id((int)i) > 0))) + { + m_position = pc; + re_brace* pb = static_cast(this->append_state(syntax_element_backref, sizeof(re_brace))); + pb->index = (int)i; + pb->icase = this->flags() & regbase::icase; + if ((i > m_max_backref) && (i < hash_value_mask)) + m_max_backref = i; + } + else + { + fail(regex_constants::error_backref, m_position - m_base); + return false; + } + m_position = pc; + if(have_brace) + { + if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != syn_end)) + { + fail(regex_constants::error_escape, m_position - m_base, incomplete_message); + return false; + } + ++m_position; + } + return true; + } + goto escape_type_class_jump; + case regex_constants::escape_type_control_v: + if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex))) + goto escape_type_class_jump; + BOOST_REGEX_FALLTHROUGH; + default: + this->append_literal(unescape_character()); + break; + } + return true; +} + +template +bool basic_regex_parser::parse_match_any() +{ + // + // we have a '.' that can match any character: + // + ++m_position; + static_cast( + this->append_state(syntax_element_wild, sizeof(re_dot)) + )->mask = static_cast(this->flags() & regbase::no_mod_s + ? BOOST_REGEX_DETAIL_NS::force_not_newline + : this->flags() & regbase::mod_s ? + BOOST_REGEX_DETAIL_NS::force_newline : BOOST_REGEX_DETAIL_NS::dont_care); + return true; +} + +template +bool basic_regex_parser::parse_repeat(std::size_t low, std::size_t high) +{ + bool greedy = true; + bool possessive = false; + std::size_t insert_point; + // + // when we get to here we may have a non-greedy ? mark still to come: + // + if((m_position != m_end) + && ( + (0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex))) + || ((regbase::basic_syntax_group|regbase::emacs_ex) == (this->flags() & (regbase::main_option_type | regbase::emacs_ex))) + ) + ) + { + // OK we have a perl or emacs regex, check for a '?': + if ((this->flags() & (regbase::main_option_type | regbase::mod_x | regbase::no_perl_ex)) == regbase::mod_x) + { + // whitespace skip: + while ((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space)) + ++m_position; + } + if((m_position != m_end) && (this->m_traits.syntax_type(*m_position) == regex_constants::syntax_question)) + { + greedy = false; + ++m_position; + } + // for perl regexes only check for possessive ++ repeats. + if((m_position != m_end) + && (0 == (this->flags() & regbase::main_option_type)) + && (this->m_traits.syntax_type(*m_position) == regex_constants::syntax_plus)) + { + possessive = true; + ++m_position; + } + } + if(0 == this->m_last_state) + { + fail(regex_constants::error_badrepeat, std::distance(m_base, m_position), "Nothing to repeat."); + return false; + } + if(this->m_last_state->type == syntax_element_endmark) + { + // insert a repeat before the '(' matching the last ')': + insert_point = this->m_paren_start; + } + else if((this->m_last_state->type == syntax_element_literal) && (static_cast(this->m_last_state)->length > 1)) + { + // the last state was a literal with more than one character, split it in two: + re_literal* lit = static_cast(this->m_last_state); + charT c = (static_cast(static_cast(lit+1)))[lit->length - 1]; + lit->length -= 1; + // now append new state: + lit = static_cast(this->append_state(syntax_element_literal, sizeof(re_literal) + sizeof(charT))); + lit->length = 1; + (static_cast(static_cast(lit+1)))[0] = c; + insert_point = this->getoffset(this->m_last_state); + } + else + { + // repeat the last state whatever it was, need to add some error checking here: + switch(this->m_last_state->type) + { + case syntax_element_start_line: + case syntax_element_end_line: + case syntax_element_word_boundary: + case syntax_element_within_word: + case syntax_element_word_start: + case syntax_element_word_end: + case syntax_element_buffer_start: + case syntax_element_buffer_end: + case syntax_element_alt: + case syntax_element_soft_buffer_end: + case syntax_element_restart_continue: + case syntax_element_jump: + case syntax_element_startmark: + case syntax_element_backstep: + case syntax_element_toggle_case: + // can't legally repeat any of the above: + fail(regex_constants::error_badrepeat, m_position - m_base); + return false; + default: + // do nothing... + break; + } + insert_point = this->getoffset(this->m_last_state); + } + // + // OK we now know what to repeat, so insert the repeat around it: + // + re_repeat* rep = static_cast(this->insert_state(insert_point, syntax_element_rep, re_repeater_size)); + rep->min = low; + rep->max = high; + rep->greedy = greedy; + rep->leading = false; + // store our repeater position for later: + std::ptrdiff_t rep_off = this->getoffset(rep); + // and append a back jump to the repeat: + re_jump* jmp = static_cast(this->append_state(syntax_element_jump, sizeof(re_jump))); + jmp->alt.i = rep_off - this->getoffset(jmp); + this->m_pdata->m_data.align(); + // now fill in the alt jump for the repeat: + rep = static_cast(this->getaddress(rep_off)); + rep->alt.i = this->m_pdata->m_data.size() - rep_off; + // + // If the repeat is possessive then bracket the repeat with a (?>...) + // independent sub-expression construct: + // + if(possessive) + { + if(m_position != m_end) + { + // + // Check for illegal following quantifier, we have to do this here, because + // the extra states we insert below circumvents our usual error checking :-( + // + bool contin = false; + do + { + if ((this->flags() & (regbase::main_option_type | regbase::mod_x | regbase::no_perl_ex)) == regbase::mod_x) + { + // whitespace skip: + while ((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space)) + ++m_position; + } + if (m_position != m_end) + { + switch (this->m_traits.syntax_type(*m_position)) + { + case regex_constants::syntax_star: + case regex_constants::syntax_plus: + case regex_constants::syntax_question: + case regex_constants::syntax_open_brace: + fail(regex_constants::error_badrepeat, m_position - m_base); + return false; + case regex_constants::syntax_open_mark: + // Do we have a comment? If so we need to skip it here... + if ((m_position + 2 < m_end) && this->m_traits.syntax_type(*(m_position + 1)) == regex_constants::syntax_question + && this->m_traits.syntax_type(*(m_position + 2)) == regex_constants::syntax_hash) + { + while ((m_position != m_end) + && (this->m_traits.syntax_type(*m_position++) != regex_constants::syntax_close_mark)) { + } + contin = true; + } + else + contin = false; + break; + default: + contin = false; + } + } + else + contin = false; + } while (contin); + } + re_brace* pb = static_cast(this->insert_state(insert_point, syntax_element_startmark, sizeof(re_brace))); + pb->index = -3; + pb->icase = this->flags() & regbase::icase; + jmp = static_cast(this->insert_state(insert_point + sizeof(re_brace), syntax_element_jump, sizeof(re_jump))); + this->m_pdata->m_data.align(); + jmp->alt.i = this->m_pdata->m_data.size() - this->getoffset(jmp); + pb = static_cast(this->append_state(syntax_element_endmark, sizeof(re_brace))); + pb->index = -3; + pb->icase = this->flags() & regbase::icase; + } + return true; +} + +template +bool basic_regex_parser::parse_repeat_range(bool isbasic) +{ + static const char incomplete_message[] = "Missing } in quantified repetition."; + // + // parse a repeat-range: + // + std::size_t min, max; + std::intmax_t v; + // skip whitespace: + while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space)) + ++m_position; + if(this->m_position == this->m_end) + { + if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex)) + { + fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message); + return false; + } + // Treat the opening '{' as a literal character, rewind to start of error: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position; + return parse_literal(); + } + // get min: + v = this->m_traits.toi(m_position, m_end, 10); + // skip whitespace: + if((v < 0) || (v > umax())) + { + if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex)) + { + fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message); + return false; + } + // Treat the opening '{' as a literal character, rewind to start of error: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position; + return parse_literal(); + } + while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space)) + ++m_position; + if(this->m_position == this->m_end) + { + if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex)) + { + fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message); + return false; + } + // Treat the opening '{' as a literal character, rewind to start of error: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position; + return parse_literal(); + } + min = static_cast(v); + // see if we have a comma: + if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_comma) + { + // move on and error check: + ++m_position; + // skip whitespace: + while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space)) + ++m_position; + if(this->m_position == this->m_end) + { + if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex)) + { + fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message); + return false; + } + // Treat the opening '{' as a literal character, rewind to start of error: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position; + return parse_literal(); + } + // get the value if any: + v = this->m_traits.toi(m_position, m_end, 10); + max = ((v >= 0) && (v < umax())) ? (std::size_t)v : (std::numeric_limits::max)(); + } + else + { + // no comma, max = min: + max = min; + } + // skip whitespace: + while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space)) + ++m_position; + // OK now check trailing }: + if(this->m_position == this->m_end) + { + if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex)) + { + fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message); + return false; + } + // Treat the opening '{' as a literal character, rewind to start of error: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position; + return parse_literal(); + } + if(isbasic) + { + if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_escape) + { + ++m_position; + if(this->m_position == this->m_end) + { + fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message); + return false; + } + } + else + { + fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message); + return false; + } + } + if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_brace) + ++m_position; + else + { + // Treat the opening '{' as a literal character, rewind to start of error: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position; + return parse_literal(); + } + // + // finally go and add the repeat, unless error: + // + if(min > max) + { + // Backtrack to error location: + m_position -= 2; + while(this->m_traits.isctype(*m_position, this->m_word_mask)) --m_position; + ++m_position; + fail(regex_constants::error_badbrace, m_position - m_base); + return false; + } + return parse_repeat(min, max); +} + +template +bool basic_regex_parser::parse_alt() +{ + // + // error check: if there have been no previous states, + // or if the last state was a '(' then error: + // + if( + ((this->m_last_state == 0) || (this->m_last_state->type == syntax_element_startmark)) + && + !( + ((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group) + && + ((this->flags() & regbase::no_empty_expressions) == 0) + ) + ) + { + fail(regex_constants::error_empty, this->m_position - this->m_base, "A regular expression cannot start with the alternation operator |."); + return false; + } + // + // Reset mark count if required: + // + if(m_max_mark < m_mark_count) + m_max_mark = m_mark_count; + if(m_mark_reset >= 0) + m_mark_count = m_mark_reset; + + ++m_position; + // + // we need to append a trailing jump: + // + re_syntax_base* pj = this->append_state(BOOST_REGEX_DETAIL_NS::syntax_element_jump, sizeof(re_jump)); + std::ptrdiff_t jump_offset = this->getoffset(pj); + // + // now insert the alternative: + // + re_alt* palt = static_cast(this->insert_state(this->m_alt_insert_point, syntax_element_alt, re_alt_size)); + jump_offset += re_alt_size; + this->m_pdata->m_data.align(); + palt->alt.i = this->m_pdata->m_data.size() - this->getoffset(palt); + // + // update m_alt_insert_point so that the next alternate gets + // inserted at the start of the second of the two we've just created: + // + this->m_alt_insert_point = this->m_pdata->m_data.size(); + // + // the start of this alternative must have a case changes state + // if the current block has messed around with case changes: + // + if(m_has_case_change) + { + static_cast( + this->append_state(syntax_element_toggle_case, sizeof(re_case)) + )->icase = this->m_icase; + } + // + // push the alternative onto our stack, a recursive + // implementation here is easier to understand (and faster + // as it happens), but causes all kinds of stack overflow problems + // on programs with small stacks (COM+). + // + m_alt_jumps.push_back(jump_offset); + return true; +} + +template +bool basic_regex_parser::parse_set() +{ + static const char incomplete_message[] = "Character set declaration starting with [ terminated prematurely - either no ] was found or the set had no content."; + ++m_position; + if(m_position == m_end) + { + fail(regex_constants::error_brack, m_position - m_base, incomplete_message); + return false; + } + basic_char_set char_set; + + const charT* base = m_position; // where the '[' was + const charT* item_base = m_position; // where the '[' or '^' was + + while(m_position != m_end) + { + switch(this->m_traits.syntax_type(*m_position)) + { + case regex_constants::syntax_caret: + if(m_position == base) + { + char_set.negate(); + ++m_position; + item_base = m_position; + } + else + parse_set_literal(char_set); + break; + case regex_constants::syntax_close_set: + if(m_position == item_base) + { + parse_set_literal(char_set); + break; + } + else + { + ++m_position; + if(0 == this->append_set(char_set)) + { + fail(regex_constants::error_ctype, m_position - m_base); + return false; + } + } + return true; + case regex_constants::syntax_open_set: + if(parse_inner_set(char_set)) + break; + return true; + case regex_constants::syntax_escape: + { + // + // look ahead and see if this is a character class shortcut + // \d \w \s etc... + // + ++m_position; + if(this->m_traits.escape_syntax_type(*m_position) + == regex_constants::escape_type_class) + { + char_class_type m = this->m_traits.lookup_classname(m_position, m_position+1); + if(m != 0) + { + char_set.add_class(m); + ++m_position; + break; + } + } + else if(this->m_traits.escape_syntax_type(*m_position) + == regex_constants::escape_type_not_class) + { + // negated character class: + char_class_type m = this->m_traits.lookup_classname(m_position, m_position+1); + if(m != 0) + { + char_set.add_negated_class(m); + ++m_position; + break; + } + } + // not a character class, just a regular escape: + --m_position; + parse_set_literal(char_set); + break; + } + default: + parse_set_literal(char_set); + break; + } + } + return m_position != m_end; +} + +template +bool basic_regex_parser::parse_inner_set(basic_char_set& char_set) +{ + static const char incomplete_message[] = "Character class declaration starting with [ terminated prematurely - either no ] was found or the set had no content."; + // + // we have either a character class [:name:] + // a collating element [.name.] + // or an equivalence class [=name=] + // + if(m_end == ++m_position) + { + fail(regex_constants::error_brack, m_position - m_base, incomplete_message); + return false; + } + switch(this->m_traits.syntax_type(*m_position)) + { + case regex_constants::syntax_dot: + // + // a collating element is treated as a literal: + // + --m_position; + parse_set_literal(char_set); + return true; + case regex_constants::syntax_colon: + { + // check that character classes are actually enabled: + if((this->flags() & (regbase::main_option_type | regbase::no_char_classes)) + == (regbase::basic_syntax_group | regbase::no_char_classes)) + { + --m_position; + parse_set_literal(char_set); + return true; + } + // skip the ':' + if(m_end == ++m_position) + { + fail(regex_constants::error_brack, m_position - m_base, incomplete_message); + return false; + } + const charT* name_first = m_position; + // skip at least one character, then find the matching ':]' + if(m_end == ++m_position) + { + fail(regex_constants::error_brack, m_position - m_base, incomplete_message); + return false; + } + while((m_position != m_end) + && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_colon)) + ++m_position; + const charT* name_last = m_position; + if(m_end == m_position) + { + fail(regex_constants::error_brack, m_position - m_base, incomplete_message); + return false; + } + if((m_end == ++m_position) + || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set)) + { + fail(regex_constants::error_brack, m_position - m_base, incomplete_message); + return false; + } + // + // check for negated class: + // + bool negated = false; + if(this->m_traits.syntax_type(*name_first) == regex_constants::syntax_caret) + { + ++name_first; + negated = true; + } + typedef typename traits::char_class_type m_type; + m_type m = this->m_traits.lookup_classname(name_first, name_last); + if(m == 0) + { + if(char_set.empty() && (name_last - name_first == 1)) + { + // maybe a special case: + ++m_position; + if( (m_position != m_end) + && (this->m_traits.syntax_type(*m_position) + == regex_constants::syntax_close_set)) + { + if(this->m_traits.escape_syntax_type(*name_first) + == regex_constants::escape_type_left_word) + { + ++m_position; + this->append_state(syntax_element_word_start); + return false; + } + if(this->m_traits.escape_syntax_type(*name_first) + == regex_constants::escape_type_right_word) + { + ++m_position; + this->append_state(syntax_element_word_end); + return false; + } + } + } + fail(regex_constants::error_ctype, name_first - m_base); + return false; + } + if(!negated) + char_set.add_class(m); + else + char_set.add_negated_class(m); + ++m_position; + break; + } + case regex_constants::syntax_equal: + { + // skip the '=' + if(m_end == ++m_position) + { + fail(regex_constants::error_brack, m_position - m_base, incomplete_message); + return false; + } + const charT* name_first = m_position; + // skip at least one character, then find the matching '=]' + if(m_end == ++m_position) + { + fail(regex_constants::error_brack, m_position - m_base, incomplete_message); + return false; + } + while((m_position != m_end) + && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal)) + ++m_position; + const charT* name_last = m_position; + if(m_end == m_position) + { + fail(regex_constants::error_brack, m_position - m_base, incomplete_message); + return false; + } + if((m_end == ++m_position) + || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set)) + { + fail(regex_constants::error_brack, m_position - m_base, incomplete_message); + return false; + } + string_type m = this->m_traits.lookup_collatename(name_first, name_last); + if(m.empty() || (m.size() > 2)) + { + fail(regex_constants::error_collate, name_first - m_base); + return false; + } + digraph d; + d.first = m[0]; + if(m.size() > 1) + d.second = m[1]; + else + d.second = 0; + char_set.add_equivalent(d); + ++m_position; + break; + } + default: + --m_position; + parse_set_literal(char_set); + break; + } + return true; +} + +template +void basic_regex_parser::parse_set_literal(basic_char_set& char_set) +{ + digraph start_range(get_next_set_literal(char_set)); + if(m_end == m_position) + { + fail(regex_constants::error_brack, m_position - m_base); + return; + } + if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_dash) + { + // we have a range: + if(m_end == ++m_position) + { + fail(regex_constants::error_brack, m_position - m_base); + return; + } + if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set) + { + digraph end_range = get_next_set_literal(char_set); + char_set.add_range(start_range, end_range); + if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_dash) + { + if(m_end == ++m_position) + { + fail(regex_constants::error_brack, m_position - m_base); + return; + } + if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_set) + { + // trailing - : + --m_position; + return; + } + fail(regex_constants::error_range, m_position - m_base); + return; + } + return; + } + --m_position; + } + char_set.add_single(start_range); +} + +template +digraph basic_regex_parser::get_next_set_literal(basic_char_set& char_set) +{ + digraph result; + switch(this->m_traits.syntax_type(*m_position)) + { + case regex_constants::syntax_dash: + if(!char_set.empty()) + { + // see if we are at the end of the set: + if((++m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set)) + { + fail(regex_constants::error_range, m_position - m_base); + return result; + } + --m_position; + } + result.first = *m_position++; + return result; + case regex_constants::syntax_escape: + // check to see if escapes are supported first: + if(this->flags() & regex_constants::no_escape_in_lists) + { + result = *m_position++; + break; + } + ++m_position; + result = unescape_character(); + break; + case regex_constants::syntax_open_set: + { + if(m_end == ++m_position) + { + fail(regex_constants::error_collate, m_position - m_base); + return result; + } + if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_dot) + { + --m_position; + result.first = *m_position; + ++m_position; + return result; + } + if(m_end == ++m_position) + { + fail(regex_constants::error_collate, m_position - m_base); + return result; + } + const charT* name_first = m_position; + // skip at least one character, then find the matching ':]' + if(m_end == ++m_position) + { + fail(regex_constants::error_collate, name_first - m_base); + return result; + } + while((m_position != m_end) + && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_dot)) + ++m_position; + const charT* name_last = m_position; + if(m_end == m_position) + { + fail(regex_constants::error_collate, name_first - m_base); + return result; + } + if((m_end == ++m_position) + || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set)) + { + fail(regex_constants::error_collate, name_first - m_base); + return result; + } + ++m_position; + string_type s = this->m_traits.lookup_collatename(name_first, name_last); + if(s.empty() || (s.size() > 2)) + { + fail(regex_constants::error_collate, name_first - m_base); + return result; + } + result.first = s[0]; + if(s.size() > 1) + result.second = s[1]; + else + result.second = 0; + return result; + } + default: + result = *m_position++; + } + return result; +} + +// +// does a value fit in the specified charT type? +// +template +bool valid_value(charT, std::intmax_t v, const std::integral_constant&) +{ + return (v >> (sizeof(charT) * CHAR_BIT)) == 0; +} +template +bool valid_value(charT, std::intmax_t, const std::integral_constant&) +{ + return true; // v will alsways fit in a charT +} +template +bool valid_value(charT c, std::intmax_t v) +{ + return valid_value(c, v, std::integral_constant()); +} + +template +charT basic_regex_parser::unescape_character() +{ +#ifdef BOOST_REGEX_MSVC +#pragma warning(push) +#pragma warning(disable:4127) +#endif + charT result(0); + if(m_position == m_end) + { + fail(regex_constants::error_escape, m_position - m_base, "Escape sequence terminated prematurely."); + return false; + } + switch(this->m_traits.escape_syntax_type(*m_position)) + { + case regex_constants::escape_type_control_a: + result = charT('\a'); + break; + case regex_constants::escape_type_e: + result = charT(27); + break; + case regex_constants::escape_type_control_f: + result = charT('\f'); + break; + case regex_constants::escape_type_control_n: + result = charT('\n'); + break; + case regex_constants::escape_type_control_r: + result = charT('\r'); + break; + case regex_constants::escape_type_control_t: + result = charT('\t'); + break; + case regex_constants::escape_type_control_v: + result = charT('\v'); + break; + case regex_constants::escape_type_word_assert: + result = charT('\b'); + break; + case regex_constants::escape_type_ascii_control: + ++m_position; + if(m_position == m_end) + { + // Rewind to start of escape: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position; + fail(regex_constants::error_escape, m_position - m_base, "ASCII escape sequence terminated prematurely."); + return result; + } + result = static_cast(*m_position % 32); + break; + case regex_constants::escape_type_hex: + ++m_position; + if(m_position == m_end) + { + // Rewind to start of escape: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position; + fail(regex_constants::error_escape, m_position - m_base, "Hexadecimal escape sequence terminated prematurely."); + return result; + } + // maybe have \x{ddd} + if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace) + { + ++m_position; + if(m_position == m_end) + { + // Rewind to start of escape: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position; + fail(regex_constants::error_escape, m_position - m_base, "Missing } in hexadecimal escape sequence."); + return result; + } + std::intmax_t i = this->m_traits.toi(m_position, m_end, 16); + if((m_position == m_end) + || (i < 0) + || ((std::numeric_limits::is_specialized) && (i > (std::intmax_t)(std::numeric_limits::max)())) + || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace)) + { + // Rewind to start of escape: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position; + fail(regex_constants::error_badbrace, m_position - m_base, "Hexadecimal escape sequence was invalid."); + return result; + } + ++m_position; + result = charT(i); + } + else + { + std::ptrdiff_t len = (std::min)(static_cast(2), static_cast(m_end - m_position)); + std::intmax_t i = this->m_traits.toi(m_position, m_position + len, 16); + if((i < 0) + || !valid_value(charT(0), i)) + { + // Rewind to start of escape: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position; + fail(regex_constants::error_escape, m_position - m_base, "Escape sequence did not encode a valid character."); + return result; + } + result = charT(i); + } + return result; + case regex_constants::syntax_digit: + { + // an octal escape sequence, the first character must be a zero + // followed by up to 3 octal digits: + std::ptrdiff_t len = (std::min)(std::distance(m_position, m_end), static_cast(4)); + const charT* bp = m_position; + std::intmax_t val = this->m_traits.toi(bp, bp + 1, 8); + if(val != 0) + { + // Rewind to start of escape: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position; + // Oops not an octal escape after all: + fail(regex_constants::error_escape, m_position - m_base, "Invalid octal escape sequence."); + return result; + } + val = this->m_traits.toi(m_position, m_position + len, 8); + if((val < 0) || (val > (std::intmax_t)(std::numeric_limits::max)())) + { + // Rewind to start of escape: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position; + fail(regex_constants::error_escape, m_position - m_base, "Octal escape sequence is invalid."); + return result; + } + return static_cast(val); + } + case regex_constants::escape_type_named_char: + { + ++m_position; + if(m_position == m_end) + { + // Rewind to start of escape: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position; + fail(regex_constants::error_escape, m_position - m_base); + return false; + } + // maybe have \N{name} + if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace) + { + const charT* base = m_position; + // skip forward until we find enclosing brace: + while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace)) + ++m_position; + if(m_position == m_end) + { + // Rewind to start of escape: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position; + fail(regex_constants::error_escape, m_position - m_base); + return false; + } + string_type s = this->m_traits.lookup_collatename(++base, m_position++); + if(s.empty()) + { + // Rewind to start of escape: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position; + fail(regex_constants::error_collate, m_position - m_base); + return false; + } + if(s.size() == 1) + { + return s[0]; + } + } + // fall through is a failure: + // Rewind to start of escape: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position; + fail(regex_constants::error_escape, m_position - m_base); + return false; + } + default: + result = *m_position; + break; + } + ++m_position; + return result; +#ifdef BOOST_REGEX_MSVC +#pragma warning(pop) +#endif +} + +template +bool basic_regex_parser::parse_backref() +{ + BOOST_REGEX_ASSERT(m_position != m_end); + const charT* pc = m_position; + std::intmax_t i = this->m_traits.toi(pc, pc + 1, 10); + if((i == 0) || (((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group) && (this->flags() & regbase::no_bk_refs))) + { + // not a backref at all but an octal escape sequence: + charT c = unescape_character(); + this->append_literal(c); + } + else if((i > 0)) + { + m_position = pc; + re_brace* pb = static_cast(this->append_state(syntax_element_backref, sizeof(re_brace))); + pb->index = (int)i; + pb->icase = this->flags() & regbase::icase; + if(i > m_max_backref) + m_max_backref = i; + } + else + { + // Rewind to start of escape: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position; + fail(regex_constants::error_backref, m_position - m_base); + return false; + } + return true; +} + +template +bool basic_regex_parser::parse_QE() +{ +#ifdef BOOST_REGEX_MSVC +#pragma warning(push) +#pragma warning(disable:4127) +#endif + // + // parse a \Q...\E sequence: + // + ++m_position; // skip the Q + const charT* start = m_position; + const charT* end; + do + { + while((m_position != m_end) + && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape)) + ++m_position; + if(m_position == m_end) + { + // a \Q...\E sequence may terminate with the end of the expression: + end = m_position; + break; + } + if(++m_position == m_end) // skip the escape + { + fail(regex_constants::error_escape, m_position - m_base, "Unterminated \\Q...\\E sequence."); + return false; + } + // check to see if it's a \E: + if(this->m_traits.escape_syntax_type(*m_position) == regex_constants::escape_type_E) + { + ++m_position; + end = m_position - 2; + break; + } + // otherwise go round again: + }while(true); + // + // now add all the character between the two escapes as literals: + // + while(start != end) + { + this->append_literal(*start); + ++start; + } + return true; +#ifdef BOOST_REGEX_MSVC +#pragma warning(pop) +#endif +} + +template +bool basic_regex_parser::parse_perl_extension() +{ + if(++m_position == m_end) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + // + // treat comments as a special case, as these + // are the only ones that don't start with a leading + // startmark state: + // + if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_hash) + { + while((m_position != m_end) + && (this->m_traits.syntax_type(*m_position++) != regex_constants::syntax_close_mark)) + {} + return true; + } + // + // backup some state, and prepare the way: + // + int markid = 0; + std::ptrdiff_t jump_offset = 0; + re_brace* pb = static_cast(this->append_state(syntax_element_startmark, sizeof(re_brace))); + pb->icase = this->flags() & regbase::icase; + std::ptrdiff_t last_paren_start = this->getoffset(pb); + // back up insertion point for alternations, and set new point: + std::ptrdiff_t last_alt_point = m_alt_insert_point; + this->m_pdata->m_data.align(); + m_alt_insert_point = this->m_pdata->m_data.size(); + std::ptrdiff_t expected_alt_point = m_alt_insert_point; + bool restore_flags = true; + regex_constants::syntax_option_type old_flags = this->flags(); + bool old_case_change = m_has_case_change; + m_has_case_change = false; + charT name_delim; + int mark_reset = m_mark_reset; + int max_mark = m_max_mark; + m_mark_reset = -1; + m_max_mark = m_mark_count; + std::intmax_t v; + // + // select the actual extension used: + // + switch(this->m_traits.syntax_type(*m_position)) + { + case regex_constants::syntax_or: + m_mark_reset = m_mark_count; + BOOST_REGEX_FALLTHROUGH; + case regex_constants::syntax_colon: + // + // a non-capturing mark: + // + pb->index = markid = 0; + ++m_position; + break; + case regex_constants::syntax_digit: + { + // + // a recursive subexpression: + // + v = this->m_traits.toi(m_position, m_end, 10); + if((v < 0) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base, "The recursive sub-expression refers to an invalid marking group, or is unterminated."); + return false; + } +insert_recursion: + pb->index = markid = 0; + re_recurse* pr = static_cast(this->append_state(syntax_element_recurse, sizeof(re_recurse))); + pr->alt.i = (std::ptrdiff_t)v; + pr->state_id = 0; + static_cast( + this->append_state(syntax_element_toggle_case, sizeof(re_case)) + )->icase = this->flags() & regbase::icase; + break; + } + case regex_constants::syntax_plus: + // + // A forward-relative recursive subexpression: + // + ++m_position; + v = this->m_traits.toi(m_position, m_end, 10); + if((v <= 0) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base, "An invalid or unterminated recursive sub-expression."); + return false; + } + if ((std::numeric_limits::max)() - m_mark_count < v) + { + fail(regex_constants::error_perl_extension, m_position - m_base, "An invalid or unterminated recursive sub-expression."); + return false; + } + v += m_mark_count; + goto insert_recursion; + case regex_constants::syntax_dash: + // + // Possibly a backward-relative recursive subexpression: + // + ++m_position; + v = this->m_traits.toi(m_position, m_end, 10); + if(v <= 0) + { + --m_position; + // Oops not a relative recursion at all, but a (?-imsx) group: + goto option_group_jump; + } + v = static_cast(m_mark_count) + 1 - v; + if(v <= 0) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base, "An invalid or unterminated recursive sub-expression."); + return false; + } + goto insert_recursion; + case regex_constants::syntax_equal: + pb->index = markid = -1; + ++m_position; + jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump))); + this->m_pdata->m_data.align(); + m_alt_insert_point = this->m_pdata->m_data.size(); + break; + case regex_constants::syntax_not: + pb->index = markid = -2; + ++m_position; + jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump))); + this->m_pdata->m_data.align(); + m_alt_insert_point = this->m_pdata->m_data.size(); + break; + case regex_constants::escape_type_left_word: + { + // a lookbehind assertion: + if(++m_position == m_end) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + regex_constants::syntax_type t = this->m_traits.syntax_type(*m_position); + if(t == regex_constants::syntax_not) + pb->index = markid = -2; + else if(t == regex_constants::syntax_equal) + pb->index = markid = -1; + else + { + // Probably a named capture which also starts (?< : + name_delim = '>'; + --m_position; + goto named_capture_jump; + } + ++m_position; + jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump))); + this->append_state(syntax_element_backstep, sizeof(re_brace)); + this->m_pdata->m_data.align(); + m_alt_insert_point = this->m_pdata->m_data.size(); + break; + } + case regex_constants::escape_type_right_word: + // + // an independent sub-expression: + // + pb->index = markid = -3; + ++m_position; + jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump))); + this->m_pdata->m_data.align(); + m_alt_insert_point = this->m_pdata->m_data.size(); + break; + case regex_constants::syntax_open_mark: + { + // a conditional expression: + pb->index = markid = -4; + if(++m_position == m_end) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + v = this->m_traits.toi(m_position, m_end, 10); + if(m_position == m_end) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + if(*m_position == charT('R')) + { + if(++m_position == m_end) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + if(*m_position == charT('&')) + { + const charT* base = ++m_position; + while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)) + ++m_position; + if(m_position == m_end) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + v = -static_cast(hash_value_from_capture_name(base, m_position)); + } + else + { + v = -this->m_traits.toi(m_position, m_end, 10); + } + re_brace* br = static_cast(this->append_state(syntax_element_assert_backref, sizeof(re_brace))); + br->index = v < 0 ? (int)(v - 1) : 0; + if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + if(++m_position == m_end) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + } + else if((*m_position == charT('\'')) || (*m_position == charT('<'))) + { + const charT* base = ++m_position; + while((m_position != m_end) && (*m_position != charT('>')) && (*m_position != charT('\''))) + ++m_position; + if(m_position == m_end) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + v = static_cast(hash_value_from_capture_name(base, m_position)); + re_brace* br = static_cast(this->append_state(syntax_element_assert_backref, sizeof(re_brace))); + br->index = (int)v; + if(((*m_position != charT('>')) && (*m_position != charT('\''))) || (++m_position == m_end)) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base, "Unterminated named capture."); + return false; + } + if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + if(++m_position == m_end) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + } + else if(*m_position == charT('D')) + { + const char* def = "DEFINE"; + while(*def && (m_position != m_end) && (*m_position == charT(*def))) + ++m_position, ++def; + if((m_position == m_end) || *def) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + re_brace* br = static_cast(this->append_state(syntax_element_assert_backref, sizeof(re_brace))); + br->index = 9999; // special magic value! + if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + if(++m_position == m_end) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + } + else if(v > 0) + { + re_brace* br = static_cast(this->append_state(syntax_element_assert_backref, sizeof(re_brace))); + br->index = (int)v; + if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + if(++m_position == m_end) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + } + else + { + // verify that we have a lookahead or lookbehind assert: + if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_question) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + if(++m_position == m_end) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + if(this->m_traits.syntax_type(*m_position) == regex_constants::escape_type_left_word) + { + if(++m_position == m_end) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + if((this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal) + && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_not)) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + m_position -= 3; + } + else + { + if((this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal) + && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_not)) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + m_position -= 2; + } + } + break; + } + case regex_constants::syntax_close_mark: + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + case regex_constants::escape_type_end_buffer: + { + name_delim = *m_position; +named_capture_jump: + markid = 0; + if(0 == (this->flags() & regbase::nosubs)) + { + markid = ++m_mark_count; + if(this->flags() & regbase::save_subexpression_location) + this->m_pdata->m_subs.push_back(std::pair(std::distance(m_base, m_position) - 2, 0)); + } + pb->index = markid; + const charT* base = ++m_position; + if(m_position == m_end) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + while((m_position != m_end) && (*m_position != name_delim)) + ++m_position; + if(m_position == m_end) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + this->m_pdata->set_name(base, m_position, markid); + ++m_position; + break; + } + default: + if(*m_position == charT('R')) + { + ++m_position; + v = 0; + if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + goto insert_recursion; + } + if(*m_position == charT('&')) + { + ++m_position; + const charT* base = m_position; + while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)) + ++m_position; + if(m_position == m_end) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + v = static_cast(hash_value_from_capture_name(base, m_position)); + goto insert_recursion; + } + if(*m_position == charT('P')) + { + ++m_position; + if(m_position == m_end) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + if(*m_position == charT('>')) + { + ++m_position; + const charT* base = m_position; + while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)) + ++m_position; + if(m_position == m_end) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + v = static_cast(hash_value_from_capture_name(base, m_position)); + goto insert_recursion; + } + } + // + // lets assume that we have a (?imsx) group and try and parse it: + // +option_group_jump: + regex_constants::syntax_option_type opts = parse_options(); + if(m_position == m_end) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + // make a note of whether we have a case change: + m_has_case_change = ((opts & regbase::icase) != (this->flags() & regbase::icase)); + pb->index = markid = 0; + if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark) + { + // update flags and carry on as normal: + this->flags(opts); + restore_flags = false; + old_case_change |= m_has_case_change; // defer end of scope by one ')' + } + else if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_colon) + { + // update flags and carry on until the matching ')' is found: + this->flags(opts); + ++m_position; + } + else + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + + // finally append a case change state if we need it: + if(m_has_case_change) + { + static_cast( + this->append_state(syntax_element_toggle_case, sizeof(re_case)) + )->icase = opts & regbase::icase; + } + + } + // + // now recursively add more states, this will terminate when we get to a + // matching ')' : + // + parse_all(); + // + // Unwind alternatives: + // + if(0 == unwind_alts(last_paren_start)) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base, "Invalid alternation operators within (?...) block."); + return false; + } + // + // we either have a ')' or we have run out of characters prematurely: + // + if(m_position == m_end) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + this->fail(regex_constants::error_paren, std::distance(m_base, m_end)); + return false; + } + BOOST_REGEX_ASSERT(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark); + ++m_position; + // + // restore the flags: + // + if(restore_flags) + { + // append a case change state if we need it: + if(m_has_case_change) + { + static_cast( + this->append_state(syntax_element_toggle_case, sizeof(re_case)) + )->icase = old_flags & regbase::icase; + } + this->flags(old_flags); + } + // + // set up the jump pointer if we have one: + // + if(jump_offset) + { + this->m_pdata->m_data.align(); + re_jump* jmp = static_cast(this->getaddress(jump_offset)); + jmp->alt.i = this->m_pdata->m_data.size() - this->getoffset(jmp); + if((this->m_last_state == jmp) && (markid != -2)) + { + // Oops... we didn't have anything inside the assertion. + // Note we don't get here for negated forward lookahead as (?!) + // does have some uses. + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base, "Invalid or empty zero width assertion."); + return false; + } + } + // + // verify that if this is conditional expression, that we do have + // an alternative, if not add one: + // + if(markid == -4) + { + re_syntax_base* b = this->getaddress(expected_alt_point); + // Make sure we have exactly one alternative following this state: + if(b->type != syntax_element_alt) + { + re_alt* alt = static_cast(this->insert_state(expected_alt_point, syntax_element_alt, sizeof(re_alt))); + alt->alt.i = this->m_pdata->m_data.size() - this->getoffset(alt); + } + else if(((std::ptrdiff_t)this->m_pdata->m_data.size() > (static_cast(b)->alt.i + this->getoffset(b))) && (static_cast(b)->alt.i > 0) && this->getaddress(static_cast(b)->alt.i, b)->type == syntax_element_alt) + { + // Can't have seen more than one alternative: + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_bad_pattern, m_position - m_base, "More than one alternation operator | was encountered inside a conditional expression."); + return false; + } + else + { + // We must *not* have seen an alternative inside a (DEFINE) block: + b = this->getaddress(b->next.i, b); + if((b->type == syntax_element_assert_backref) && (static_cast(b)->index == 9999)) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_bad_pattern, m_position - m_base, "Alternation operators are not allowed inside a DEFINE block."); + return false; + } + } + // check for invalid repetition of next state: + b = this->getaddress(expected_alt_point); + b = this->getaddress(static_cast(b)->next.i, b); + if((b->type != syntax_element_assert_backref) + && (b->type != syntax_element_startmark)) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_badrepeat, m_position - m_base, "A repetition operator cannot be applied to a zero-width assertion."); + return false; + } + } + // + // append closing parenthesis state: + // + pb = static_cast(this->append_state(syntax_element_endmark, sizeof(re_brace))); + pb->index = markid; + pb->icase = this->flags() & regbase::icase; + this->m_paren_start = last_paren_start; + // + // restore the alternate insertion point: + // + this->m_alt_insert_point = last_alt_point; + // + // and the case change data: + // + m_has_case_change = old_case_change; + // + // And the mark_reset data: + // + if(m_max_mark > m_mark_count) + { + m_mark_count = m_max_mark; + } + m_mark_reset = mark_reset; + m_max_mark = max_mark; + + + if(markid > 0) + { + if(this->flags() & regbase::save_subexpression_location) + this->m_pdata->m_subs.at((std::size_t)markid - 1).second = std::distance(m_base, m_position) - 1; + } + return true; +} + +template +bool basic_regex_parser::match_verb(const char* verb) +{ + while(*verb) + { + if(static_cast(*verb) != *m_position) + { + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + if(++m_position == m_end) + { + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + ++verb; + } + return true; +} + +#ifdef BOOST_REGEX_MSVC +# pragma warning(push) +#if BOOST_REGEX_MSVC >= 1800 +#pragma warning(disable:26812) +#endif +#endif +template +bool basic_regex_parser::parse_perl_verb() +{ + if(++m_position == m_end) + { + // Rewind to start of (* sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + switch(*m_position) + { + case 'F': + if(++m_position == m_end) + { + // Rewind to start of (* sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + if((this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark) || match_verb("AIL")) + { + if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)) + { + // Rewind to start of (* sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + ++m_position; + this->append_state(syntax_element_fail); + return true; + } + break; + case 'A': + if(++m_position == m_end) + { + // Rewind to start of (* sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + if(match_verb("CCEPT")) + { + if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)) + { + // Rewind to start of (* sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + ++m_position; + this->append_state(syntax_element_accept); + return true; + } + break; + case 'C': + if(++m_position == m_end) + { + // Rewind to start of (* sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + if(match_verb("OMMIT")) + { + if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)) + { + // Rewind to start of (* sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + ++m_position; + static_cast(this->append_state(syntax_element_commit, sizeof(re_commit)))->action = commit_commit; + this->m_pdata->m_disable_match_any = true; + return true; + } + break; + case 'P': + if(++m_position == m_end) + { + // Rewind to start of (* sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + if(match_verb("RUNE")) + { + if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)) + { + // Rewind to start of (* sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + ++m_position; + static_cast(this->append_state(syntax_element_commit, sizeof(re_commit)))->action = commit_prune; + this->m_pdata->m_disable_match_any = true; + return true; + } + break; + case 'S': + if(++m_position == m_end) + { + // Rewind to start of (* sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + if(match_verb("KIP")) + { + if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)) + { + // Rewind to start of (* sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + ++m_position; + static_cast(this->append_state(syntax_element_commit, sizeof(re_commit)))->action = commit_skip; + this->m_pdata->m_disable_match_any = true; + return true; + } + break; + case 'T': + if(++m_position == m_end) + { + // Rewind to start of (* sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + if(match_verb("HEN")) + { + if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)) + { + // Rewind to start of (* sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + ++m_position; + this->append_state(syntax_element_then); + this->m_pdata->m_disable_match_any = true; + return true; + } + break; + } + // Rewind to start of (* sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; +} +#ifdef BOOST_REGEX_MSVC +# pragma warning(pop) +#endif + +template +bool basic_regex_parser::add_emacs_code(bool negate) +{ + // + // parses an emacs style \sx or \Sx construct. + // + if(++m_position == m_end) + { + // Rewind to start of sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position; + fail(regex_constants::error_escape, m_position - m_base); + return false; + } + basic_char_set char_set; + if(negate) + char_set.negate(); + + static const charT s_punct[5] = { 'p', 'u', 'n', 'c', 't', }; + + switch(*m_position) + { + case 's': + case ' ': + char_set.add_class(this->m_mask_space); + break; + case 'w': + char_set.add_class(this->m_word_mask); + break; + case '_': + char_set.add_single(digraph(charT('$'))); + char_set.add_single(digraph(charT('&'))); + char_set.add_single(digraph(charT('*'))); + char_set.add_single(digraph(charT('+'))); + char_set.add_single(digraph(charT('-'))); + char_set.add_single(digraph(charT('_'))); + char_set.add_single(digraph(charT('<'))); + char_set.add_single(digraph(charT('>'))); + break; + case '.': + char_set.add_class(this->m_traits.lookup_classname(s_punct, s_punct+5)); + break; + case '(': + char_set.add_single(digraph(charT('('))); + char_set.add_single(digraph(charT('['))); + char_set.add_single(digraph(charT('{'))); + break; + case ')': + char_set.add_single(digraph(charT(')'))); + char_set.add_single(digraph(charT(']'))); + char_set.add_single(digraph(charT('}'))); + break; + case '"': + char_set.add_single(digraph(charT('"'))); + char_set.add_single(digraph(charT('\''))); + char_set.add_single(digraph(charT('`'))); + break; + case '\'': + char_set.add_single(digraph(charT('\''))); + char_set.add_single(digraph(charT(','))); + char_set.add_single(digraph(charT('#'))); + break; + case '<': + char_set.add_single(digraph(charT(';'))); + break; + case '>': + char_set.add_single(digraph(charT('\n'))); + char_set.add_single(digraph(charT('\f'))); + break; + default: + fail(regex_constants::error_ctype, m_position - m_base); + return false; + } + if(0 == this->append_set(char_set)) + { + fail(regex_constants::error_ctype, m_position - m_base); + return false; + } + ++m_position; + return true; +} + +template +regex_constants::syntax_option_type basic_regex_parser::parse_options() +{ + // we have a (?imsx-imsx) group, convert it into a set of flags: + regex_constants::syntax_option_type f = this->flags(); + bool breakout = false; + do + { + switch(*m_position) + { + case 's': + f |= regex_constants::mod_s; + f &= ~regex_constants::no_mod_s; + break; + case 'm': + f &= ~regex_constants::no_mod_m; + break; + case 'i': + f |= regex_constants::icase; + break; + case 'x': + f |= regex_constants::mod_x; + break; + default: + breakout = true; + continue; + } + if(++m_position == m_end) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_paren, m_position - m_base); + return false; + } + } + while(!breakout); + + breakout = false; + + if(*m_position == static_cast('-')) + { + if(++m_position == m_end) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_paren, m_position - m_base); + return false; + } + do + { + switch(*m_position) + { + case 's': + f &= ~regex_constants::mod_s; + f |= regex_constants::no_mod_s; + break; + case 'm': + f |= regex_constants::no_mod_m; + break; + case 'i': + f &= ~regex_constants::icase; + break; + case 'x': + f &= ~regex_constants::mod_x; + break; + default: + breakout = true; + continue; + } + if(++m_position == m_end) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_paren, m_position - m_base); + return false; + } + } + while(!breakout); + } + return f; +} + +template +bool basic_regex_parser::unwind_alts(std::ptrdiff_t last_paren_start) +{ + // + // If we didn't actually add any states after the last + // alternative then that's an error: + // + if((this->m_alt_insert_point == static_cast(this->m_pdata->m_data.size())) + && (!m_alt_jumps.empty()) && (m_alt_jumps.back() > last_paren_start) + && + !( + ((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group) + && + ((this->flags() & regbase::no_empty_expressions) == 0) + ) + ) + { + fail(regex_constants::error_empty, this->m_position - this->m_base, "Can't terminate a sub-expression with an alternation operator |."); + return false; + } + // + // Fix up our alternatives: + // + while((!m_alt_jumps.empty()) && (m_alt_jumps.back() > last_paren_start)) + { + // + // fix up the jump to point to the end of the states + // that we've just added: + // + std::ptrdiff_t jump_offset = m_alt_jumps.back(); + m_alt_jumps.pop_back(); + this->m_pdata->m_data.align(); + re_jump* jmp = static_cast(this->getaddress(jump_offset)); + if (jmp->type != syntax_element_jump) + { + // Something really bad happened, this used to be an assert, + // but we'll make it an error just in case we should ever get here. + fail(regex_constants::error_unknown, this->m_position - this->m_base, "Internal logic failed while compiling the expression, probably you added a repeat to something non-repeatable!"); + return false; + } + jmp->alt.i = this->m_pdata->m_data.size() - jump_offset; + } + return true; +} + +#ifdef BOOST_REGEX_MSVC +#pragma warning(pop) +#endif + +} // namespace BOOST_REGEX_DETAIL_NS +} // namespace boost + +#endif diff --git a/scintilla/include/boost/regex/v5/c_regex_traits.hpp b/scintilla/include/boost/regex/v5/c_regex_traits.hpp new file mode 100644 index 0000000000..9de3463256 --- /dev/null +++ b/scintilla/include/boost/regex/v5/c_regex_traits.hpp @@ -0,0 +1,474 @@ +/* + * + * Copyright (c) 2004 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE c_regex_traits.hpp + * VERSION see + * DESCRIPTION: Declares regular expression traits class that wraps the global C locale. + */ + +#ifndef BOOST_C_REGEX_TRAITS_HPP_INCLUDED +#define BOOST_C_REGEX_TRAITS_HPP_INCLUDED + +#include +#include +#include + +namespace boost{ + + namespace BOOST_REGEX_DETAIL_NS { + + enum + { + char_class_space = 1 << 0, + char_class_print = 1 << 1, + char_class_cntrl = 1 << 2, + char_class_upper = 1 << 3, + char_class_lower = 1 << 4, + char_class_alpha = 1 << 5, + char_class_digit = 1 << 6, + char_class_punct = 1 << 7, + char_class_xdigit = 1 << 8, + char_class_alnum = char_class_alpha | char_class_digit, + char_class_graph = char_class_alnum | char_class_punct, + char_class_blank = 1 << 9, + char_class_word = 1 << 10, + char_class_unicode = 1 << 11, + char_class_horizontal = 1 << 12, + char_class_vertical = 1 << 13 + }; + + } + +template +struct c_regex_traits; + +template<> +struct c_regex_traits +{ + c_regex_traits(){} + typedef char char_type; + typedef std::size_t size_type; + typedef std::string string_type; + struct locale_type{}; + typedef std::uint32_t char_class_type; + + static size_type length(const char_type* p) + { + return (std::strlen)(p); + } + + char translate(char c) const + { + return c; + } + char translate_nocase(char c) const + { + return static_cast((std::tolower)(static_cast(c))); + } + + static string_type transform(const char* p1, const char* p2); + static string_type transform_primary(const char* p1, const char* p2); + + static char_class_type lookup_classname(const char* p1, const char* p2); + static string_type lookup_collatename(const char* p1, const char* p2); + + static bool isctype(char, char_class_type); + static int value(char, int); + + locale_type imbue(locale_type l) + { return l; } + locale_type getloc()const + { return locale_type(); } + +private: + // this type is not copyable: + c_regex_traits(const c_regex_traits&); + c_regex_traits& operator=(const c_regex_traits&); +}; + +#ifndef BOOST_NO_WREGEX +template<> +struct c_regex_traits +{ + c_regex_traits(){} + typedef wchar_t char_type; + typedef std::size_t size_type; + typedef std::wstring string_type; + struct locale_type{}; + typedef std::uint32_t char_class_type; + + static size_type length(const char_type* p) + { + return (std::wcslen)(p); + } + + wchar_t translate(wchar_t c) const + { + return c; + } + wchar_t translate_nocase(wchar_t c) const + { + return (std::towlower)(c); + } + + static string_type transform(const wchar_t* p1, const wchar_t* p2); + static string_type transform_primary(const wchar_t* p1, const wchar_t* p2); + + static char_class_type lookup_classname(const wchar_t* p1, const wchar_t* p2); + static string_type lookup_collatename(const wchar_t* p1, const wchar_t* p2); + + static bool isctype(wchar_t, char_class_type); + static int value(wchar_t, int); + + locale_type imbue(locale_type l) + { return l; } + locale_type getloc()const + { return locale_type(); } + +private: + // this type is not copyable: + c_regex_traits(const c_regex_traits&); + c_regex_traits& operator=(const c_regex_traits&); +}; + +#endif // BOOST_NO_WREGEX + +inline c_regex_traits::string_type c_regex_traits::transform(const char* p1, const char* p2) +{ + std::string result(10, ' '); + std::size_t s = result.size(); + std::size_t r; + std::string src(p1, p2); + while (s < (r = std::strxfrm(&*result.begin(), src.c_str(), s))) + { +#if defined(_CPPLIB_VER) + // + // A bug in VC11 and 12 causes the program to hang if we pass a null-string + // to std::strxfrm, but only for certain locales :-( + // Probably effects Intel and Clang or any compiler using the VC std library (Dinkumware). + // + if (r == INT_MAX) + { + result.erase(); + result.insert(result.begin(), static_cast(0)); + return result; + } +#endif + result.append(r - s + 3, ' '); + s = result.size(); + } + result.erase(r); + return result; +} + +inline c_regex_traits::string_type c_regex_traits::transform_primary(const char* p1, const char* p2) +{ + static char s_delim; + static const int s_collate_type = ::boost::BOOST_REGEX_DETAIL_NS::find_sort_syntax(static_cast*>(0), &s_delim); + std::string result; + // + // What we do here depends upon the format of the sort key returned by + // sort key returned by this->transform: + // + switch (s_collate_type) + { + case ::boost::BOOST_REGEX_DETAIL_NS::sort_C: + case ::boost::BOOST_REGEX_DETAIL_NS::sort_unknown: + // the best we can do is translate to lower case, then get a regular sort key: + { + result.assign(p1, p2); + for (std::string::size_type i = 0; i < result.size(); ++i) + result[i] = static_cast((std::tolower)(static_cast(result[i]))); + result = transform(&*result.begin(), &*result.begin() + result.size()); + break; + } + case ::boost::BOOST_REGEX_DETAIL_NS::sort_fixed: + { + // get a regular sort key, and then truncate it: + result = transform(p1, p2); + result.erase(s_delim); + break; + } + case ::boost::BOOST_REGEX_DETAIL_NS::sort_delim: + // get a regular sort key, and then truncate everything after the delim: + result = transform(p1, p2); + if ((!result.empty()) && (result[0] == s_delim)) + break; + std::size_t i; + for (i = 0; i < result.size(); ++i) + { + if (result[i] == s_delim) + break; + } + result.erase(i); + break; + } + if (result.empty()) + result = std::string(1, char(0)); + return result; +} + +inline c_regex_traits::char_class_type c_regex_traits::lookup_classname(const char* p1, const char* p2) +{ + using namespace BOOST_REGEX_DETAIL_NS; + static const char_class_type masks[] = + { + 0, + char_class_alnum, + char_class_alpha, + char_class_blank, + char_class_cntrl, + char_class_digit, + char_class_digit, + char_class_graph, + char_class_horizontal, + char_class_lower, + char_class_lower, + char_class_print, + char_class_punct, + char_class_space, + char_class_space, + char_class_upper, + char_class_unicode, + char_class_upper, + char_class_vertical, + char_class_alnum | char_class_word, + char_class_alnum | char_class_word, + char_class_xdigit, + }; + + int idx = ::boost::BOOST_REGEX_DETAIL_NS::get_default_class_id(p1, p2); + if (idx < 0) + { + std::string s(p1, p2); + for (std::string::size_type i = 0; i < s.size(); ++i) + s[i] = static_cast((std::tolower)(static_cast(s[i]))); + idx = ::boost::BOOST_REGEX_DETAIL_NS::get_default_class_id(&*s.begin(), &*s.begin() + s.size()); + } + BOOST_REGEX_ASSERT(std::size_t(idx) + 1u < sizeof(masks) / sizeof(masks[0])); + return masks[idx + 1]; +} + +inline bool c_regex_traits::isctype(char c, char_class_type mask) +{ + using namespace BOOST_REGEX_DETAIL_NS; + return + ((mask & char_class_space) && (std::isspace)(static_cast(c))) + || ((mask & char_class_print) && (std::isprint)(static_cast(c))) + || ((mask & char_class_cntrl) && (std::iscntrl)(static_cast(c))) + || ((mask & char_class_upper) && (std::isupper)(static_cast(c))) + || ((mask & char_class_lower) && (std::islower)(static_cast(c))) + || ((mask & char_class_alpha) && (std::isalpha)(static_cast(c))) + || ((mask & char_class_digit) && (std::isdigit)(static_cast(c))) + || ((mask & char_class_punct) && (std::ispunct)(static_cast(c))) + || ((mask & char_class_xdigit) && (std::isxdigit)(static_cast(c))) + || ((mask & char_class_blank) && (std::isspace)(static_cast(c)) && !::boost::BOOST_REGEX_DETAIL_NS::is_separator(c)) + || ((mask & char_class_word) && (c == '_')) + || ((mask & char_class_vertical) && (::boost::BOOST_REGEX_DETAIL_NS::is_separator(c) || (c == '\v'))) + || ((mask & char_class_horizontal) && (std::isspace)(static_cast(c)) && !::boost::BOOST_REGEX_DETAIL_NS::is_separator(c) && (c != '\v')); +} + +inline c_regex_traits::string_type c_regex_traits::lookup_collatename(const char* p1, const char* p2) +{ + std::string s(p1, p2); + s = ::boost::BOOST_REGEX_DETAIL_NS::lookup_default_collate_name(s); + if (s.empty() && (p2 - p1 == 1)) + s.append(1, *p1); + return s; +} + +inline int c_regex_traits::value(char c, int radix) +{ + char b[2] = { c, '\0', }; + char* ep; + int result = std::strtol(b, &ep, radix); + if (ep == b) + return -1; + return result; +} + +#ifndef BOOST_NO_WREGEX + +inline c_regex_traits::string_type c_regex_traits::transform(const wchar_t* p1, const wchar_t* p2) +{ + std::size_t r; + std::size_t s = 10; + std::wstring src(p1, p2); + std::wstring result(s, L' '); + while (s < (r = std::wcsxfrm(&*result.begin(), src.c_str(), s))) + { +#if defined(_CPPLIB_VER) + // + // A bug in VC11 and 12 causes the program to hang if we pass a null-string + // to std::strxfrm, but only for certain locales :-( + // Probably effects Intel and Clang or any compiler using the VC std library (Dinkumware). + // + if (r == INT_MAX) + { + result.erase(); + result.insert(result.begin(), static_cast(0)); + return result; + } +#endif + result.append(r - s + 3, L' '); + s = result.size(); + } + result.erase(r); + return result; +} + +inline c_regex_traits::string_type c_regex_traits::transform_primary(const wchar_t* p1, const wchar_t* p2) +{ + static wchar_t s_delim; + static const int s_collate_type = ::boost::BOOST_REGEX_DETAIL_NS::find_sort_syntax(static_cast*>(0), &s_delim); + std::wstring result; + // + // What we do here depends upon the format of the sort key returned by + // sort key returned by this->transform: + // + switch (s_collate_type) + { + case ::boost::BOOST_REGEX_DETAIL_NS::sort_C: + case ::boost::BOOST_REGEX_DETAIL_NS::sort_unknown: + // the best we can do is translate to lower case, then get a regular sort key: + { + result.assign(p1, p2); + for (std::wstring::size_type i = 0; i < result.size(); ++i) + result[i] = (std::towlower)(result[i]); + result = c_regex_traits::transform(&*result.begin(), &*result.begin() + result.size()); + break; + } + case ::boost::BOOST_REGEX_DETAIL_NS::sort_fixed: + { + // get a regular sort key, and then truncate it: + result = c_regex_traits::transform(&*result.begin(), &*result.begin() + result.size()); + result.erase(s_delim); + break; + } + case ::boost::BOOST_REGEX_DETAIL_NS::sort_delim: + // get a regular sort key, and then truncate everything after the delim: + result = c_regex_traits::transform(&*result.begin(), &*result.begin() + result.size()); + if ((!result.empty()) && (result[0] == s_delim)) + break; + std::size_t i; + for (i = 0; i < result.size(); ++i) + { + if (result[i] == s_delim) + break; + } + result.erase(i); + break; + } + if (result.empty()) + result = std::wstring(1, char(0)); + return result; +} + +inline c_regex_traits::char_class_type c_regex_traits::lookup_classname(const wchar_t* p1, const wchar_t* p2) +{ + using namespace BOOST_REGEX_DETAIL_NS; + static const char_class_type masks[] = + { + 0, + char_class_alnum, + char_class_alpha, + char_class_blank, + char_class_cntrl, + char_class_digit, + char_class_digit, + char_class_graph, + char_class_horizontal, + char_class_lower, + char_class_lower, + char_class_print, + char_class_punct, + char_class_space, + char_class_space, + char_class_upper, + char_class_unicode, + char_class_upper, + char_class_vertical, + char_class_alnum | char_class_word, + char_class_alnum | char_class_word, + char_class_xdigit, + }; + + int idx = ::boost::BOOST_REGEX_DETAIL_NS::get_default_class_id(p1, p2); + if (idx < 0) + { + std::wstring s(p1, p2); + for (std::wstring::size_type i = 0; i < s.size(); ++i) + s[i] = (std::towlower)(s[i]); + idx = ::boost::BOOST_REGEX_DETAIL_NS::get_default_class_id(&*s.begin(), &*s.begin() + s.size()); + } + BOOST_REGEX_ASSERT(idx + 1 < static_cast(sizeof(masks) / sizeof(masks[0]))); + return masks[idx + 1]; +} + +inline bool c_regex_traits::isctype(wchar_t c, char_class_type mask) +{ + using namespace BOOST_REGEX_DETAIL_NS; + return + ((mask & char_class_space) && (std::iswspace)(c)) + || ((mask & char_class_print) && (std::iswprint)(c)) + || ((mask & char_class_cntrl) && (std::iswcntrl)(c)) + || ((mask & char_class_upper) && (std::iswupper)(c)) + || ((mask & char_class_lower) && (std::iswlower)(c)) + || ((mask & char_class_alpha) && (std::iswalpha)(c)) + || ((mask & char_class_digit) && (std::iswdigit)(c)) + || ((mask & char_class_punct) && (std::iswpunct)(c)) + || ((mask & char_class_xdigit) && (std::iswxdigit)(c)) + || ((mask & char_class_blank) && (std::iswspace)(c) && !::boost::BOOST_REGEX_DETAIL_NS::is_separator(c)) + || ((mask & char_class_word) && (c == '_')) + || ((mask & char_class_unicode) && (c & ~static_cast(0xff))) + || ((mask & char_class_vertical) && (::boost::BOOST_REGEX_DETAIL_NS::is_separator(c) || (c == L'\v'))) + || ((mask & char_class_horizontal) && (std::iswspace)(c) && !::boost::BOOST_REGEX_DETAIL_NS::is_separator(c) && (c != L'\v')); +} + +inline c_regex_traits::string_type c_regex_traits::lookup_collatename(const wchar_t* p1, const wchar_t* p2) +{ + std::string name; + // Usual msvc warning suppression does not work here with std::string template constructor.... use a workaround instead: + for (const wchar_t* pos = p1; pos != p2; ++pos) + name.push_back((char)*pos); + name = ::boost::BOOST_REGEX_DETAIL_NS::lookup_default_collate_name(name); + if (!name.empty()) + return string_type(name.begin(), name.end()); + if (p2 - p1 == 1) + return string_type(1, *p1); + return string_type(); +} + +inline int c_regex_traits::value(wchar_t c, int radix) +{ +#ifdef BOOST_BORLANDC + // workaround for broken wcstol: + if ((std::iswxdigit)(c) == 0) + return -1; +#endif + wchar_t b[2] = { c, '\0', }; + wchar_t* ep; + int result = std::wcstol(b, &ep, radix); + if (ep == b) + return -1; + return result; +} + +#endif + +} + +#endif + + + diff --git a/scintilla/include/boost/regex/v5/char_regex_traits.hpp b/scintilla/include/boost/regex/v5/char_regex_traits.hpp new file mode 100644 index 0000000000..aeed86f834 --- /dev/null +++ b/scintilla/include/boost/regex/v5/char_regex_traits.hpp @@ -0,0 +1,59 @@ +/* + * + * Copyright (c) 2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE char_regex_traits.cpp + * VERSION see + * DESCRIPTION: Declares deprecated traits classes char_regex_traits<>. + */ + + +#ifndef BOOST_REGEX_V5_CHAR_REGEX_TRAITS_HPP +#define BOOST_REGEX_V5_CHAR_REGEX_TRAITS_HPP + +namespace boost{ + +namespace deprecated{ +// +// class char_regex_traits_i +// provides case insensitive traits classes (deprecated): +template +class char_regex_traits_i : public regex_traits {}; + +template<> +class char_regex_traits_i : public regex_traits +{ +public: + typedef char char_type; + typedef unsigned char uchar_type; + typedef unsigned int size_type; + typedef regex_traits base_type; + +}; + +#ifndef BOOST_NO_WREGEX +template<> +class char_regex_traits_i : public regex_traits +{ +public: + typedef wchar_t char_type; + typedef unsigned short uchar_type; + typedef unsigned int size_type; + typedef regex_traits base_type; + +}; +#endif +} // namespace deprecated +} // namespace boost + +#endif // include + diff --git a/scintilla/include/boost/regex/v5/cpp_regex_traits.hpp b/scintilla/include/boost/regex/v5/cpp_regex_traits.hpp new file mode 100644 index 0000000000..26b6f503d6 --- /dev/null +++ b/scintilla/include/boost/regex/v5/cpp_regex_traits.hpp @@ -0,0 +1,1040 @@ +/* + * + * Copyright (c) 2004 John Maddock + * Copyright 2011 Garmin Ltd. or its subsidiaries + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE cpp_regex_traits.hpp + * VERSION see + * DESCRIPTION: Declares regular expression traits class cpp_regex_traits. + */ + +#ifndef BOOST_CPP_REGEX_TRAITS_HPP_INCLUDED +#define BOOST_CPP_REGEX_TRAITS_HPP_INCLUDED + +#include +#include +#include +#include + +#include +#include + +#ifdef BOOST_HAS_THREADS +#include +#endif +#include +#include + +#include +#include +#include + +#ifdef BOOST_REGEX_MSVC +#pragma warning(push) +#pragma warning(disable:4786 4251) +#endif + +namespace boost{ + +// +// forward declaration is needed by some compilers: +// +template +class cpp_regex_traits; + +namespace BOOST_REGEX_DETAIL_NS{ + +// +// class parser_buf: +// acts as a stream buffer which wraps around a pair of pointers: +// +template > +class parser_buf : public ::std::basic_streambuf +{ + typedef ::std::basic_streambuf base_type; + typedef typename base_type::int_type int_type; + typedef typename base_type::char_type char_type; + typedef typename base_type::pos_type pos_type; + typedef ::std::streamsize streamsize; + typedef typename base_type::off_type off_type; +public: + parser_buf() : base_type() { setbuf(0, 0); } + const charT* getnext() { return this->gptr(); } +protected: + std::basic_streambuf* setbuf(char_type* s, streamsize n) override; + typename parser_buf::pos_type seekpos(pos_type sp, ::std::ios_base::openmode which) override; + typename parser_buf::pos_type seekoff(off_type off, ::std::ios_base::seekdir way, ::std::ios_base::openmode which) override; +private: + parser_buf& operator=(const parser_buf&); + parser_buf(const parser_buf&); +}; + +template +std::basic_streambuf* +parser_buf::setbuf(char_type* s, streamsize n) +{ + this->setg(s, s, s + n); + return this; +} + +template +typename parser_buf::pos_type +parser_buf::seekoff(off_type off, ::std::ios_base::seekdir way, ::std::ios_base::openmode which) +{ + if(which & ::std::ios_base::out) + return pos_type(off_type(-1)); + std::ptrdiff_t size = this->egptr() - this->eback(); + std::ptrdiff_t pos = this->gptr() - this->eback(); + charT* g = this->eback(); + switch(static_cast(way)) + { + case ::std::ios_base::beg: + if((off < 0) || (off > size)) + return pos_type(off_type(-1)); + else + this->setg(g, g + off, g + size); + break; + case ::std::ios_base::end: + if((off < 0) || (off > size)) + return pos_type(off_type(-1)); + else + this->setg(g, g + size - off, g + size); + break; + case ::std::ios_base::cur: + { + std::ptrdiff_t newpos = static_cast(pos + off); + if((newpos < 0) || (newpos > size)) + return pos_type(off_type(-1)); + else + this->setg(g, g + newpos, g + size); + break; + } + default: ; + } +#ifdef BOOST_REGEX_MSVC +#pragma warning(push) +#pragma warning(disable:4244) +#endif + return static_cast(this->gptr() - this->eback()); +#ifdef BOOST_REGEX_MSVC +#pragma warning(pop) +#endif +} + +template +typename parser_buf::pos_type +parser_buf::seekpos(pos_type sp, ::std::ios_base::openmode which) +{ + if(which & ::std::ios_base::out) + return pos_type(off_type(-1)); + off_type size = static_cast(this->egptr() - this->eback()); + charT* g = this->eback(); + if(off_type(sp) <= size) + { + this->setg(g, g + off_type(sp), g + size); + } + return pos_type(off_type(-1)); +} + +// +// class cpp_regex_traits_base: +// acts as a container for locale and the facets we are using. +// +template +struct cpp_regex_traits_base +{ + cpp_regex_traits_base(const std::locale& l) + { (void)imbue(l); } + std::locale imbue(const std::locale& l); + + std::locale m_locale; + std::ctype const* m_pctype; + std::messages const* m_pmessages; + std::collate const* m_pcollate; + + bool operator<(const cpp_regex_traits_base& b)const + { + if(m_pctype == b.m_pctype) + { + if(m_pmessages == b.m_pmessages) + { + return m_pcollate < b.m_pcollate; + } + return m_pmessages < b.m_pmessages; + } + return m_pctype < b.m_pctype; + } + bool operator==(const cpp_regex_traits_base& b)const + { + return (m_pctype == b.m_pctype) + && (m_pmessages == b.m_pmessages) + && (m_pcollate == b.m_pcollate); + } +}; + +template +std::locale cpp_regex_traits_base::imbue(const std::locale& l) +{ + std::locale result(m_locale); + m_locale = l; + m_pctype = &std::use_facet>(l); + m_pmessages = std::has_facet >(l) ? &std::use_facet >(l) : 0; + m_pcollate = &std::use_facet >(l); + return result; +} + +// +// class cpp_regex_traits_char_layer: +// implements methods that require specialization for narrow characters: +// +template +class cpp_regex_traits_char_layer : public cpp_regex_traits_base +{ + typedef std::basic_string string_type; + typedef std::map map_type; + typedef typename map_type::const_iterator map_iterator_type; +public: + cpp_regex_traits_char_layer(const std::locale& l) + : cpp_regex_traits_base(l) + { + init(); + } + cpp_regex_traits_char_layer(const cpp_regex_traits_base& b) + : cpp_regex_traits_base(b) + { + init(); + } + void init(); + + regex_constants::syntax_type syntax_type(charT c)const + { + map_iterator_type i = m_char_map.find(c); + return ((i == m_char_map.end()) ? 0 : i->second); + } + regex_constants::escape_syntax_type escape_syntax_type(charT c) const + { + map_iterator_type i = m_char_map.find(c); + if(i == m_char_map.end()) + { + if(this->m_pctype->is(std::ctype_base::lower, c)) return regex_constants::escape_type_class; + if(this->m_pctype->is(std::ctype_base::upper, c)) return regex_constants::escape_type_not_class; + return 0; + } + return i->second; + } + +private: + string_type get_default_message(regex_constants::syntax_type); + // TODO: use a hash table when available! + map_type m_char_map; +}; + +template +void cpp_regex_traits_char_layer::init() +{ + // we need to start by initialising our syntax map so we know which + // character is used for which purpose: +#ifndef __IBMCPP__ + typename std::messages::catalog cat = static_cast::catalog>(-1); +#else + typename std::messages::catalog cat = reinterpret_cast::catalog>(-1); +#endif + std::string cat_name(cpp_regex_traits::get_catalog_name()); + if((!cat_name.empty()) && (this->m_pmessages != 0)) + { + cat = this->m_pmessages->open( + cat_name, + this->m_locale); + if((int)cat < 0) + { + std::string m("Unable to open message catalog: "); + std::runtime_error err(m + cat_name); + boost::BOOST_REGEX_DETAIL_NS::raise_runtime_error(err); + } + } + // + // if we have a valid catalog then load our messages: + // + if((int)cat >= 0) + { +#ifndef BOOST_NO_EXCEPTIONS + try{ +#endif + for(regex_constants::syntax_type i = 1; i < regex_constants::syntax_max; ++i) + { + string_type mss = this->m_pmessages->get(cat, 0, i, get_default_message(i)); + for(typename string_type::size_type j = 0; j < mss.size(); ++j) + { + m_char_map[mss[j]] = i; + } + } + this->m_pmessages->close(cat); +#ifndef BOOST_NO_EXCEPTIONS + } + catch(...) + { + if(this->m_pmessages) + this->m_pmessages->close(cat); + throw; + } +#endif + } + else + { + for(regex_constants::syntax_type i = 1; i < regex_constants::syntax_max; ++i) + { + const char* ptr = get_default_syntax(i); + while(ptr && *ptr) + { + m_char_map[this->m_pctype->widen(*ptr)] = i; + ++ptr; + } + } + } +} + +template +typename cpp_regex_traits_char_layer::string_type + cpp_regex_traits_char_layer::get_default_message(regex_constants::syntax_type i) +{ + const char* ptr = get_default_syntax(i); + string_type result; + while(ptr && *ptr) + { + result.append(1, this->m_pctype->widen(*ptr)); + ++ptr; + } + return result; +} + +// +// specialized version for narrow characters: +// +template <> +class cpp_regex_traits_char_layer : public cpp_regex_traits_base +{ + typedef std::string string_type; +public: + cpp_regex_traits_char_layer(const std::locale& l) + : cpp_regex_traits_base(l) + { + init(); + } + cpp_regex_traits_char_layer(const cpp_regex_traits_base& l) + : cpp_regex_traits_base(l) + { + init(); + } + + regex_constants::syntax_type syntax_type(char c)const + { + return m_char_map[static_cast(c)]; + } + regex_constants::escape_syntax_type escape_syntax_type(char c) const + { + return m_char_map[static_cast(c)]; + } + +private: + regex_constants::syntax_type m_char_map[1u << CHAR_BIT]; + void init(); +}; + +// +// class cpp_regex_traits_implementation: +// provides pimpl implementation for cpp_regex_traits. +// +template +class cpp_regex_traits_implementation : public cpp_regex_traits_char_layer +{ +public: + typedef typename cpp_regex_traits::char_class_type char_class_type; + typedef typename std::ctype::mask native_mask_type; + typedef typename std::make_unsigned::type unsigned_native_mask_type; + static const char_class_type mask_blank = 1u << 24; + static const char_class_type mask_word = 1u << 25; + static const char_class_type mask_unicode = 1u << 26; + static const char_class_type mask_horizontal = 1u << 27; + static const char_class_type mask_vertical = 1u << 28; + + typedef std::basic_string string_type; + typedef charT char_type; + //cpp_regex_traits_implementation(); + cpp_regex_traits_implementation(const std::locale& l) + : cpp_regex_traits_char_layer(l) + { + init(); + } + cpp_regex_traits_implementation(const cpp_regex_traits_base& l) + : cpp_regex_traits_char_layer(l) + { + init(); + } + std::string error_string(regex_constants::error_type n) const + { + if(!m_error_strings.empty()) + { + std::map::const_iterator p = m_error_strings.find(n); + return (p == m_error_strings.end()) ? std::string(get_default_error_string(n)) : p->second; + } + return get_default_error_string(n); + } + char_class_type lookup_classname(const charT* p1, const charT* p2) const + { + char_class_type result = lookup_classname_imp(p1, p2); + if(result == 0) + { + string_type temp(p1, p2); + this->m_pctype->tolower(&*temp.begin(), &*temp.begin() + temp.size()); + result = lookup_classname_imp(&*temp.begin(), &*temp.begin() + temp.size()); + } + return result; + } + string_type lookup_collatename(const charT* p1, const charT* p2) const; + string_type transform_primary(const charT* p1, const charT* p2) const; + string_type transform(const charT* p1, const charT* p2) const; +private: + std::map m_error_strings; // error messages indexed by numberic ID + std::map m_custom_class_names; // character class names + std::map m_custom_collate_names; // collating element names + unsigned m_collate_type; // the form of the collation string + charT m_collate_delim; // the collation group delimiter + // + // helpers: + // + char_class_type lookup_classname_imp(const charT* p1, const charT* p2) const; + void init(); +}; + +template +typename cpp_regex_traits_implementation::char_class_type const cpp_regex_traits_implementation::mask_blank; +template +typename cpp_regex_traits_implementation::char_class_type const cpp_regex_traits_implementation::mask_word; +template +typename cpp_regex_traits_implementation::char_class_type const cpp_regex_traits_implementation::mask_unicode; +template +typename cpp_regex_traits_implementation::char_class_type const cpp_regex_traits_implementation::mask_vertical; +template +typename cpp_regex_traits_implementation::char_class_type const cpp_regex_traits_implementation::mask_horizontal; + +template +typename cpp_regex_traits_implementation::string_type + cpp_regex_traits_implementation::transform_primary(const charT* p1, const charT* p2) const +{ + // + // PRECONDITIONS: + // + // A bug in gcc 3.2 (and maybe other versions as well) treats + // p1 as a null terminated string, for efficiency reasons + // we work around this elsewhere, but just assert here that + // we adhere to gcc's (buggy) preconditions... + // + BOOST_REGEX_ASSERT(*p2 == 0); + string_type result; +#if defined(_CPPLIB_VER) + // + // A bug in VC11 and 12 causes the program to hang if we pass a null-string + // to std::collate::transform, but only for certain locales :-( + // Probably effects Intel and Clang or any compiler using the VC std library (Dinkumware). + // + if(*p1 == 0) + { + return string_type(1, charT(0)); + } +#endif + // + // swallowing all exceptions here is a bad idea + // however at least one std lib will always throw + // std::bad_alloc for certain arguments... + // +#ifndef BOOST_NO_EXCEPTIONS + try{ +#endif + // + // What we do here depends upon the format of the sort key returned by + // sort key returned by this->transform: + // + switch(m_collate_type) + { + case sort_C: + case sort_unknown: + // the best we can do is translate to lower case, then get a regular sort key: + { + result.assign(p1, p2); + this->m_pctype->tolower(&*result.begin(), &*result.begin() + result.size()); + result = this->m_pcollate->transform(&*result.begin(), &*result.begin() + result.size()); + break; + } + case sort_fixed: + { + // get a regular sort key, and then truncate it: + result.assign(this->m_pcollate->transform(p1, p2)); + result.erase(this->m_collate_delim); + break; + } + case sort_delim: + // get a regular sort key, and then truncate everything after the delim: + result.assign(this->m_pcollate->transform(p1, p2)); + std::size_t i; + for(i = 0; i < result.size(); ++i) + { + if(result[i] == m_collate_delim) + break; + } + result.erase(i); + break; + } +#ifndef BOOST_NO_EXCEPTIONS + }catch(...){} +#endif + while((!result.empty()) && (charT(0) == *result.rbegin())) + result.erase(result.size() - 1); + if(result.empty()) + { + // character is ignorable at the primary level: + result = string_type(1, charT(0)); + } + return result; +} + +template +typename cpp_regex_traits_implementation::string_type + cpp_regex_traits_implementation::transform(const charT* p1, const charT* p2) const +{ + // + // PRECONDITIONS: + // + // A bug in gcc 3.2 (and maybe other versions as well) treats + // p1 as a null terminated string, for efficiency reasons + // we work around this elsewhere, but just assert here that + // we adhere to gcc's (buggy) preconditions... + // + BOOST_REGEX_ASSERT(*p2 == 0); + // + // swallowing all exceptions here is a bad idea + // however at least one std lib will always throw + // std::bad_alloc for certain arguments... + // + string_type result, result2; +#if defined(_CPPLIB_VER) + // + // A bug in VC11 and 12 causes the program to hang if we pass a null-string + // to std::collate::transform, but only for certain locales :-( + // Probably effects Intel and Clang or any compiler using the VC std library (Dinkumware). + // + if(*p1 == 0) + { + return result; + } +#endif +#ifndef BOOST_NO_EXCEPTIONS + try{ +#endif + result = this->m_pcollate->transform(p1, p2); + // + // some implementations (Dinkumware) append unnecessary trailing \0's: + while((!result.empty()) && (charT(0) == *result.rbegin())) + result.erase(result.size() - 1); + // + // We may have NULL's used as separators between sections of the collate string, + // an example would be Boost.Locale. We have no way to detect this case via + // #defines since this can be used with any compiler/platform combination. + // Unfortunately our state machine (which was devised when all implementations + // used underlying C language API's) can't cope with that case. One workaround + // is to replace each character with 2, fortunately this code isn't used that + // much as this is now slower than before :-( + // + typedef typename std::make_unsigned::type uchar_type; + result2.reserve(result.size() * 2 + 2); + for(unsigned i = 0; i < result.size(); ++i) + { + if(static_cast(result[i]) == (std::numeric_limits::max)()) + { + result2.append(1, charT((std::numeric_limits::max)())).append(1, charT('b')); + } + else + { + result2.append(1, static_cast(1 + static_cast(result[i]))).append(1, charT('b') - 1); + } + } + BOOST_REGEX_ASSERT(std::find(result2.begin(), result2.end(), charT(0)) == result2.end()); +#ifndef BOOST_NO_EXCEPTIONS + } + catch(...) + { + } +#endif + return result2; +} + + +template +typename cpp_regex_traits_implementation::string_type + cpp_regex_traits_implementation::lookup_collatename(const charT* p1, const charT* p2) const +{ + typedef typename std::map::const_iterator iter_type; + if(!m_custom_collate_names.empty()) + { + iter_type pos = m_custom_collate_names.find(string_type(p1, p2)); + if(pos != m_custom_collate_names.end()) + return pos->second; + } + std::string name(p1, p2); + name = lookup_default_collate_name(name); + if(!name.empty()) + return string_type(name.begin(), name.end()); + if(p2 - p1 == 1) + return string_type(1, *p1); + return string_type(); +} + +template +void cpp_regex_traits_implementation::init() +{ +#ifndef __IBMCPP__ + typename std::messages::catalog cat = static_cast::catalog>(-1); +#else + typename std::messages::catalog cat = reinterpret_cast::catalog>(-1); +#endif + std::string cat_name(cpp_regex_traits::get_catalog_name()); + if((!cat_name.empty()) && (this->m_pmessages != 0)) + { + cat = this->m_pmessages->open( + cat_name, + this->m_locale); + if((int)cat < 0) + { + std::string m("Unable to open message catalog: "); + std::runtime_error err(m + cat_name); + boost::BOOST_REGEX_DETAIL_NS::raise_runtime_error(err); + } + } + // + // if we have a valid catalog then load our messages: + // + if((int)cat >= 0) + { + // + // Error messages: + // + for(boost::regex_constants::error_type i = static_cast(0); + i <= boost::regex_constants::error_unknown; + i = static_cast(i + 1)) + { + const char* p = get_default_error_string(i); + string_type default_message; + while(*p) + { + default_message.append(1, this->m_pctype->widen(*p)); + ++p; + } + string_type s = this->m_pmessages->get(cat, 0, i+200, default_message); + std::string result; + for(std::string::size_type j = 0; j < s.size(); ++j) + { + result.append(1, this->m_pctype->narrow(s[j], 0)); + } + m_error_strings[i] = result; + } + // + // Custom class names: + // + static const char_class_type masks[16] = + { + static_cast(std::ctype::alnum), + static_cast(std::ctype::alpha), + static_cast(std::ctype::cntrl), + static_cast(std::ctype::digit), + static_cast(std::ctype::graph), + cpp_regex_traits_implementation::mask_horizontal, + static_cast(std::ctype::lower), + static_cast(std::ctype::print), + static_cast(std::ctype::punct), + static_cast(std::ctype::space), + static_cast(std::ctype::upper), + cpp_regex_traits_implementation::mask_vertical, + static_cast(std::ctype::xdigit), + cpp_regex_traits_implementation::mask_blank, + cpp_regex_traits_implementation::mask_word, + cpp_regex_traits_implementation::mask_unicode, + }; + static const string_type null_string; + for(unsigned int j = 0; j <= 13; ++j) + { + string_type s(this->m_pmessages->get(cat, 0, j+300, null_string)); + if(!s.empty()) + this->m_custom_class_names[s] = masks[j]; + } + } + // + // get the collation format used by m_pcollate: + // + m_collate_type = BOOST_REGEX_DETAIL_NS::find_sort_syntax(this, &m_collate_delim); +} + +template +typename cpp_regex_traits_implementation::char_class_type + cpp_regex_traits_implementation::lookup_classname_imp(const charT* p1, const charT* p2) const +{ + static const char_class_type masks[22] = + { + 0, + static_cast(std::ctype::alnum), + static_cast(std::ctype::alpha), + cpp_regex_traits_implementation::mask_blank, + static_cast(std::ctype::cntrl), + static_cast(std::ctype::digit), + static_cast(std::ctype::digit), + static_cast(std::ctype::graph), + cpp_regex_traits_implementation::mask_horizontal, + static_cast(std::ctype::lower), + static_cast(std::ctype::lower), + static_cast(std::ctype::print), + static_cast(std::ctype::punct), + static_cast(std::ctype::space), + static_cast(std::ctype::space), + static_cast(std::ctype::upper), + cpp_regex_traits_implementation::mask_unicode, + static_cast(std::ctype::upper), + cpp_regex_traits_implementation::mask_vertical, + static_cast(std::ctype::alnum) | cpp_regex_traits_implementation::mask_word, + static_cast(std::ctype::alnum) | cpp_regex_traits_implementation::mask_word, + static_cast(std::ctype::xdigit), + }; + if(!m_custom_class_names.empty()) + { + typedef typename std::map, char_class_type>::const_iterator map_iter; + map_iter pos = m_custom_class_names.find(string_type(p1, p2)); + if(pos != m_custom_class_names.end()) + return pos->second; + } + std::size_t state_id = 1 + BOOST_REGEX_DETAIL_NS::get_default_class_id(p1, p2); + BOOST_REGEX_ASSERT(state_id < sizeof(masks) / sizeof(masks[0])); + return masks[state_id]; +} + +template +inline std::shared_ptr > create_cpp_regex_traits(const std::locale& l) +{ + cpp_regex_traits_base key(l); + return ::boost::object_cache, cpp_regex_traits_implementation >::get(key, 5); +} + +} // BOOST_REGEX_DETAIL_NS + +template +class cpp_regex_traits +{ +private: + typedef std::ctype ctype_type; +public: + typedef charT char_type; + typedef std::size_t size_type; + typedef std::basic_string string_type; + typedef std::locale locale_type; + typedef std::uint_least32_t char_class_type; + + struct boost_extensions_tag{}; + + cpp_regex_traits() + : m_pimpl(BOOST_REGEX_DETAIL_NS::create_cpp_regex_traits(std::locale())) + { } + static size_type length(const char_type* p) + { + return std::char_traits::length(p); + } + regex_constants::syntax_type syntax_type(charT c)const + { + return m_pimpl->syntax_type(c); + } + regex_constants::escape_syntax_type escape_syntax_type(charT c) const + { + return m_pimpl->escape_syntax_type(c); + } + charT translate(charT c) const + { + return c; + } + charT translate_nocase(charT c) const + { + return m_pimpl->m_pctype->tolower(c); + } + charT translate(charT c, bool icase) const + { + return icase ? m_pimpl->m_pctype->tolower(c) : c; + } + charT tolower(charT c) const + { + return m_pimpl->m_pctype->tolower(c); + } + charT toupper(charT c) const + { + return m_pimpl->m_pctype->toupper(c); + } + string_type transform(const charT* p1, const charT* p2) const + { + return m_pimpl->transform(p1, p2); + } + string_type transform_primary(const charT* p1, const charT* p2) const + { + return m_pimpl->transform_primary(p1, p2); + } + char_class_type lookup_classname(const charT* p1, const charT* p2) const + { + return m_pimpl->lookup_classname(p1, p2); + } + string_type lookup_collatename(const charT* p1, const charT* p2) const + { + return m_pimpl->lookup_collatename(p1, p2); + } + bool isctype(charT c, char_class_type f) const + { + typedef typename std::ctype::mask ctype_mask; + + static const ctype_mask mask_base = + static_cast( + std::ctype::alnum + | std::ctype::alpha + | std::ctype::cntrl + | std::ctype::digit + | std::ctype::graph + | std::ctype::lower + | std::ctype::print + | std::ctype::punct + | std::ctype::space + | std::ctype::upper + | std::ctype::xdigit); + + if((f & mask_base) + && (m_pimpl->m_pctype->is( + static_cast(f & mask_base), c))) + return true; + else if((f & BOOST_REGEX_DETAIL_NS::cpp_regex_traits_implementation::mask_unicode) && BOOST_REGEX_DETAIL_NS::is_extended(c)) + return true; + else if((f & BOOST_REGEX_DETAIL_NS::cpp_regex_traits_implementation::mask_word) && (c == '_')) + return true; + else if((f & BOOST_REGEX_DETAIL_NS::cpp_regex_traits_implementation::mask_blank) + && m_pimpl->m_pctype->is(std::ctype::space, c) + && !BOOST_REGEX_DETAIL_NS::is_separator(c)) + return true; + else if((f & BOOST_REGEX_DETAIL_NS::cpp_regex_traits_implementation::mask_vertical) + && (::boost::BOOST_REGEX_DETAIL_NS::is_separator(c) || (c == '\v'))) + return true; + else if((f & BOOST_REGEX_DETAIL_NS::cpp_regex_traits_implementation::mask_horizontal) + && this->isctype(c, std::ctype::space) && !this->isctype(c, BOOST_REGEX_DETAIL_NS::cpp_regex_traits_implementation::mask_vertical)) + return true; +#ifdef __CYGWIN__ + // + // Cygwin has a buggy ctype facet, see https://www.cygwin.com/ml/cygwin/2012-08/msg00178.html: + // + else if((f & std::ctype::xdigit) == std::ctype::xdigit) + { + if((c >= 'a') && (c <= 'f')) + return true; + if((c >= 'A') && (c <= 'F')) + return true; + } +#endif + return false; + } + std::intmax_t toi(const charT*& p1, const charT* p2, int radix)const; + int value(charT c, int radix)const + { + const charT* pc = &c; + return (int)toi(pc, pc + 1, radix); + } + locale_type imbue(locale_type l) + { + std::locale result(getloc()); + m_pimpl = BOOST_REGEX_DETAIL_NS::create_cpp_regex_traits(l); + return result; + } + locale_type getloc()const + { + return m_pimpl->m_locale; + } + std::string error_string(regex_constants::error_type n) const + { + return m_pimpl->error_string(n); + } + + // + // extension: + // set the name of the message catalog in use (defaults to "boost_regex"). + // + static std::string catalog_name(const std::string& name); + static std::string get_catalog_name(); + +private: + std::shared_ptr > m_pimpl; + // + // catalog name handler: + // + static std::string& get_catalog_name_inst(); + +#ifdef BOOST_HAS_THREADS + static std::mutex& get_mutex_inst(); +#endif +}; + + +template +std::intmax_t cpp_regex_traits::toi(const charT*& first, const charT* last, int radix)const +{ + BOOST_REGEX_DETAIL_NS::parser_buf sbuf; // buffer for parsing numbers. + std::basic_istream is(&sbuf); // stream for parsing numbers. + + // we do NOT want to parse any thousands separators inside the stream: + last = std::find(first, last, std::use_facet>(is.getloc()).thousands_sep()); + + sbuf.pubsetbuf(const_cast(static_cast(first)), static_cast(last-first)); + is.clear(); + if(std::abs(radix) == 16) is >> std::hex; + else if(std::abs(radix) == 8) is >> std::oct; + else is >> std::dec; + std::intmax_t val; + if(is >> val) + { + first = first + ((last - first) - sbuf.in_avail()); + return val; + } + else + return -1; +} + +template +std::string cpp_regex_traits::catalog_name(const std::string& name) +{ +#ifdef BOOST_HAS_THREADS + std::lock_guard lk(get_mutex_inst()); +#endif + std::string result(get_catalog_name_inst()); + get_catalog_name_inst() = name; + return result; +} + +template +std::string& cpp_regex_traits::get_catalog_name_inst() +{ + static std::string s_name; + return s_name; +} + +template +std::string cpp_regex_traits::get_catalog_name() +{ +#ifdef BOOST_HAS_THREADS + std::lock_guard lk(get_mutex_inst()); +#endif + std::string result(get_catalog_name_inst()); + return result; +} + +#ifdef BOOST_HAS_THREADS +template +std::mutex& cpp_regex_traits::get_mutex_inst() +{ + static std::mutex s_mutex; + return s_mutex; +} +#endif + +namespace BOOST_REGEX_DETAIL_NS { + + inline void cpp_regex_traits_char_layer::init() + { + // we need to start by initialising our syntax map so we know which + // character is used for which purpose: + std::memset(m_char_map, 0, sizeof(m_char_map)); +#ifndef __IBMCPP__ + std::messages::catalog cat = static_cast::catalog>(-1); +#else + std::messages::catalog cat = reinterpret_cast::catalog>(-1); +#endif + std::string cat_name(cpp_regex_traits::get_catalog_name()); + if ((!cat_name.empty()) && (m_pmessages != 0)) + { + cat = this->m_pmessages->open( + cat_name, + this->m_locale); + if ((int)cat < 0) + { + std::string m("Unable to open message catalog: "); + std::runtime_error err(m + cat_name); + boost::BOOST_REGEX_DETAIL_NS::raise_runtime_error(err); + } + } + // + // if we have a valid catalog then load our messages: + // + if ((int)cat >= 0) + { +#ifndef BOOST_NO_EXCEPTIONS + try { +#endif + for (regex_constants::syntax_type i = 1; i < regex_constants::syntax_max; ++i) + { + string_type mss = this->m_pmessages->get(cat, 0, i, get_default_syntax(i)); + for (string_type::size_type j = 0; j < mss.size(); ++j) + { + m_char_map[static_cast(mss[j])] = i; + } + } + this->m_pmessages->close(cat); +#ifndef BOOST_NO_EXCEPTIONS + } + catch (...) + { + this->m_pmessages->close(cat); + throw; + } +#endif + } + else + { + for (regex_constants::syntax_type j = 1; j < regex_constants::syntax_max; ++j) + { + const char* ptr = get_default_syntax(j); + while (ptr && *ptr) + { + m_char_map[static_cast(*ptr)] = j; + ++ptr; + } + } + } + // + // finish off by calculating our escape types: + // + unsigned char i = 'A'; + do + { + if (m_char_map[i] == 0) + { + if (this->m_pctype->is(std::ctype_base::lower, i)) + m_char_map[i] = regex_constants::escape_type_class; + else if (this->m_pctype->is(std::ctype_base::upper, i)) + m_char_map[i] = regex_constants::escape_type_not_class; + } + } while (0xFF != i++); + } + +} // namespace detail + + +} // boost + +#ifdef BOOST_REGEX_MSVC +#pragma warning(pop) +#endif + + +#endif diff --git a/scintilla/include/boost/regex/v5/cregex.hpp b/scintilla/include/boost/regex/v5/cregex.hpp new file mode 100644 index 0000000000..a657105215 --- /dev/null +++ b/scintilla/include/boost/regex/v5/cregex.hpp @@ -0,0 +1,195 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE cregex.cpp + * VERSION see + * DESCRIPTION: Declares POSIX API functions + * + boost::RegEx high level wrapper. + */ + +#ifndef BOOST_RE_CREGEX_HPP_INCLUDED +#define BOOST_RE_CREGEX_HPP_INCLUDED + +#ifndef BOOST_REGEX_CONFIG_HPP +#include +#endif +#include +#include + +#ifndef BOOST_REGEX_STANDALONE +#if !defined(BOOST_REGEX_NO_LIB) && !defined(BOOST_REGEX_SOURCE) && !defined(BOOST_ALL_NO_LIB) && defined(__cplusplus) +# define BOOST_LIB_NAME boost_regex +# if defined(BOOST_REGEX_DYN_LINK) || defined(BOOST_ALL_DYN_LINK) +# define BOOST_DYN_LINK +# endif +# ifdef BOOST_REGEX_DIAG +# define BOOST_LIB_DIAGNOSTIC +# endif +# include +#endif +#endif + +#ifdef __cplusplus +#include +#else +#include +#endif + +/* include these defs only for POSIX compatablity */ +#ifdef __cplusplus +namespace boost{ +extern "C" { +#endif + +#if defined(__cplusplus) +typedef std::ptrdiff_t regoff_t; +typedef std::size_t regsize_t; +#else +typedef ptrdiff_t regoff_t; +typedef size_t regsize_t; +#endif + +typedef struct +{ + unsigned int re_magic; +#ifdef __cplusplus + std::size_t re_nsub; /* number of parenthesized subexpressions */ +#else + size_t re_nsub; +#endif + const char* re_endp; /* end pointer for REG_PEND */ + void* guts; /* none of your business :-) */ + match_flag_type eflags; /* none of your business :-) */ +} regex_tA; + +#ifndef BOOST_NO_WREGEX +typedef struct +{ + unsigned int re_magic; +#ifdef __cplusplus + std::size_t re_nsub; /* number of parenthesized subexpressions */ +#else + size_t re_nsub; +#endif + const wchar_t* re_endp; /* end pointer for REG_PEND */ + void* guts; /* none of your business :-) */ + match_flag_type eflags; /* none of your business :-) */ +} regex_tW; +#endif + +typedef struct +{ + regoff_t rm_so; /* start of match */ + regoff_t rm_eo; /* end of match */ +} regmatch_t; + +/* regcomp() flags */ +typedef enum{ + REG_BASIC = 0000, + REG_EXTENDED = 0001, + REG_ICASE = 0002, + REG_NOSUB = 0004, + REG_NEWLINE = 0010, + REG_NOSPEC = 0020, + REG_PEND = 0040, + REG_DUMP = 0200, + REG_NOCOLLATE = 0400, + REG_ESCAPE_IN_LISTS = 01000, + REG_NEWLINE_ALT = 02000, + REG_PERLEX = 04000, + + REG_PERL = REG_EXTENDED | REG_NOCOLLATE | REG_ESCAPE_IN_LISTS | REG_PERLEX, + REG_AWK = REG_EXTENDED | REG_ESCAPE_IN_LISTS, + REG_GREP = REG_BASIC | REG_NEWLINE_ALT, + REG_EGREP = REG_EXTENDED | REG_NEWLINE_ALT, + + REG_ASSERT = 15, + REG_INVARG = 16, + REG_ATOI = 255, /* convert name to number (!) */ + REG_ITOA = 0400 /* convert number to name (!) */ +} reg_comp_flags; + +/* regexec() flags */ +typedef enum{ + REG_NOTBOL = 00001, + REG_NOTEOL = 00002, + REG_STARTEND = 00004 +} reg_exec_flags; + +/* + * POSIX error codes: + */ +typedef unsigned reg_error_t; +typedef reg_error_t reg_errcode_t; /* backwards compatibility */ + +static const reg_error_t REG_NOERROR = 0; /* Success. */ +static const reg_error_t REG_NOMATCH = 1; /* Didn't find a match (for regexec). */ + + /* POSIX regcomp return error codes. (In the order listed in the + standard.) */ +static const reg_error_t REG_BADPAT = 2; /* Invalid pattern. */ +static const reg_error_t REG_ECOLLATE = 3; /* Undefined collating element. */ +static const reg_error_t REG_ECTYPE = 4; /* Invalid character class name. */ +static const reg_error_t REG_EESCAPE = 5; /* Trailing backslash. */ +static const reg_error_t REG_ESUBREG = 6; /* Invalid back reference. */ +static const reg_error_t REG_EBRACK = 7; /* Unmatched left bracket. */ +static const reg_error_t REG_EPAREN = 8; /* Parenthesis imbalance. */ +static const reg_error_t REG_EBRACE = 9; /* Unmatched \{. */ +static const reg_error_t REG_BADBR = 10; /* Invalid contents of \{\}. */ +static const reg_error_t REG_ERANGE = 11; /* Invalid range end. */ +static const reg_error_t REG_ESPACE = 12; /* Ran out of memory. */ +static const reg_error_t REG_BADRPT = 13; /* No preceding re for repetition op. */ +static const reg_error_t REG_EEND = 14; /* unexpected end of expression */ +static const reg_error_t REG_ESIZE = 15; /* expression too big */ +static const reg_error_t REG_ERPAREN = 8; /* = REG_EPAREN : unmatched right parenthesis */ +static const reg_error_t REG_EMPTY = 17; /* empty expression */ +static const reg_error_t REG_E_MEMORY = 15; /* = REG_ESIZE : out of memory */ +static const reg_error_t REG_ECOMPLEXITY = 18; /* complexity too high */ +static const reg_error_t REG_ESTACK = 19; /* out of stack space */ +static const reg_error_t REG_E_PERL = 20; /* Perl (?...) error */ +static const reg_error_t REG_E_UNKNOWN = 21; /* unknown error */ +static const reg_error_t REG_ENOSYS = 21; /* = REG_E_UNKNOWN : Reserved. */ + +BOOST_REGEX_DECL int BOOST_REGEX_CCALL regcompA(regex_tA*, const char*, int); +BOOST_REGEX_DECL regsize_t BOOST_REGEX_CCALL regerrorA(int, const regex_tA*, char*, regsize_t); +BOOST_REGEX_DECL int BOOST_REGEX_CCALL regexecA(const regex_tA*, const char*, regsize_t, regmatch_t*, int); +BOOST_REGEX_DECL void BOOST_REGEX_CCALL regfreeA(regex_tA*); + +#ifndef BOOST_NO_WREGEX +BOOST_REGEX_DECL int BOOST_REGEX_CCALL regcompW(regex_tW*, const wchar_t*, int); +BOOST_REGEX_DECL regsize_t BOOST_REGEX_CCALL regerrorW(int, const regex_tW*, wchar_t*, regsize_t); +BOOST_REGEX_DECL int BOOST_REGEX_CCALL regexecW(const regex_tW*, const wchar_t*, regsize_t, regmatch_t*, int); +BOOST_REGEX_DECL void BOOST_REGEX_CCALL regfreeW(regex_tW*); +#endif + +#ifdef UNICODE +#define regcomp regcompW +#define regerror regerrorW +#define regexec regexecW +#define regfree regfreeW +#define regex_t regex_tW +#else +#define regcomp regcompA +#define regerror regerrorA +#define regexec regexecA +#define regfree regfreeA +#define regex_t regex_tA +#endif + +#ifdef __cplusplus +} /* extern "C" */ +} /* namespace */ +#endif + +#endif /* include guard */ + diff --git a/scintilla/include/boost/regex/v5/error_type.hpp b/scintilla/include/boost/regex/v5/error_type.hpp new file mode 100644 index 0000000000..afcc71e3d8 --- /dev/null +++ b/scintilla/include/boost/regex/v5/error_type.hpp @@ -0,0 +1,59 @@ +/* + * + * Copyright (c) 2003-2005 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE error_type.hpp + * VERSION see + * DESCRIPTION: Declares regular expression error type enumerator. + */ + +#ifndef BOOST_REGEX_ERROR_TYPE_HPP +#define BOOST_REGEX_ERROR_TYPE_HPP + +#ifdef __cplusplus +namespace boost{ +#endif + +#ifdef __cplusplus +namespace regex_constants{ + +enum error_type{ + + error_ok = 0, /* not used */ + error_no_match = 1, /* not used */ + error_bad_pattern = 2, + error_collate = 3, + error_ctype = 4, + error_escape = 5, + error_backref = 6, + error_brack = 7, + error_paren = 8, + error_brace = 9, + error_badbrace = 10, + error_range = 11, + error_space = 12, + error_badrepeat = 13, + error_end = 14, /* not used */ + error_size = 15, + error_right_paren = 16, /* not used */ + error_empty = 17, + error_complexity = 18, + error_stack = 19, + error_perl_extension = 20, + error_unknown = 21 +}; + +} +} +#endif /* __cplusplus */ + +#endif diff --git a/scintilla/include/boost/regex/v5/icu.hpp b/scintilla/include/boost/regex/v5/icu.hpp new file mode 100644 index 0000000000..aefd97684a --- /dev/null +++ b/scintilla/include/boost/regex/v5/icu.hpp @@ -0,0 +1,1402 @@ +/* + * + * Copyright (c) 2004 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE icu.hpp + * VERSION see + * DESCRIPTION: Unicode regular expressions on top of the ICU Library. + */ + +#ifndef BOOST_REGEX_ICU_V5_HPP +#define BOOST_REGEX_ICU_V5_HPP + +#include +#include +#include +#include +#include +#include +#include + +#ifdef BOOST_REGEX_MSVC +#pragma warning (push) +#pragma warning (disable: 4251) +#endif + +namespace boost{ + +namespace BOOST_REGEX_DETAIL_NS{ + +// +// Implementation details: +// +class icu_regex_traits_implementation +{ + typedef UChar32 char_type; + typedef std::size_t size_type; + typedef std::vector string_type; + typedef U_NAMESPACE_QUALIFIER Locale locale_type; + typedef std::uint_least32_t char_class_type; +public: + icu_regex_traits_implementation(const U_NAMESPACE_QUALIFIER Locale& l) + : m_locale(l) + { + UErrorCode success = U_ZERO_ERROR; + m_collator.reset(U_NAMESPACE_QUALIFIER Collator::createInstance(l, success)); + if(U_SUCCESS(success) == 0) + init_error(); + m_collator->setStrength(U_NAMESPACE_QUALIFIER Collator::IDENTICAL); + success = U_ZERO_ERROR; + m_primary_collator.reset(U_NAMESPACE_QUALIFIER Collator::createInstance(l, success)); + if(U_SUCCESS(success) == 0) + init_error(); + m_primary_collator->setStrength(U_NAMESPACE_QUALIFIER Collator::PRIMARY); + } + U_NAMESPACE_QUALIFIER Locale getloc()const + { + return m_locale; + } + string_type do_transform(const char_type* p1, const char_type* p2, const U_NAMESPACE_QUALIFIER Collator* pcoll) const + { + // TODO make thread safe!!!! : + typedef u32_to_u16_iterator itt; + itt i(p1), j(p2); + std::vector< ::UChar> t(i, j); + std::uint8_t result[100]; + std::int32_t len; + if (!t.empty()) + len = pcoll->getSortKey(&*t.begin(), static_cast(t.size()), result, sizeof(result)); + else + len = pcoll->getSortKey(static_cast(0), static_cast(0), result, sizeof(result)); + if (std::size_t(len) > sizeof(result)) + { + std::unique_ptr< std::uint8_t[]> presult(new ::uint8_t[len + 1]); + if (!t.empty()) + len = pcoll->getSortKey(&*t.begin(), static_cast(t.size()), presult.get(), len + 1); + else + len = pcoll->getSortKey(static_cast(0), static_cast(0), presult.get(), len + 1); + if ((0 == presult[len - 1]) && (len > 1)) + --len; + return string_type(presult.get(), presult.get() + len); + } + if ((0 == result[len - 1]) && (len > 1)) + --len; + return string_type(result, result + len); + } + string_type transform(const char_type* p1, const char_type* p2) const + { + return do_transform(p1, p2, m_collator.get()); + } + string_type transform_primary(const char_type* p1, const char_type* p2) const + { + return do_transform(p1, p2, m_primary_collator.get()); + } +private: + void init_error() + { + std::runtime_error e("Could not initialize ICU resources"); +#ifndef BOOST_REGEX_STANDALONE + boost::throw_exception(e); +#else + throw e; +#endif + } + U_NAMESPACE_QUALIFIER Locale m_locale; // The ICU locale that we're using + std::unique_ptr< U_NAMESPACE_QUALIFIER Collator> m_collator; // The full collation object + std::unique_ptr< U_NAMESPACE_QUALIFIER Collator> m_primary_collator; // The primary collation object +}; +inline std::shared_ptr get_icu_regex_traits_implementation(const U_NAMESPACE_QUALIFIER Locale& loc) +{ + return std::shared_ptr(new icu_regex_traits_implementation(loc)); +} + +} + +class icu_regex_traits +{ +public: + typedef UChar32 char_type; + typedef std::size_t size_type; + typedef std::vector string_type; + typedef U_NAMESPACE_QUALIFIER Locale locale_type; + typedef std::uint64_t char_class_type; + + struct boost_extensions_tag{}; + + icu_regex_traits() + : m_pimpl(BOOST_REGEX_DETAIL_NS::get_icu_regex_traits_implementation(U_NAMESPACE_QUALIFIER Locale())) + { + } + static size_type length(const char_type* p) + { + size_type result = 0; + while (*p) + { + ++p; + ++result; + } + return result; + } + + ::boost::regex_constants::syntax_type syntax_type(char_type c)const + { + return ((c < 0x7f) && (c > 0)) ? BOOST_REGEX_DETAIL_NS::get_default_syntax_type(static_cast(c)) : regex_constants::syntax_char; + } + ::boost::regex_constants::escape_syntax_type escape_syntax_type(char_type c) const + { + return ((c < 0x7f) && (c > 0)) ? BOOST_REGEX_DETAIL_NS::get_default_escape_syntax_type(static_cast(c)) : regex_constants::syntax_char; + } + char_type translate(char_type c) const + { + return c; + } + char_type translate_nocase(char_type c) const + { + return ::u_foldCase(c, U_FOLD_CASE_DEFAULT); + } + char_type translate(char_type c, bool icase) const + { + return icase ? translate_nocase(c) : translate(c); + } + char_type tolower(char_type c) const + { + return ::u_tolower(c); + } + char_type toupper(char_type c) const + { + return ::u_toupper(c); + } + string_type transform(const char_type* p1, const char_type* p2) const + { + return m_pimpl->transform(p1, p2); + } + string_type transform_primary(const char_type* p1, const char_type* p2) const + { + return m_pimpl->transform_primary(p1, p2); + } + char_class_type lookup_classname(const char_type* p1, const char_type* p2) const + { + constexpr char_class_type mask_blank = char_class_type(1) << offset_blank; + constexpr char_class_type mask_space = char_class_type(1) << offset_space; + constexpr char_class_type mask_xdigit = char_class_type(1) << offset_xdigit; + constexpr char_class_type mask_underscore = char_class_type(1) << offset_underscore; + constexpr char_class_type mask_unicode = char_class_type(1) << offset_unicode; + //constexpr char_class_type mask_any = char_class_type(1) << offset_any; + //constexpr char_class_type mask_ascii = char_class_type(1) << offset_ascii; + constexpr char_class_type mask_horizontal = char_class_type(1) << offset_horizontal; + constexpr char_class_type mask_vertical = char_class_type(1) << offset_vertical; + + static const char_class_type masks[] = + { + 0, + U_GC_L_MASK | U_GC_ND_MASK, + U_GC_L_MASK, + mask_blank, + U_GC_CC_MASK | U_GC_CF_MASK | U_GC_ZL_MASK | U_GC_ZP_MASK, + U_GC_ND_MASK, + U_GC_ND_MASK, + (0x3FFFFFFFu) & ~(U_GC_CC_MASK | U_GC_CF_MASK | U_GC_CS_MASK | U_GC_CN_MASK | U_GC_Z_MASK), + mask_horizontal, + U_GC_LL_MASK, + U_GC_LL_MASK, + ~(U_GC_C_MASK), + U_GC_P_MASK, + char_class_type(U_GC_Z_MASK) | mask_space, + char_class_type(U_GC_Z_MASK) | mask_space, + U_GC_LU_MASK, + mask_unicode, + U_GC_LU_MASK, + mask_vertical, + char_class_type(U_GC_L_MASK | U_GC_ND_MASK | U_GC_MN_MASK) | mask_underscore, + char_class_type(U_GC_L_MASK | U_GC_ND_MASK | U_GC_MN_MASK) | mask_underscore, + char_class_type(U_GC_ND_MASK) | mask_xdigit, + }; + + int idx = ::boost::BOOST_REGEX_DETAIL_NS::get_default_class_id(p1, p2); + if (idx >= 0) + return masks[idx + 1]; + char_class_type result = lookup_icu_mask(p1, p2); + if (result != 0) + return result; + + if (idx < 0) + { + string_type s(p1, p2); + string_type::size_type i = 0; + while (i < s.size()) + { + s[i] = static_cast((::u_tolower)(s[i])); + if (::u_isspace(s[i]) || (s[i] == '-') || (s[i] == '_')) + s.erase(s.begin() + i, s.begin() + i + 1); + else + { + s[i] = static_cast((::u_tolower)(s[i])); + ++i; + } + } + if (!s.empty()) + idx = ::boost::BOOST_REGEX_DETAIL_NS::get_default_class_id(&*s.begin(), &*s.begin() + s.size()); + if (idx >= 0) + return masks[idx + 1]; + if (!s.empty()) + result = lookup_icu_mask(&*s.begin(), &*s.begin() + s.size()); + if (result != 0) + return result; + } + BOOST_REGEX_ASSERT(std::size_t(idx + 1) < sizeof(masks) / sizeof(masks[0])); + return masks[idx + 1]; + } + string_type lookup_collatename(const char_type* p1, const char_type* p2) const + { + string_type result; + if (std::find_if(p1, p2, std::bind(std::greater< ::UChar32>(), std::placeholders::_1, 0x7f)) == p2) + { + std::string s(p1, p2); + // Try Unicode name: + UErrorCode err = U_ZERO_ERROR; + UChar32 c = ::u_charFromName(U_UNICODE_CHAR_NAME, s.c_str(), &err); + if (U_SUCCESS(err)) + { + result.push_back(c); + return result; + } + // Try Unicode-extended name: + err = U_ZERO_ERROR; + c = ::u_charFromName(U_EXTENDED_CHAR_NAME, s.c_str(), &err); + if (U_SUCCESS(err)) + { + result.push_back(c); + return result; + } + // try POSIX name: + s = ::boost::BOOST_REGEX_DETAIL_NS::lookup_default_collate_name(s); + result.assign(s.begin(), s.end()); + } + if (result.empty() && (p2 - p1 == 1)) + result.push_back(*p1); + return result; + } + bool isctype(char_type c, char_class_type f) const + { + constexpr char_class_type mask_blank = char_class_type(1) << offset_blank; + constexpr char_class_type mask_space = char_class_type(1) << offset_space; + constexpr char_class_type mask_xdigit = char_class_type(1) << offset_xdigit; + constexpr char_class_type mask_underscore = char_class_type(1) << offset_underscore; + constexpr char_class_type mask_unicode = char_class_type(1) << offset_unicode; + constexpr char_class_type mask_any = char_class_type(1) << offset_any; + constexpr char_class_type mask_ascii = char_class_type(1) << offset_ascii; + constexpr char_class_type mask_horizontal = char_class_type(1) << offset_horizontal; + constexpr char_class_type mask_vertical = char_class_type(1) << offset_vertical; + + // check for standard catagories first: + char_class_type m = char_class_type(static_cast(1) << u_charType(c)); + if ((m & f) != 0) + return true; + // now check for special cases: + if (((f & mask_blank) != 0) && u_isblank(c)) + return true; + if (((f & mask_space) != 0) && u_isspace(c)) + return true; + if (((f & mask_xdigit) != 0) && (u_digit(c, 16) >= 0)) + return true; + if (((f & mask_unicode) != 0) && (c >= 0x100)) + return true; + if (((f & mask_underscore) != 0) && (c == '_')) + return true; + if (((f & mask_any) != 0) && (c <= 0x10FFFF)) + return true; + if (((f & mask_ascii) != 0) && (c <= 0x7F)) + return true; + if (((f & mask_vertical) != 0) && (::boost::BOOST_REGEX_DETAIL_NS::is_separator(c) || (c == static_cast('\v')) || (m == U_GC_ZL_MASK) || (m == U_GC_ZP_MASK))) + return true; + if (((f & mask_horizontal) != 0) && !::boost::BOOST_REGEX_DETAIL_NS::is_separator(c) && u_isspace(c) && (c != static_cast('\v'))) + return true; + return false; + } + std::intmax_t toi(const char_type*& p1, const char_type* p2, int radix)const + { + return BOOST_REGEX_DETAIL_NS::global_toi(p1, p2, radix, *this); + } + int value(char_type c, int radix)const + { + return u_digit(c, static_cast< std::int8_t>(radix)); + } + locale_type imbue(locale_type l) + { + locale_type result(m_pimpl->getloc()); + m_pimpl = BOOST_REGEX_DETAIL_NS::get_icu_regex_traits_implementation(l); + return result; + } + locale_type getloc()const + { + return locale_type(); + } + std::string error_string(::boost::regex_constants::error_type n) const + { + return BOOST_REGEX_DETAIL_NS::get_default_error_string(n); + } +private: + icu_regex_traits(const icu_regex_traits&); + icu_regex_traits& operator=(const icu_regex_traits&); + + // + // define the bitmasks offsets we need for additional character properties: + // + enum{ + offset_blank = U_CHAR_CATEGORY_COUNT, + offset_space = U_CHAR_CATEGORY_COUNT+1, + offset_xdigit = U_CHAR_CATEGORY_COUNT+2, + offset_underscore = U_CHAR_CATEGORY_COUNT+3, + offset_unicode = U_CHAR_CATEGORY_COUNT+4, + offset_any = U_CHAR_CATEGORY_COUNT+5, + offset_ascii = U_CHAR_CATEGORY_COUNT+6, + offset_horizontal = U_CHAR_CATEGORY_COUNT+7, + offset_vertical = U_CHAR_CATEGORY_COUNT+8 + }; + + static char_class_type lookup_icu_mask(const ::UChar32* p1, const ::UChar32* p2) + { + //constexpr char_class_type mask_blank = char_class_type(1) << offset_blank; + //constexpr char_class_type mask_space = char_class_type(1) << offset_space; + //constexpr char_class_type mask_xdigit = char_class_type(1) << offset_xdigit; + //constexpr char_class_type mask_underscore = char_class_type(1) << offset_underscore; + //constexpr char_class_type mask_unicode = char_class_type(1) << offset_unicode; + constexpr char_class_type mask_any = char_class_type(1) << offset_any; + constexpr char_class_type mask_ascii = char_class_type(1) << offset_ascii; + //constexpr char_class_type mask_horizontal = char_class_type(1) << offset_horizontal; + //constexpr char_class_type mask_vertical = char_class_type(1) << offset_vertical; + + static const ::UChar32 prop_name_table[] = { + /* any */ 'a', 'n', 'y', + /* ascii */ 'a', 's', 'c', 'i', 'i', + /* assigned */ 'a', 's', 's', 'i', 'g', 'n', 'e', 'd', + /* c* */ 'c', '*', + /* cc */ 'c', 'c', + /* cf */ 'c', 'f', + /* closepunctuation */ 'c', 'l', 'o', 's', 'e', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n', + /* cn */ 'c', 'n', + /* co */ 'c', 'o', + /* connectorpunctuation */ 'c', 'o', 'n', 'n', 'e', 'c', 't', 'o', 'r', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n', + /* control */ 'c', 'o', 'n', 't', 'r', 'o', 'l', + /* cs */ 'c', 's', + /* currencysymbol */ 'c', 'u', 'r', 'r', 'e', 'n', 'c', 'y', 's', 'y', 'm', 'b', 'o', 'l', + /* dashpunctuation */ 'd', 'a', 's', 'h', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n', + /* decimaldigitnumber */ 'd', 'e', 'c', 'i', 'm', 'a', 'l', 'd', 'i', 'g', 'i', 't', 'n', 'u', 'm', 'b', 'e', 'r', + /* enclosingmark */ 'e', 'n', 'c', 'l', 'o', 's', 'i', 'n', 'g', 'm', 'a', 'r', 'k', + /* finalpunctuation */ 'f', 'i', 'n', 'a', 'l', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n', + /* format */ 'f', 'o', 'r', 'm', 'a', 't', + /* initialpunctuation */ 'i', 'n', 'i', 't', 'i', 'a', 'l', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n', + /* l* */ 'l', '*', + /* letter */ 'l', 'e', 't', 't', 'e', 'r', + /* letternumber */ 'l', 'e', 't', 't', 'e', 'r', 'n', 'u', 'm', 'b', 'e', 'r', + /* lineseparator */ 'l', 'i', 'n', 'e', 's', 'e', 'p', 'a', 'r', 'a', 't', 'o', 'r', + /* ll */ 'l', 'l', + /* lm */ 'l', 'm', + /* lo */ 'l', 'o', + /* lowercaseletter */ 'l', 'o', 'w', 'e', 'r', 'c', 'a', 's', 'e', 'l', 'e', 't', 't', 'e', 'r', + /* lt */ 'l', 't', + /* lu */ 'l', 'u', + /* m* */ 'm', '*', + /* mark */ 'm', 'a', 'r', 'k', + /* mathsymbol */ 'm', 'a', 't', 'h', 's', 'y', 'm', 'b', 'o', 'l', + /* mc */ 'm', 'c', + /* me */ 'm', 'e', + /* mn */ 'm', 'n', + /* modifierletter */ 'm', 'o', 'd', 'i', 'f', 'i', 'e', 'r', 'l', 'e', 't', 't', 'e', 'r', + /* modifiersymbol */ 'm', 'o', 'd', 'i', 'f', 'i', 'e', 'r', 's', 'y', 'm', 'b', 'o', 'l', + /* n* */ 'n', '*', + /* nd */ 'n', 'd', + /* nl */ 'n', 'l', + /* no */ 'n', 'o', + /* nonspacingmark */ 'n', 'o', 'n', 's', 'p', 'a', 'c', 'i', 'n', 'g', 'm', 'a', 'r', 'k', + /* notassigned */ 'n', 'o', 't', 'a', 's', 's', 'i', 'g', 'n', 'e', 'd', + /* number */ 'n', 'u', 'm', 'b', 'e', 'r', + /* openpunctuation */ 'o', 'p', 'e', 'n', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n', + /* other */ 'o', 't', 'h', 'e', 'r', + /* otherletter */ 'o', 't', 'h', 'e', 'r', 'l', 'e', 't', 't', 'e', 'r', + /* othernumber */ 'o', 't', 'h', 'e', 'r', 'n', 'u', 'm', 'b', 'e', 'r', + /* otherpunctuation */ 'o', 't', 'h', 'e', 'r', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n', + /* othersymbol */ 'o', 't', 'h', 'e', 'r', 's', 'y', 'm', 'b', 'o', 'l', + /* p* */ 'p', '*', + /* paragraphseparator */ 'p', 'a', 'r', 'a', 'g', 'r', 'a', 'p', 'h', 's', 'e', 'p', 'a', 'r', 'a', 't', 'o', 'r', + /* pc */ 'p', 'c', + /* pd */ 'p', 'd', + /* pe */ 'p', 'e', + /* pf */ 'p', 'f', + /* pi */ 'p', 'i', + /* po */ 'p', 'o', + /* privateuse */ 'p', 'r', 'i', 'v', 'a', 't', 'e', 'u', 's', 'e', + /* ps */ 'p', 's', + /* punctuation */ 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n', + /* s* */ 's', '*', + /* sc */ 's', 'c', + /* separator */ 's', 'e', 'p', 'a', 'r', 'a', 't', 'o', 'r', + /* sk */ 's', 'k', + /* sm */ 's', 'm', + /* so */ 's', 'o', + /* spaceseparator */ 's', 'p', 'a', 'c', 'e', 's', 'e', 'p', 'a', 'r', 'a', 't', 'o', 'r', + /* spacingcombiningmark */ 's', 'p', 'a', 'c', 'i', 'n', 'g', 'c', 'o', 'm', 'b', 'i', 'n', 'i', 'n', 'g', 'm', 'a', 'r', 'k', + /* surrogate */ 's', 'u', 'r', 'r', 'o', 'g', 'a', 't', 'e', + /* symbol */ 's', 'y', 'm', 'b', 'o', 'l', + /* titlecase */ 't', 'i', 't', 'l', 'e', 'c', 'a', 's', 'e', + /* titlecaseletter */ 't', 'i', 't', 'l', 'e', 'c', 'a', 's', 'e', 'l', 'e', 't', 't', 'e', 'r', + /* uppercaseletter */ 'u', 'p', 'p', 'e', 'r', 'c', 'a', 's', 'e', 'l', 'e', 't', 't', 'e', 'r', + /* z* */ 'z', '*', + /* zl */ 'z', 'l', + /* zp */ 'z', 'p', + /* zs */ 'z', 's', + }; + + static const BOOST_REGEX_DETAIL_NS::character_pointer_range< ::UChar32> range_data[] = { + { prop_name_table + 0, prop_name_table + 3, }, // any + { prop_name_table + 3, prop_name_table + 8, }, // ascii + { prop_name_table + 8, prop_name_table + 16, }, // assigned + { prop_name_table + 16, prop_name_table + 18, }, // c* + { prop_name_table + 18, prop_name_table + 20, }, // cc + { prop_name_table + 20, prop_name_table + 22, }, // cf + { prop_name_table + 22, prop_name_table + 38, }, // closepunctuation + { prop_name_table + 38, prop_name_table + 40, }, // cn + { prop_name_table + 40, prop_name_table + 42, }, // co + { prop_name_table + 42, prop_name_table + 62, }, // connectorpunctuation + { prop_name_table + 62, prop_name_table + 69, }, // control + { prop_name_table + 69, prop_name_table + 71, }, // cs + { prop_name_table + 71, prop_name_table + 85, }, // currencysymbol + { prop_name_table + 85, prop_name_table + 100, }, // dashpunctuation + { prop_name_table + 100, prop_name_table + 118, }, // decimaldigitnumber + { prop_name_table + 118, prop_name_table + 131, }, // enclosingmark + { prop_name_table + 131, prop_name_table + 147, }, // finalpunctuation + { prop_name_table + 147, prop_name_table + 153, }, // format + { prop_name_table + 153, prop_name_table + 171, }, // initialpunctuation + { prop_name_table + 171, prop_name_table + 173, }, // l* + { prop_name_table + 173, prop_name_table + 179, }, // letter + { prop_name_table + 179, prop_name_table + 191, }, // letternumber + { prop_name_table + 191, prop_name_table + 204, }, // lineseparator + { prop_name_table + 204, prop_name_table + 206, }, // ll + { prop_name_table + 206, prop_name_table + 208, }, // lm + { prop_name_table + 208, prop_name_table + 210, }, // lo + { prop_name_table + 210, prop_name_table + 225, }, // lowercaseletter + { prop_name_table + 225, prop_name_table + 227, }, // lt + { prop_name_table + 227, prop_name_table + 229, }, // lu + { prop_name_table + 229, prop_name_table + 231, }, // m* + { prop_name_table + 231, prop_name_table + 235, }, // mark + { prop_name_table + 235, prop_name_table + 245, }, // mathsymbol + { prop_name_table + 245, prop_name_table + 247, }, // mc + { prop_name_table + 247, prop_name_table + 249, }, // me + { prop_name_table + 249, prop_name_table + 251, }, // mn + { prop_name_table + 251, prop_name_table + 265, }, // modifierletter + { prop_name_table + 265, prop_name_table + 279, }, // modifiersymbol + { prop_name_table + 279, prop_name_table + 281, }, // n* + { prop_name_table + 281, prop_name_table + 283, }, // nd + { prop_name_table + 283, prop_name_table + 285, }, // nl + { prop_name_table + 285, prop_name_table + 287, }, // no + { prop_name_table + 287, prop_name_table + 301, }, // nonspacingmark + { prop_name_table + 301, prop_name_table + 312, }, // notassigned + { prop_name_table + 312, prop_name_table + 318, }, // number + { prop_name_table + 318, prop_name_table + 333, }, // openpunctuation + { prop_name_table + 333, prop_name_table + 338, }, // other + { prop_name_table + 338, prop_name_table + 349, }, // otherletter + { prop_name_table + 349, prop_name_table + 360, }, // othernumber + { prop_name_table + 360, prop_name_table + 376, }, // otherpunctuation + { prop_name_table + 376, prop_name_table + 387, }, // othersymbol + { prop_name_table + 387, prop_name_table + 389, }, // p* + { prop_name_table + 389, prop_name_table + 407, }, // paragraphseparator + { prop_name_table + 407, prop_name_table + 409, }, // pc + { prop_name_table + 409, prop_name_table + 411, }, // pd + { prop_name_table + 411, prop_name_table + 413, }, // pe + { prop_name_table + 413, prop_name_table + 415, }, // pf + { prop_name_table + 415, prop_name_table + 417, }, // pi + { prop_name_table + 417, prop_name_table + 419, }, // po + { prop_name_table + 419, prop_name_table + 429, }, // privateuse + { prop_name_table + 429, prop_name_table + 431, }, // ps + { prop_name_table + 431, prop_name_table + 442, }, // punctuation + { prop_name_table + 442, prop_name_table + 444, }, // s* + { prop_name_table + 444, prop_name_table + 446, }, // sc + { prop_name_table + 446, prop_name_table + 455, }, // separator + { prop_name_table + 455, prop_name_table + 457, }, // sk + { prop_name_table + 457, prop_name_table + 459, }, // sm + { prop_name_table + 459, prop_name_table + 461, }, // so + { prop_name_table + 461, prop_name_table + 475, }, // spaceseparator + { prop_name_table + 475, prop_name_table + 495, }, // spacingcombiningmark + { prop_name_table + 495, prop_name_table + 504, }, // surrogate + { prop_name_table + 504, prop_name_table + 510, }, // symbol + { prop_name_table + 510, prop_name_table + 519, }, // titlecase + { prop_name_table + 519, prop_name_table + 534, }, // titlecaseletter + { prop_name_table + 534, prop_name_table + 549, }, // uppercaseletter + { prop_name_table + 549, prop_name_table + 551, }, // z* + { prop_name_table + 551, prop_name_table + 553, }, // zl + { prop_name_table + 553, prop_name_table + 555, }, // zp + { prop_name_table + 555, prop_name_table + 557, }, // zs + }; + + static const icu_regex_traits::char_class_type icu_class_map[] = { + mask_any, // any + mask_ascii, // ascii + (0x3FFFFFFFu) & ~(U_GC_CN_MASK), // assigned + U_GC_C_MASK, // c* + U_GC_CC_MASK, // cc + U_GC_CF_MASK, // cf + U_GC_PE_MASK, // closepunctuation + U_GC_CN_MASK, // cn + U_GC_CO_MASK, // co + U_GC_PC_MASK, // connectorpunctuation + U_GC_CC_MASK, // control + U_GC_CS_MASK, // cs + U_GC_SC_MASK, // currencysymbol + U_GC_PD_MASK, // dashpunctuation + U_GC_ND_MASK, // decimaldigitnumber + U_GC_ME_MASK, // enclosingmark + U_GC_PF_MASK, // finalpunctuation + U_GC_CF_MASK, // format + U_GC_PI_MASK, // initialpunctuation + U_GC_L_MASK, // l* + U_GC_L_MASK, // letter + U_GC_NL_MASK, // letternumber + U_GC_ZL_MASK, // lineseparator + U_GC_LL_MASK, // ll + U_GC_LM_MASK, // lm + U_GC_LO_MASK, // lo + U_GC_LL_MASK, // lowercaseletter + U_GC_LT_MASK, // lt + U_GC_LU_MASK, // lu + U_GC_M_MASK, // m* + U_GC_M_MASK, // mark + U_GC_SM_MASK, // mathsymbol + U_GC_MC_MASK, // mc + U_GC_ME_MASK, // me + U_GC_MN_MASK, // mn + U_GC_LM_MASK, // modifierletter + U_GC_SK_MASK, // modifiersymbol + U_GC_N_MASK, // n* + U_GC_ND_MASK, // nd + U_GC_NL_MASK, // nl + U_GC_NO_MASK, // no + U_GC_MN_MASK, // nonspacingmark + U_GC_CN_MASK, // notassigned + U_GC_N_MASK, // number + U_GC_PS_MASK, // openpunctuation + U_GC_C_MASK, // other + U_GC_LO_MASK, // otherletter + U_GC_NO_MASK, // othernumber + U_GC_PO_MASK, // otherpunctuation + U_GC_SO_MASK, // othersymbol + U_GC_P_MASK, // p* + U_GC_ZP_MASK, // paragraphseparator + U_GC_PC_MASK, // pc + U_GC_PD_MASK, // pd + U_GC_PE_MASK, // pe + U_GC_PF_MASK, // pf + U_GC_PI_MASK, // pi + U_GC_PO_MASK, // po + U_GC_CO_MASK, // privateuse + U_GC_PS_MASK, // ps + U_GC_P_MASK, // punctuation + U_GC_S_MASK, // s* + U_GC_SC_MASK, // sc + U_GC_Z_MASK, // separator + U_GC_SK_MASK, // sk + U_GC_SM_MASK, // sm + U_GC_SO_MASK, // so + U_GC_ZS_MASK, // spaceseparator + U_GC_MC_MASK, // spacingcombiningmark + U_GC_CS_MASK, // surrogate + U_GC_S_MASK, // symbol + U_GC_LT_MASK, // titlecase + U_GC_LT_MASK, // titlecaseletter + U_GC_LU_MASK, // uppercaseletter + U_GC_Z_MASK, // z* + U_GC_ZL_MASK, // zl + U_GC_ZP_MASK, // zp + U_GC_ZS_MASK, // zs + }; + + + const BOOST_REGEX_DETAIL_NS::character_pointer_range< ::UChar32>* ranges_begin = range_data; + const BOOST_REGEX_DETAIL_NS::character_pointer_range< ::UChar32>* ranges_end = range_data + (sizeof(range_data) / sizeof(range_data[0])); + + BOOST_REGEX_DETAIL_NS::character_pointer_range< ::UChar32> t = { p1, p2, }; + const BOOST_REGEX_DETAIL_NS::character_pointer_range< ::UChar32>* p = std::lower_bound(ranges_begin, ranges_end, t); + if ((p != ranges_end) && (t == *p)) + return icu_class_map[p - ranges_begin]; + return 0; + } + std::shared_ptr< ::boost::BOOST_REGEX_DETAIL_NS::icu_regex_traits_implementation> m_pimpl; +}; + +} // namespace boost + +namespace boost{ + +// types: +typedef basic_regex< ::UChar32, icu_regex_traits> u32regex; +typedef match_results u32match; +typedef match_results u16match; + +// +// Construction of 32-bit regex types from UTF-8 and UTF-16 primitives: +// +namespace BOOST_REGEX_DETAIL_NS{ + +template +inline u32regex do_make_u32regex(InputIterator i, + InputIterator j, + boost::regex_constants::syntax_option_type opt, + const std::integral_constant*) +{ + typedef boost::u8_to_u32_iterator conv_type; + return u32regex(conv_type(i, i, j), conv_type(j, i, j), opt); +} + +template +inline u32regex do_make_u32regex(InputIterator i, + InputIterator j, + boost::regex_constants::syntax_option_type opt, + const std::integral_constant*) +{ + typedef boost::u16_to_u32_iterator conv_type; + return u32regex(conv_type(i, i, j), conv_type(j, i, j), opt); +} + +template +inline u32regex do_make_u32regex(InputIterator i, + InputIterator j, + boost::regex_constants::syntax_option_type opt, + const std::integral_constant*) +{ + return u32regex(i, j, opt); +} +} + +// BOOST_REGEX_UCHAR_IS_WCHAR_T +// +// Source inspection of unicode/umachine.h in ICU version 59 indicates that: +// +// On version 59, UChar is always char16_t in C++ mode (and uint16_t in C mode) +// +// On earlier versions, the logic is +// +// #if U_SIZEOF_WCHAR_T==2 +// typedef wchar_t OldUChar; +// #elif defined(__CHAR16_TYPE__) +// typedef __CHAR16_TYPE__ OldUChar; +// #else +// typedef uint16_t OldUChar; +// #endif +// +// That is, UChar is wchar_t only on versions below 59, when U_SIZEOF_WCHAR_T==2 +// +// Hence, + +#define BOOST_REGEX_UCHAR_IS_WCHAR_T (U_ICU_VERSION_MAJOR_NUM < 59 && U_SIZEOF_WCHAR_T == 2) + +#if BOOST_REGEX_UCHAR_IS_WCHAR_T + static_assert((std::is_same::value), "Configuration logic has failed!"); +#else + static_assert(!(std::is_same::value), "Configuration logic has failed!"); +#endif + +// +// Construction from an iterator pair: +// +template +inline u32regex make_u32regex(InputIterator i, + InputIterator j, + boost::regex_constants::syntax_option_type opt) +{ + return BOOST_REGEX_DETAIL_NS::do_make_u32regex(i, j, opt, static_cast const*>(0)); +} +// +// construction from UTF-8 nul-terminated strings: +// +inline u32regex make_u32regex(const char* p, boost::regex_constants::syntax_option_type opt = boost::regex_constants::perl) +{ + return BOOST_REGEX_DETAIL_NS::do_make_u32regex(p, p + std::strlen(p), opt, static_cast const*>(0)); +} +inline u32regex make_u32regex(const unsigned char* p, boost::regex_constants::syntax_option_type opt = boost::regex_constants::perl) +{ + return BOOST_REGEX_DETAIL_NS::do_make_u32regex(p, p + std::strlen(reinterpret_cast(p)), opt, static_cast const*>(0)); +} +// +// construction from UTF-16 nul-terminated strings: +// +#ifndef BOOST_NO_WREGEX +inline u32regex make_u32regex(const wchar_t* p, boost::regex_constants::syntax_option_type opt = boost::regex_constants::perl) +{ + return BOOST_REGEX_DETAIL_NS::do_make_u32regex(p, p + std::wcslen(p), opt, static_cast const*>(0)); +} +#endif +#if !BOOST_REGEX_UCHAR_IS_WCHAR_T +inline u32regex make_u32regex(const UChar* p, boost::regex_constants::syntax_option_type opt = boost::regex_constants::perl) +{ + return BOOST_REGEX_DETAIL_NS::do_make_u32regex(p, p + u_strlen(p), opt, static_cast const*>(0)); +} +#endif +// +// construction from basic_string class-template: +// +template +inline u32regex make_u32regex(const std::basic_string& s, boost::regex_constants::syntax_option_type opt = boost::regex_constants::perl) +{ + return BOOST_REGEX_DETAIL_NS::do_make_u32regex(s.begin(), s.end(), opt, static_cast const*>(0)); +} +// +// Construction from ICU string type: +// +inline u32regex make_u32regex(const U_NAMESPACE_QUALIFIER UnicodeString& s, boost::regex_constants::syntax_option_type opt = boost::regex_constants::perl) +{ + return BOOST_REGEX_DETAIL_NS::do_make_u32regex(s.getBuffer(), s.getBuffer() + s.length(), opt, static_cast const*>(0)); +} + +// +// regex_match overloads that widen the character type as appropriate: +// +namespace BOOST_REGEX_DETAIL_NS{ +template +void copy_results(MR1& out, MR2 const& in, NSubs named_subs) +{ + // copy results from an adapted MR2 match_results: + out.set_size(in.size(), in.prefix().first.base(), in.suffix().second.base()); + out.set_base(in.base().base()); + out.set_named_subs(named_subs); + for(int i = 0; i < (int)in.size(); ++i) + { + if(in[i].matched || !i) + { + out.set_first(in[i].first.base(), i); + out.set_second(in[i].second.base(), i, in[i].matched); + } + } +#ifdef BOOST_REGEX_MATCH_EXTRA + // Copy full capture info as well: + for(int i = 0; i < (int)in.size(); ++i) + { + if(in[i].captures().size()) + { + out[i].get_captures().assign(in[i].captures().size(), typename MR1::value_type()); + for(int j = 0; j < (int)out[i].captures().size(); ++j) + { + out[i].get_captures()[j].first = in[i].captures()[j].first.base(); + out[i].get_captures()[j].second = in[i].captures()[j].second.base(); + out[i].get_captures()[j].matched = in[i].captures()[j].matched; + } + } + } +#endif +} + +template +inline bool do_regex_match(BidiIterator first, BidiIterator last, + match_results& m, + const u32regex& e, + match_flag_type flags, + std::integral_constant const*) +{ + return ::boost::regex_match(first, last, m, e, flags); +} +template +bool do_regex_match(BidiIterator first, BidiIterator last, + match_results& m, + const u32regex& e, + match_flag_type flags, + std::integral_constant const*) +{ + typedef u16_to_u32_iterator conv_type; + typedef match_results match_type; + //typedef typename match_type::allocator_type alloc_type; + match_type what; + bool result = ::boost::regex_match(conv_type(first, first, last), conv_type(last, first, last), what, e, flags); + // copy results across to m: + if(result) copy_results(m, what, e.get_named_subs()); + return result; +} +template +bool do_regex_match(BidiIterator first, BidiIterator last, + match_results& m, + const u32regex& e, + match_flag_type flags, + std::integral_constant const*) +{ + typedef u8_to_u32_iterator conv_type; + typedef match_results match_type; + //typedef typename match_type::allocator_type alloc_type; + match_type what; + bool result = ::boost::regex_match(conv_type(first, first, last), conv_type(last, first, last), what, e, flags); + // copy results across to m: + if(result) copy_results(m, what, e.get_named_subs()); + return result; +} +} // namespace BOOST_REGEX_DETAIL_NS + +template +inline bool u32regex_match(BidiIterator first, BidiIterator last, + match_results& m, + const u32regex& e, + match_flag_type flags = match_default) +{ + return BOOST_REGEX_DETAIL_NS::do_regex_match(first, last, m, e, flags, static_cast const*>(0)); +} +inline bool u32regex_match(const UChar* p, + match_results& m, + const u32regex& e, + match_flag_type flags = match_default) +{ + return BOOST_REGEX_DETAIL_NS::do_regex_match(p, p+u_strlen(p), m, e, flags, static_cast const*>(0)); +} +#if !BOOST_REGEX_UCHAR_IS_WCHAR_T && !defined(BOOST_NO_WREGEX) +inline bool u32regex_match(const wchar_t* p, + match_results& m, + const u32regex& e, + match_flag_type flags = match_default) +{ + return BOOST_REGEX_DETAIL_NS::do_regex_match(p, p+std::wcslen(p), m, e, flags, static_cast const*>(0)); +} +#endif +inline bool u32regex_match(const char* p, + match_results& m, + const u32regex& e, + match_flag_type flags = match_default) +{ + return BOOST_REGEX_DETAIL_NS::do_regex_match(p, p+std::strlen(p), m, e, flags, static_cast const*>(0)); +} +inline bool u32regex_match(const unsigned char* p, + match_results& m, + const u32regex& e, + match_flag_type flags = match_default) +{ + return BOOST_REGEX_DETAIL_NS::do_regex_match(p, p+std::strlen((const char*)p), m, e, flags, static_cast const*>(0)); +} +inline bool u32regex_match(const std::string& s, + match_results& m, + const u32regex& e, + match_flag_type flags = match_default) +{ + return BOOST_REGEX_DETAIL_NS::do_regex_match(s.begin(), s.end(), m, e, flags, static_cast const*>(0)); +} +#ifndef BOOST_NO_STD_WSTRING +inline bool u32regex_match(const std::wstring& s, + match_results& m, + const u32regex& e, + match_flag_type flags = match_default) +{ + return BOOST_REGEX_DETAIL_NS::do_regex_match(s.begin(), s.end(), m, e, flags, static_cast const*>(0)); +} +#endif +inline bool u32regex_match(const U_NAMESPACE_QUALIFIER UnicodeString& s, + match_results& m, + const u32regex& e, + match_flag_type flags = match_default) +{ + return BOOST_REGEX_DETAIL_NS::do_regex_match(s.getBuffer(), s.getBuffer() + s.length(), m, e, flags, static_cast const*>(0)); +} +// +// regex_match overloads that do not return what matched: +// +template +inline bool u32regex_match(BidiIterator first, BidiIterator last, + const u32regex& e, + match_flag_type flags = match_default) +{ + match_results m; + return BOOST_REGEX_DETAIL_NS::do_regex_match(first, last, m, e, flags, static_cast const*>(0)); +} +inline bool u32regex_match(const UChar* p, + const u32regex& e, + match_flag_type flags = match_default) +{ + match_results m; + return BOOST_REGEX_DETAIL_NS::do_regex_match(p, p+u_strlen(p), m, e, flags, static_cast const*>(0)); +} +#if !BOOST_REGEX_UCHAR_IS_WCHAR_T && !defined(BOOST_NO_WREGEX) +inline bool u32regex_match(const wchar_t* p, + const u32regex& e, + match_flag_type flags = match_default) +{ + match_results m; + return BOOST_REGEX_DETAIL_NS::do_regex_match(p, p+std::wcslen(p), m, e, flags, static_cast const*>(0)); +} +#endif +inline bool u32regex_match(const char* p, + const u32regex& e, + match_flag_type flags = match_default) +{ + match_results m; + return BOOST_REGEX_DETAIL_NS::do_regex_match(p, p+std::strlen(p), m, e, flags, static_cast const*>(0)); +} +inline bool u32regex_match(const unsigned char* p, + const u32regex& e, + match_flag_type flags = match_default) +{ + match_results m; + return BOOST_REGEX_DETAIL_NS::do_regex_match(p, p+std::strlen((const char*)p), m, e, flags, static_cast const*>(0)); +} +inline bool u32regex_match(const std::string& s, + const u32regex& e, + match_flag_type flags = match_default) +{ + match_results m; + return BOOST_REGEX_DETAIL_NS::do_regex_match(s.begin(), s.end(), m, e, flags, static_cast const*>(0)); +} +#ifndef BOOST_NO_STD_WSTRING +inline bool u32regex_match(const std::wstring& s, + const u32regex& e, + match_flag_type flags = match_default) +{ + match_results m; + return BOOST_REGEX_DETAIL_NS::do_regex_match(s.begin(), s.end(), m, e, flags, static_cast const*>(0)); +} +#endif +inline bool u32regex_match(const U_NAMESPACE_QUALIFIER UnicodeString& s, + const u32regex& e, + match_flag_type flags = match_default) +{ + match_results m; + return BOOST_REGEX_DETAIL_NS::do_regex_match(s.getBuffer(), s.getBuffer() + s.length(), m, e, flags, static_cast const*>(0)); +} + +// +// regex_search overloads that widen the character type as appropriate: +// +namespace BOOST_REGEX_DETAIL_NS{ +template +inline bool do_regex_search(BidiIterator first, BidiIterator last, + match_results& m, + const u32regex& e, + match_flag_type flags, + BidiIterator base, + std::integral_constant const*) +{ + return ::boost::regex_search(first, last, m, e, flags, base); +} +template +bool do_regex_search(BidiIterator first, BidiIterator last, + match_results& m, + const u32regex& e, + match_flag_type flags, + BidiIterator base, + std::integral_constant const*) +{ + typedef u16_to_u32_iterator conv_type; + typedef match_results match_type; + //typedef typename match_type::allocator_type alloc_type; + match_type what; + bool result = ::boost::regex_search(conv_type(first, first, last), conv_type(last, first, last), what, e, flags, conv_type(base)); + // copy results across to m: + if(result) copy_results(m, what, e.get_named_subs()); + return result; +} +template +bool do_regex_search(BidiIterator first, BidiIterator last, + match_results& m, + const u32regex& e, + match_flag_type flags, + BidiIterator base, + std::integral_constant const*) +{ + typedef u8_to_u32_iterator conv_type; + typedef match_results match_type; + //typedef typename match_type::allocator_type alloc_type; + match_type what; + bool result = ::boost::regex_search(conv_type(first, first, last), conv_type(last, first, last), what, e, flags, conv_type(base)); + // copy results across to m: + if(result) copy_results(m, what, e.get_named_subs()); + return result; +} +} + +template +inline bool u32regex_search(BidiIterator first, BidiIterator last, + match_results& m, + const u32regex& e, + match_flag_type flags = match_default) +{ + return BOOST_REGEX_DETAIL_NS::do_regex_search(first, last, m, e, flags, first, static_cast const*>(0)); +} +template +inline bool u32regex_search(BidiIterator first, BidiIterator last, + match_results& m, + const u32regex& e, + match_flag_type flags, + BidiIterator base) +{ + return BOOST_REGEX_DETAIL_NS::do_regex_search(first, last, m, e, flags, base, static_cast const*>(0)); +} +inline bool u32regex_search(const UChar* p, + match_results& m, + const u32regex& e, + match_flag_type flags = match_default) +{ + return BOOST_REGEX_DETAIL_NS::do_regex_search(p, p+u_strlen(p), m, e, flags, p, static_cast const*>(0)); +} +#if !BOOST_REGEX_UCHAR_IS_WCHAR_T && !defined(BOOST_NO_WREGEX) +inline bool u32regex_search(const wchar_t* p, + match_results& m, + const u32regex& e, + match_flag_type flags = match_default) +{ + return BOOST_REGEX_DETAIL_NS::do_regex_search(p, p+std::wcslen(p), m, e, flags, p, static_cast const*>(0)); +} +#endif +inline bool u32regex_search(const char* p, + match_results& m, + const u32regex& e, + match_flag_type flags = match_default) +{ + return BOOST_REGEX_DETAIL_NS::do_regex_search(p, p+std::strlen(p), m, e, flags, p, static_cast const*>(0)); +} +inline bool u32regex_search(const unsigned char* p, + match_results& m, + const u32regex& e, + match_flag_type flags = match_default) +{ + return BOOST_REGEX_DETAIL_NS::do_regex_search(p, p+std::strlen((const char*)p), m, e, flags, p, static_cast const*>(0)); +} +inline bool u32regex_search(const std::string& s, + match_results& m, + const u32regex& e, + match_flag_type flags = match_default) +{ + return BOOST_REGEX_DETAIL_NS::do_regex_search(s.begin(), s.end(), m, e, flags, s.begin(), static_cast const*>(0)); +} +#ifndef BOOST_NO_STD_WSTRING +inline bool u32regex_search(const std::wstring& s, + match_results& m, + const u32regex& e, + match_flag_type flags = match_default) +{ + return BOOST_REGEX_DETAIL_NS::do_regex_search(s.begin(), s.end(), m, e, flags, s.begin(), static_cast const*>(0)); +} +#endif +inline bool u32regex_search(const U_NAMESPACE_QUALIFIER UnicodeString& s, + match_results& m, + const u32regex& e, + match_flag_type flags = match_default) +{ + return BOOST_REGEX_DETAIL_NS::do_regex_search(s.getBuffer(), s.getBuffer() + s.length(), m, e, flags, s.getBuffer(), static_cast const*>(0)); +} +template +inline bool u32regex_search(BidiIterator first, BidiIterator last, + const u32regex& e, + match_flag_type flags = match_default) +{ + match_results m; + return BOOST_REGEX_DETAIL_NS::do_regex_search(first, last, m, e, flags, first, static_cast const*>(0)); +} +inline bool u32regex_search(const UChar* p, + const u32regex& e, + match_flag_type flags = match_default) +{ + match_results m; + return BOOST_REGEX_DETAIL_NS::do_regex_search(p, p+u_strlen(p), m, e, flags, p, static_cast const*>(0)); +} +#if !BOOST_REGEX_UCHAR_IS_WCHAR_T && !defined(BOOST_NO_WREGEX) +inline bool u32regex_search(const wchar_t* p, + const u32regex& e, + match_flag_type flags = match_default) +{ + match_results m; + return BOOST_REGEX_DETAIL_NS::do_regex_search(p, p+std::wcslen(p), m, e, flags, p, static_cast const*>(0)); +} +#endif +inline bool u32regex_search(const char* p, + const u32regex& e, + match_flag_type flags = match_default) +{ + match_results m; + return BOOST_REGEX_DETAIL_NS::do_regex_search(p, p+std::strlen(p), m, e, flags, p, static_cast const*>(0)); +} +inline bool u32regex_search(const unsigned char* p, + const u32regex& e, + match_flag_type flags = match_default) +{ + match_results m; + return BOOST_REGEX_DETAIL_NS::do_regex_search(p, p+std::strlen((const char*)p), m, e, flags, p, static_cast const*>(0)); +} +inline bool u32regex_search(const std::string& s, + const u32regex& e, + match_flag_type flags = match_default) +{ + match_results m; + return BOOST_REGEX_DETAIL_NS::do_regex_search(s.begin(), s.end(), m, e, flags, s.begin(), static_cast const*>(0)); +} +#ifndef BOOST_NO_STD_WSTRING +inline bool u32regex_search(const std::wstring& s, + const u32regex& e, + match_flag_type flags = match_default) +{ + match_results m; + return BOOST_REGEX_DETAIL_NS::do_regex_search(s.begin(), s.end(), m, e, flags, s.begin(), static_cast const*>(0)); +} +#endif +inline bool u32regex_search(const U_NAMESPACE_QUALIFIER UnicodeString& s, + const u32regex& e, + match_flag_type flags = match_default) +{ + match_results m; + return BOOST_REGEX_DETAIL_NS::do_regex_search(s.getBuffer(), s.getBuffer() + s.length(), m, e, flags, s.getBuffer(), static_cast const*>(0)); +} + +// +// overloads for regex_replace with utf-8 and utf-16 data types: +// +namespace BOOST_REGEX_DETAIL_NS{ +template +inline std::pair< boost::u8_to_u32_iterator, boost::u8_to_u32_iterator > + make_utf32_seq(I i, I j, std::integral_constant const*) +{ + return std::pair< boost::u8_to_u32_iterator, boost::u8_to_u32_iterator >(boost::u8_to_u32_iterator(i, i, j), boost::u8_to_u32_iterator(j, i, j)); +} +template +inline std::pair< boost::u16_to_u32_iterator, boost::u16_to_u32_iterator > + make_utf32_seq(I i, I j, std::integral_constant const*) +{ + return std::pair< boost::u16_to_u32_iterator, boost::u16_to_u32_iterator >(boost::u16_to_u32_iterator(i, i, j), boost::u16_to_u32_iterator(j, i, j)); +} +template +inline std::pair< I, I > + make_utf32_seq(I i, I j, std::integral_constant const*) +{ + return std::pair< I, I >(i, j); +} +template +inline std::pair< boost::u8_to_u32_iterator, boost::u8_to_u32_iterator > + make_utf32_seq(const charT* p, std::integral_constant const*) +{ + std::size_t len = std::strlen((const char*)p); + return std::pair< boost::u8_to_u32_iterator, boost::u8_to_u32_iterator >(boost::u8_to_u32_iterator(p, p, p+len), boost::u8_to_u32_iterator(p+len, p, p+len)); +} +template +inline std::pair< boost::u16_to_u32_iterator, boost::u16_to_u32_iterator > + make_utf32_seq(const charT* p, std::integral_constant const*) +{ + std::size_t len = u_strlen((const UChar*)p); + return std::pair< boost::u16_to_u32_iterator, boost::u16_to_u32_iterator >(boost::u16_to_u32_iterator(p, p, p + len), boost::u16_to_u32_iterator(p+len, p, p + len)); +} +template +inline std::pair< const charT*, const charT* > + make_utf32_seq(const charT* p, std::integral_constant const*) +{ + return std::pair< const charT*, const charT* >(p, p+icu_regex_traits::length((UChar32 const*)p)); +} +template +inline OutputIterator make_utf32_out(OutputIterator o, std::integral_constant const*) +{ + return o; +} +template +inline utf16_output_iterator make_utf32_out(OutputIterator o, std::integral_constant const*) +{ + return o; +} +template +inline utf8_output_iterator make_utf32_out(OutputIterator o, std::integral_constant const*) +{ + return o; +} + +template +OutputIterator do_regex_replace(OutputIterator out, + std::pair const& in, + const u32regex& e, + const std::pair& fmt, + match_flag_type flags + ) +{ + // unfortunately we have to copy the format string in order to pass in onward: + std::vector f; + f.assign(fmt.first, fmt.second); + + regex_iterator i(in.first, in.second, e, flags); + regex_iterator j; + if(i == j) + { + if(!(flags & regex_constants::format_no_copy)) + out = std::copy(in.first, in.second, out); + } + else + { + I1 last_m = in.first; + while(i != j) + { + if(!(flags & regex_constants::format_no_copy)) + out = std::copy(i->prefix().first, i->prefix().second, out); + if(!f.empty()) + out = ::boost::BOOST_REGEX_DETAIL_NS::regex_format_imp(out, *i, &*f.begin(), &*f.begin() + f.size(), flags, e.get_traits()); + else + out = ::boost::BOOST_REGEX_DETAIL_NS::regex_format_imp(out, *i, static_cast(0), static_cast(0), flags, e.get_traits()); + last_m = (*i)[0].second; + if(flags & regex_constants::format_first_only) + break; + ++i; + } + if(!(flags & regex_constants::format_no_copy)) + out = std::copy(last_m, in.second, out); + } + return out; +} +template +inline const BaseIterator& extract_output_base(const BaseIterator& b) +{ + return b; +} +template +inline BaseIterator extract_output_base(const utf8_output_iterator& b) +{ + return b.base(); +} +template +inline BaseIterator extract_output_base(const utf16_output_iterator& b) +{ + return b.base(); +} +} // BOOST_REGEX_DETAIL_NS + +template +inline OutputIterator u32regex_replace(OutputIterator out, + BidirectionalIterator first, + BidirectionalIterator last, + const u32regex& e, + const charT* fmt, + match_flag_type flags = match_default) +{ + return BOOST_REGEX_DETAIL_NS::extract_output_base + ( + BOOST_REGEX_DETAIL_NS::do_regex_replace( + BOOST_REGEX_DETAIL_NS::make_utf32_out(out, static_cast const*>(0)), + BOOST_REGEX_DETAIL_NS::make_utf32_seq(first, last, static_cast const*>(0)), + e, + BOOST_REGEX_DETAIL_NS::make_utf32_seq(fmt, static_cast const*>(0)), + flags) + ); +} + +template +inline OutputIterator u32regex_replace(OutputIterator out, + Iterator first, + Iterator last, + const u32regex& e, + const std::basic_string& fmt, + match_flag_type flags = match_default) +{ + return BOOST_REGEX_DETAIL_NS::extract_output_base + ( + BOOST_REGEX_DETAIL_NS::do_regex_replace( + BOOST_REGEX_DETAIL_NS::make_utf32_out(out, static_cast const*>(0)), + BOOST_REGEX_DETAIL_NS::make_utf32_seq(first, last, static_cast const*>(0)), + e, + BOOST_REGEX_DETAIL_NS::make_utf32_seq(fmt.begin(), fmt.end(), static_cast const*>(0)), + flags) + ); +} + +template +inline OutputIterator u32regex_replace(OutputIterator out, + Iterator first, + Iterator last, + const u32regex& e, + const U_NAMESPACE_QUALIFIER UnicodeString& fmt, + match_flag_type flags = match_default) +{ + return BOOST_REGEX_DETAIL_NS::extract_output_base + ( + BOOST_REGEX_DETAIL_NS::do_regex_replace( + BOOST_REGEX_DETAIL_NS::make_utf32_out(out, static_cast const*>(0)), + BOOST_REGEX_DETAIL_NS::make_utf32_seq(first, last, static_cast const*>(0)), + e, + BOOST_REGEX_DETAIL_NS::make_utf32_seq(fmt.getBuffer(), fmt.getBuffer() + fmt.length(), static_cast const*>(0)), + flags) + ); +} + +template +std::basic_string u32regex_replace(const std::basic_string& s, + const u32regex& e, + const charT* fmt, + match_flag_type flags = match_default) +{ + std::basic_string result; + BOOST_REGEX_DETAIL_NS::string_out_iterator > i(result); + u32regex_replace(i, s.begin(), s.end(), e, fmt, flags); + return result; +} + +template +std::basic_string u32regex_replace(const std::basic_string& s, + const u32regex& e, + const std::basic_string& fmt, + match_flag_type flags = match_default) +{ + std::basic_string result; + BOOST_REGEX_DETAIL_NS::string_out_iterator > i(result); + u32regex_replace(i, s.begin(), s.end(), e, fmt.c_str(), flags); + return result; +} + +namespace BOOST_REGEX_DETAIL_NS{ + +class unicode_string_out_iterator +{ + U_NAMESPACE_QUALIFIER UnicodeString* out; +public: + unicode_string_out_iterator(U_NAMESPACE_QUALIFIER UnicodeString& s) : out(&s) {} + unicode_string_out_iterator& operator++() { return *this; } + unicode_string_out_iterator& operator++(int) { return *this; } + unicode_string_out_iterator& operator*() { return *this; } + unicode_string_out_iterator& operator=(UChar v) + { + *out += v; + return *this; + } + typedef std::ptrdiff_t difference_type; + typedef UChar value_type; + typedef value_type* pointer; + typedef value_type& reference; + typedef std::output_iterator_tag iterator_category; +}; + +} + +inline U_NAMESPACE_QUALIFIER UnicodeString u32regex_replace(const U_NAMESPACE_QUALIFIER UnicodeString& s, + const u32regex& e, + const UChar* fmt, + match_flag_type flags = match_default) +{ + U_NAMESPACE_QUALIFIER UnicodeString result; + BOOST_REGEX_DETAIL_NS::unicode_string_out_iterator i(result); + u32regex_replace(i, s.getBuffer(), s.getBuffer()+s.length(), e, fmt, flags); + return result; +} + +inline U_NAMESPACE_QUALIFIER UnicodeString u32regex_replace(const U_NAMESPACE_QUALIFIER UnicodeString& s, + const u32regex& e, + const U_NAMESPACE_QUALIFIER UnicodeString& fmt, + match_flag_type flags = match_default) +{ + U_NAMESPACE_QUALIFIER UnicodeString result; + BOOST_REGEX_DETAIL_NS::unicode_string_out_iterator i(result); + BOOST_REGEX_DETAIL_NS::do_regex_replace( + BOOST_REGEX_DETAIL_NS::make_utf32_out(i, static_cast const*>(0)), + BOOST_REGEX_DETAIL_NS::make_utf32_seq(s.getBuffer(), s.getBuffer()+s.length(), static_cast const*>(0)), + e, + BOOST_REGEX_DETAIL_NS::make_utf32_seq(fmt.getBuffer(), fmt.getBuffer() + fmt.length(), static_cast const*>(0)), + flags); + return result; +} + +} // namespace boost. + +#ifdef BOOST_REGEX_MSVC +#pragma warning (pop) +#endif + +#include +#include + +#endif diff --git a/scintilla/include/boost/regex/v5/iterator_category.hpp b/scintilla/include/boost/regex/v5/iterator_category.hpp new file mode 100644 index 0000000000..9bd745781c --- /dev/null +++ b/scintilla/include/boost/regex/v5/iterator_category.hpp @@ -0,0 +1,84 @@ +/* + * + * Copyright (c) 2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE regex_match.hpp + * VERSION see + * DESCRIPTION: Iterator traits for selecting an iterator type as + * an integral constant expression. + */ + + +#ifndef BOOST_REGEX_ITERATOR_CATEGORY_HPP +#define BOOST_REGEX_ITERATOR_CATEGORY_HPP + +#include +#include + +namespace boost{ +namespace detail{ + +template +struct is_random_imp +{ +private: + typedef typename std::iterator_traits::iterator_category cat; +public: + static const bool value = (std::is_convertible::value); +}; + +template +struct is_random_pointer_imp +{ + static const bool value = true; +}; + +template +struct is_random_imp_selector +{ + template + struct rebind + { + typedef is_random_imp type; + }; +}; + +template <> +struct is_random_imp_selector +{ + template + struct rebind + { + typedef is_random_pointer_imp type; + }; +}; + +} + +template +struct is_random_access_iterator +{ +private: + typedef detail::is_random_imp_selector< std::is_pointer::value> selector; + typedef typename selector::template rebind bound_type; + typedef typename bound_type::type answer; +public: + static const bool value = answer::value; +}; + +template +const bool is_random_access_iterator::value; + +} + +#endif + diff --git a/scintilla/include/boost/regex/v5/iterator_traits.hpp b/scintilla/include/boost/regex/v5/iterator_traits.hpp new file mode 100644 index 0000000000..293db6bf8c --- /dev/null +++ b/scintilla/include/boost/regex/v5/iterator_traits.hpp @@ -0,0 +1,32 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE iterator_traits.cpp + * VERSION see + * DESCRIPTION: Declares iterator traits workarounds. + */ + +#ifndef BOOST_REGEX_V5_ITERATOR_TRAITS_HPP +#define BOOST_REGEX_V5_ITERATOR_TRAITS_HPP + +namespace boost{ +namespace BOOST_REGEX_DETAIL_NS{ + +template +struct regex_iterator_traits : public std::iterator_traits {}; + +} // namespace BOOST_REGEX_DETAIL_NS +} // namespace boost + +#endif + diff --git a/scintilla/include/boost/regex/v5/match_flags.hpp b/scintilla/include/boost/regex/v5/match_flags.hpp new file mode 100644 index 0000000000..6ff236b041 --- /dev/null +++ b/scintilla/include/boost/regex/v5/match_flags.hpp @@ -0,0 +1,156 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE match_flags.hpp + * VERSION see + * DESCRIPTION: Declares match_flags type. + */ + +#ifndef BOOST_REGEX_V5_MATCH_FLAGS +#define BOOST_REGEX_V5_MATCH_FLAGS + +#ifdef __cplusplus +# include +#endif + +#ifdef __cplusplus +namespace boost{ + namespace regex_constants{ +#endif + +#ifdef BOOST_REGEX_MSVC +#pragma warning(push) +#if BOOST_REGEX_MSVC >= 1800 +#pragma warning(disable : 26812) +#endif +#endif + +typedef enum _match_flags +{ + match_default = 0, + match_not_bol = 1, /* first is not start of line */ + match_not_eol = match_not_bol << 1, /* last is not end of line */ + match_not_bob = match_not_eol << 1, /* first is not start of buffer */ + match_not_eob = match_not_bob << 1, /* last is not end of buffer */ + match_not_bow = match_not_eob << 1, /* first is not start of word */ + match_not_eow = match_not_bow << 1, /* last is not end of word */ + match_not_dot_newline = match_not_eow << 1, /* \n is not matched by '.' */ + match_not_dot_null = match_not_dot_newline << 1, /* '\0' is not matched by '.' */ + match_prev_avail = match_not_dot_null << 1, /* *--first is a valid expression */ + match_init = match_prev_avail << 1, /* internal use */ + match_any = match_init << 1, /* don't care what we match */ + match_not_null = match_any << 1, /* string can't be null */ + match_continuous = match_not_null << 1, /* each grep match must continue from */ + /* uninterrupted from the previous one */ + match_partial = match_continuous << 1, /* find partial matches */ + + match_stop = match_partial << 1, /* stop after first match (grep) V3 only */ + match_not_initial_null = match_stop, /* don't match initial null, V4 only */ + match_all = match_stop << 1, /* must find the whole of input even if match_any is set */ + match_perl = match_all << 1, /* Use perl matching rules */ + match_posix = match_perl << 1, /* Use POSIX matching rules */ + match_nosubs = match_posix << 1, /* don't trap marked subs */ + match_extra = match_nosubs << 1, /* include full capture information for repeated captures */ + match_single_line = match_extra << 1, /* treat text as single line and ignore any \n's when matching ^ and $. */ + match_unused1 = match_single_line << 1, /* unused */ + match_unused2 = match_unused1 << 1, /* unused */ + match_unused3 = match_unused2 << 1, /* unused */ + match_max = match_unused3, + + format_perl = 0, /* perl style replacement */ + format_default = 0, /* ditto. */ + format_sed = match_max << 1, /* sed style replacement. */ + format_all = format_sed << 1, /* enable all extensions to syntax. */ + format_no_copy = format_all << 1, /* don't copy non-matching segments. */ + format_first_only = format_no_copy << 1, /* Only replace first occurrence. */ + format_is_if = format_first_only << 1, /* internal use only. */ + format_literal = format_is_if << 1, /* treat string as a literal */ + + match_not_any = match_not_bol | match_not_eol | match_not_bob + | match_not_eob | match_not_bow | match_not_eow | match_not_dot_newline + | match_not_dot_null | match_prev_avail | match_init | match_not_null + | match_continuous | match_partial | match_stop | match_not_initial_null + | match_stop | match_all | match_perl | match_posix | match_nosubs + | match_extra | match_single_line | match_unused1 | match_unused2 + | match_unused3 | match_max | format_perl | format_default | format_sed + | format_all | format_no_copy | format_first_only | format_is_if + | format_literal + + +} match_flags; + +typedef match_flags match_flag_type; + +#ifdef __cplusplus +inline match_flags operator&(match_flags m1, match_flags m2) +{ return static_cast(static_cast(m1) & static_cast(m2)); } +inline match_flags operator|(match_flags m1, match_flags m2) +{ return static_cast(static_cast(m1) | static_cast(m2)); } +inline match_flags operator^(match_flags m1, match_flags m2) +{ return static_cast(static_cast(m1) ^ static_cast(m2)); } +inline match_flags operator~(match_flags m1) +{ return static_cast(~static_cast(m1)); } +inline match_flags& operator&=(match_flags& m1, match_flags m2) +{ m1 = m1&m2; return m1; } +inline match_flags& operator|=(match_flags& m1, match_flags m2) +{ m1 = m1|m2; return m1; } +inline match_flags& operator^=(match_flags& m1, match_flags m2) +{ m1 = m1^m2; return m1; } +#endif + +#ifdef __cplusplus +} /* namespace regex_constants */ +/* + * import names into boost for backwards compatibility: + */ +using regex_constants::match_flag_type; +using regex_constants::match_default; +using regex_constants::match_not_bol; +using regex_constants::match_not_eol; +using regex_constants::match_not_bob; +using regex_constants::match_not_eob; +using regex_constants::match_not_bow; +using regex_constants::match_not_eow; +using regex_constants::match_not_dot_newline; +using regex_constants::match_not_dot_null; +using regex_constants::match_prev_avail; +/* using regex_constants::match_init; */ +using regex_constants::match_any; +using regex_constants::match_not_null; +using regex_constants::match_continuous; +using regex_constants::match_partial; +/*using regex_constants::match_stop; */ +using regex_constants::match_all; +using regex_constants::match_perl; +using regex_constants::match_posix; +using regex_constants::match_nosubs; +using regex_constants::match_extra; +using regex_constants::match_single_line; +/*using regex_constants::match_max; */ +using regex_constants::format_all; +using regex_constants::format_sed; +using regex_constants::format_perl; +using regex_constants::format_default; +using regex_constants::format_no_copy; +using regex_constants::format_first_only; +/*using regex_constants::format_is_if;*/ + +#ifdef BOOST_REGEX_MSVC +#pragma warning(pop) +#endif + + +} /* namespace boost */ +#endif /* __cplusplus */ +#endif /* include guard */ + diff --git a/scintilla/include/boost/regex/v5/match_results.hpp b/scintilla/include/boost/regex/v5/match_results.hpp new file mode 100644 index 0000000000..3f9cd0129f --- /dev/null +++ b/scintilla/include/boost/regex/v5/match_results.hpp @@ -0,0 +1,667 @@ +/* + * + * Copyright (c) 1998-2009 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE match_results.cpp + * VERSION see + * DESCRIPTION: Declares template class match_results. + */ + +#ifndef BOOST_REGEX_V5_MATCH_RESULTS_HPP +#define BOOST_REGEX_V5_MATCH_RESULTS_HPP + +namespace boost{ +#ifdef BOOST_REGEX_MSVC +#pragma warning(push) +#pragma warning(disable : 4251 4459) +#if BOOST_REGEX_MSVC < 1700 +# pragma warning(disable : 4231) +#endif +# if BOOST_REGEX_MSVC < 1600 +# pragma warning(disable : 4660) +# endif +#endif + +namespace BOOST_REGEX_DETAIL_NS{ + +class named_subexpressions; + +} + +template +class match_results +{ +private: + typedef std::vector, Allocator> vector_type; +public: + typedef sub_match value_type; + typedef typename std::allocator_traits::value_type const & const_reference; + typedef const_reference reference; + typedef typename vector_type::const_iterator const_iterator; + typedef const_iterator iterator; + typedef typename std::iterator_traits< + BidiIterator>::difference_type difference_type; + typedef typename std::allocator_traits::size_type size_type; + typedef Allocator allocator_type; + typedef typename std::iterator_traits< + BidiIterator>::value_type char_type; + typedef std::basic_string string_type; + typedef BOOST_REGEX_DETAIL_NS::named_subexpressions named_sub_type; + + // construct/copy/destroy: + explicit match_results(const Allocator& a = Allocator()) + : m_subs(a), m_base(), m_null(), m_last_closed_paren(0), m_is_singular(true) {} + // + // IMPORTANT: in the code below, the crazy looking checks around m_is_singular are + // all required because it is illegal to copy a singular iterator. + // See https://svn.boost.org/trac/boost/ticket/3632. + // + match_results(const match_results& m) + : m_subs(m.m_subs), m_base(), m_null(), m_named_subs(m.m_named_subs), m_last_closed_paren(m.m_last_closed_paren), m_is_singular(m.m_is_singular) + { + if(!m_is_singular) + { + m_base = m.m_base; + m_null = m.m_null; + } + } + match_results& operator=(const match_results& m) + { + m_subs = m.m_subs; + m_named_subs = m.m_named_subs; + m_last_closed_paren = m.m_last_closed_paren; + m_is_singular = m.m_is_singular; + if(!m_is_singular) + { + m_base = m.m_base; + m_null = m.m_null; + } + return *this; + } + ~match_results(){} + + // size: + size_type size() const + { return empty() ? 0 : m_subs.size() - 2; } + size_type max_size() const + { return m_subs.max_size(); } + bool empty() const + { return m_subs.size() < 2; } + // element access: + difference_type length(int sub = 0) const + { + if(m_is_singular) + raise_logic_error(); + sub += 2; + if((sub < (int)m_subs.size()) && (sub > 0)) + return m_subs[sub].length(); + return 0; + } + difference_type length(const char_type* sub) const + { + if(m_is_singular) + raise_logic_error(); + const char_type* sub_end = sub; + while(*sub_end) ++sub_end; + return length(named_subexpression_index(sub, sub_end)); + } + template + difference_type length(const charT* sub) const + { + if(m_is_singular) + raise_logic_error(); + const charT* sub_end = sub; + while(*sub_end) ++sub_end; + return length(named_subexpression_index(sub, sub_end)); + } + template + difference_type length(const std::basic_string& sub) const + { + return length(sub.c_str()); + } + difference_type position(size_type sub = 0) const + { + if(m_is_singular) + raise_logic_error(); + sub += 2; + if(sub < m_subs.size()) + { + const sub_match& s = m_subs[sub]; + if(s.matched || (sub == 2)) + { + return std::distance((BidiIterator)(m_base), (BidiIterator)(s.first)); + } + } + return ~static_cast(0); + } + difference_type position(const char_type* sub) const + { + const char_type* sub_end = sub; + while(*sub_end) ++sub_end; + return position(named_subexpression_index(sub, sub_end)); + } + template + difference_type position(const charT* sub) const + { + const charT* sub_end = sub; + while(*sub_end) ++sub_end; + return position(named_subexpression_index(sub, sub_end)); + } + template + difference_type position(const std::basic_string& sub) const + { + return position(sub.c_str()); + } + string_type str(int sub = 0) const + { + if(m_is_singular) + raise_logic_error(); + sub += 2; + string_type result; + if(sub < (int)m_subs.size() && (sub > 0)) + { + const sub_match& s = m_subs[sub]; + if(s.matched) + { + result = s.str(); + } + } + return result; + } + string_type str(const char_type* sub) const + { + return (*this)[sub].str(); + } + template + string_type str(const std::basic_string& sub) const + { + return (*this)[sub].str(); + } + template + string_type str(const charT* sub) const + { + return (*this)[sub].str(); + } + template + string_type str(const std::basic_string& sub) const + { + return (*this)[sub].str(); + } + const_reference operator[](int sub) const + { + if(m_is_singular && m_subs.empty()) + raise_logic_error(); + sub += 2; + if(sub < (int)m_subs.size() && (sub >= 0)) + { + return m_subs[sub]; + } + return m_null; + } + // + // Named sub-expressions: + // + const_reference named_subexpression(const char_type* i, const char_type* j) const + { + // + // Scan for the leftmost *matched* subexpression with the specified named: + // + if(m_is_singular) + raise_logic_error(); + BOOST_REGEX_DETAIL_NS::named_subexpressions::range_type r = m_named_subs->equal_range(i, j); + while((r.first != r.second) && ((*this)[r.first->index].matched == false)) + ++r.first; + return r.first != r.second ? (*this)[r.first->index] : m_null; + } + template + const_reference named_subexpression(const charT* i, const charT* j) const + { + static_assert(sizeof(charT) <= sizeof(char_type), "Failed internal logic"); + if(i == j) + return m_null; + std::vector s; + while(i != j) + s.insert(s.end(), *i++); + return named_subexpression(&*s.begin(), &*s.begin() + s.size()); + } + int named_subexpression_index(const char_type* i, const char_type* j) const + { + // + // Scan for the leftmost *matched* subexpression with the specified named. + // If none found then return the leftmost expression with that name, + // otherwise an invalid index: + // + if(m_is_singular) + raise_logic_error(); + BOOST_REGEX_DETAIL_NS::named_subexpressions::range_type s, r; + s = r = m_named_subs->equal_range(i, j); + while((r.first != r.second) && ((*this)[r.first->index].matched == false)) + ++r.first; + if(r.first == r.second) + r = s; + return r.first != r.second ? r.first->index : -20; + } + template + int named_subexpression_index(const charT* i, const charT* j) const + { + static_assert(sizeof(charT) <= sizeof(char_type), "Failed internal logic"); + if(i == j) + return -20; + std::vector s; + while(i != j) + s.insert(s.end(), *i++); + return named_subexpression_index(&*s.begin(), &*s.begin() + s.size()); + } + template + const_reference operator[](const std::basic_string& s) const + { + return named_subexpression(s.c_str(), s.c_str() + s.size()); + } + const_reference operator[](const char_type* p) const + { + const char_type* e = p; + while(*e) ++e; + return named_subexpression(p, e); + } + + template + const_reference operator[](const charT* p) const + { + static_assert(sizeof(charT) <= sizeof(char_type), "Failed internal logic"); + if(*p == 0) + return m_null; + std::vector s; + while(*p) + s.insert(s.end(), *p++); + return named_subexpression(&*s.begin(), &*s.begin() + s.size()); + } + template + const_reference operator[](const std::basic_string& ns) const + { + static_assert(sizeof(charT) <= sizeof(char_type), "Failed internal logic"); + if(ns.empty()) + return m_null; + std::vector s; + for(unsigned i = 0; i < ns.size(); ++i) + s.insert(s.end(), ns[i]); + return named_subexpression(&*s.begin(), &*s.begin() + s.size()); + } + + const_reference prefix() const + { + if(m_is_singular) + raise_logic_error(); + return (*this)[-1]; + } + + const_reference suffix() const + { + if(m_is_singular) + raise_logic_error(); + return (*this)[-2]; + } + const_iterator begin() const + { + return (m_subs.size() > 2) ? (m_subs.begin() + 2) : m_subs.end(); + } + const_iterator end() const + { + return m_subs.end(); + } + // format: + template + OutputIterator format(OutputIterator out, + Functor fmt, + match_flag_type flags = format_default) const + { + if(m_is_singular) + raise_logic_error(); + typedef typename BOOST_REGEX_DETAIL_NS::compute_functor_type, OutputIterator>::type F; + F func(fmt); + return func(*this, out, flags); + } + template + string_type format(Functor fmt, match_flag_type flags = format_default) const + { + if(m_is_singular) + raise_logic_error(); + std::basic_string result; + BOOST_REGEX_DETAIL_NS::string_out_iterator > i(result); + + typedef typename BOOST_REGEX_DETAIL_NS::compute_functor_type, BOOST_REGEX_DETAIL_NS::string_out_iterator > >::type F; + F func(fmt); + + func(*this, i, flags); + return result; + } + // format with locale: + template + OutputIterator format(OutputIterator out, + Functor fmt, + match_flag_type flags, + const RegexT& re) const + { + if(m_is_singular) + raise_logic_error(); + typedef ::boost::regex_traits_wrapper traits_type; + typedef typename BOOST_REGEX_DETAIL_NS::compute_functor_type, OutputIterator, traits_type>::type F; + F func(fmt); + return func(*this, out, flags, re.get_traits()); + } + template + string_type format(Functor fmt, + match_flag_type flags, + const RegexT& re) const + { + if(m_is_singular) + raise_logic_error(); + typedef ::boost::regex_traits_wrapper traits_type; + std::basic_string result; + BOOST_REGEX_DETAIL_NS::string_out_iterator > i(result); + + typedef typename BOOST_REGEX_DETAIL_NS::compute_functor_type, BOOST_REGEX_DETAIL_NS::string_out_iterator >, traits_type >::type F; + F func(fmt); + + func(*this, i, flags, re.get_traits()); + return result; + } + + const_reference get_last_closed_paren()const + { + if(m_is_singular) + raise_logic_error(); + return m_last_closed_paren == 0 ? m_null : (*this)[m_last_closed_paren]; + } + + allocator_type get_allocator() const + { + return m_subs.get_allocator(); + } + void swap(match_results& that) + { + std::swap(m_subs, that.m_subs); + std::swap(m_named_subs, that.m_named_subs); + std::swap(m_last_closed_paren, that.m_last_closed_paren); + if(m_is_singular) + { + if(!that.m_is_singular) + { + m_base = that.m_base; + m_null = that.m_null; + } + } + else if(that.m_is_singular) + { + that.m_base = m_base; + that.m_null = m_null; + } + else + { + std::swap(m_base, that.m_base); + std::swap(m_null, that.m_null); + } + std::swap(m_is_singular, that.m_is_singular); + } + bool operator==(const match_results& that)const + { + if(m_is_singular) + { + return that.m_is_singular; + } + else if(that.m_is_singular) + { + return false; + } + return (m_subs == that.m_subs) && (m_base == that.m_base) && (m_last_closed_paren == that.m_last_closed_paren); + } + bool operator!=(const match_results& that)const + { return !(*this == that); } + +#ifdef BOOST_REGEX_MATCH_EXTRA + typedef typename sub_match::capture_sequence_type capture_sequence_type; + + const capture_sequence_type& captures(int i)const + { + if(m_is_singular) + raise_logic_error(); + return (*this)[i].captures(); + } +#endif + + // + // private access functions: + void set_second(BidiIterator i) + { + BOOST_REGEX_ASSERT(m_subs.size() > 2); + m_subs[2].second = i; + m_subs[2].matched = true; + m_subs[0].first = i; + m_subs[0].matched = (m_subs[0].first != m_subs[0].second); + m_null.first = i; + m_null.second = i; + m_null.matched = false; + m_is_singular = false; + } + + void set_second(BidiIterator i, size_type pos, bool m = true, bool escape_k = false) + { + if(pos) + m_last_closed_paren = static_cast(pos); + pos += 2; + BOOST_REGEX_ASSERT(m_subs.size() > pos); + m_subs[pos].second = i; + m_subs[pos].matched = m; + if((pos == 2) && !escape_k) + { + m_subs[0].first = i; + m_subs[0].matched = (m_subs[0].first != m_subs[0].second); + m_null.first = i; + m_null.second = i; + m_null.matched = false; + m_is_singular = false; + } + } + void set_size(size_type n, BidiIterator i, BidiIterator j) + { + value_type v(j); + size_type len = m_subs.size(); + if(len > n + 2) + { + m_subs.erase(m_subs.begin()+n+2, m_subs.end()); + std::fill(m_subs.begin(), m_subs.end(), v); + } + else + { + std::fill(m_subs.begin(), m_subs.end(), v); + if(n+2 != len) + m_subs.insert(m_subs.end(), n+2-len, v); + } + m_subs[1].first = i; + m_last_closed_paren = 0; + } + void set_base(BidiIterator pos) + { + m_base = pos; + } + BidiIterator base()const + { + return m_base; + } + void set_first(BidiIterator i) + { + BOOST_REGEX_ASSERT(m_subs.size() > 2); + // set up prefix: + m_subs[1].second = i; + m_subs[1].matched = (m_subs[1].first != i); + // set up $0: + m_subs[2].first = i; + // zero out everything else: + for(size_type n = 3; n < m_subs.size(); ++n) + { + m_subs[n].first = m_subs[n].second = m_subs[0].second; + m_subs[n].matched = false; + } + } + void set_first(BidiIterator i, size_type pos, bool escape_k = false) + { + BOOST_REGEX_ASSERT(pos+2 < m_subs.size()); + if(pos || escape_k) + { + m_subs[pos+2].first = i; + if(escape_k) + { + m_subs[1].second = i; + m_subs[1].matched = (m_subs[1].first != m_subs[1].second); + } + } + else + set_first(i); + } + void maybe_assign(const match_results& m); + + void set_named_subs(std::shared_ptr subs) + { + m_named_subs = subs; + } + +private: + // + // Error handler called when an uninitialized match_results is accessed: + // + static void raise_logic_error() + { + std::logic_error e("Attempt to access an uninitialized boost::match_results<> class."); +#ifndef BOOST_REGEX_STANDALONE + boost::throw_exception(e); +#else + throw e; +#endif + } + + + vector_type m_subs; // subexpressions + BidiIterator m_base; // where the search started from + sub_match m_null; // a null match + std::shared_ptr m_named_subs; // Shared copy of named subs in the regex object + int m_last_closed_paren; // Last ) to be seen - used for formatting + bool m_is_singular; // True if our stored iterators are singular +}; + +template +void match_results::maybe_assign(const match_results& m) +{ + if(m_is_singular) + { + *this = m; + return; + } + const_iterator p1, p2; + p1 = begin(); + p2 = m.begin(); + // + // Distances are measured from the start of *this* match, unless this isn't + // a valid match in which case we use the start of the whole sequence. Note that + // no subsequent match-candidate can ever be to the left of the first match found. + // This ensures that when we are using bidirectional iterators, that distances + // measured are as short as possible, and therefore as efficient as possible + // to compute. Finally note that we don't use the "matched" data member to test + // whether a sub-expression is a valid match, because partial matches set this + // to false for sub-expression 0. + // + BidiIterator l_end = this->suffix().second; + BidiIterator l_base = (p1->first == l_end) ? this->prefix().first : (*this)[0].first; + difference_type len1 = 0; + difference_type len2 = 0; + difference_type base1 = 0; + difference_type base2 = 0; + std::size_t i; + for(i = 0; i < size(); ++i, ++p1, ++p2) + { + // + // Leftmost takes priority over longest; handle special cases + // where distances need not be computed first (an optimisation + // for bidirectional iterators: ensure that we don't accidently + // compute the length of the whole sequence, as this can be really + // expensive). + // + if(p1->first == l_end) + { + if(p2->first != l_end) + { + // p2 must be better than p1, and no need to calculate + // actual distances: + base1 = 1; + base2 = 0; + break; + } + else + { + // *p1 and *p2 are either unmatched or match end-of sequence, + // either way no need to calculate distances: + if((p1->matched == false) && (p2->matched == true)) + break; + if((p1->matched == true) && (p2->matched == false)) + return; + continue; + } + } + else if(p2->first == l_end) + { + // p1 better than p2, and no need to calculate distances: + return; + } + base1 = std::distance(l_base, p1->first); + base2 = std::distance(l_base, p2->first); + BOOST_REGEX_ASSERT(base1 >= 0); + BOOST_REGEX_ASSERT(base2 >= 0); + if(base1 < base2) return; + if(base2 < base1) break; + + len1 = std::distance((BidiIterator)p1->first, (BidiIterator)p1->second); + len2 = std::distance((BidiIterator)p2->first, (BidiIterator)p2->second); + BOOST_REGEX_ASSERT(len1 >= 0); + BOOST_REGEX_ASSERT(len2 >= 0); + if((len1 != len2) || ((p1->matched == false) && (p2->matched == true))) + break; + if((p1->matched == true) && (p2->matched == false)) + return; + } + if(i == size()) + return; + if(base2 < base1) + *this = m; + else if((len2 > len1) || ((p1->matched == false) && (p2->matched == true)) ) + *this = m; +} + +template +void swap(match_results& a, match_results& b) +{ + a.swap(b); +} + +template +std::basic_ostream& + operator << (std::basic_ostream& os, + const match_results& s) +{ + return (os << s.str()); +} + +#ifdef BOOST_REGEX_MSVC +#pragma warning(pop) +#endif +} // namespace boost + +#endif + + diff --git a/scintilla/include/boost/regex/v5/mem_block_cache.hpp b/scintilla/include/boost/regex/v5/mem_block_cache.hpp new file mode 100644 index 0000000000..3e1216d06a --- /dev/null +++ b/scintilla/include/boost/regex/v5/mem_block_cache.hpp @@ -0,0 +1,173 @@ + /* + * Copyright (c) 2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE mem_block_cache.hpp + * VERSION see + * DESCRIPTION: memory block cache used by the non-recursive matcher. + */ + +#ifndef BOOST_REGEX_V5_MEM_BLOCK_CACHE_HPP +#define BOOST_REGEX_V5_MEM_BLOCK_CACHE_HPP + +#include +#ifdef BOOST_HAS_THREADS +#include +#endif + +#ifndef BOOST_NO_CXX11_HDR_ATOMIC + #include + #if ATOMIC_POINTER_LOCK_FREE == 2 + #define BOOST_REGEX_MEM_BLOCK_CACHE_LOCK_FREE + #define BOOST_REGEX_ATOMIC_POINTER std::atomic + #endif +#endif + +namespace boost{ +namespace BOOST_REGEX_DETAIL_NS{ + +#if BOOST_REGEX_MAX_CACHE_BLOCKS != 0 +#ifdef BOOST_REGEX_MEM_BLOCK_CACHE_LOCK_FREE /* lock free implementation */ +struct mem_block_cache +{ + std::atomic cache[BOOST_REGEX_MAX_CACHE_BLOCKS]; + + ~mem_block_cache() + { + for (size_t i = 0;i < BOOST_REGEX_MAX_CACHE_BLOCKS; ++i) { + if (cache[i].load()) ::operator delete(cache[i].load()); + } + } + void* get() + { + for (size_t i = 0;i < BOOST_REGEX_MAX_CACHE_BLOCKS; ++i) { + void* p = cache[i].load(); + if (p != NULL) { + if (cache[i].compare_exchange_strong(p, NULL)) return p; + } + } + return ::operator new(BOOST_REGEX_BLOCKSIZE); + } + void put(void* ptr) + { + for (size_t i = 0;i < BOOST_REGEX_MAX_CACHE_BLOCKS; ++i) { + void* p = cache[i].load(); + if (p == NULL) { + if (cache[i].compare_exchange_strong(p, ptr)) return; + } + } + ::operator delete(ptr); + } + + static mem_block_cache& instance() + { + static mem_block_cache block_cache = { { {nullptr} } }; + return block_cache; + } +}; + + +#else /* lock-based implementation */ + + +struct mem_block_node +{ + mem_block_node* next; +}; + +struct mem_block_cache +{ + // this member has to be statically initialsed: + mem_block_node* next { nullptr }; + unsigned cached_blocks { 0 }; +#ifdef BOOST_HAS_THREADS + std::mutex mut; +#endif + + ~mem_block_cache() + { + while(next) + { + mem_block_node* old = next; + next = next->next; + ::operator delete(old); + } + } + void* get() + { +#ifdef BOOST_HAS_THREADS + std::lock_guard g(mut); +#endif + if(next) + { + mem_block_node* result = next; + next = next->next; + --cached_blocks; + return result; + } + return ::operator new(BOOST_REGEX_BLOCKSIZE); + } + void put(void* p) + { +#ifdef BOOST_HAS_THREADS + std::lock_guard g(mut); +#endif + if(cached_blocks >= BOOST_REGEX_MAX_CACHE_BLOCKS) + { + ::operator delete(p); + } + else + { + mem_block_node* old = static_cast(p); + old->next = next; + next = old; + ++cached_blocks; + } + } + static mem_block_cache& instance() + { + static mem_block_cache block_cache; + return block_cache; + } +}; +#endif +#endif + +#if BOOST_REGEX_MAX_CACHE_BLOCKS == 0 + +inline void* get_mem_block() +{ + return ::operator new(BOOST_REGEX_BLOCKSIZE); +} + +inline void put_mem_block(void* p) +{ + ::operator delete(p); +} + +#else + +inline void* get_mem_block() +{ + return mem_block_cache::instance().get(); +} + +inline void put_mem_block(void* p) +{ + mem_block_cache::instance().put(p); +} + +#endif +} +} // namespace boost + +#endif + diff --git a/scintilla/include/boost/regex/v5/object_cache.hpp b/scintilla/include/boost/regex/v5/object_cache.hpp new file mode 100644 index 0000000000..6ebef9b34e --- /dev/null +++ b/scintilla/include/boost/regex/v5/object_cache.hpp @@ -0,0 +1,160 @@ +/* + * + * Copyright (c) 2004 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE object_cache.hpp + * VERSION see + * DESCRIPTION: Implements a generic object cache. + */ + +#ifndef BOOST_REGEX_OBJECT_CACHE_HPP +#define BOOST_REGEX_OBJECT_CACHE_HPP + +#include +#include +#include +#include +#include +#include +#ifdef BOOST_HAS_THREADS +#include +#endif + +namespace boost{ + +template +class object_cache +{ +public: + typedef std::pair< ::std::shared_ptr, Key const*> value_type; + typedef std::list list_type; + typedef typename list_type::iterator list_iterator; + typedef std::map map_type; + typedef typename map_type::iterator map_iterator; + typedef typename list_type::size_type size_type; + static std::shared_ptr get(const Key& k, size_type l_max_cache_size); + +private: + static std::shared_ptr do_get(const Key& k, size_type l_max_cache_size); + + struct data + { + list_type cont; + map_type index; + }; + + // Needed by compilers not implementing the resolution to DR45. For reference, + // see http://www.open-std.org/JTC1/SC22/WG21/docs/cwg_defects.html#45. + friend struct data; +}; + +#ifdef BOOST_REGEX_MSVC +#pragma warning(push) +#pragma warning(disable: 4702) +#endif +template +std::shared_ptr object_cache::get(const Key& k, size_type l_max_cache_size) +{ +#ifdef BOOST_HAS_THREADS + static std::mutex mut; + std::lock_guard l(mut); + return do_get(k, l_max_cache_size); +#else + return do_get(k, l_max_cache_size); +#endif +} +#ifdef BOOST_REGEX_MSVC +#pragma warning(pop) +#endif + +template +std::shared_ptr object_cache::do_get(const Key& k, size_type l_max_cache_size) +{ + typedef typename object_cache::data object_data; + typedef typename map_type::size_type map_size_type; + static object_data s_data; + + // + // see if the object is already in the cache: + // + map_iterator mpos = s_data.index.find(k); + if(mpos != s_data.index.end()) + { + // + // Eureka! + // We have a cached item, bump it up the list and return it: + // + if(--(s_data.cont.end()) != mpos->second) + { + // splice out the item we want to move: + list_type temp; + temp.splice(temp.end(), s_data.cont, mpos->second); + // and now place it at the end of the list: + s_data.cont.splice(s_data.cont.end(), temp, temp.begin()); + BOOST_REGEX_ASSERT(*(s_data.cont.back().second) == k); + // update index with new position: + mpos->second = --(s_data.cont.end()); + BOOST_REGEX_ASSERT(&(mpos->first) == mpos->second->second); + BOOST_REGEX_ASSERT(&(mpos->first) == s_data.cont.back().second); + } + return s_data.cont.back().first; + } + // + // if we get here then the item is not in the cache, + // so create it: + // + std::shared_ptr result(new Object(k)); + // + // Add it to the list, and index it: + // + s_data.cont.push_back(value_type(result, static_cast(0))); + s_data.index.insert(std::make_pair(k, --(s_data.cont.end()))); + s_data.cont.back().second = &(s_data.index.find(k)->first); + map_size_type s = s_data.index.size(); + BOOST_REGEX_ASSERT(s_data.index[k]->first.get() == result.get()); + BOOST_REGEX_ASSERT(&(s_data.index.find(k)->first) == s_data.cont.back().second); + BOOST_REGEX_ASSERT(s_data.index.find(k)->first == k); + if(s > l_max_cache_size) + { + // + // We have too many items in the list, so we need to start + // popping them off the back of the list, but only if they're + // being held uniquely by us: + // + list_iterator pos = s_data.cont.begin(); + list_iterator last = s_data.cont.end(); + while((pos != last) && (s > l_max_cache_size)) + { + if(pos->first.use_count() == 1) + { + list_iterator condemmed(pos); + ++pos; + // now remove the items from our containers, + // then order has to be as follows: + BOOST_REGEX_ASSERT(s_data.index.find(*(condemmed->second)) != s_data.index.end()); + s_data.index.erase(*(condemmed->second)); + s_data.cont.erase(condemmed); + --s; + } + else + ++pos; + } + BOOST_REGEX_ASSERT(s_data.index[k]->first.get() == result.get()); + BOOST_REGEX_ASSERT(&(s_data.index.find(k)->first) == s_data.cont.back().second); + BOOST_REGEX_ASSERT(s_data.index.find(k)->first == k); + } + return result; +} + +} + +#endif diff --git a/scintilla/include/boost/regex/v5/pattern_except.hpp b/scintilla/include/boost/regex/v5/pattern_except.hpp new file mode 100644 index 0000000000..3fbdca0a22 --- /dev/null +++ b/scintilla/include/boost/regex/v5/pattern_except.hpp @@ -0,0 +1,106 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE pattern_except.hpp + * VERSION see + * DESCRIPTION: Declares pattern-matching exception classes. + */ + +#ifndef BOOST_RE_V5_PAT_EXCEPT_HPP +#define BOOST_RE_V5_PAT_EXCEPT_HPP + +#ifndef BOOST_REGEX_CONFIG_HPP +#include +#endif + +#include +#include +#include +#include + +namespace boost{ + +#ifdef BOOST_REGEX_MSVC +#pragma warning(push) +#pragma warning(disable : 4275) +#if BOOST_REGEX_MSVC >= 1800 +#pragma warning(disable : 26812 4459) +#endif +#endif +class regex_error : public std::runtime_error +{ +public: + explicit regex_error(const std::string& s, regex_constants::error_type err = regex_constants::error_unknown, std::ptrdiff_t pos = 0) + : std::runtime_error(s) + , m_error_code(err) + , m_position(pos) + { + } + explicit regex_error(regex_constants::error_type err) + : std::runtime_error(::boost::BOOST_REGEX_DETAIL_NS::get_default_error_string(err)) + , m_error_code(err) + , m_position(0) + { + } + ~regex_error() noexcept override {} + regex_constants::error_type code()const + { return m_error_code; } + std::ptrdiff_t position()const + { return m_position; } + void raise()const + { +#ifndef BOOST_NO_EXCEPTIONS +#ifndef BOOST_REGEX_STANDALONE + ::boost::throw_exception(*this); +#else + throw* this; +#endif +#endif + } +private: + regex_constants::error_type m_error_code; + std::ptrdiff_t m_position; +}; + +typedef regex_error bad_pattern; +typedef regex_error bad_expression; + +namespace BOOST_REGEX_DETAIL_NS{ + +template +inline void raise_runtime_error(const E& ex) +{ +#ifndef BOOST_REGEX_STANDALONE + ::boost::throw_exception(ex); +#else + throw ex; +#endif +} + +template +void raise_error(const traits& t, regex_constants::error_type code) +{ + (void)t; // warning suppression + regex_error e(t.error_string(code), code, 0); + ::boost::BOOST_REGEX_DETAIL_NS::raise_runtime_error(e); +} + +} + +#ifdef BOOST_REGEX_MSVC +#pragma warning(pop) +#endif + +} // namespace boost + +#endif diff --git a/scintilla/include/boost/regex/v5/perl_matcher.hpp b/scintilla/include/boost/regex/v5/perl_matcher.hpp new file mode 100644 index 0000000000..deeffa589b --- /dev/null +++ b/scintilla/include/boost/regex/v5/perl_matcher.hpp @@ -0,0 +1,576 @@ +/* + * + * Copyright (c) 2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + +#ifndef BOOST_REGEX_MATCHER_HPP +#define BOOST_REGEX_MATCHER_HPP + +#include + +#ifdef BOOST_REGEX_MSVC +# pragma warning(push) +#pragma warning(disable : 4251 4459) +#if BOOST_REGEX_MSVC < 1700 +# pragma warning(disable : 4231) +#endif +# if BOOST_REGEX_MSVC < 1600 +# pragma warning(disable : 4660) +# endif +#if BOOST_REGEX_MSVC < 1910 +#pragma warning(disable:4800) +#endif +#endif + +namespace boost{ +namespace BOOST_REGEX_DETAIL_NS{ + +// +// error checking API: +// +inline void verify_options(boost::regex_constants::syntax_option_type, match_flag_type mf) +{ + // + // can't mix match_extra with POSIX matching rules: + // + if ((mf & match_extra) && (mf & match_posix)) + { + std::logic_error msg("Usage Error: Can't mix regular expression captures with POSIX matching rules"); +#ifndef BOOST_REGEX_STANDALONE + throw_exception(msg); +#else + throw msg; +#endif + } +} +// +// function can_start: +// +template +inline bool can_start(charT c, const unsigned char* map, unsigned char mask) +{ + return ((c < static_cast(0)) ? true : ((c >= static_cast(1 << CHAR_BIT)) ? true : map[c] & mask)); +} +inline bool can_start(char c, const unsigned char* map, unsigned char mask) +{ + return map[(unsigned char)c] & mask; +} +inline bool can_start(signed char c, const unsigned char* map, unsigned char mask) +{ + return map[(unsigned char)c] & mask; +} +inline bool can_start(unsigned char c, const unsigned char* map, unsigned char mask) +{ + return map[c] & mask; +} +inline bool can_start(unsigned short c, const unsigned char* map, unsigned char mask) +{ + return ((c >= (1 << CHAR_BIT)) ? true : map[c] & mask); +} +#if defined(WCHAR_MIN) && (WCHAR_MIN == 0) && !defined(BOOST_NO_INTRINSIC_WCHAR_T) +inline bool can_start(wchar_t c, const unsigned char* map, unsigned char mask) +{ + return ((c >= static_cast(1u << CHAR_BIT)) ? true : map[c] & mask); +} +#endif +#if !defined(BOOST_NO_INTRINSIC_WCHAR_T) +inline bool can_start(unsigned int c, const unsigned char* map, unsigned char mask) +{ + return (((c >= static_cast(1u << CHAR_BIT)) ? true : map[c] & mask)); +} +#endif + +template +inline int string_compare(const std::basic_string& s, const C* p) +{ + if(0 == *p) + { + if(s.empty() || ((s.size() == 1) && (s[0] == 0))) + return 0; + } + return s.compare(p); +} +template +inline int string_compare(const Seq& s, const C* p) +{ + std::size_t i = 0; + while((i < s.size()) && (p[i] == s[i])) + { + ++i; + } + return (i == s.size()) ? -(int)p[i] : (int)s[i] - (int)p[i]; +} +# define STR_COMP(s,p) string_compare(s,p) + +template +inline const charT* re_skip_past_null(const charT* p) +{ + while (*p != static_cast(0)) ++p; + return ++p; +} + +template +iterator re_is_set_member(iterator next, + iterator last, + const re_set_long* set_, + const regex_data& e, bool icase) +{ + const charT* p = reinterpret_cast(set_+1); + iterator ptr; + unsigned int i; + //bool icase = e.m_flags & regex_constants::icase; + + if(next == last) return next; + + typedef typename traits_type::string_type traits_string_type; + const ::boost::regex_traits_wrapper& traits_inst = *(e.m_ptraits); + + // dwa 9/13/00 suppress incorrect MSVC warning - it claims this is never + // referenced + (void)traits_inst; + + // try and match a single character, could be a multi-character + // collating element... + for(i = 0; i < set_->csingles; ++i) + { + ptr = next; + if(*p == static_cast(0)) + { + // treat null string as special case: + if(traits_inst.translate(*ptr, icase)) + { + ++p; + continue; + } + return set_->isnot ? next : (ptr == next) ? ++next : ptr; + } + else + { + while(*p && (ptr != last)) + { + if(traits_inst.translate(*ptr, icase) != *p) + break; + ++p; + ++ptr; + } + + if(*p == static_cast(0)) // if null we've matched + return set_->isnot ? next : (ptr == next) ? ++next : ptr; + + p = re_skip_past_null(p); // skip null + } + } + + charT col = traits_inst.translate(*next, icase); + + + if(set_->cranges || set_->cequivalents) + { + traits_string_type s1; + // + // try and match a range, NB only a single character can match + if(set_->cranges) + { + if((e.m_flags & regex_constants::collate) == 0) + s1.assign(1, col); + else + { + charT a[2] = { col, charT(0), }; + s1 = traits_inst.transform(a, a + 1); + } + for(i = 0; i < set_->cranges; ++i) + { + if(STR_COMP(s1, p) >= 0) + { + do{ ++p; }while(*p); + ++p; + if(STR_COMP(s1, p) <= 0) + return set_->isnot ? next : ++next; + } + else + { + // skip first string + do{ ++p; }while(*p); + ++p; + } + // skip second string + do{ ++p; }while(*p); + ++p; + } + } + // + // try and match an equivalence class, NB only a single character can match + if(set_->cequivalents) + { + charT a[2] = { col, charT(0), }; + s1 = traits_inst.transform_primary(a, a +1); + for(i = 0; i < set_->cequivalents; ++i) + { + if(STR_COMP(s1, p) == 0) + return set_->isnot ? next : ++next; + // skip string + do{ ++p; }while(*p); + ++p; + } + } + } + if(traits_inst.isctype(col, set_->cclasses) == true) + return set_->isnot ? next : ++next; + if((set_->cnclasses != 0) && (traits_inst.isctype(col, set_->cnclasses) == false)) + return set_->isnot ? next : ++next; + return set_->isnot ? ++next : next; +} + +template +class repeater_count +{ + repeater_count** stack; + repeater_count* next; + int state_id; + std::size_t count; // the number of iterations so far + BidiIterator start_pos; // where the last repeat started + + repeater_count* unwind_until(int n, repeater_count* p, int current_recursion_id) + { + while(p && (p->state_id != n)) + { + if(-2 - current_recursion_id == p->state_id) + return 0; + p = p->next; + if(p && (p->state_id < 0)) + { + p = unwind_until(p->state_id, p, current_recursion_id); + if(!p) + return p; + p = p->next; + } + } + return p; + } +public: + repeater_count(repeater_count** s) : stack(s), next(0), state_id(-1), count(0), start_pos() {} + + repeater_count(int i, repeater_count** s, BidiIterator start, int current_recursion_id) + : start_pos(start) + { + state_id = i; + stack = s; + next = *stack; + *stack = this; + if((state_id > next->state_id) && (next->state_id >= 0)) + count = 0; + else + { + repeater_count* p = next; + p = unwind_until(state_id, p, current_recursion_id); + if(p) + { + count = p->count; + start_pos = p->start_pos; + } + else + count = 0; + } + } + ~repeater_count() + { + if(next) + *stack = next; + } + std::size_t get_count() { return count; } + int get_id() { return state_id; } + std::size_t operator++() { return ++count; } + bool check_null_repeat(const BidiIterator& pos, std::size_t max) + { + // this is called when we are about to start a new repeat, + // if the last one was NULL move our count to max, + // otherwise save the current position. + bool result = (count == 0) ? false : (pos == start_pos); + if(result) + count = max; + else + start_pos = pos; + return result; + } +}; + +struct saved_state; + +enum saved_state_type +{ + saved_type_end = 0, + saved_type_paren = 1, + saved_type_recurse = 2, + saved_type_assertion = 3, + saved_state_alt = 4, + saved_state_repeater_count = 5, + saved_state_extra_block = 6, + saved_state_greedy_single_repeat = 7, + saved_state_rep_slow_dot = 8, + saved_state_rep_fast_dot = 9, + saved_state_rep_char = 10, + saved_state_rep_short_set = 11, + saved_state_rep_long_set = 12, + saved_state_non_greedy_long_repeat = 13, + saved_state_count = 14 +}; + +#ifdef BOOST_REGEX_MSVC +# pragma warning(push) +#if BOOST_REGEX_MSVC >= 1800 +#pragma warning(disable:26495) +#endif +#endif +template +struct recursion_info +{ + typedef typename Results::value_type value_type; + typedef typename value_type::iterator iterator; + int idx; + const re_syntax_base* preturn_address; + Results results; + repeater_count* repeater_stack; + iterator location_of_start; +}; +#ifdef BOOST_REGEX_MSVC +# pragma warning(pop) +#endif + +template +class perl_matcher +{ +public: + typedef typename traits::char_type char_type; + typedef perl_matcher self_type; + typedef bool (self_type::*matcher_proc_type)(); + typedef std::size_t traits_size_type; + typedef typename is_byte::width_type width_type; + typedef typename std::iterator_traits::difference_type difference_type; + typedef match_results results_type; + + perl_matcher(BidiIterator first, BidiIterator end, + match_results& what, + const basic_regex& e, + match_flag_type f, + BidiIterator l_base) + : m_result(what), base(first), last(end), + position(first), backstop(l_base), re(e), traits_inst(e.get_traits()), + m_independent(false), next_count(&rep_obj), rep_obj(&next_count) + , m_recursions(0) + { + construct_init(e, f); + } + + bool match(); + bool find(); + + void setf(match_flag_type f) + { m_match_flags |= f; } + void unsetf(match_flag_type f) + { m_match_flags &= ~f; } + +private: + void construct_init(const basic_regex& e, match_flag_type f); + + bool find_imp(); + bool match_imp(); + void estimate_max_state_count(std::random_access_iterator_tag*); + void estimate_max_state_count(void*); + bool match_prefix(); + bool match_all_states(); + + // match procs, stored in s_match_vtable: + bool match_startmark(); + bool match_endmark(); + bool match_literal(); + bool match_start_line(); + bool match_end_line(); + bool match_wild(); + bool match_match(); + bool match_word_boundary(); + bool match_within_word(); + bool match_word_start(); + bool match_word_end(); + bool match_buffer_start(); + bool match_buffer_end(); + bool match_backref(); + bool match_long_set(); + bool match_set(); + bool match_jump(); + bool match_alt(); + bool match_rep(); + bool match_combining(); + bool match_soft_buffer_end(); + bool match_restart_continue(); + bool match_long_set_repeat(); + bool match_set_repeat(); + bool match_char_repeat(); + bool match_dot_repeat_fast(); + bool match_dot_repeat_slow(); + bool match_dot_repeat_dispatch() + { + return ::boost::is_random_access_iterator::value ? match_dot_repeat_fast() : match_dot_repeat_slow(); + } + bool match_backstep(); + bool match_assert_backref(); + bool match_toggle_case(); + bool match_recursion(); + bool match_fail(); + bool match_accept(); + bool match_commit(); + bool match_then(); + bool skip_until_paren(int index, bool match = true); + + // find procs stored in s_find_vtable: + bool find_restart_any(); + bool find_restart_word(); + bool find_restart_line(); + bool find_restart_buf(); + bool find_restart_lit(); + +private: + // final result structure to be filled in: + match_results& m_result; + // temporary result for POSIX matches: + std::unique_ptr > m_temp_match; + // pointer to actual result structure to fill in: + match_results* m_presult; + // start of sequence being searched: + BidiIterator base; + // end of sequence being searched: + BidiIterator last; + // current character being examined: + BidiIterator position; + // where to restart next search after failed match attempt: + BidiIterator restart; + // where the current search started from, acts as base for $` during grep: + BidiIterator search_base; + // how far we can go back when matching lookbehind: + BidiIterator backstop; + // the expression being examined: + const basic_regex& re; + // the expression's traits class: + const ::boost::regex_traits_wrapper& traits_inst; + // the next state in the machine being matched: + const re_syntax_base* pstate; + // matching flags in use: + match_flag_type m_match_flags; + // how many states we have examined so far: + std::ptrdiff_t state_count; + // max number of states to examine before giving up: + std::ptrdiff_t max_state_count; + // whether we should ignore case or not: + bool icase; + // set to true when (position == last), indicates that we may have a partial match: + bool m_has_partial_match; + // set to true whenever we get a match: + bool m_has_found_match; + // set to true whenever we're inside an independent sub-expression: + bool m_independent; + // the current repeat being examined: + repeater_count* next_count; + // the first repeat being examined (top of linked list): + repeater_count rep_obj; + // the mask to pass when matching word boundaries: + typename traits::char_class_type m_word_mask; + // the bitmask to use when determining whether a match_any matches a newline or not: + unsigned char match_any_mask; + // recursion information: + std::vector > recursion_stack; + // + // additional members for non-recursive version: + // + typedef bool (self_type::*unwind_proc_type)(bool); + + void extend_stack(); + bool unwind(bool); + bool unwind_end(bool); + bool unwind_paren(bool); + bool unwind_recursion_stopper(bool); + bool unwind_assertion(bool); + bool unwind_alt(bool); + bool unwind_repeater_counter(bool); + bool unwind_extra_block(bool); + bool unwind_greedy_single_repeat(bool); + bool unwind_slow_dot_repeat(bool); + bool unwind_fast_dot_repeat(bool); + bool unwind_char_repeat(bool); + bool unwind_short_set_repeat(bool); + bool unwind_long_set_repeat(bool); + bool unwind_non_greedy_repeat(bool); + bool unwind_recursion(bool); + bool unwind_recursion_pop(bool); + bool unwind_commit(bool); + bool unwind_then(bool); + bool unwind_case(bool); + void destroy_single_repeat(); + void push_matched_paren(int index, const sub_match& sub); + void push_recursion_stopper(); + void push_assertion(const re_syntax_base* ps, bool positive); + void push_alt(const re_syntax_base* ps); + void push_repeater_count(int i, repeater_count** s); + void push_single_repeat(std::size_t c, const re_repeat* r, BidiIterator last_position, int state_id); + void push_non_greedy_repeat(const re_syntax_base* ps); + void push_recursion(int idx, const re_syntax_base* p, results_type* presults, results_type* presults2); + void push_recursion_pop(); + void push_case_change(bool); + + // pointer to base of stack: + saved_state* m_stack_base; + // pointer to current stack position: + saved_state* m_backup_state; + // how many memory blocks have we used up?: + unsigned used_block_count; + // determines what value to return when unwinding from recursion, + // allows for mixed recursive/non-recursive algorithm: + bool m_recursive_result; + // We have unwound to a lookahead/lookbehind, used by COMMIT/PRUNE/SKIP: + bool m_unwound_lookahead; + // We have unwound to an alternative, used by THEN: + bool m_unwound_alt; + // We are unwinding a commit - used by independent subs to determine whether to stop there or carry on unwinding: + //bool m_unwind_commit; + // Recursion limit: + unsigned m_recursions; + +#ifdef BOOST_REGEX_MSVC +# pragma warning(push) +#if BOOST_REGEX_MSVC >= 1800 +#pragma warning(disable:26495) +#endif +#endif + // these operations aren't allowed, so are declared private, + // bodies are provided to keep explicit-instantiation requests happy: + perl_matcher& operator=(const perl_matcher&) + { + return *this; + } + perl_matcher(const perl_matcher& that) + : m_result(that.m_result), re(that.re), traits_inst(that.traits_inst), rep_obj(0) {} +#ifdef BOOST_REGEX_MSVC +# pragma warning(pop) +#endif +}; + +} // namespace BOOST_REGEX_DETAIL_NS + +#ifdef BOOST_REGEX_MSVC +# pragma warning(pop) +#endif + +} // namespace boost + +// +// include the implementation of perl_matcher: +// +#include +// this one has to be last: +#include + +#endif diff --git a/scintilla/include/boost/regex/v5/perl_matcher_common.hpp b/scintilla/include/boost/regex/v5/perl_matcher_common.hpp new file mode 100644 index 0000000000..dcce9e6ac1 --- /dev/null +++ b/scintilla/include/boost/regex/v5/perl_matcher_common.hpp @@ -0,0 +1,921 @@ +/* + * + * Copyright (c) 2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE perl_matcher_common.cpp + * VERSION see + * DESCRIPTION: Definitions of perl_matcher member functions that are + * common to both the recursive and non-recursive versions. + */ + +#ifndef BOOST_REGEX_V5_PERL_MATCHER_COMMON_HPP +#define BOOST_REGEX_V5_PERL_MATCHER_COMMON_HPP + +#ifdef BOOST_REGEX_MSVC +# pragma warning(push) +#pragma warning(disable:4459) +#if BOOST_REGEX_MSVC < 1910 +#pragma warning(disable:4800) +#endif +#endif + +namespace boost{ +namespace BOOST_REGEX_DETAIL_NS{ + +#ifdef BOOST_REGEX_MSVC +# pragma warning(push) +#pragma warning(disable:26812) +#endif + template +void perl_matcher::construct_init(const basic_regex& e, match_flag_type f) +{ + typedef typename std::iterator_traits::iterator_category category; + typedef typename basic_regex::flag_type expression_flag_type; + + if(e.empty()) + { + // precondition failure: e is not a valid regex. + std::invalid_argument ex("Invalid regular expression object"); +#ifndef BOOST_REGEX_STANDALONE + boost::throw_exception(ex); +#else + throw e; +#endif + } + pstate = 0; + m_match_flags = f; + estimate_max_state_count(static_cast(0)); + expression_flag_type re_f = re.flags(); + icase = re_f & regex_constants::icase; + if(!(m_match_flags & (match_perl|match_posix))) + { + if((re_f & (regbase::main_option_type|regbase::no_perl_ex)) == 0) + m_match_flags |= match_perl; + else if((re_f & (regbase::main_option_type|regbase::emacs_ex)) == (regbase::basic_syntax_group|regbase::emacs_ex)) + m_match_flags |= match_perl; + else if((re_f & (regbase::main_option_type|regbase::literal)) == (regbase::literal)) + m_match_flags |= match_perl; + else + m_match_flags |= match_posix; + } + if(m_match_flags & match_posix) + { + m_temp_match.reset(new match_results()); + m_presult = m_temp_match.get(); + } + else + m_presult = &m_result; + m_stack_base = 0; + m_backup_state = 0; + // find the value to use for matching word boundaries: + m_word_mask = re.get_data().m_word_mask; + // find bitmask to use for matching '.': + match_any_mask = static_cast((f & match_not_dot_newline) ? BOOST_REGEX_DETAIL_NS::test_not_newline : BOOST_REGEX_DETAIL_NS::test_newline); + // Disable match_any if requested in the state machine: + if(e.get_data().m_disable_match_any) + m_match_flags &= regex_constants::match_not_any; +} +#ifdef BOOST_REGEX_MSVC +# pragma warning(pop) +#endif + +template +void perl_matcher::estimate_max_state_count(std::random_access_iterator_tag*) +{ + // + // How many states should we allow our machine to visit before giving up? + // This is a heuristic: it takes the greater of O(N^2) and O(NS^2) + // where N is the length of the string, and S is the number of states + // in the machine. It's tempting to up this to O(N^2S) or even O(N^2S^2) + // but these take unreasonably amounts of time to bale out in pathological + // cases. + // + // Calculate NS^2 first: + // + static const std::ptrdiff_t k = 100000; + std::ptrdiff_t dist = std::distance(base, last); + if(dist == 0) + dist = 1; + std::ptrdiff_t states = re.size(); + if(states == 0) + states = 1; + if ((std::numeric_limits::max)() / states < states) + { + max_state_count = (std::min)((std::ptrdiff_t)BOOST_REGEX_MAX_STATE_COUNT, (std::numeric_limits::max)() - 2); + return; + } + states *= states; + if((std::numeric_limits::max)() / dist < states) + { + max_state_count = (std::min)((std::ptrdiff_t)BOOST_REGEX_MAX_STATE_COUNT, (std::numeric_limits::max)() - 2); + return; + } + states *= dist; + if((std::numeric_limits::max)() - k < states) + { + max_state_count = (std::min)((std::ptrdiff_t)BOOST_REGEX_MAX_STATE_COUNT, (std::numeric_limits::max)() - 2); + return; + } + states += k; + + max_state_count = states; + + // + // Now calculate N^2: + // + states = dist; + if((std::numeric_limits::max)() / dist < states) + { + max_state_count = (std::min)((std::ptrdiff_t)BOOST_REGEX_MAX_STATE_COUNT, (std::numeric_limits::max)() - 2); + return; + } + states *= dist; + if((std::numeric_limits::max)() - k < states) + { + max_state_count = (std::min)((std::ptrdiff_t)BOOST_REGEX_MAX_STATE_COUNT, (std::numeric_limits::max)() - 2); + return; + } + states += k; + // + // N^2 can be a very large number indeed, to prevent things getting out + // of control, cap the max states: + // + if(states > BOOST_REGEX_MAX_STATE_COUNT) + states = BOOST_REGEX_MAX_STATE_COUNT; + // + // If (the possibly capped) N^2 is larger than our first estimate, + // use this instead: + // + if(states > max_state_count) + max_state_count = states; +} + +template +inline void perl_matcher::estimate_max_state_count(void*) +{ + // we don't know how long the sequence is: + max_state_count = BOOST_REGEX_MAX_STATE_COUNT; +} + +template +inline bool perl_matcher::match() +{ + return match_imp(); +} + +template +bool perl_matcher::match_imp() +{ + // initialise our stack if we are non-recursive: + save_state_init init(&m_stack_base, &m_backup_state); + used_block_count = BOOST_REGEX_MAX_BLOCKS; +#if !defined(BOOST_NO_EXCEPTIONS) + try{ +#endif + + // reset our state machine: + position = base; + search_base = base; + state_count = 0; + m_match_flags |= regex_constants::match_all; + m_presult->set_size((m_match_flags & match_nosubs) ? 1u : static_cast(1u + re.mark_count()), search_base, last); + m_presult->set_base(base); + m_presult->set_named_subs(this->re.get_named_subs()); + if(m_match_flags & match_posix) + m_result = *m_presult; + verify_options(re.flags(), m_match_flags); + if(0 == match_prefix()) + return false; + return (m_result[0].second == last) && (m_result[0].first == base); + +#if !defined(BOOST_NO_EXCEPTIONS) + } + catch(...) + { + // unwind all pushed states, apart from anything else this + // ensures that all the states are correctly destructed + // not just the memory freed. + while(unwind(true)){} + throw; + } +#endif +} + +template +inline bool perl_matcher::find() +{ + return find_imp(); +} + +template +bool perl_matcher::find_imp() +{ + static matcher_proc_type const s_find_vtable[7] = + { + &perl_matcher::find_restart_any, + &perl_matcher::find_restart_word, + &perl_matcher::find_restart_line, + &perl_matcher::find_restart_buf, + &perl_matcher::match_prefix, + &perl_matcher::find_restart_lit, + &perl_matcher::find_restart_lit, + }; + + // initialise our stack if we are non-recursive: + save_state_init init(&m_stack_base, &m_backup_state); + used_block_count = BOOST_REGEX_MAX_BLOCKS; +#if !defined(BOOST_NO_EXCEPTIONS) + try{ +#endif + + state_count = 0; + if((m_match_flags & regex_constants::match_init) == 0) + { + // reset our state machine: + search_base = position = base; + pstate = re.get_first_state(); + m_presult->set_size((m_match_flags & match_nosubs) ? 1u : static_cast(1u + re.mark_count()), base, last); + m_presult->set_base(base); + m_presult->set_named_subs(this->re.get_named_subs()); + m_match_flags |= regex_constants::match_init; + } + else + { + // start again: + search_base = position = m_result[0].second; + // If last match was null and match_not_null was not set then increment + // our start position, otherwise we go into an infinite loop: + if(((m_match_flags & match_not_null) == 0) && (m_result.length() == 0)) + { + if(position == last) + return false; + else + ++position; + } + // reset $` start: + m_presult->set_size((m_match_flags & match_nosubs) ? 1u : static_cast(1u + re.mark_count()), search_base, last); + //if((base != search_base) && (base == backstop)) + // m_match_flags |= match_prev_avail; + } + if(m_match_flags & match_posix) + { + m_result.set_size(static_cast(1u + re.mark_count()), base, last); + m_result.set_base(base); + } + + verify_options(re.flags(), m_match_flags); + // find out what kind of expression we have: + unsigned type = (m_match_flags & match_continuous) ? + static_cast(regbase::restart_continue) + : static_cast(re.get_restart_type()); + + // call the appropriate search routine: + matcher_proc_type proc = s_find_vtable[type]; + return (this->*proc)(); + +#if !defined(BOOST_NO_EXCEPTIONS) + } + catch(...) + { + // unwind all pushed states, apart from anything else this + // ensures that all the states are correctly destructed + // not just the memory freed. + while(unwind(true)){} + throw; + } +#endif +} + +template +bool perl_matcher::match_prefix() +{ + m_has_partial_match = false; + m_has_found_match = false; + pstate = re.get_first_state(); + m_presult->set_first(position); + restart = position; + match_all_states(); + if(!m_has_found_match && m_has_partial_match && (m_match_flags & match_partial)) + { + m_has_found_match = true; + m_presult->set_second(last, 0, false); + position = last; + if((m_match_flags & match_posix) == match_posix) + { + m_result.maybe_assign(*m_presult); + } + } +#ifdef BOOST_REGEX_MATCH_EXTRA + if(m_has_found_match && (match_extra & m_match_flags)) + { + // + // we have a match, reverse the capture information: + // + for(unsigned i = 0; i < m_presult->size(); ++i) + { + typename sub_match::capture_sequence_type & seq = ((*m_presult)[i]).get_captures(); + std::reverse(seq.begin(), seq.end()); + } + } +#endif + if(!m_has_found_match) + position = restart; // reset search postion + return m_has_found_match; +} + +template +bool perl_matcher::match_literal() +{ + unsigned int len = static_cast(pstate)->length; + const char_type* what = reinterpret_cast(static_cast(pstate) + 1); + // + // compare string with what we stored in + // our records: + for(unsigned int i = 0; i < len; ++i, ++position) + { + if((position == last) || (traits_inst.translate(*position, icase) != what[i])) + return false; + } + pstate = pstate->next.p; + return true; +} + +template +bool perl_matcher::match_start_line() +{ + if(position == backstop) + { + if((m_match_flags & match_prev_avail) == 0) + { + if((m_match_flags & match_not_bol) == 0) + { + pstate = pstate->next.p; + return true; + } + return false; + } + } + else if(m_match_flags & match_single_line) + return false; + + // check the previous value character: + BidiIterator t(position); + --t; + if(position != last) + { + if(is_separator(*t) && !((*t == static_cast('\r')) && (*position == static_cast('\n'))) ) + { + pstate = pstate->next.p; + return true; + } + } + else if(is_separator(*t)) + { + pstate = pstate->next.p; + return true; + } + return false; +} + +template +bool perl_matcher::match_end_line() +{ + if(position != last) + { + if(m_match_flags & match_single_line) + return false; + // we're not yet at the end so *first is always valid: + if(is_separator(*position)) + { + if((position != backstop) || (m_match_flags & match_prev_avail)) + { + // check that we're not in the middle of \r\n sequence + BidiIterator t(position); + --t; + if((*t == static_cast('\r')) && (*position == static_cast('\n'))) + { + return false; + } + } + pstate = pstate->next.p; + return true; + } + } + else if((m_match_flags & match_not_eol) == 0) + { + pstate = pstate->next.p; + return true; + } + return false; +} + +template +bool perl_matcher::match_wild() +{ + if(position == last) + return false; + if(is_separator(*position) && ((match_any_mask & static_cast(pstate)->mask) == 0)) + return false; + if((*position == char_type(0)) && (m_match_flags & match_not_dot_null)) + return false; + pstate = pstate->next.p; + ++position; + return true; +} + +template +bool perl_matcher::match_word_boundary() +{ + bool b; // indcates whether next character is a word character + if(position != last) + { + // prev and this character must be opposites: + b = traits_inst.isctype(*position, m_word_mask); + } + else + { + if (m_match_flags & match_not_eow) + return false; + b = false; + } + if((position == backstop) && ((m_match_flags & match_prev_avail) == 0)) + { + if(m_match_flags & match_not_bow) + return false; + else + b ^= false; + } + else + { + --position; + b ^= traits_inst.isctype(*position, m_word_mask); + ++position; + } + if(b) + { + pstate = pstate->next.p; + return true; + } + return false; // no match if we get to here... +} + +template +bool perl_matcher::match_within_word() +{ + bool b = !match_word_boundary(); + if(b) + pstate = pstate->next.p; + return b; + /* + if(position == last) + return false; + // both prev and this character must be m_word_mask: + bool prev = traits_inst.isctype(*position, m_word_mask); + { + bool b; + if((position == backstop) && ((m_match_flags & match_prev_avail) == 0)) + return false; + else + { + --position; + b = traits_inst.isctype(*position, m_word_mask); + ++position; + } + if(b == prev) + { + pstate = pstate->next.p; + return true; + } + } + return false; + */ +} + +template +bool perl_matcher::match_word_start() +{ + if(position == last) + return false; // can't be starting a word if we're already at the end of input + if(!traits_inst.isctype(*position, m_word_mask)) + return false; // next character isn't a word character + if((position == backstop) && ((m_match_flags & match_prev_avail) == 0)) + { + if(m_match_flags & match_not_bow) + return false; // no previous input + } + else + { + // otherwise inside buffer: + BidiIterator t(position); + --t; + if(traits_inst.isctype(*t, m_word_mask)) + return false; // previous character not non-word + } + // OK we have a match: + pstate = pstate->next.p; + return true; +} + +template +bool perl_matcher::match_word_end() +{ + if((position == backstop) && ((m_match_flags & match_prev_avail) == 0)) + return false; // start of buffer can't be end of word + BidiIterator t(position); + --t; + if(traits_inst.isctype(*t, m_word_mask) == false) + return false; // previous character wasn't a word character + + if(position == last) + { + if(m_match_flags & match_not_eow) + return false; // end of buffer but not end of word + } + else + { + // otherwise inside buffer: + if(traits_inst.isctype(*position, m_word_mask)) + return false; // next character is a word character + } + pstate = pstate->next.p; + return true; // if we fall through to here then we've succeeded +} + +template +bool perl_matcher::match_buffer_start() +{ + if((position != backstop) || (m_match_flags & match_not_bob)) + return false; + // OK match: + pstate = pstate->next.p; + return true; +} + +template +bool perl_matcher::match_buffer_end() +{ + if((position != last) || (m_match_flags & match_not_eob)) + return false; + // OK match: + pstate = pstate->next.p; + return true; +} + +template +bool perl_matcher::match_backref() +{ + // + // Compare with what we previously matched. + // Note that this succeeds if the backref did not partisipate + // in the match, this is in line with ECMAScript, but not Perl + // or PCRE. + // + int index = static_cast(pstate)->index; + if(index >= hash_value_mask) + { + named_subexpressions::range_type r = re.get_data().equal_range(index); + BOOST_REGEX_ASSERT(r.first != r.second); + do + { + index = r.first->index; + ++r.first; + }while((r.first != r.second) && ((*m_presult)[index].matched != true)); + } + + if((m_match_flags & match_perl) && !(*m_presult)[index].matched) + return false; + + BidiIterator i = (*m_presult)[index].first; + BidiIterator j = (*m_presult)[index].second; + while(i != j) + { + if((position == last) || (traits_inst.translate(*position, icase) != traits_inst.translate(*i, icase))) + return false; + ++i; + ++position; + } + pstate = pstate->next.p; + return true; +} + +template +bool perl_matcher::match_long_set() +{ + typedef typename traits::char_class_type char_class_type; + // let the traits class do the work: + if(position == last) + return false; + BidiIterator t = re_is_set_member(position, last, static_cast*>(pstate), re.get_data(), icase); + if(t != position) + { + pstate = pstate->next.p; + position = t; + return true; + } + return false; +} + +template +bool perl_matcher::match_set() +{ + if(position == last) + return false; + if(static_cast(pstate)->_map[static_cast(traits_inst.translate(*position, icase))]) + { + pstate = pstate->next.p; + ++position; + return true; + } + return false; +} + +template +bool perl_matcher::match_jump() +{ + pstate = static_cast(pstate)->alt.p; + return true; +} + +template +bool perl_matcher::match_combining() +{ + if(position == last) + return false; + if(is_combining(traits_inst.translate(*position, icase))) + return false; + ++position; + while((position != last) && is_combining(traits_inst.translate(*position, icase))) + ++position; + pstate = pstate->next.p; + return true; +} + +template +bool perl_matcher::match_soft_buffer_end() +{ + if(m_match_flags & match_not_eob) + return false; + BidiIterator p(position); + while((p != last) && is_separator(traits_inst.translate(*p, icase)))++p; + if(p != last) + return false; + pstate = pstate->next.p; + return true; +} + +template +bool perl_matcher::match_restart_continue() +{ + if(position == search_base) + { + pstate = pstate->next.p; + return true; + } + return false; +} + +template +bool perl_matcher::match_backstep() +{ +#ifdef BOOST_REGEX_MSVC +#pragma warning(push) +#pragma warning(disable:4127) +#endif + if( ::boost::is_random_access_iterator::value) + { + std::ptrdiff_t maxlen = std::distance(backstop, position); + if(maxlen < static_cast(pstate)->index) + return false; + std::advance(position, -static_cast(pstate)->index); + } + else + { + int c = static_cast(pstate)->index; + while(c--) + { + if(position == backstop) + return false; + --position; + } + } + pstate = pstate->next.p; + return true; +#ifdef BOOST_REGEX_MSVC +#pragma warning(pop) +#endif +} + +template +inline bool perl_matcher::match_assert_backref() +{ + // return true if marked sub-expression N has been matched: + int index = static_cast(pstate)->index; + bool result = false; + if(index == 9999) + { + // Magic value for a (DEFINE) block: + return false; + } + else if(index > 0) + { + // Have we matched subexpression "index"? + // Check if index is a hash value: + if(index >= hash_value_mask) + { + named_subexpressions::range_type r = re.get_data().equal_range(index); + while(r.first != r.second) + { + if((*m_presult)[r.first->index].matched) + { + result = true; + break; + } + ++r.first; + } + } + else + { + result = (*m_presult)[index].matched; + } + pstate = pstate->next.p; + } + else + { + // Have we recursed into subexpression "index"? + // If index == 0 then check for any recursion at all, otherwise for recursion to -index-1. + int idx = -(index+1); + if(idx >= hash_value_mask) + { + named_subexpressions::range_type r = re.get_data().equal_range(idx); + int stack_index = recursion_stack.empty() ? -1 : recursion_stack.back().idx; + while(r.first != r.second) + { + result |= (stack_index == r.first->index); + if(result)break; + ++r.first; + } + } + else + { + result = !recursion_stack.empty() && ((recursion_stack.back().idx == idx) || (index == 0)); + } + pstate = pstate->next.p; + } + return result; +} + +template +bool perl_matcher::match_fail() +{ + // Just force a backtrack: + return false; +} + +template +bool perl_matcher::match_accept() +{ + if(!recursion_stack.empty()) + { + return skip_until_paren(recursion_stack.back().idx); + } + else + { + return skip_until_paren(INT_MAX); + } +} + +template +bool perl_matcher::find_restart_any() +{ +#ifdef BOOST_REGEX_MSVC +#pragma warning(push) +#pragma warning(disable:4127) +#endif + const unsigned char* _map = re.get_map(); + while(true) + { + // skip everything we can't match: + while((position != last) && !can_start(*position, _map, (unsigned char)mask_any) ) + ++position; + if(position == last) + { + // run out of characters, try a null match if possible: + if(re.can_be_null()) + return match_prefix(); + break; + } + // now try and obtain a match: + if(match_prefix()) + return true; + if(position == last) + return false; + ++position; + } + return false; +#ifdef BOOST_REGEX_MSVC +#pragma warning(pop) +#endif +} + +template +bool perl_matcher::find_restart_word() +{ +#ifdef BOOST_REGEX_MSVC +#pragma warning(push) +#pragma warning(disable:4127) +#endif + // do search optimised for word starts: + const unsigned char* _map = re.get_map(); + if((m_match_flags & match_prev_avail) || (position != base)) + --position; + else if(match_prefix()) + return true; + do + { + while((position != last) && traits_inst.isctype(*position, m_word_mask)) + ++position; + while((position != last) && !traits_inst.isctype(*position, m_word_mask)) + ++position; + if(position == last) + break; + + if(can_start(*position, _map, (unsigned char)mask_any) ) + { + if(match_prefix()) + return true; + } + if(position == last) + break; + } while(true); + return false; +#ifdef BOOST_REGEX_MSVC +#pragma warning(pop) +#endif +} + +template +bool perl_matcher::find_restart_line() +{ + // do search optimised for line starts: + const unsigned char* _map = re.get_map(); + if(match_prefix()) + return true; + while(position != last) + { + while((position != last) && !is_separator(*position)) + ++position; + if(position == last) + return false; + ++position; + if(position == last) + { + if(re.can_be_null() && match_prefix()) + return true; + return false; + } + + if( can_start(*position, _map, (unsigned char)mask_any) ) + { + if(match_prefix()) + return true; + } + if(position == last) + return false; + //++position; + } + return false; +} + +template +bool perl_matcher::find_restart_buf() +{ + if((position == base) && ((m_match_flags & match_not_bob) == 0)) + return match_prefix(); + return false; +} + +template +bool perl_matcher::find_restart_lit() +{ + return false; +} + +} // namespace BOOST_REGEX_DETAIL_NS + +} // namespace boost + +#ifdef BOOST_REGEX_MSVC +# pragma warning(pop) +#endif + +#endif + diff --git a/scintilla/include/boost/regex/v5/perl_matcher_non_recursive.hpp b/scintilla/include/boost/regex/v5/perl_matcher_non_recursive.hpp new file mode 100644 index 0000000000..28d6c462fe --- /dev/null +++ b/scintilla/include/boost/regex/v5/perl_matcher_non_recursive.hpp @@ -0,0 +1,1874 @@ +/* + * + * Copyright (c) 2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE perl_matcher_common.cpp + * VERSION see + * DESCRIPTION: Definitions of perl_matcher member functions that are + * specific to the non-recursive implementation. + */ + +#ifndef BOOST_REGEX_V5_PERL_MATCHER_NON_RECURSIVE_HPP +#define BOOST_REGEX_V5_PERL_MATCHER_NON_RECURSIVE_HPP + +#include + +#ifdef BOOST_REGEX_MSVC +# pragma warning(push) +# pragma warning(disable: 4706 4459) +#if BOOST_REGEX_MSVC < 1910 +#pragma warning(disable:4800) +#endif +#endif + +namespace boost{ +namespace BOOST_REGEX_DETAIL_NS{ + +template +inline void inplace_destroy(T* p) +{ + (void)p; // warning suppression + p->~T(); +} + +struct saved_state +{ + union{ + unsigned int state_id; + // this padding ensures correct alignment on 64-bit platforms: + std::size_t padding1; + std::ptrdiff_t padding2; + void* padding3; + }; + saved_state(unsigned i) : state_id(i) {} +}; + +template +struct saved_matched_paren : public saved_state +{ + int index; + sub_match sub; + saved_matched_paren(int i, const sub_match& s) : saved_state(1), index(i), sub(s){} +}; + +template +struct saved_position : public saved_state +{ + const re_syntax_base* pstate; + BidiIterator position; + saved_position(const re_syntax_base* ps, BidiIterator pos, int i) : saved_state(i), pstate(ps), position(pos){} +}; + +template +struct saved_assertion : public saved_position +{ + bool positive; + saved_assertion(bool p, const re_syntax_base* ps, BidiIterator pos) + : saved_position(ps, pos, saved_type_assertion), positive(p){} +}; + +template +struct saved_repeater : public saved_state +{ + repeater_count count; + saved_repeater(int i, repeater_count** s, BidiIterator start, int current_recursion_id) + : saved_state(saved_state_repeater_count), count(i, s, start, current_recursion_id){} +}; + +struct saved_extra_block : public saved_state +{ + saved_state *base, *end; + saved_extra_block(saved_state* b, saved_state* e) + : saved_state(saved_state_extra_block), base(b), end(e) {} +}; + +struct save_state_init +{ + saved_state** stack; + save_state_init(saved_state** base, saved_state** end) + : stack(base) + { + *base = static_cast(get_mem_block()); + *end = reinterpret_cast(reinterpret_cast(*base)+BOOST_REGEX_BLOCKSIZE); + --(*end); + (void) new (*end)saved_state(0); + BOOST_REGEX_ASSERT(*end > *base); + } + ~save_state_init() + { + put_mem_block(*stack); + *stack = 0; + } +}; + +template +struct saved_single_repeat : public saved_state +{ + std::size_t count; + const re_repeat* rep; + BidiIterator last_position; + saved_single_repeat(std::size_t c, const re_repeat* r, BidiIterator lp, int arg_id) + : saved_state(arg_id), count(c), rep(r), last_position(lp){} +}; + +template +struct saved_recursion : public saved_state +{ + saved_recursion(int idx, const re_syntax_base* p, Results* pr, Results* pr2) + : saved_state(14), recursion_id(idx), preturn_address(p), internal_results(*pr), prior_results(*pr2) {} + int recursion_id; + const re_syntax_base* preturn_address; + Results internal_results, prior_results; +}; + +struct saved_change_case : public saved_state +{ + bool icase; + saved_change_case(bool c) : saved_state(18), icase(c) {} +}; + +struct incrementer +{ + incrementer(unsigned* pu) : m_pu(pu) { ++*m_pu; } + ~incrementer() { --*m_pu; } + bool operator > (unsigned i) { return *m_pu > i; } +private: + unsigned* m_pu; +}; + +template +bool perl_matcher::match_all_states() +{ + static matcher_proc_type const s_match_vtable[34] = + { + (&perl_matcher::match_startmark), + &perl_matcher::match_endmark, + &perl_matcher::match_literal, + &perl_matcher::match_start_line, + &perl_matcher::match_end_line, + &perl_matcher::match_wild, + &perl_matcher::match_match, + &perl_matcher::match_word_boundary, + &perl_matcher::match_within_word, + &perl_matcher::match_word_start, + &perl_matcher::match_word_end, + &perl_matcher::match_buffer_start, + &perl_matcher::match_buffer_end, + &perl_matcher::match_backref, + &perl_matcher::match_long_set, + &perl_matcher::match_set, + &perl_matcher::match_jump, + &perl_matcher::match_alt, + &perl_matcher::match_rep, + &perl_matcher::match_combining, + &perl_matcher::match_soft_buffer_end, + &perl_matcher::match_restart_continue, + // Although this next line *should* be evaluated at compile time, in practice + // some compilers (VC++) emit run-time initialisation which breaks thread + // safety, so use a dispatch function instead: + //(::boost::is_random_access_iterator::value ? &perl_matcher::match_dot_repeat_fast : &perl_matcher::match_dot_repeat_slow), + &perl_matcher::match_dot_repeat_dispatch, + &perl_matcher::match_char_repeat, + &perl_matcher::match_set_repeat, + &perl_matcher::match_long_set_repeat, + &perl_matcher::match_backstep, + &perl_matcher::match_assert_backref, + &perl_matcher::match_toggle_case, + &perl_matcher::match_recursion, + &perl_matcher::match_fail, + &perl_matcher::match_accept, + &perl_matcher::match_commit, + &perl_matcher::match_then, + }; + incrementer inc(&m_recursions); + if(inc > 80) + raise_error(traits_inst, regex_constants::error_complexity); + push_recursion_stopper(); + do{ + while(pstate) + { + matcher_proc_type proc = s_match_vtable[pstate->type]; + ++state_count; + if(!(this->*proc)()) + { + if(state_count > max_state_count) + raise_error(traits_inst, regex_constants::error_complexity); + if((m_match_flags & match_partial) && (position == last) && (position != search_base)) + m_has_partial_match = true; + bool successful_unwind = unwind(false); + if((m_match_flags & match_partial) && (position == last) && (position != search_base)) + m_has_partial_match = true; + if(!successful_unwind) + return m_recursive_result; + } + } + }while(unwind(true)); + return m_recursive_result; +} + +template +void perl_matcher::extend_stack() +{ + if(used_block_count) + { + --used_block_count; + saved_state* stack_base; + saved_state* backup_state; + stack_base = static_cast(get_mem_block()); + backup_state = reinterpret_cast(reinterpret_cast(stack_base)+BOOST_REGEX_BLOCKSIZE); + saved_extra_block* block = static_cast(backup_state); + --block; + (void) new (block) saved_extra_block(m_stack_base, m_backup_state); + m_stack_base = stack_base; + m_backup_state = block; + } + else + raise_error(traits_inst, regex_constants::error_stack); +} + +template +inline void perl_matcher::push_matched_paren(int index, const sub_match& sub) +{ + //BOOST_REGEX_ASSERT(index); + saved_matched_paren* pmp = static_cast*>(m_backup_state); + --pmp; + if(pmp < m_stack_base) + { + extend_stack(); + pmp = static_cast*>(m_backup_state); + --pmp; + } + (void) new (pmp)saved_matched_paren(index, sub); + m_backup_state = pmp; +} + +template +inline void perl_matcher::push_case_change(bool c) +{ + //BOOST_REGEX_ASSERT(index); + saved_change_case* pmp = static_cast(m_backup_state); + --pmp; + if(pmp < m_stack_base) + { + extend_stack(); + pmp = static_cast(m_backup_state); + --pmp; + } + (void) new (pmp)saved_change_case(c); + m_backup_state = pmp; +} + +template +inline void perl_matcher::push_recursion_stopper() +{ + saved_state* pmp = m_backup_state; + --pmp; + if(pmp < m_stack_base) + { + extend_stack(); + pmp = m_backup_state; + --pmp; + } + (void) new (pmp)saved_state(saved_type_recurse); + m_backup_state = pmp; +} + +template +inline void perl_matcher::push_assertion(const re_syntax_base* ps, bool positive) +{ + saved_assertion* pmp = static_cast*>(m_backup_state); + --pmp; + if(pmp < m_stack_base) + { + extend_stack(); + pmp = static_cast*>(m_backup_state); + --pmp; + } + (void) new (pmp)saved_assertion(positive, ps, position); + m_backup_state = pmp; +} + +template +inline void perl_matcher::push_alt(const re_syntax_base* ps) +{ + saved_position* pmp = static_cast*>(m_backup_state); + --pmp; + if(pmp < m_stack_base) + { + extend_stack(); + pmp = static_cast*>(m_backup_state); + --pmp; + } + (void) new (pmp)saved_position(ps, position, saved_state_alt); + m_backup_state = pmp; +} + +template +inline void perl_matcher::push_non_greedy_repeat(const re_syntax_base* ps) +{ + saved_position* pmp = static_cast*>(m_backup_state); + --pmp; + if(pmp < m_stack_base) + { + extend_stack(); + pmp = static_cast*>(m_backup_state); + --pmp; + } + (void) new (pmp)saved_position(ps, position, saved_state_non_greedy_long_repeat); + m_backup_state = pmp; +} + +template +inline void perl_matcher::push_repeater_count(int i, repeater_count** s) +{ + saved_repeater* pmp = static_cast*>(m_backup_state); + --pmp; + if(pmp < m_stack_base) + { + extend_stack(); + pmp = static_cast*>(m_backup_state); + --pmp; + } + (void) new (pmp)saved_repeater(i, s, position, this->recursion_stack.empty() ? (INT_MIN + 3) : this->recursion_stack.back().idx); + m_backup_state = pmp; +} + +template +inline void perl_matcher::push_single_repeat(std::size_t c, const re_repeat* r, BidiIterator last_position, int state_id) +{ + saved_single_repeat* pmp = static_cast*>(m_backup_state); + --pmp; + if(pmp < m_stack_base) + { + extend_stack(); + pmp = static_cast*>(m_backup_state); + --pmp; + } + (void) new (pmp)saved_single_repeat(c, r, last_position, state_id); + m_backup_state = pmp; +} + +template +inline void perl_matcher::push_recursion(int idx, const re_syntax_base* p, results_type* presults, results_type* presults2) +{ + saved_recursion* pmp = static_cast*>(m_backup_state); + --pmp; + if(pmp < m_stack_base) + { + extend_stack(); + pmp = static_cast*>(m_backup_state); + --pmp; + } + (void) new (pmp)saved_recursion(idx, p, presults, presults2); + m_backup_state = pmp; +} + +template +bool perl_matcher::match_toggle_case() +{ + // change our case sensitivity: + push_case_change(this->icase); + this->icase = static_cast(pstate)->icase; + pstate = pstate->next.p; + return true; +} + +template +bool perl_matcher::match_startmark() +{ + int index = static_cast(pstate)->index; + icase = static_cast(pstate)->icase; + switch(index) + { + case 0: + pstate = pstate->next.p; + break; + case -1: + case -2: + { + // forward lookahead assert: + const re_syntax_base* next_pstate = static_cast(pstate->next.p)->alt.p->next.p; + pstate = pstate->next.p->next.p; + push_assertion(next_pstate, index == -1); + break; + } + case -3: + { + // independent sub-expression, currently this is always recursive: + bool old_independent = m_independent; + m_independent = true; + const re_syntax_base* next_pstate = static_cast(pstate->next.p)->alt.p->next.p; + pstate = pstate->next.p->next.p; + bool r = false; +#if !defined(BOOST_NO_EXCEPTIONS) + try{ +#endif + r = match_all_states(); + if(!r && !m_independent) + { + // Must be unwinding from a COMMIT/SKIP/PRUNE and the independent + // sub failed, need to unwind everything else: + while (m_backup_state->state_id) + unwind(false); + return false; + } +#if !defined(BOOST_NO_EXCEPTIONS) + } + catch(...) + { + pstate = next_pstate; + // unwind all pushed states, apart from anything else this + // ensures that all the states are correctly destructed + // not just the memory freed. + while(unwind(true)) {} + throw; + } +#endif + pstate = next_pstate; + m_independent = old_independent; +#ifdef BOOST_REGEX_MATCH_EXTRA + if(r && (m_match_flags & match_extra)) + { + // + // our captures have been stored in *m_presult + // we need to unpack them, and insert them + // back in the right order when we unwind the stack: + // + match_results temp_match(*m_presult); + unsigned i; + for(i = 0; i < temp_match.size(); ++i) + (*m_presult)[i].get_captures().clear(); + // match everything else: +#if !defined(BOOST_NO_EXCEPTIONS) + try{ +#endif + r = match_all_states(); +#if !defined(BOOST_NO_EXCEPTIONS) + } + catch(...) + { + pstate = next_pstate; + // unwind all pushed states, apart from anything else this + // ensures that all the states are correctly destructed + // not just the memory freed. + while(unwind(true)) {} + throw; + } +#endif + // now place the stored captures back: + for(i = 0; i < temp_match.size(); ++i) + { + typedef typename sub_match::capture_sequence_type seq; + seq& s1 = (*m_presult)[i].get_captures(); + const seq& s2 = temp_match[i].captures(); + s1.insert( + s1.end(), + s2.begin(), + s2.end()); + } + } +#endif + return r; + } + case -4: + { + // conditional expression: + const re_alt* alt = static_cast(pstate->next.p); + BOOST_REGEX_ASSERT(alt->type == syntax_element_alt); + pstate = alt->next.p; + if(pstate->type == syntax_element_assert_backref) + { + if(!match_assert_backref()) + pstate = alt->alt.p; + break; + } + else + { + // zero width assertion, have to match this recursively: + BOOST_REGEX_ASSERT(pstate->type == syntax_element_startmark); + bool negated = static_cast(pstate)->index == -2; + BidiIterator saved_position = position; + const re_syntax_base* next_pstate = static_cast(pstate->next.p)->alt.p->next.p; + pstate = pstate->next.p->next.p; +#if !defined(BOOST_NO_EXCEPTIONS) + try{ +#endif + bool r = match_all_states(); + position = saved_position; + if(negated) + r = !r; + if(r) + pstate = next_pstate; + else + pstate = alt->alt.p; +#if !defined(BOOST_NO_EXCEPTIONS) + } + catch(...) + { + pstate = next_pstate; + // unwind all pushed states, apart from anything else this + // ensures that all the states are correctly destructed + // not just the memory freed. + while(unwind(true)){} + throw; + } +#endif + break; + } + } + case -5: + { + push_matched_paren(0, (*m_presult)[0]); + m_presult->set_first(position, 0, true); + pstate = pstate->next.p; + break; + } + default: + { + BOOST_REGEX_ASSERT(index > 0); + if((m_match_flags & match_nosubs) == 0) + { + push_matched_paren(index, (*m_presult)[index]); + m_presult->set_first(position, index); + } + pstate = pstate->next.p; + break; + } + } + return true; +} + +template +bool perl_matcher::match_alt() +{ + bool take_first, take_second; + const re_alt* jmp = static_cast(pstate); + + // find out which of these two alternatives we need to take: + if(position == last) + { + take_first = jmp->can_be_null & mask_take; + take_second = jmp->can_be_null & mask_skip; + } + else + { + take_first = can_start(*position, jmp->_map, (unsigned char)mask_take); + take_second = can_start(*position, jmp->_map, (unsigned char)mask_skip); + } + + if(take_first) + { + // we can take the first alternative, + // see if we need to push next alternative: + if(take_second) + { + push_alt(jmp->alt.p); + } + pstate = pstate->next.p; + return true; + } + if(take_second) + { + pstate = jmp->alt.p; + return true; + } + return false; // neither option is possible +} + +template +bool perl_matcher::match_rep() +{ +#ifdef BOOST_REGEX_MSVC +#pragma warning(push) +#pragma warning(disable:4127 4244) +#endif +#ifdef BOOST_BORLANDC +#pragma option push -w-8008 -w-8066 -w-8004 +#endif + const re_repeat* rep = static_cast(pstate); + + // find out which of these two alternatives we need to take: + bool take_first, take_second; + if(position == last) + { + take_first = rep->can_be_null & mask_take; + take_second = rep->can_be_null & mask_skip; + } + else + { + take_first = can_start(*position, rep->_map, (unsigned char)mask_take); + take_second = can_start(*position, rep->_map, (unsigned char)mask_skip); + } + + if((m_backup_state->state_id != saved_state_repeater_count) + || (static_cast*>(m_backup_state)->count.get_id() != rep->state_id) + || (next_count->get_id() != rep->state_id)) + { + // we're moving to a different repeat from the last + // one, so set up a counter object: + push_repeater_count(rep->state_id, &next_count); + } + // + // If we've had at least one repeat already, and the last one + // matched the NULL string then set the repeat count to + // maximum: + // + next_count->check_null_repeat(position, rep->max); + + if(next_count->get_count() < rep->min) + { + // we must take the repeat: + if(take_first) + { + // increase the counter: + ++(*next_count); + pstate = rep->next.p; + return true; + } + return false; + } + + bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent); + if(greedy) + { + // try and take the repeat if we can: + if((next_count->get_count() < rep->max) && take_first) + { + if(take_second) + { + // store position in case we fail: + push_alt(rep->alt.p); + } + // increase the counter: + ++(*next_count); + pstate = rep->next.p; + return true; + } + else if(take_second) + { + pstate = rep->alt.p; + return true; + } + return false; // can't take anything, fail... + } + else // non-greedy + { + // try and skip the repeat if we can: + if(take_second) + { + if((next_count->get_count() < rep->max) && take_first) + { + // store position in case we fail: + push_non_greedy_repeat(rep->next.p); + } + pstate = rep->alt.p; + return true; + } + if((next_count->get_count() < rep->max) && take_first) + { + // increase the counter: + ++(*next_count); + pstate = rep->next.p; + return true; + } + } + return false; +#ifdef BOOST_BORLANDC +#pragma option pop +#endif +#ifdef BOOST_REGEX_MSVC +#pragma warning(pop) +#endif +} + +template +bool perl_matcher::match_dot_repeat_slow() +{ + std::size_t count = 0; + const re_repeat* rep = static_cast(pstate); + re_syntax_base* psingle = rep->next.p; + // match compulsory repeats first: + while(count < rep->min) + { + pstate = psingle; + if(!match_wild()) + return false; + ++count; + } + bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent); + if(greedy) + { + // repeat for as long as we can: + while(count < rep->max) + { + pstate = psingle; + if(!match_wild()) + break; + ++count; + } + // remember where we got to if this is a leading repeat: + if((rep->leading) && (count < rep->max)) + restart = position; + // push backtrack info if available: + if(count - rep->min) + push_single_repeat(count, rep, position, saved_state_greedy_single_repeat); + // jump to next state: + pstate = rep->alt.p; + return true; + } + else + { + // non-greedy, push state and return true if we can skip: + if(count < rep->max) + push_single_repeat(count, rep, position, saved_state_rep_slow_dot); + pstate = rep->alt.p; + return (position == last) ? (rep->can_be_null & mask_skip) : can_start(*position, rep->_map, mask_skip); + } +} + +template +bool perl_matcher::match_dot_repeat_fast() +{ + if(m_match_flags & match_not_dot_null) + return match_dot_repeat_slow(); + if((static_cast(pstate->next.p)->mask & match_any_mask) == 0) + return match_dot_repeat_slow(); + + const re_repeat* rep = static_cast(pstate); + bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent); + std::size_t count = static_cast((std::min)(static_cast(std::distance(position, last)), greedy ? rep->max : rep->min)); + if(rep->min > count) + { + position = last; + return false; // not enough text left to match + } + std::advance(position, count); + + if(greedy) + { + if((rep->leading) && (count < rep->max)) + restart = position; + // push backtrack info if available: + if(count - rep->min) + push_single_repeat(count, rep, position, saved_state_greedy_single_repeat); + // jump to next state: + pstate = rep->alt.p; + return true; + } + else + { + // non-greedy, push state and return true if we can skip: + if(count < rep->max) + push_single_repeat(count, rep, position, saved_state_rep_fast_dot); + pstate = rep->alt.p; + return (position == last) ? (rep->can_be_null & mask_skip) : can_start(*position, rep->_map, mask_skip); + } +} + +template +bool perl_matcher::match_char_repeat() +{ +#ifdef BOOST_REGEX_MSVC +#pragma warning(push) +#pragma warning(disable:4127) +#endif +#ifdef BOOST_BORLANDC +#pragma option push -w-8008 -w-8066 -w-8004 +#endif + const re_repeat* rep = static_cast(pstate); + BOOST_REGEX_ASSERT(1 == static_cast(rep->next.p)->length); + const char_type what = *reinterpret_cast(static_cast(rep->next.p) + 1); + std::size_t count = 0; + // + // start by working out how much we can skip: + // + bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent); + std::size_t desired = greedy ? rep->max : rep->min; + if(::boost::is_random_access_iterator::value) + { + BidiIterator end = position; + // Move end forward by "desired", preferably without using distance or advance if we can + // as these can be slow for some iterator types. + std::size_t len = (desired == (std::numeric_limits::max)()) ? 0u : std::distance(position, last); + if(desired >= len) + end = last; + else + std::advance(end, desired); + BidiIterator origin(position); + while((position != end) && (traits_inst.translate(*position, icase) == what)) + { + ++position; + } + count = (unsigned)std::distance(origin, position); + } + else + { + while((count < desired) && (position != last) && (traits_inst.translate(*position, icase) == what)) + { + ++position; + ++count; + } + } + + if(count < rep->min) + return false; + + if(greedy) + { + if((rep->leading) && (count < rep->max)) + restart = position; + // push backtrack info if available: + if(count - rep->min) + push_single_repeat(count, rep, position, saved_state_greedy_single_repeat); + // jump to next state: + pstate = rep->alt.p; + return true; + } + else + { + // non-greedy, push state and return true if we can skip: + if(count < rep->max) + push_single_repeat(count, rep, position, saved_state_rep_char); + pstate = rep->alt.p; + return (position == last) ? (rep->can_be_null & mask_skip) : can_start(*position, rep->_map, mask_skip); + } +#ifdef BOOST_BORLANDC +#pragma option pop +#endif +#ifdef BOOST_REGEX_MSVC +#pragma warning(pop) +#endif +} + +template +bool perl_matcher::match_set_repeat() +{ +#ifdef BOOST_REGEX_MSVC +#pragma warning(push) +#pragma warning(disable:4127) +#endif +#ifdef BOOST_BORLANDC +#pragma option push -w-8008 -w-8066 -w-8004 +#endif + const re_repeat* rep = static_cast(pstate); + const unsigned char* map = static_cast(rep->next.p)->_map; + std::size_t count = 0; + // + // start by working out how much we can skip: + // + bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent); + std::size_t desired = greedy ? rep->max : rep->min; + if(::boost::is_random_access_iterator::value) + { + BidiIterator end = position; + // Move end forward by "desired", preferably without using distance or advance if we can + // as these can be slow for some iterator types. + std::size_t len = (desired == (std::numeric_limits::max)()) ? 0u : std::distance(position, last); + if(desired >= len) + end = last; + else + std::advance(end, desired); + BidiIterator origin(position); + while((position != end) && map[static_cast(traits_inst.translate(*position, icase))]) + { + ++position; + } + count = (unsigned)std::distance(origin, position); + } + else + { + while((count < desired) && (position != last) && map[static_cast(traits_inst.translate(*position, icase))]) + { + ++position; + ++count; + } + } + + if(count < rep->min) + return false; + + if(greedy) + { + if((rep->leading) && (count < rep->max)) + restart = position; + // push backtrack info if available: + if(count - rep->min) + push_single_repeat(count, rep, position, saved_state_greedy_single_repeat); + // jump to next state: + pstate = rep->alt.p; + return true; + } + else + { + // non-greedy, push state and return true if we can skip: + if(count < rep->max) + push_single_repeat(count, rep, position, saved_state_rep_short_set); + pstate = rep->alt.p; + return (position == last) ? (rep->can_be_null & mask_skip) : can_start(*position, rep->_map, mask_skip); + } +#ifdef BOOST_BORLANDC +#pragma option pop +#endif +#ifdef BOOST_REGEX_MSVC +#pragma warning(pop) +#endif +} + +template +bool perl_matcher::match_long_set_repeat() +{ +#ifdef BOOST_REGEX_MSVC +#pragma warning(push) +#pragma warning(disable:4127) +#endif +#ifdef BOOST_BORLANDC +#pragma option push -w-8008 -w-8066 -w-8004 +#endif + typedef typename traits::char_class_type m_type; + const re_repeat* rep = static_cast(pstate); + const re_set_long* set = static_cast*>(pstate->next.p); + std::size_t count = 0; + // + // start by working out how much we can skip: + // + bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent); + std::size_t desired = greedy ? rep->max : rep->min; + if(::boost::is_random_access_iterator::value) + { + BidiIterator end = position; + // Move end forward by "desired", preferably without using distance or advance if we can + // as these can be slow for some iterator types. + std::size_t len = (desired == (std::numeric_limits::max)()) ? 0u : std::distance(position, last); + if(desired >= len) + end = last; + else + std::advance(end, desired); + BidiIterator origin(position); + while((position != end) && (position != re_is_set_member(position, last, set, re.get_data(), icase))) + { + ++position; + } + count = (unsigned)std::distance(origin, position); + } + else + { + while((count < desired) && (position != last) && (position != re_is_set_member(position, last, set, re.get_data(), icase))) + { + ++position; + ++count; + } + } + + if(count < rep->min) + return false; + + if(greedy) + { + if((rep->leading) && (count < rep->max)) + restart = position; + // push backtrack info if available: + if(count - rep->min) + push_single_repeat(count, rep, position, saved_state_greedy_single_repeat); + // jump to next state: + pstate = rep->alt.p; + return true; + } + else + { + // non-greedy, push state and return true if we can skip: + if(count < rep->max) + push_single_repeat(count, rep, position, saved_state_rep_long_set); + pstate = rep->alt.p; + return (position == last) ? (rep->can_be_null & mask_skip) : can_start(*position, rep->_map, mask_skip); + } +#ifdef BOOST_BORLANDC +#pragma option pop +#endif +#ifdef BOOST_REGEX_MSVC +#pragma warning(pop) +#endif +} + +template +bool perl_matcher::match_recursion() +{ + BOOST_REGEX_ASSERT(pstate->type == syntax_element_recurse); + // + // See if we've seen this recursion before at this location, if we have then + // we need to prevent infinite recursion: + // + for(typename std::vector >::reverse_iterator i = recursion_stack.rbegin(); i != recursion_stack.rend(); ++i) + { + if(i->idx == static_cast(static_cast(pstate)->alt.p)->index) + { + if(i->location_of_start == position) + return false; + break; + } + } + // + // Backup call stack: + // + push_recursion_pop(); + // + // Set new call stack: + // + if(recursion_stack.capacity() == 0) + { + recursion_stack.reserve(50); + } + recursion_stack.push_back(recursion_info()); + recursion_stack.back().preturn_address = pstate->next.p; + recursion_stack.back().results = *m_presult; + pstate = static_cast(pstate)->alt.p; + recursion_stack.back().idx = static_cast(pstate)->index; + recursion_stack.back().location_of_start = position; + //if(static_cast(pstate)->state_id > 0) + { + push_repeater_count(-(2 + static_cast(pstate)->index), &next_count); + } + + return true; +} + +template +bool perl_matcher::match_endmark() +{ + int index = static_cast(pstate)->index; + icase = static_cast(pstate)->icase; + if(index > 0) + { + if((m_match_flags & match_nosubs) == 0) + { + m_presult->set_second(position, index); + } + if(!recursion_stack.empty()) + { + if(index == recursion_stack.back().idx) + { + pstate = recursion_stack.back().preturn_address; + *m_presult = recursion_stack.back().results; + push_recursion(recursion_stack.back().idx, recursion_stack.back().preturn_address, m_presult, &recursion_stack.back().results); + recursion_stack.pop_back(); + push_repeater_count(-(2 + index), &next_count); + } + } + } + else if((index < 0) && (index != -4)) + { + // matched forward lookahead: + pstate = 0; + return true; + } + pstate = pstate->next.p; + return true; +} + +template +bool perl_matcher::match_match() +{ + if(!recursion_stack.empty()) + { + BOOST_REGEX_ASSERT(0 == recursion_stack.back().idx); + pstate = recursion_stack.back().preturn_address; + push_recursion(recursion_stack.back().idx, recursion_stack.back().preturn_address, m_presult, &recursion_stack.back().results); + *m_presult = recursion_stack.back().results; + recursion_stack.pop_back(); + return true; + } + if((m_match_flags & match_not_null) && (position == (*m_presult)[0].first)) + return false; + if((m_match_flags & match_all) && (position != last)) + return false; + if((m_match_flags & regex_constants::match_not_initial_null) && (position == search_base)) + return false; + m_presult->set_second(position); + pstate = 0; + m_has_found_match = true; + if((m_match_flags & match_posix) == match_posix) + { + m_result.maybe_assign(*m_presult); + if((m_match_flags & match_any) == 0) + return false; + } +#ifdef BOOST_REGEX_MATCH_EXTRA + if(match_extra & m_match_flags) + { + for(unsigned i = 0; i < m_presult->size(); ++i) + if((*m_presult)[i].matched) + ((*m_presult)[i]).get_captures().push_back((*m_presult)[i]); + } +#endif + return true; +} + +template +bool perl_matcher::match_commit() +{ + // Ideally we would just junk all the states that are on the stack, + // however we might not unwind correctly in that case, so for now, + // just mark that we don't backtrack into whatever is left (or rather + // we'll unwind it unconditionally without pausing to try other matches). + + switch(static_cast(pstate)->action) + { + case commit_commit: + restart = last; + break; + case commit_skip: + if(base != position) + { + restart = position; + // Have to decrement restart since it will get incremented again later: + --restart; + } + break; + case commit_prune: + break; + } + + saved_state* pmp = m_backup_state; + --pmp; + if(pmp < m_stack_base) + { + extend_stack(); + pmp = m_backup_state; + --pmp; + } + (void) new (pmp)saved_state(16); + m_backup_state = pmp; + pstate = pstate->next.p; + return true; +} + +template +bool perl_matcher::match_then() +{ + // Just leave a mark that we need to skip to next alternative: + saved_state* pmp = m_backup_state; + --pmp; + if(pmp < m_stack_base) + { + extend_stack(); + pmp = m_backup_state; + --pmp; + } + (void) new (pmp)saved_state(17); + m_backup_state = pmp; + pstate = pstate->next.p; + return true; +} + +template +bool perl_matcher::skip_until_paren(int index, bool have_match) +{ + while(pstate) + { + if(pstate->type == syntax_element_endmark) + { + if(static_cast(pstate)->index == index) + { + if(have_match) + return this->match_endmark(); + pstate = pstate->next.p; + return true; + } + else + { + // Unenclosed closing ), occurs when (*ACCEPT) is inside some other + // parenthesis which may or may not have other side effects associated with it. + const re_syntax_base* sp = pstate; + match_endmark(); + if(!pstate) + { + unwind(true); + // unwind may leave pstate NULL if we've unwound a forward lookahead, in which + // case just move to the next state and keep looking... + if (!pstate) + pstate = sp->next.p; + } + } + continue; + } + else if(pstate->type == syntax_element_match) + return true; + else if(pstate->type == syntax_element_startmark) + { + int idx = static_cast(pstate)->index; + pstate = pstate->next.p; + skip_until_paren(idx, false); + continue; + } + pstate = pstate->next.p; + } + return true; +} + +/**************************************************************************** + +Unwind and associated procedures follow, these perform what normal stack +unwinding does in the recursive implementation. + +****************************************************************************/ + +template +bool perl_matcher::unwind(bool have_match) +{ + static unwind_proc_type const s_unwind_table[19] = + { + &perl_matcher::unwind_end, + &perl_matcher::unwind_paren, + &perl_matcher::unwind_recursion_stopper, + &perl_matcher::unwind_assertion, + &perl_matcher::unwind_alt, + &perl_matcher::unwind_repeater_counter, + &perl_matcher::unwind_extra_block, + &perl_matcher::unwind_greedy_single_repeat, + &perl_matcher::unwind_slow_dot_repeat, + &perl_matcher::unwind_fast_dot_repeat, + &perl_matcher::unwind_char_repeat, + &perl_matcher::unwind_short_set_repeat, + &perl_matcher::unwind_long_set_repeat, + &perl_matcher::unwind_non_greedy_repeat, + &perl_matcher::unwind_recursion, + &perl_matcher::unwind_recursion_pop, + &perl_matcher::unwind_commit, + &perl_matcher::unwind_then, + &perl_matcher::unwind_case, + }; + + m_recursive_result = have_match; + m_unwound_lookahead = false; + m_unwound_alt = false; + unwind_proc_type unwinder; + bool cont; + // + // keep unwinding our stack until we have something to do: + // + do + { + unwinder = s_unwind_table[m_backup_state->state_id]; + cont = (this->*unwinder)(m_recursive_result); + }while(cont); + // + // return true if we have more states to try: + // + return pstate ? true : false; +} + +template +bool perl_matcher::unwind_end(bool) +{ + pstate = 0; // nothing left to search + return false; // end of stack nothing more to search +} + +template +bool perl_matcher::unwind_case(bool) +{ + saved_change_case* pmp = static_cast(m_backup_state); + icase = pmp->icase; + boost::BOOST_REGEX_DETAIL_NS::inplace_destroy(pmp++); + m_backup_state = pmp; + return true; +} + +template +bool perl_matcher::unwind_paren(bool have_match) +{ + saved_matched_paren* pmp = static_cast*>(m_backup_state); + // restore previous values if no match was found: + if(!have_match) + { + m_presult->set_first(pmp->sub.first, pmp->index, pmp->index == 0); + m_presult->set_second(pmp->sub.second, pmp->index, pmp->sub.matched, pmp->index == 0); + } +#ifdef BOOST_REGEX_MATCH_EXTRA + // + // we have a match, push the capture information onto the stack: + // + else if(pmp->sub.matched && (match_extra & m_match_flags)) + ((*m_presult)[pmp->index]).get_captures().push_back(pmp->sub); +#endif + // unwind stack: + m_backup_state = pmp+1; + boost::BOOST_REGEX_DETAIL_NS::inplace_destroy(pmp); + return true; // keep looking +} + +template +bool perl_matcher::unwind_recursion_stopper(bool) +{ + boost::BOOST_REGEX_DETAIL_NS::inplace_destroy(m_backup_state++); + pstate = 0; // nothing left to search + return false; // end of stack nothing more to search +} + +template +bool perl_matcher::unwind_assertion(bool r) +{ + saved_assertion* pmp = static_cast*>(m_backup_state); + pstate = pmp->pstate; + position = pmp->position; + bool result = (r == pmp->positive); + m_recursive_result = pmp->positive ? r : !r; + boost::BOOST_REGEX_DETAIL_NS::inplace_destroy(pmp++); + m_backup_state = pmp; + m_unwound_lookahead = true; + return !result; // return false if the assertion was matched to stop search. +} + +template +bool perl_matcher::unwind_alt(bool r) +{ + saved_position* pmp = static_cast*>(m_backup_state); + if(!r) + { + pstate = pmp->pstate; + position = pmp->position; + } + boost::BOOST_REGEX_DETAIL_NS::inplace_destroy(pmp++); + m_backup_state = pmp; + m_unwound_alt = !r; + return r; +} + +template +bool perl_matcher::unwind_repeater_counter(bool) +{ + saved_repeater* pmp = static_cast*>(m_backup_state); + boost::BOOST_REGEX_DETAIL_NS::inplace_destroy(pmp++); + m_backup_state = pmp; + return true; // keep looking +} + +template +bool perl_matcher::unwind_extra_block(bool) +{ + ++used_block_count; + saved_extra_block* pmp = static_cast(m_backup_state); + void* condemmed = m_stack_base; + m_stack_base = pmp->base; + m_backup_state = pmp->end; + boost::BOOST_REGEX_DETAIL_NS::inplace_destroy(pmp); + put_mem_block(condemmed); + return true; // keep looking +} + +template +inline void perl_matcher::destroy_single_repeat() +{ + saved_single_repeat* p = static_cast*>(m_backup_state); + boost::BOOST_REGEX_DETAIL_NS::inplace_destroy(p++); + m_backup_state = p; +} + +template +bool perl_matcher::unwind_greedy_single_repeat(bool r) +{ + saved_single_repeat* pmp = static_cast*>(m_backup_state); + + // if we have a match, just discard this state: + if(r) + { + destroy_single_repeat(); + return true; + } + + const re_repeat* rep = pmp->rep; + std::size_t count = pmp->count; + BOOST_REGEX_ASSERT(rep->next.p != 0); + BOOST_REGEX_ASSERT(rep->alt.p != 0); + + count -= rep->min; + + if((m_match_flags & match_partial) && (position == last)) + m_has_partial_match = true; + + BOOST_REGEX_ASSERT(count); + position = pmp->last_position; + + // backtrack till we can skip out: + do + { + --position; + --count; + ++state_count; + }while(count && !can_start(*position, rep->_map, mask_skip)); + + // if we've hit base, destroy this state: + if(count == 0) + { + destroy_single_repeat(); + if(!can_start(*position, rep->_map, mask_skip)) + return true; + } + else + { + pmp->count = count + rep->min; + pmp->last_position = position; + } + pstate = rep->alt.p; + return false; +} + +template +bool perl_matcher::unwind_slow_dot_repeat(bool r) +{ + saved_single_repeat* pmp = static_cast*>(m_backup_state); + + // if we have a match, just discard this state: + if(r) + { + destroy_single_repeat(); + return true; + } + + const re_repeat* rep = pmp->rep; + std::size_t count = pmp->count; + BOOST_REGEX_ASSERT(rep->type == syntax_element_dot_rep); + BOOST_REGEX_ASSERT(rep->next.p != 0); + BOOST_REGEX_ASSERT(rep->alt.p != 0); + BOOST_REGEX_ASSERT(rep->next.p->type == syntax_element_wild); + + BOOST_REGEX_ASSERT(count < rep->max); + pstate = rep->next.p; + position = pmp->last_position; + + if(position != last) + { + // wind forward until we can skip out of the repeat: + do + { + if(!match_wild()) + { + // failed repeat match, discard this state and look for another: + destroy_single_repeat(); + return true; + } + ++count; + ++state_count; + pstate = rep->next.p; + }while((count < rep->max) && (position != last) && !can_start(*position, rep->_map, mask_skip)); + } + if(position == last) + { + // can't repeat any more, remove the pushed state: + destroy_single_repeat(); + if((m_match_flags & match_partial) && (position == last) && (position != search_base)) + m_has_partial_match = true; + if(0 == (rep->can_be_null & mask_skip)) + return true; + } + else if(count == rep->max) + { + // can't repeat any more, remove the pushed state: + destroy_single_repeat(); + if(!can_start(*position, rep->_map, mask_skip)) + return true; + } + else + { + pmp->count = count; + pmp->last_position = position; + } + pstate = rep->alt.p; + return false; +} + +template +bool perl_matcher::unwind_fast_dot_repeat(bool r) +{ + saved_single_repeat* pmp = static_cast*>(m_backup_state); + + // if we have a match, just discard this state: + if(r) + { + destroy_single_repeat(); + return true; + } + + const re_repeat* rep = pmp->rep; + std::size_t count = pmp->count; + + BOOST_REGEX_ASSERT(count < rep->max); + position = pmp->last_position; + if(position != last) + { + + // wind forward until we can skip out of the repeat: + do + { + ++position; + ++count; + ++state_count; + }while((count < rep->max) && (position != last) && !can_start(*position, rep->_map, mask_skip)); + } + + // remember where we got to if this is a leading repeat: + if((rep->leading) && (count < rep->max)) + restart = position; + if(position == last) + { + // can't repeat any more, remove the pushed state: + destroy_single_repeat(); + if((m_match_flags & match_partial) && (position == last) && (position != search_base)) + m_has_partial_match = true; + if(0 == (rep->can_be_null & mask_skip)) + return true; + } + else if(count == rep->max) + { + // can't repeat any more, remove the pushed state: + destroy_single_repeat(); + if(!can_start(*position, rep->_map, mask_skip)) + return true; + } + else + { + pmp->count = count; + pmp->last_position = position; + } + pstate = rep->alt.p; + return false; +} + +template +bool perl_matcher::unwind_char_repeat(bool r) +{ + saved_single_repeat* pmp = static_cast*>(m_backup_state); + + // if we have a match, just discard this state: + if(r) + { + destroy_single_repeat(); + return true; + } + + const re_repeat* rep = pmp->rep; + std::size_t count = pmp->count; + pstate = rep->next.p; + const char_type what = *reinterpret_cast(static_cast(pstate) + 1); + position = pmp->last_position; + + BOOST_REGEX_ASSERT(rep->type == syntax_element_char_rep); + BOOST_REGEX_ASSERT(rep->next.p != 0); + BOOST_REGEX_ASSERT(rep->alt.p != 0); + BOOST_REGEX_ASSERT(rep->next.p->type == syntax_element_literal); + BOOST_REGEX_ASSERT(count < rep->max); + + if(position != last) + { + // wind forward until we can skip out of the repeat: + do + { + if(traits_inst.translate(*position, icase) != what) + { + // failed repeat match, discard this state and look for another: + destroy_single_repeat(); + return true; + } + ++count; + ++ position; + ++state_count; + pstate = rep->next.p; + }while((count < rep->max) && (position != last) && !can_start(*position, rep->_map, mask_skip)); + } + // remember where we got to if this is a leading repeat: + if((rep->leading) && (count < rep->max)) + restart = position; + if(position == last) + { + // can't repeat any more, remove the pushed state: + destroy_single_repeat(); + if((m_match_flags & match_partial) && (position == last) && (position != search_base)) + m_has_partial_match = true; + if(0 == (rep->can_be_null & mask_skip)) + return true; + } + else if(count == rep->max) + { + // can't repeat any more, remove the pushed state: + destroy_single_repeat(); + if(!can_start(*position, rep->_map, mask_skip)) + return true; + } + else + { + pmp->count = count; + pmp->last_position = position; + } + pstate = rep->alt.p; + return false; +} + +template +bool perl_matcher::unwind_short_set_repeat(bool r) +{ + saved_single_repeat* pmp = static_cast*>(m_backup_state); + + // if we have a match, just discard this state: + if(r) + { + destroy_single_repeat(); + return true; + } + + const re_repeat* rep = pmp->rep; + std::size_t count = pmp->count; + pstate = rep->next.p; + const unsigned char* map = static_cast(rep->next.p)->_map; + position = pmp->last_position; + + BOOST_REGEX_ASSERT(rep->type == syntax_element_short_set_rep); + BOOST_REGEX_ASSERT(rep->next.p != 0); + BOOST_REGEX_ASSERT(rep->alt.p != 0); + BOOST_REGEX_ASSERT(rep->next.p->type == syntax_element_set); + BOOST_REGEX_ASSERT(count < rep->max); + + if(position != last) + { + // wind forward until we can skip out of the repeat: + do + { + if(!map[static_cast(traits_inst.translate(*position, icase))]) + { + // failed repeat match, discard this state and look for another: + destroy_single_repeat(); + return true; + } + ++count; + ++ position; + ++state_count; + pstate = rep->next.p; + }while((count < rep->max) && (position != last) && !can_start(*position, rep->_map, mask_skip)); + } + // remember where we got to if this is a leading repeat: + if((rep->leading) && (count < rep->max)) + restart = position; + if(position == last) + { + // can't repeat any more, remove the pushed state: + destroy_single_repeat(); + if((m_match_flags & match_partial) && (position == last) && (position != search_base)) + m_has_partial_match = true; + if(0 == (rep->can_be_null & mask_skip)) + return true; + } + else if(count == rep->max) + { + // can't repeat any more, remove the pushed state: + destroy_single_repeat(); + if(!can_start(*position, rep->_map, mask_skip)) + return true; + } + else + { + pmp->count = count; + pmp->last_position = position; + } + pstate = rep->alt.p; + return false; +} + +template +bool perl_matcher::unwind_long_set_repeat(bool r) +{ + typedef typename traits::char_class_type m_type; + saved_single_repeat* pmp = static_cast*>(m_backup_state); + + // if we have a match, just discard this state: + if(r) + { + destroy_single_repeat(); + return true; + } + + const re_repeat* rep = pmp->rep; + std::size_t count = pmp->count; + pstate = rep->next.p; + const re_set_long* set = static_cast*>(pstate); + position = pmp->last_position; + + BOOST_REGEX_ASSERT(rep->type == syntax_element_long_set_rep); + BOOST_REGEX_ASSERT(rep->next.p != 0); + BOOST_REGEX_ASSERT(rep->alt.p != 0); + BOOST_REGEX_ASSERT(rep->next.p->type == syntax_element_long_set); + BOOST_REGEX_ASSERT(count < rep->max); + + if(position != last) + { + // wind forward until we can skip out of the repeat: + do + { + if(position == re_is_set_member(position, last, set, re.get_data(), icase)) + { + // failed repeat match, discard this state and look for another: + destroy_single_repeat(); + return true; + } + ++position; + ++count; + ++state_count; + pstate = rep->next.p; + }while((count < rep->max) && (position != last) && !can_start(*position, rep->_map, mask_skip)); + } + // remember where we got to if this is a leading repeat: + if((rep->leading) && (count < rep->max)) + restart = position; + if(position == last) + { + // can't repeat any more, remove the pushed state: + destroy_single_repeat(); + if((m_match_flags & match_partial) && (position == last) && (position != search_base)) + m_has_partial_match = true; + if(0 == (rep->can_be_null & mask_skip)) + return true; + } + else if(count == rep->max) + { + // can't repeat any more, remove the pushed state: + destroy_single_repeat(); + if(!can_start(*position, rep->_map, mask_skip)) + return true; + } + else + { + pmp->count = count; + pmp->last_position = position; + } + pstate = rep->alt.p; + return false; +} + +template +bool perl_matcher::unwind_non_greedy_repeat(bool r) +{ + saved_position* pmp = static_cast*>(m_backup_state); + if(!r) + { + position = pmp->position; + pstate = pmp->pstate; + ++(*next_count); + } + boost::BOOST_REGEX_DETAIL_NS::inplace_destroy(pmp++); + m_backup_state = pmp; + return r; +} + +template +bool perl_matcher::unwind_recursion(bool r) +{ + // We are backtracking back inside a recursion, need to push the info + // back onto the recursion stack, and do so unconditionally, otherwise + // we can get mismatched pushes and pops... + saved_recursion* pmp = static_cast*>(m_backup_state); + if (!r) + { + recursion_stack.push_back(recursion_info()); + recursion_stack.back().idx = pmp->recursion_id; + recursion_stack.back().preturn_address = pmp->preturn_address; + recursion_stack.back().results = pmp->prior_results; + recursion_stack.back().location_of_start = position; + *m_presult = pmp->internal_results; + } + boost::BOOST_REGEX_DETAIL_NS::inplace_destroy(pmp++); + m_backup_state = pmp; + return true; +} + +template +bool perl_matcher::unwind_recursion_pop(bool r) +{ + // Backtracking out of a recursion, we must pop state off the recursion + // stack unconditionally to ensure matched pushes and pops: + saved_state* pmp = static_cast(m_backup_state); + if (!r && !recursion_stack.empty()) + { + *m_presult = recursion_stack.back().results; + position = recursion_stack.back().location_of_start; + recursion_stack.pop_back(); + } + boost::BOOST_REGEX_DETAIL_NS::inplace_destroy(pmp++); + m_backup_state = pmp; + return true; +} + +template +void perl_matcher::push_recursion_pop() +{ + saved_state* pmp = static_cast(m_backup_state); + --pmp; + if(pmp < m_stack_base) + { + extend_stack(); + pmp = static_cast(m_backup_state); + --pmp; + } + (void) new (pmp)saved_state(15); + m_backup_state = pmp; +} + +template +bool perl_matcher::unwind_commit(bool b) +{ + boost::BOOST_REGEX_DETAIL_NS::inplace_destroy(m_backup_state++); + while(unwind(b) && !m_unwound_lookahead){} + if(m_unwound_lookahead && pstate) + { + // + // If we stop because we just unwound an assertion, put the + // commit state back on the stack again: + // + m_unwound_lookahead = false; + saved_state* pmp = m_backup_state; + --pmp; + if(pmp < m_stack_base) + { + extend_stack(); + pmp = m_backup_state; + --pmp; + } + (void) new (pmp)saved_state(16); + m_backup_state = pmp; + } + // This prevents us from stopping when we exit from an independent sub-expression: + m_independent = false; + return false; +} + +template +bool perl_matcher::unwind_then(bool b) +{ + // Unwind everything till we hit an alternative: + boost::BOOST_REGEX_DETAIL_NS::inplace_destroy(m_backup_state++); + bool result = false; + result = unwind(b); + while(result && !m_unwound_alt) + { + result = unwind(b); + } + // We're now pointing at the next alternative, need one more backtrack + // since *all* the other alternatives must fail once we've reached a THEN clause: + if(result && m_unwound_alt) + unwind(b); + return false; +} + +} // namespace BOOST_REGEX_DETAIL_NS +} // namespace boost + +#ifdef BOOST_REGEX_MSVC +# pragma warning(pop) +#endif + +#endif diff --git a/scintilla/include/boost/regex/v5/primary_transform.hpp b/scintilla/include/boost/regex/v5/primary_transform.hpp new file mode 100644 index 0000000000..ed46f39221 --- /dev/null +++ b/scintilla/include/boost/regex/v5/primary_transform.hpp @@ -0,0 +1,120 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE: primary_transform.hpp + * VERSION: see + * DESCRIPTION: Heuristically determines the sort string format in use + * by the current locale. + */ + +#ifndef BOOST_REGEX_PRIMARY_TRANSFORM +#define BOOST_REGEX_PRIMARY_TRANSFORM + +namespace boost{ + namespace BOOST_REGEX_DETAIL_NS{ + + +enum{ + sort_C, + sort_fixed, + sort_delim, + sort_unknown +}; + +template +unsigned count_chars(const S& s, charT c) +{ + // + // Count how many occurrences of character c occur + // in string s: if c is a delimeter between collation + // fields, then this should be the same value for all + // sort keys: + // + unsigned int count = 0; + for(unsigned pos = 0; pos < s.size(); ++pos) + { + if(s[pos] == c) ++count; + } + return count; +} + + +template +unsigned find_sort_syntax(const traits* pt, charT* delim) +{ + // + // compare 'a' with 'A' to see how similar they are, + // should really use a-accute but we can't portably do that, + // + typedef typename traits::string_type string_type; + typedef typename traits::char_type char_type; + + // Suppress incorrect warning for MSVC + (void)pt; + + char_type a[2] = {'a', '\0', }; + string_type sa(pt->transform(a, a+1)); + if(sa == a) + { + *delim = 0; + return sort_C; + } + char_type A[2] = { 'A', '\0', }; + string_type sA(pt->transform(A, A+1)); + char_type c[2] = { ';', '\0', }; + string_type sc(pt->transform(c, c+1)); + + int pos = 0; + while((pos <= static_cast(sa.size())) && (pos <= static_cast(sA.size())) && (sa[pos] == sA[pos])) ++pos; + --pos; + if(pos < 0) + { + *delim = 0; + return sort_unknown; + } + // + // at this point sa[pos] is either the end of a fixed width field + // or the character that acts as a delimiter: + // + charT maybe_delim = sa[pos]; + if((pos != 0) && (count_chars(sa, maybe_delim) == count_chars(sA, maybe_delim)) && (count_chars(sa, maybe_delim) == count_chars(sc, maybe_delim))) + { + *delim = maybe_delim; + return sort_delim; + } + // + // OK doen't look like a delimiter, try for fixed width field: + // + if((sa.size() == sA.size()) && (sa.size() == sc.size())) + { + // note assumes that the fixed width field is less than + // (numeric_limits::max)(), should be true for all types + // I can't imagine 127 character fields... + *delim = static_cast(++pos); + return sort_fixed; + } + // + // don't know what it is: + // + *delim = 0; + return sort_unknown; +} + + + } // namespace BOOST_REGEX_DETAIL_NS +} // namespace boost + +#endif + + + diff --git a/scintilla/include/boost/regex/v5/regbase.hpp b/scintilla/include/boost/regex/v5/regbase.hpp new file mode 100644 index 0000000000..5cefb36b4c --- /dev/null +++ b/scintilla/include/boost/regex/v5/regbase.hpp @@ -0,0 +1,158 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE regbase.cpp + * VERSION see + * DESCRIPTION: Declares class regbase. + */ + +#ifndef BOOST_REGEX_V5_REGBASE_HPP +#define BOOST_REGEX_V5_REGBASE_HPP + +namespace boost{ +// +// class regbase +// handles error codes and flags +// +class regbase +{ +public: + enum flag_type_ + { + // + // Divide the flags up into logical groups: + // bits 0-7 indicate main synatx type. + // bits 8-15 indicate syntax subtype. + // bits 16-31 indicate options that are common to all + // regex syntaxes. + // In all cases the default is 0. + // + // Main synatx group: + // + perl_syntax_group = 0, // default + basic_syntax_group = 1, // POSIX basic + literal = 2, // all characters are literals + main_option_type = literal | basic_syntax_group | perl_syntax_group, // everything! + // + // options specific to perl group: + // + no_bk_refs = 1 << 8, // \d not allowed + no_perl_ex = 1 << 9, // disable perl extensions + no_mod_m = 1 << 10, // disable Perl m modifier + mod_x = 1 << 11, // Perl x modifier + mod_s = 1 << 12, // force s modifier on (overrides match_not_dot_newline) + no_mod_s = 1 << 13, // force s modifier off (overrides match_not_dot_newline) + + // + // options specific to basic group: + // + no_char_classes = 1 << 8, // [[:CLASS:]] not allowed + no_intervals = 1 << 9, // {x,y} not allowed + bk_plus_qm = 1 << 10, // uses \+ and \? + bk_vbar = 1 << 11, // use \| for alternatives + emacs_ex = 1 << 12, // enables emacs extensions + + // + // options common to all groups: + // + no_escape_in_lists = 1 << 16, // '\' not special inside [...] + newline_alt = 1 << 17, // \n is the same as | + no_except = 1 << 18, // no exception on error + failbit = 1 << 19, // error flag + icase = 1 << 20, // characters are matched regardless of case + nocollate = 0, // don't use locale specific collation (deprecated) + collate = 1 << 21, // use locale specific collation + nosubs = 1 << 22, // don't mark sub-expressions + save_subexpression_location = 1 << 23, // save subexpression locations + no_empty_expressions = 1 << 24, // no empty expressions allowed + optimize = 0, // not really supported + + + + basic = basic_syntax_group | collate | no_escape_in_lists, + extended = no_bk_refs | collate | no_perl_ex | no_escape_in_lists, + normal = 0, + emacs = basic_syntax_group | collate | emacs_ex | bk_vbar, + awk = no_bk_refs | collate | no_perl_ex, + grep = basic | newline_alt, + egrep = extended | newline_alt, + sed = basic, + perl = normal, + ECMAScript = normal, + JavaScript = normal, + JScript = normal + }; + typedef unsigned int flag_type; + + enum restart_info + { + restart_any = 0, + restart_word = 1, + restart_line = 2, + restart_buf = 3, + restart_continue = 4, + restart_lit = 5, + restart_fixed_lit = 6, + restart_count = 7 + }; +}; + +// +// provide std lib proposal compatible constants: +// +namespace regex_constants{ + + enum flag_type_ + { + + no_except = ::boost::regbase::no_except, + failbit = ::boost::regbase::failbit, + literal = ::boost::regbase::literal, + icase = ::boost::regbase::icase, + nocollate = ::boost::regbase::nocollate, + collate = ::boost::regbase::collate, + nosubs = ::boost::regbase::nosubs, + optimize = ::boost::regbase::optimize, + bk_plus_qm = ::boost::regbase::bk_plus_qm, + bk_vbar = ::boost::regbase::bk_vbar, + no_intervals = ::boost::regbase::no_intervals, + no_char_classes = ::boost::regbase::no_char_classes, + no_escape_in_lists = ::boost::regbase::no_escape_in_lists, + no_mod_m = ::boost::regbase::no_mod_m, + mod_x = ::boost::regbase::mod_x, + mod_s = ::boost::regbase::mod_s, + no_mod_s = ::boost::regbase::no_mod_s, + save_subexpression_location = ::boost::regbase::save_subexpression_location, + no_empty_expressions = ::boost::regbase::no_empty_expressions, + + basic = ::boost::regbase::basic, + extended = ::boost::regbase::extended, + normal = ::boost::regbase::normal, + emacs = ::boost::regbase::emacs, + awk = ::boost::regbase::awk, + grep = ::boost::regbase::grep, + egrep = ::boost::regbase::egrep, + sed = basic, + perl = normal, + ECMAScript = normal, + JavaScript = normal, + JScript = normal + }; + typedef ::boost::regbase::flag_type syntax_option_type; + +} // namespace regex_constants + +} // namespace boost + +#endif + diff --git a/scintilla/include/boost/regex/v5/regex.hpp b/scintilla/include/boost/regex/v5/regex.hpp new file mode 100644 index 0000000000..eb6dc72822 --- /dev/null +++ b/scintilla/include/boost/regex/v5/regex.hpp @@ -0,0 +1,106 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE regex.cpp + * VERSION see + * DESCRIPTION: Declares boost::basic_regex<> and associated + * functions and classes. This header is the main + * entry point for the template regex code. + */ + +#ifndef BOOST_RE_REGEX_HPP_INCLUDED +#define BOOST_RE_REGEX_HPP_INCLUDED + +#ifdef __cplusplus + +// what follows is all C++ don't include in C builds!! + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace boost{ +#ifdef BOOST_REGEX_NO_FWD +typedef basic_regex > regex; +#ifndef BOOST_NO_WREGEX +typedef basic_regex > wregex; +#endif +#endif + +typedef match_results cmatch; +typedef match_results smatch; +#ifndef BOOST_NO_WREGEX +typedef match_results wcmatch; +typedef match_results wsmatch; +#endif + +} // namespace boost + +#include +#include +#include +#include +#include +#include +#include +#include + +#endif // __cplusplus + +#endif // include + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/scintilla/include/boost/regex/v5/regex_format.hpp b/scintilla/include/boost/regex/v5/regex_format.hpp new file mode 100644 index 0000000000..4c82d185aa --- /dev/null +++ b/scintilla/include/boost/regex/v5/regex_format.hpp @@ -0,0 +1,1124 @@ +/* + * + * Copyright (c) 1998-2009 John Maddock + * Copyright 2008 Eric Niebler. + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE regex_format.hpp + * VERSION see + * DESCRIPTION: Provides formatting output routines for search and replace + * operations. Note this is an internal header file included + * by regex.hpp, do not include on its own. + */ + +#ifndef BOOST_REGEX_FORMAT_HPP +#define BOOST_REGEX_FORMAT_HPP + +#include +#include + +namespace boost{ + +// +// Forward declaration: +// + template >::allocator_type > +class match_results; + +namespace BOOST_REGEX_DETAIL_NS{ + +// +// struct trivial_format_traits: +// defines minimum localisation support for formatting +// in the case that the actual regex traits is unavailable. +// +template +struct trivial_format_traits +{ + typedef charT char_type; + + static std::ptrdiff_t length(const charT* p) + { + return global_length(p); + } + static charT tolower(charT c) + { + return ::boost::BOOST_REGEX_DETAIL_NS::global_lower(c); + } + static charT toupper(charT c) + { + return ::boost::BOOST_REGEX_DETAIL_NS::global_upper(c); + } + static int value(const charT c, int radix) + { + int result = global_value(c); + return result >= radix ? -1 : result; + } + int toi(const charT*& p1, const charT* p2, int radix)const + { + return (int)global_toi(p1, p2, radix, *this); + } +}; + +#ifdef BOOST_REGEX_MSVC +# pragma warning(push) +#pragma warning(disable:26812) +#endif +template +class basic_regex_formatter +{ +public: + typedef typename traits::char_type char_type; + basic_regex_formatter(OutputIterator o, const Results& r, const traits& t) + : m_traits(t), m_results(r), m_out(o), m_position(), m_end(), m_flags(), m_state(output_copy), m_restore_state(output_copy), m_have_conditional(false) {} + OutputIterator format(ForwardIter p1, ForwardIter p2, match_flag_type f); + OutputIterator format(ForwardIter p1, match_flag_type f) + { + return format(p1, p1 + m_traits.length(p1), f); + } +private: + typedef typename Results::value_type sub_match_type; + enum output_state + { + output_copy, + output_next_lower, + output_next_upper, + output_lower, + output_upper, + output_none + }; + + void put(char_type c); + void put(const sub_match_type& sub); + void format_all(); + void format_perl(); + void format_escape(); + void format_conditional(); + void format_until_scope_end(); + bool handle_perl_verb(bool have_brace); + + inline typename Results::value_type const& get_named_sub(ForwardIter i, ForwardIter j, const std::integral_constant&) + { + std::vector v(i, j); + return (i != j) ? this->m_results.named_subexpression(&v[0], &v[0] + v.size()) + : this->m_results.named_subexpression(static_cast(0), static_cast(0)); + } + inline typename Results::value_type const& get_named_sub(ForwardIter i, ForwardIter j, const std::integral_constant&) + { + return this->m_results.named_subexpression(i, j); + } + inline typename Results::value_type const& get_named_sub(ForwardIter i, ForwardIter j) + { + typedef typename std::is_convertible::type tag_type; + return get_named_sub(i, j, tag_type()); + } + inline int get_named_sub_index(ForwardIter i, ForwardIter j, const std::integral_constant&) + { + std::vector v(i, j); + return (i != j) ? this->m_results.named_subexpression_index(&v[0], &v[0] + v.size()) + : this->m_results.named_subexpression_index(static_cast(0), static_cast(0)); + } + inline int get_named_sub_index(ForwardIter i, ForwardIter j, const std::integral_constant&) + { + return this->m_results.named_subexpression_index(i, j); + } + inline int get_named_sub_index(ForwardIter i, ForwardIter j) + { + typedef typename std::is_convertible::type tag_type; + return get_named_sub_index(i, j, tag_type()); + } +#ifdef BOOST_REGEX_MSVC + // msvc-8.0 issues a spurious warning on the call to std::advance here: +#pragma warning(push) +#pragma warning(disable:4244) +#endif + inline int toi(ForwardIter& i, ForwardIter j, int base, const std::integral_constant&) + { + if(i != j) + { + std::vector v(i, j); + const char_type* start = &v[0]; + const char_type* pos = start; + int r = (int)m_traits.toi(pos, &v[0] + v.size(), base); + std::advance(i, pos - start); + return r; + } + return -1; + } +#ifdef BOOST_REGEX_MSVC +#pragma warning(pop) +#endif + inline int toi(ForwardIter& i, ForwardIter j, int base, const std::integral_constant&) + { + return m_traits.toi(i, j, base); + } + inline int toi(ForwardIter& i, ForwardIter j, int base) + { +#if defined(_MSC_VER) && defined(__INTEL_COMPILER) && ((__INTEL_COMPILER == 9999) || (__INTEL_COMPILER == 1210)) + // Workaround for Intel support issue #656654. + // See also https://svn.boost.org/trac/boost/ticket/6359 + return toi(i, j, base, std::integral_constant()); +#else + typedef typename std::is_convertible::type tag_type; + return toi(i, j, base, tag_type()); +#endif + } + + const traits& m_traits; // the traits class for localised formatting operations + const Results& m_results; // the match_results being used. + OutputIterator m_out; // where to send output. + ForwardIter m_position; // format string, current position + ForwardIter m_end; // format string end + match_flag_type m_flags; // format flags to use + output_state m_state; // what to do with the next character + output_state m_restore_state; // what state to restore to. + bool m_have_conditional; // we are parsing a conditional +private: + basic_regex_formatter(const basic_regex_formatter&); + basic_regex_formatter& operator=(const basic_regex_formatter&); +}; +#ifdef BOOST_REGEX_MSVC +# pragma warning(pop) +#endif + +template +OutputIterator basic_regex_formatter::format(ForwardIter p1, ForwardIter p2, match_flag_type f) +{ + m_position = p1; + m_end = p2; + m_flags = f; + format_all(); + return m_out; +} + +template +void basic_regex_formatter::format_all() +{ + // over and over: + while(m_position != m_end) + { + switch(*m_position) + { + case '&': + if(m_flags & ::boost::regex_constants::format_sed) + { + ++m_position; + put(m_results[0]); + break; + } + put(*m_position++); + break; + case '\\': + format_escape(); + break; + case '(': + if(m_flags & boost::regex_constants::format_all) + { + ++m_position; + bool have_conditional = m_have_conditional; + m_have_conditional = false; + format_until_scope_end(); + m_have_conditional = have_conditional; + if(m_position == m_end) + return; + BOOST_REGEX_ASSERT(*m_position == static_cast(')')); + ++m_position; // skip the closing ')' + break; + } + put(*m_position); + ++m_position; + break; + case ')': + if(m_flags & boost::regex_constants::format_all) + { + return; + } + put(*m_position); + ++m_position; + break; + case ':': + if((m_flags & boost::regex_constants::format_all) && m_have_conditional) + { + return; + } + put(*m_position); + ++m_position; + break; + case '?': + if(m_flags & boost::regex_constants::format_all) + { + ++m_position; + format_conditional(); + break; + } + put(*m_position); + ++m_position; + break; + case '$': + if((m_flags & format_sed) == 0) + { + format_perl(); + break; + } + // not a special character: + BOOST_REGEX_FALLTHROUGH; + default: + put(*m_position); + ++m_position; + break; + } + } +} + +template +void basic_regex_formatter::format_perl() +{ + // + // On entry *m_position points to a '$' character + // output the information that goes with it: + // + BOOST_REGEX_ASSERT(*m_position == '$'); + // + // see if this is a trailing '$': + // + if(++m_position == m_end) + { + --m_position; + put(*m_position); + ++m_position; + return; + } + // + // OK find out what kind it is: + // + bool have_brace = false; + ForwardIter save_position = m_position; + switch(*m_position) + { + case '&': + ++m_position; + put(this->m_results[0]); + break; + case '`': + ++m_position; + put(this->m_results.prefix()); + break; + case '\'': + ++m_position; + put(this->m_results.suffix()); + break; + case '$': + put(*m_position++); + break; + case '+': + if((++m_position != m_end) && (*m_position == '{')) + { + ForwardIter base = ++m_position; + while((m_position != m_end) && (*m_position != '}')) ++m_position; + if(m_position != m_end) + { + // Named sub-expression: + put(get_named_sub(base, m_position)); + ++m_position; + break; + } + else + { + m_position = --base; + } + } + put((this->m_results)[this->m_results.size() > 1 ? static_cast(this->m_results.size() - 1) : 1]); + break; + case '{': + have_brace = true; + ++m_position; + BOOST_REGEX_FALLTHROUGH; + default: + // see if we have a number: + { + std::ptrdiff_t len = std::distance(m_position, m_end); + //len = (std::min)(static_cast(2), len); + int v = this->toi(m_position, m_position + len, 10); + if((v < 0) || (have_brace && ((m_position == m_end) || (*m_position != '}')))) + { + // Look for a Perl-5.10 verb: + if(!handle_perl_verb(have_brace)) + { + // leave the $ as is, and carry on: + m_position = --save_position; + put(*m_position); + ++m_position; + } + break; + } + // otherwise output sub v: + put(this->m_results[v]); + if(have_brace) + ++m_position; + } + } +} + +template +bool basic_regex_formatter::handle_perl_verb(bool have_brace) +{ + // + // We may have a capitalised string containing a Perl action: + // + static const char_type MATCH[] = { 'M', 'A', 'T', 'C', 'H' }; + static const char_type PREMATCH[] = { 'P', 'R', 'E', 'M', 'A', 'T', 'C', 'H' }; + static const char_type POSTMATCH[] = { 'P', 'O', 'S', 'T', 'M', 'A', 'T', 'C', 'H' }; + static const char_type LAST_PAREN_MATCH[] = { 'L', 'A', 'S', 'T', '_', 'P', 'A', 'R', 'E', 'N', '_', 'M', 'A', 'T', 'C', 'H' }; + static const char_type LAST_SUBMATCH_RESULT[] = { 'L', 'A', 'S', 'T', '_', 'S', 'U', 'B', 'M', 'A', 'T', 'C', 'H', '_', 'R', 'E', 'S', 'U', 'L', 'T' }; + static const char_type LAST_SUBMATCH_RESULT_ALT[] = { '^', 'N' }; + + if(m_position == m_end) + return false; + if(have_brace && (*m_position == '^')) + ++m_position; + + std::ptrdiff_t max_len = m_end - m_position; + + if((max_len >= 5) && std::equal(m_position, m_position + 5, MATCH)) + { + m_position += 5; + if(have_brace) + { + if((m_position != m_end) && (*m_position == '}')) + ++m_position; + else + { + m_position -= 5; + return false; + } + } + put(this->m_results[0]); + return true; + } + if((max_len >= 8) && std::equal(m_position, m_position + 8, PREMATCH)) + { + m_position += 8; + if(have_brace) + { + if((m_position != m_end) && (*m_position == '}')) + ++m_position; + else + { + m_position -= 8; + return false; + } + } + put(this->m_results.prefix()); + return true; + } + if((max_len >= 9) && std::equal(m_position, m_position + 9, POSTMATCH)) + { + m_position += 9; + if(have_brace) + { + if((m_position != m_end) && (*m_position == '}')) + ++m_position; + else + { + m_position -= 9; + return false; + } + } + put(this->m_results.suffix()); + return true; + } + if((max_len >= 16) && std::equal(m_position, m_position + 16, LAST_PAREN_MATCH)) + { + m_position += 16; + if(have_brace) + { + if((m_position != m_end) && (*m_position == '}')) + ++m_position; + else + { + m_position -= 16; + return false; + } + } + put((this->m_results)[this->m_results.size() > 1 ? static_cast(this->m_results.size() - 1) : 1]); + return true; + } + if((max_len >= 20) && std::equal(m_position, m_position + 20, LAST_SUBMATCH_RESULT)) + { + m_position += 20; + if(have_brace) + { + if((m_position != m_end) && (*m_position == '}')) + ++m_position; + else + { + m_position -= 20; + return false; + } + } + put(this->m_results.get_last_closed_paren()); + return true; + } + if((max_len >= 2) && std::equal(m_position, m_position + 2, LAST_SUBMATCH_RESULT_ALT)) + { + m_position += 2; + if(have_brace) + { + if((m_position != m_end) && (*m_position == '}')) + ++m_position; + else + { + m_position -= 2; + return false; + } + } + put(this->m_results.get_last_closed_paren()); + return true; + } + return false; +} + +template +void basic_regex_formatter::format_escape() +{ + // skip the escape and check for trailing escape: + if(++m_position == m_end) + { + put(static_cast('\\')); + return; + } + // now switch on the escape type: + switch(*m_position) + { + case 'a': + put(static_cast('\a')); + ++m_position; + break; + case 'f': + put(static_cast('\f')); + ++m_position; + break; + case 'n': + put(static_cast('\n')); + ++m_position; + break; + case 'r': + put(static_cast('\r')); + ++m_position; + break; + case 't': + put(static_cast('\t')); + ++m_position; + break; + case 'v': + put(static_cast('\v')); + ++m_position; + break; + case 'x': + if(++m_position == m_end) + { + put(static_cast('x')); + return; + } + // maybe have \x{ddd} + if(*m_position == static_cast('{')) + { + ++m_position; + int val = this->toi(m_position, m_end, 16); + if(val < 0) + { + // invalid value treat everything as literals: + put(static_cast('x')); + put(static_cast('{')); + return; + } + if((m_position == m_end) || (*m_position != static_cast('}'))) + { + --m_position; + while(*m_position != static_cast('\\')) + --m_position; + ++m_position; + put(*m_position++); + return; + } + ++m_position; + put(static_cast(val)); + return; + } + else + { + std::ptrdiff_t len = std::distance(m_position, m_end); + len = (std::min)(static_cast(2), len); + int val = this->toi(m_position, m_position + len, 16); + if(val < 0) + { + --m_position; + put(*m_position++); + return; + } + put(static_cast(val)); + } + break; + case 'c': + if(++m_position == m_end) + { + --m_position; + put(*m_position++); + return; + } + put(static_cast(*m_position++ % 32)); + break; + case 'e': + put(static_cast(27)); + ++m_position; + break; + default: + // see if we have a perl specific escape: + if((m_flags & boost::regex_constants::format_sed) == 0) + { + bool breakout = false; + switch(*m_position) + { + case 'l': + ++m_position; + m_restore_state = m_state; + m_state = output_next_lower; + breakout = true; + break; + case 'L': + ++m_position; + m_state = output_lower; + breakout = true; + break; + case 'u': + ++m_position; + m_restore_state = m_state; + m_state = output_next_upper; + breakout = true; + break; + case 'U': + ++m_position; + m_state = output_upper; + breakout = true; + break; + case 'E': + ++m_position; + m_state = output_copy; + breakout = true; + break; + } + if(breakout) + break; + } + // see if we have a \n sed style backreference: + std::ptrdiff_t len = std::distance(m_position, m_end); + len = (std::min)(static_cast(1), len); + int v = this->toi(m_position, m_position+len, 10); + if((v > 0) || ((v == 0) && (m_flags & ::boost::regex_constants::format_sed))) + { + put(m_results[v]); + break; + } + else if(v == 0) + { + // octal ecape sequence: + --m_position; + len = std::distance(m_position, m_end); + len = (std::min)(static_cast(4), len); + v = this->toi(m_position, m_position + len, 8); + BOOST_REGEX_ASSERT(v >= 0); + put(static_cast(v)); + break; + } + // Otherwise output the character "as is": + put(*m_position++); + break; + } +} + +template +void basic_regex_formatter::format_conditional() +{ + if(m_position == m_end) + { + // oops trailing '?': + put(static_cast('?')); + return; + } + int v; + if(*m_position == '{') + { + ForwardIter base = m_position; + ++m_position; + v = this->toi(m_position, m_end, 10); + if(v < 0) + { + // Try a named subexpression: + while((m_position != m_end) && (*m_position != '}')) + ++m_position; + v = this->get_named_sub_index(base + 1, m_position); + } + if((v < 0) || (*m_position != '}')) + { + m_position = base; + // oops trailing '?': + put(static_cast('?')); + return; + } + // Skip trailing '}': + ++m_position; + } + else + { + std::ptrdiff_t len = std::distance(m_position, m_end); + len = (std::min)(static_cast(2), len); + v = this->toi(m_position, m_position + len, 10); + } + if(v < 0) + { + // oops not a number: + put(static_cast('?')); + return; + } + + // output varies depending upon whether sub-expression v matched or not: + if(m_results[v].matched) + { + m_have_conditional = true; + format_all(); + m_have_conditional = false; + if((m_position != m_end) && (*m_position == static_cast(':'))) + { + // skip the ':': + ++m_position; + // save output state, then turn it off: + output_state saved_state = m_state; + m_state = output_none; + // format the rest of this scope: + format_until_scope_end(); + // restore output state: + m_state = saved_state; + } + } + else + { + // save output state, then turn it off: + output_state saved_state = m_state; + m_state = output_none; + // format until ':' or ')': + m_have_conditional = true; + format_all(); + m_have_conditional = false; + // restore state: + m_state = saved_state; + if((m_position != m_end) && (*m_position == static_cast(':'))) + { + // skip the ':': + ++m_position; + // format the rest of this scope: + format_until_scope_end(); + } + } +} + +template +void basic_regex_formatter::format_until_scope_end() +{ + do + { + format_all(); + if((m_position == m_end) || (*m_position == static_cast(')'))) + return; + put(*m_position++); + }while(m_position != m_end); +} + +template +void basic_regex_formatter::put(char_type c) +{ + // write a single character to output + // according to which case translation mode we are in: + switch(this->m_state) + { + case output_none: + return; + case output_next_lower: + c = m_traits.tolower(c); + this->m_state = m_restore_state; + break; + case output_next_upper: + c = m_traits.toupper(c); + this->m_state = m_restore_state; + break; + case output_lower: + c = m_traits.tolower(c); + break; + case output_upper: + c = m_traits.toupper(c); + break; + default: + break; + } + *m_out = c; + ++m_out; +} + +template +void basic_regex_formatter::put(const sub_match_type& sub) +{ + typedef typename sub_match_type::iterator iterator_type; + iterator_type i = sub.first; + while(i != sub.second) + { + put(*i); + ++i; + } +} + +template +class string_out_iterator +{ + S* out; +public: + string_out_iterator(S& s) : out(&s) {} + string_out_iterator& operator++() { return *this; } + string_out_iterator& operator++(int) { return *this; } + string_out_iterator& operator*() { return *this; } + string_out_iterator& operator=(typename S::value_type v) + { + out->append(1, v); + return *this; + } + + typedef std::ptrdiff_t difference_type; + typedef typename S::value_type value_type; + typedef value_type* pointer; + typedef value_type& reference; + typedef std::output_iterator_tag iterator_category; +}; + +template +OutputIterator regex_format_imp(OutputIterator out, + const match_results& m, + ForwardIter p1, ForwardIter p2, + match_flag_type flags, + const traits& t + ) +{ + if(flags & regex_constants::format_literal) + { + return BOOST_REGEX_DETAIL_NS::copy(p1, p2, out); + } + + BOOST_REGEX_DETAIL_NS::basic_regex_formatter< + OutputIterator, + match_results, + traits, ForwardIter> f(out, m, t); + return f.format(p1, p2, flags); +} + +template +struct has_const_iterator +{ + template + static typename U::const_iterator tester(U*); + static char tester(...); + + static T* get(); + + static const bool value = sizeof(tester(get())) != sizeof(char); +}; + +struct any_type +{ + template + any_type(const T&); + template + any_type(const T&, const U&); + template + any_type(const T&, const U&, const V&); +}; +typedef char no_type; +typedef char (&unary_type)[2]; +typedef char (&binary_type)[3]; +typedef char (&ternary_type)[4]; + +no_type check_is_formatter(unary_type, binary_type, ternary_type); +template +unary_type check_is_formatter(T const &, binary_type, ternary_type); +template +binary_type check_is_formatter(unary_type, T const &, ternary_type); +template +binary_type check_is_formatter(T const &, U const &, ternary_type); +template +ternary_type check_is_formatter(unary_type, binary_type, T const &); +template +ternary_type check_is_formatter(T const &, binary_type, U const &); +template +ternary_type check_is_formatter(unary_type, T const &, U const &); +template +ternary_type check_is_formatter(T const &, U const &, V const &); + +struct unary_binary_ternary +{ + typedef unary_type (*unary_fun)(any_type); + typedef binary_type (*binary_fun)(any_type, any_type); + typedef ternary_type (*ternary_fun)(any_type, any_type, any_type); + operator unary_fun(); + operator binary_fun(); + operator ternary_fun(); +}; + +template::value> +struct formatter_wrapper + : Formatter + , unary_binary_ternary +{ + formatter_wrapper(){} +}; + +template +struct formatter_wrapper + : unary_binary_ternary +{ + operator Formatter *(); +}; + +template +struct formatter_wrapper + : unary_binary_ternary +{ + operator Formatter *(); +}; + +template +struct do_unwrap_reference +{ + typedef T type; +}; +template +struct do_unwrap_reference > +{ + typedef T type; +}; + +template +T& do_unwrap_ref(T& r) { return r; } +template +T& do_unwrap_ref(std::reference_wrapper const& r) { return r.get(); } + +template +struct format_traits_imp +{ +private: + // + // F must be a pointer, a function, or a class with a function call operator: + // + static_assert((::std::is_pointer::value || ::std::is_function::value || ::std::is_class::value), "The functor must be a pointer or a class with a function call operator"); + static formatter_wrapper::type> f; + static M m; + static O out; + static boost::regex_constants::match_flag_type flags; +public: + static const int value = sizeof(check_is_formatter(f(m), f(m, out), f(m, out, flags))); +}; + +template +struct format_traits +{ +public: + // + // Type is std::integral_constant where N is one of: + // + // 0 : F is a pointer to a presumably null-terminated string. + // 1 : F is a character-container such as a std::string. + // 2 : F is a Unary Functor. + // 3 : F is a Binary Functor. + // 4 : F is a Ternary Functor. + // + typedef typename std::conditional< + std::is_pointer::value && !std::is_function::type>::value, + std::integral_constant, + typename std::conditional< + has_const_iterator::value, + std::integral_constant, + std::integral_constant::value> + >::type + >::type type; + // + // This static assertion will fail if the functor passed does not accept + // the same type of arguments passed. + // + static_assert( std::is_class::value && !has_const_iterator::value ? (type::value > 1) : true, "Argument mismatch in Functor type"); +}; + +template +struct format_functor3 +{ + format_functor3(Base b) : func(b) {} + template + OutputIter operator()(const Match& m, OutputIter i, boost::regex_constants::match_flag_type f) + { + return do_unwrap_ref(func)(m, i, f); + } + template + OutputIter operator()(const Match& m, OutputIter i, boost::regex_constants::match_flag_type f, const Traits&) + { + return (*this)(m, i, f); + } +private: + Base func; + format_functor3(const format_functor3&); + format_functor3& operator=(const format_functor3&); +}; + +template +struct format_functor2 +{ + format_functor2(Base b) : func(b) {} + template + OutputIter operator()(const Match& m, OutputIter i, boost::regex_constants::match_flag_type /*f*/) + { + return do_unwrap_ref(func)(m, i); + } + template + OutputIter operator()(const Match& m, OutputIter i, boost::regex_constants::match_flag_type f, const Traits&) + { + return (*this)(m, i, f); + } +private: + Base func; + format_functor2(const format_functor2&); + format_functor2& operator=(const format_functor2&); +}; + +template +struct format_functor1 +{ + format_functor1(Base b) : func(b) {} + + template + OutputIter do_format_string(const S& s, OutputIter i) + { + return std::copy(s.begin(), s.end(), i); + } + template + inline OutputIter do_format_string(const S* s, OutputIter i) + { + while(s && *s) + { + *i = *s; + ++i; + ++s; + } + return i; + } + template + OutputIter operator()(const Match& m, OutputIter i, boost::regex_constants::match_flag_type /*f*/) + { + return do_format_string(do_unwrap_ref(func)(m), i); + } + template + OutputIter operator()(const Match& m, OutputIter i, boost::regex_constants::match_flag_type f, const Traits&) + { + return (*this)(m, i, f); + } +private: + Base func; + format_functor1(const format_functor1&); + format_functor1& operator=(const format_functor1&); +}; + +template +struct format_functor_c_string +{ + format_functor_c_string(const charT* ps) : func(ps) {} + + template + OutputIter operator()(const Match& m, OutputIter i, boost::regex_constants::match_flag_type f, const Traits& t = Traits()) + { + //typedef typename Match::char_type char_type; + const charT* end = func; + while(*end) ++end; + return regex_format_imp(i, m, func, end, f, t); + } +private: + const charT* func; + format_functor_c_string(const format_functor_c_string&); + format_functor_c_string& operator=(const format_functor_c_string&); +}; + +template +struct format_functor_container +{ + format_functor_container(const Container& c) : func(c) {} + + template + OutputIter operator()(const Match& m, OutputIter i, boost::regex_constants::match_flag_type f, const Traits& t = Traits()) + { + //typedef typename Match::char_type char_type; + return BOOST_REGEX_DETAIL_NS::regex_format_imp(i, m, func.begin(), func.end(), f, t); + } +private: + const Container& func; + format_functor_container(const format_functor_container&); + format_functor_container& operator=(const format_functor_container&); +}; + +template > +struct compute_functor_type +{ + typedef typename format_traits::type tag; + typedef typename std::remove_cv< typename std::remove_pointer::type>::type maybe_char_type; + + typedef typename std::conditional< + tag::value == 0, format_functor_c_string, + typename std::conditional< + tag::value == 1, format_functor_container, + typename std::conditional< + tag::value == 2, format_functor1, + typename std::conditional< + tag::value == 3, format_functor2, + format_functor3 + >::type + >::type + >::type + >::type type; +}; + +} // namespace BOOST_REGEX_DETAIL_NS + +template +inline OutputIterator regex_format(OutputIterator out, + const match_results& m, + Functor fmt, + match_flag_type flags = format_all + ) +{ + return m.format(out, fmt, flags); +} + +template +inline std::basic_string::char_type> regex_format(const match_results& m, + Functor fmt, + match_flag_type flags = format_all) +{ + return m.format(fmt, flags); +} + +} // namespace boost + +#endif // BOOST_REGEX_FORMAT_HPP + + + + + + diff --git a/scintilla/include/boost/regex/v5/regex_fwd.hpp b/scintilla/include/boost/regex/v5/regex_fwd.hpp new file mode 100644 index 0000000000..3076b069ac --- /dev/null +++ b/scintilla/include/boost/regex/v5/regex_fwd.hpp @@ -0,0 +1,73 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE regex_fwd.cpp + * VERSION see + * DESCRIPTION: Forward declares boost::basic_regex<> and + * associated typedefs. + */ + +#ifndef BOOST_REGEX_FWD_HPP_INCLUDED +#define BOOST_REGEX_FWD_HPP_INCLUDED + +#ifndef BOOST_REGEX_CONFIG_HPP +#include +#endif + +// +// define BOOST_REGEX_NO_FWD if this +// header doesn't work! +// +#ifdef BOOST_REGEX_NO_FWD +# ifndef BOOST_RE_REGEX_HPP +# include +# endif +#else + +namespace boost{ + +template +class cpp_regex_traits; +template +struct c_regex_traits; +template +class w32_regex_traits; + +#ifdef BOOST_REGEX_USE_WIN32_LOCALE +template > +struct regex_traits; +#elif defined(BOOST_REGEX_USE_CPP_LOCALE) +template > +struct regex_traits; +#else +template > +struct regex_traits; +#endif + +template > +class basic_regex; + +typedef basic_regex > regex; +#ifndef BOOST_NO_WREGEX +typedef basic_regex > wregex; +#endif + +} // namespace boost + +#endif // BOOST_REGEX_NO_FWD + +#endif + + + + diff --git a/scintilla/include/boost/regex/v5/regex_grep.hpp b/scintilla/include/boost/regex/v5/regex_grep.hpp new file mode 100644 index 0000000000..6cd625d606 --- /dev/null +++ b/scintilla/include/boost/regex/v5/regex_grep.hpp @@ -0,0 +1,98 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE regex_grep.hpp + * VERSION see + * DESCRIPTION: Provides regex_grep implementation. + */ + +#ifndef BOOST_REGEX_V5_REGEX_GREP_HPP +#define BOOST_REGEX_V5_REGEX_GREP_HPP + + +namespace boost{ + +// +// regex_grep: +// find all non-overlapping matches within the sequence first last: +// +template +inline unsigned int regex_grep(Predicate foo, + BidiIterator first, + BidiIterator last, + const basic_regex& e, + match_flag_type flags = match_default) +{ + if(e.flags() & regex_constants::failbit) + return false; + + typedef typename match_results::allocator_type match_allocator_type; + + match_results m; + BOOST_REGEX_DETAIL_NS::perl_matcher matcher(first, last, m, e, flags, first); + unsigned int count = 0; + while(matcher.find()) + { + ++count; + if(0 == foo(m)) + return count; // caller doesn't want to go on + if(m[0].second == last) + return count; // we've reached the end, don't try and find an extra null match. + if(m.length() == 0) + { + if(m[0].second == last) + return count; + // we found a NULL-match, now try to find + // a non-NULL one at the same position: + match_results m2(m); + matcher.setf(match_not_null | match_continuous); + if(matcher.find()) + { + ++count; + if(0 == foo(m)) + return count; + } + else + { + // reset match back to where it was: + m = m2; + } + matcher.unsetf((match_not_null | match_continuous) & ~flags); + } + } + return count; +} + +// +// regex_grep convenience interfaces: +// +template +inline unsigned int regex_grep(Predicate foo, const charT* str, + const basic_regex& e, + match_flag_type flags = match_default) +{ + return regex_grep(foo, str, str + traits::length(str), e, flags); +} + +template +inline unsigned int regex_grep(Predicate foo, const std::basic_string& s, + const basic_regex& e, + match_flag_type flags = match_default) +{ + return regex_grep(foo, s.begin(), s.end(), e, flags); +} + +} // namespace boost + +#endif // BOOST_REGEX_V5_REGEX_GREP_HPP + diff --git a/scintilla/include/boost/regex/v5/regex_iterator.hpp b/scintilla/include/boost/regex/v5/regex_iterator.hpp new file mode 100644 index 0000000000..b2e9d2ced5 --- /dev/null +++ b/scintilla/include/boost/regex/v5/regex_iterator.hpp @@ -0,0 +1,173 @@ +/* + * + * Copyright (c) 2003 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE regex_iterator.hpp + * VERSION see + * DESCRIPTION: Provides regex_iterator implementation. + */ + +#ifndef BOOST_REGEX_V5_REGEX_ITERATOR_HPP +#define BOOST_REGEX_V5_REGEX_ITERATOR_HPP + +#include + +namespace boost{ + +template +class regex_iterator_implementation +{ + typedef basic_regex regex_type; + + match_results what; // current match + BidirectionalIterator base; // start of sequence + BidirectionalIterator end; // end of sequence + const regex_type re; // the expression + match_flag_type flags; // flags for matching + +public: + regex_iterator_implementation(const regex_type* p, BidirectionalIterator last, match_flag_type f) + : base(), end(last), re(*p), flags(f){} + regex_iterator_implementation(const regex_iterator_implementation& other) + :what(other.what), base(other.base), end(other.end), re(other.re), flags(other.flags){} + bool init(BidirectionalIterator first) + { + base = first; + return regex_search(first, end, what, re, flags); + } + bool compare(const regex_iterator_implementation& that) + { + if(this == &that) return true; + return (&re.get_data() == &that.re.get_data()) && (end == that.end) && (flags == that.flags) && (what[0].first == that.what[0].first) && (what[0].second == that.what[0].second); + } + const match_results& get() + { return what; } + bool next() + { + //if(what.prefix().first != what[0].second) + // flags |= match_prev_avail; + BidirectionalIterator next_start = what[0].second; + match_flag_type f(flags); + if(!what.length() || (f & regex_constants::match_posix)) + f |= regex_constants::match_not_initial_null; + //if(base != next_start) + // f |= regex_constants::match_not_bob; + bool result = regex_search(next_start, end, what, re, f, base); + if(result) + what.set_base(base); + return result; + } +private: + regex_iterator_implementation& operator=(const regex_iterator_implementation&); +}; + +template ::value_type, + class traits = regex_traits > +class regex_iterator +{ +private: + typedef regex_iterator_implementation impl; + typedef std::shared_ptr pimpl; +public: + typedef basic_regex regex_type; + typedef match_results value_type; + typedef typename std::iterator_traits::difference_type + difference_type; + typedef const value_type* pointer; + typedef const value_type& reference; + typedef std::forward_iterator_tag iterator_category; + + regex_iterator(){} + regex_iterator(BidirectionalIterator a, BidirectionalIterator b, + const regex_type& re, + match_flag_type m = match_default) + : pdata(new impl(&re, b, m)) + { + if(!pdata->init(a)) + { + pdata.reset(); + } + } + regex_iterator(const regex_iterator& that) + : pdata(that.pdata) {} + regex_iterator& operator=(const regex_iterator& that) + { + pdata = that.pdata; + return *this; + } + bool operator==(const regex_iterator& that)const + { + if((pdata.get() == 0) || (that.pdata.get() == 0)) + return pdata.get() == that.pdata.get(); + return pdata->compare(*(that.pdata.get())); + } + bool operator!=(const regex_iterator& that)const + { return !(*this == that); } + const value_type& operator*()const + { return pdata->get(); } + const value_type* operator->()const + { return &(pdata->get()); } + regex_iterator& operator++() + { + cow(); + if(0 == pdata->next()) + { + pdata.reset(); + } + return *this; + } + regex_iterator operator++(int) + { + regex_iterator result(*this); + ++(*this); + return result; + } +private: + + pimpl pdata; + + void cow() + { + // copy-on-write + if(pdata.get() && (pdata.use_count() > 1)) + { + pdata.reset(new impl(*(pdata.get()))); + } + } +}; + +typedef regex_iterator cregex_iterator; +typedef regex_iterator sregex_iterator; +#ifndef BOOST_NO_WREGEX +typedef regex_iterator wcregex_iterator; +typedef regex_iterator wsregex_iterator; +#endif + +// make_regex_iterator: +template +inline regex_iterator make_regex_iterator(const charT* p, const basic_regex& e, regex_constants::match_flag_type m = regex_constants::match_default) +{ + return regex_iterator(p, p+traits::length(p), e, m); +} +template +inline regex_iterator::const_iterator, charT, traits> make_regex_iterator(const std::basic_string& p, const basic_regex& e, regex_constants::match_flag_type m = regex_constants::match_default) +{ + return regex_iterator::const_iterator, charT, traits>(p.begin(), p.end(), e, m); +} + +} // namespace boost + +#endif // BOOST_REGEX_V5_REGEX_ITERATOR_HPP + diff --git a/scintilla/include/boost/regex/v5/regex_match.hpp b/scintilla/include/boost/regex/v5/regex_match.hpp new file mode 100644 index 0000000000..217535769b --- /dev/null +++ b/scintilla/include/boost/regex/v5/regex_match.hpp @@ -0,0 +1,92 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE regex_match.hpp + * VERSION see + * DESCRIPTION: Regular expression matching algorithms. + * Note this is an internal header file included + * by regex.hpp, do not include on its own. + */ + + +#ifndef BOOST_REGEX_MATCH_HPP +#define BOOST_REGEX_MATCH_HPP + +namespace boost{ + +// +// proc regex_match +// returns true if the specified regular expression matches +// the whole of the input. Fills in what matched in m. +// +template +bool regex_match(BidiIterator first, BidiIterator last, + match_results& m, + const basic_regex& e, + match_flag_type flags = match_default) +{ + BOOST_REGEX_DETAIL_NS::perl_matcher matcher(first, last, m, e, flags, first); + return matcher.match(); +} +template +bool regex_match(iterator first, iterator last, + const basic_regex& e, + match_flag_type flags = match_default) +{ + match_results m; + return regex_match(first, last, m, e, flags | regex_constants::match_any); +} +// +// query_match convenience interfaces: +// +template +inline bool regex_match(const charT* str, + match_results& m, + const basic_regex& e, + match_flag_type flags = match_default) +{ + return regex_match(str, str + traits::length(str), m, e, flags); +} + +template +inline bool regex_match(const std::basic_string& s, + match_results::const_iterator, Allocator>& m, + const basic_regex& e, + match_flag_type flags = match_default) +{ + return regex_match(s.begin(), s.end(), m, e, flags); +} +template +inline bool regex_match(const charT* str, + const basic_regex& e, + match_flag_type flags = match_default) +{ + match_results m; + return regex_match(str, str + traits::length(str), m, e, flags | regex_constants::match_any); +} + +template +inline bool regex_match(const std::basic_string& s, + const basic_regex& e, + match_flag_type flags = match_default) +{ + typedef typename std::basic_string::const_iterator iterator; + match_results m; + return regex_match(s.begin(), s.end(), m, e, flags | regex_constants::match_any); +} + + +} // namespace boost + +#endif // BOOST_REGEX_MATCH_HPP + diff --git a/scintilla/include/boost/regex/v5/regex_merge.hpp b/scintilla/include/boost/regex/v5/regex_merge.hpp new file mode 100644 index 0000000000..a5103c53e8 --- /dev/null +++ b/scintilla/include/boost/regex/v5/regex_merge.hpp @@ -0,0 +1,71 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE regex_format.hpp + * VERSION see + * DESCRIPTION: Provides formatting output routines for search and replace + * operations. Note this is an internal header file included + * by regex.hpp, do not include on its own. + */ + +#ifndef BOOST_REGEX_V5_REGEX_MERGE_HPP +#define BOOST_REGEX_V5_REGEX_MERGE_HPP + + +namespace boost{ + +template +inline OutputIterator regex_merge(OutputIterator out, + Iterator first, + Iterator last, + const basic_regex& e, + const charT* fmt, + match_flag_type flags = match_default) +{ + return regex_replace(out, first, last, e, fmt, flags); +} + +template +inline OutputIterator regex_merge(OutputIterator out, + Iterator first, + Iterator last, + const basic_regex& e, + const std::basic_string& fmt, + match_flag_type flags = match_default) +{ + return regex_merge(out, first, last, e, fmt.c_str(), flags); +} + +template +inline std::basic_string regex_merge(const std::basic_string& s, + const basic_regex& e, + const charT* fmt, + match_flag_type flags = match_default) +{ + return regex_replace(s, e, fmt, flags); +} + +template +inline std::basic_string regex_merge(const std::basic_string& s, + const basic_regex& e, + const std::basic_string& fmt, + match_flag_type flags = match_default) +{ + return regex_replace(s, e, fmt, flags); +} + +} // namespace boost + +#endif // BOOST_REGEX_V5_REGEX_MERGE_HPP + + diff --git a/scintilla/include/boost/regex/v5/regex_raw_buffer.hpp b/scintilla/include/boost/regex/v5/regex_raw_buffer.hpp new file mode 100644 index 0000000000..443c57a25b --- /dev/null +++ b/scintilla/include/boost/regex/v5/regex_raw_buffer.hpp @@ -0,0 +1,213 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE regex_raw_buffer.hpp + * VERSION see + * DESCRIPTION: Raw character buffer for regex code. + * Note this is an internal header file included + * by regex.hpp, do not include on its own. + */ + +#ifndef BOOST_REGEX_RAW_BUFFER_HPP +#define BOOST_REGEX_RAW_BUFFER_HPP + +#ifndef BOOST_REGEX_CONFIG_HPP +#include +#endif + +#include +#include + +namespace boost{ + namespace BOOST_REGEX_DETAIL_NS{ + +struct empty_padding{}; + +union padding +{ + void* p; + unsigned int i; +}; + +template +struct padding3 +{ + enum{ + padding_size = 8, + padding_mask = 7 + }; +}; + +template<> +struct padding3<2> +{ + enum{ + padding_size = 2, + padding_mask = 1 + }; +}; + +template<> +struct padding3<4> +{ + enum{ + padding_size = 4, + padding_mask = 3 + }; +}; + +template<> +struct padding3<8> +{ + enum{ + padding_size = 8, + padding_mask = 7 + }; +}; + +template<> +struct padding3<16> +{ + enum{ + padding_size = 16, + padding_mask = 15 + }; +}; + +enum{ + padding_size = padding3::padding_size, + padding_mask = padding3::padding_mask +}; + +// +// class raw_storage +// basically this is a simplified vector +// this is used by basic_regex for expression storage +// + +class raw_storage +{ +public: + typedef std::size_t size_type; + typedef unsigned char* pointer; +private: + pointer last, start, end; +public: + + raw_storage(); + raw_storage(size_type n); + + ~raw_storage() + { + ::operator delete(start); + } + + void resize(size_type n) + { + size_type newsize = start ? last - start : 1024; + while (newsize < n) + newsize *= 2; + size_type datasize = end - start; + // extend newsize to WORD/DWORD boundary: + newsize = (newsize + padding_mask) & ~(padding_mask); + + // allocate and copy data: + pointer ptr = static_cast(::operator new(newsize)); + BOOST_REGEX_NOEH_ASSERT(ptr) + if (start) + std::memcpy(ptr, start, datasize); + + // get rid of old buffer: + ::operator delete(start); + + // and set up pointers: + start = ptr; + end = ptr + datasize; + last = ptr + newsize; + } + + void* extend(size_type n) + { + if(size_type(last - end) < n) + resize(n + (end - start)); + pointer result = end; + end += n; + return result; + } + + void* insert(size_type pos, size_type n) + { + BOOST_REGEX_ASSERT(pos <= size_type(end - start)); + if (size_type(last - end) < n) + resize(n + (end - start)); + void* result = start + pos; + std::memmove(start + pos + n, start + pos, (end - start) - pos); + end += n; + return result; + } + + size_type size() + { + return size_type(end - start); + } + + size_type capacity() + { + return size_type(last - start); + } + + void* data()const + { + return start; + } + + size_type index(void* ptr) + { + return size_type(static_cast(ptr) - static_cast(data())); + } + + void clear() + { + end = start; + } + + void align() + { + // move end up to a boundary: + end = start + (((end - start) + padding_mask) & ~padding_mask); + } + void swap(raw_storage& that) + { + std::swap(start, that.start); + std::swap(end, that.end); + std::swap(last, that.last); + } +}; + +inline raw_storage::raw_storage() +{ + last = start = end = 0; +} + +inline raw_storage::raw_storage(size_type n) +{ + start = end = static_cast(::operator new(n)); + BOOST_REGEX_NOEH_ASSERT(start) + last = start + n; +} + +} // namespace BOOST_REGEX_DETAIL_NS +} // namespace boost + +#endif + diff --git a/scintilla/include/boost/regex/v5/regex_replace.hpp b/scintilla/include/boost/regex/v5/regex_replace.hpp new file mode 100644 index 0000000000..75f8894fe9 --- /dev/null +++ b/scintilla/include/boost/regex/v5/regex_replace.hpp @@ -0,0 +1,77 @@ +/* + * + * Copyright (c) 1998-2009 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE regex_format.hpp + * VERSION see + * DESCRIPTION: Provides formatting output routines for search and replace + * operations. Note this is an internal header file included + * by regex.hpp, do not include on its own. + */ + +#ifndef BOOST_REGEX_V5_REGEX_REPLACE_HPP +#define BOOST_REGEX_V5_REGEX_REPLACE_HPP + + +namespace boost{ + +template +OutputIterator regex_replace(OutputIterator out, + BidirectionalIterator first, + BidirectionalIterator last, + const basic_regex& e, + Formatter fmt, + match_flag_type flags = match_default) +{ + regex_iterator i(first, last, e, flags); + regex_iterator j; + if(i == j) + { + if(!(flags & regex_constants::format_no_copy)) + out = BOOST_REGEX_DETAIL_NS::copy(first, last, out); + } + else + { + BidirectionalIterator last_m(first); + while(i != j) + { + if(!(flags & regex_constants::format_no_copy)) + out = BOOST_REGEX_DETAIL_NS::copy(i->prefix().first, i->prefix().second, out); + out = i->format(out, fmt, flags, e); + last_m = (*i)[0].second; + if(flags & regex_constants::format_first_only) + break; + ++i; + } + if(!(flags & regex_constants::format_no_copy)) + out = BOOST_REGEX_DETAIL_NS::copy(last_m, last, out); + } + return out; +} + +template +std::basic_string regex_replace(const std::basic_string& s, + const basic_regex& e, + Formatter fmt, + match_flag_type flags = match_default) +{ + std::basic_string result; + BOOST_REGEX_DETAIL_NS::string_out_iterator > i(result); + regex_replace(i, s.begin(), s.end(), e, fmt, flags); + return result; +} + +} // namespace boost + +#endif // BOOST_REGEX_V5_REGEX_REPLACE_HPP + + diff --git a/scintilla/include/boost/regex/v5/regex_search.hpp b/scintilla/include/boost/regex/v5/regex_search.hpp new file mode 100644 index 0000000000..d0addb1948 --- /dev/null +++ b/scintilla/include/boost/regex/v5/regex_search.hpp @@ -0,0 +1,103 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE regex_search.hpp + * VERSION see + * DESCRIPTION: Provides regex_search implementation. + */ + +#ifndef BOOST_REGEX_V5_REGEX_SEARCH_HPP +#define BOOST_REGEX_V5_REGEX_SEARCH_HPP + + +namespace boost{ + +template +bool regex_search(BidiIterator first, BidiIterator last, + match_results& m, + const basic_regex& e, + match_flag_type flags = match_default) +{ + return regex_search(first, last, m, e, flags, first); +} + +template +bool regex_search(BidiIterator first, BidiIterator last, + match_results& m, + const basic_regex& e, + match_flag_type flags, + BidiIterator base) +{ + if(e.flags() & regex_constants::failbit) + return false; + + BOOST_REGEX_DETAIL_NS::perl_matcher matcher(first, last, m, e, flags, base); + return matcher.find(); +} + +// +// regex_search convenience interfaces: +// +template +inline bool regex_search(const charT* str, + match_results& m, + const basic_regex& e, + match_flag_type flags = match_default) +{ + return regex_search(str, str + traits::length(str), m, e, flags); +} + +template +inline bool regex_search(const std::basic_string& s, + match_results::const_iterator, Allocator>& m, + const basic_regex& e, + match_flag_type flags = match_default) +{ + return regex_search(s.begin(), s.end(), m, e, flags); +} + +template +bool regex_search(BidiIterator first, BidiIterator last, + const basic_regex& e, + match_flag_type flags = match_default) +{ + if(e.flags() & regex_constants::failbit) + return false; + + match_results m; + typedef typename match_results::allocator_type match_alloc_type; + BOOST_REGEX_DETAIL_NS::perl_matcher matcher(first, last, m, e, flags | regex_constants::match_any, first); + return matcher.find(); +} + +template +inline bool regex_search(const charT* str, + const basic_regex& e, + match_flag_type flags = match_default) +{ + return regex_search(str, str + traits::length(str), e, flags); +} + +template +inline bool regex_search(const std::basic_string& s, + const basic_regex& e, + match_flag_type flags = match_default) +{ + return regex_search(s.begin(), s.end(), e, flags); +} + +} // namespace boost + +#endif // BOOST_REGEX_V5_REGEX_SEARCH_HPP + + diff --git a/scintilla/include/boost/regex/v5/regex_split.hpp b/scintilla/include/boost/regex/v5/regex_split.hpp new file mode 100644 index 0000000000..8d93289649 --- /dev/null +++ b/scintilla/include/boost/regex/v5/regex_split.hpp @@ -0,0 +1,152 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE regex_split.hpp + * VERSION see + * DESCRIPTION: Implements regex_split and associated functions. + * Note this is an internal header file included + * by regex.hpp, do not include on its own. + */ + +#ifndef BOOST_REGEX_SPLIT_HPP +#define BOOST_REGEX_SPLIT_HPP + +namespace boost{ + +#ifdef BOOST_REGEX_MSVC +# pragma warning(push) +#if BOOST_REGEX_MSVC < 1910 +#pragma warning(disable:4800) +#endif +#endif + +namespace BOOST_REGEX_DETAIL_NS{ + +template +const basic_regex& get_default_expression(charT) +{ + static const charT expression_text[4] = { '\\', 's', '+', '\00', }; + static const basic_regex e(expression_text); + return e; +} + +template +class split_pred +{ + typedef std::basic_string string_type; + typedef typename string_type::const_iterator iterator_type; + iterator_type* p_last; + OutputIterator* p_out; + std::size_t* p_max; + std::size_t initial_max; +public: + split_pred(iterator_type* a, OutputIterator* b, std::size_t* c) + : p_last(a), p_out(b), p_max(c), initial_max(*c) {} + + bool operator()(const match_results& what); +}; + +template +bool split_pred::operator() + (const match_results& what) +{ + *p_last = what[0].second; + if(what.size() > 1) + { + // output sub-expressions only: + for(unsigned i = 1; i < what.size(); ++i) + { + *(*p_out) = what.str(i); + ++(*p_out); + if(0 == --*p_max) return false; + } + return *p_max != 0; + } + else + { + // output $` only if it's not-null or not at the start of the input: + const sub_match& sub = what[-1]; + if((sub.first != sub.second) || (*p_max != initial_max)) + { + *(*p_out) = sub.str(); + ++(*p_out); + return --*p_max; + } + } + // + // initial null, do nothing: + return true; +} + +} // namespace BOOST_REGEX_DETAIL_NS + +template +std::size_t regex_split(OutputIterator out, + std::basic_string& s, + const basic_regex& e, + match_flag_type flags, + std::size_t max_split) +{ + typedef typename std::basic_string::const_iterator ci_t; + //typedef typename match_results::allocator_type match_allocator; + ci_t last = s.begin(); + std::size_t init_size = max_split; + BOOST_REGEX_DETAIL_NS::split_pred pred(&last, &out, &max_split); + ci_t i, j; + i = s.begin(); + j = s.end(); + regex_grep(pred, i, j, e, flags); + // + // if there is still input left, do a final push as long as max_split + // is not exhausted, and we're not splitting sub-expressions rather + // than whitespace: + if(max_split && (last != s.end()) && (e.mark_count() == 0)) + { + *out = std::basic_string((ci_t)last, (ci_t)s.end()); + ++out; + last = s.end(); + --max_split; + } + // + // delete from the string everything that has been processed so far: + s.erase(0, last - s.begin()); + // + // return the number of new records pushed: + return init_size - max_split; +} + +template +inline std::size_t regex_split(OutputIterator out, + std::basic_string& s, + const basic_regex& e, + match_flag_type flags = match_default) +{ + return regex_split(out, s, e, flags, UINT_MAX); +} + +template +inline std::size_t regex_split(OutputIterator out, + std::basic_string& s) +{ + return regex_split(out, s, BOOST_REGEX_DETAIL_NS::get_default_expression(charT(0)), match_default, UINT_MAX); +} + +#ifdef BOOST_REGEX_MSVC +# pragma warning(pop) +#endif + +} // namespace boost + +#endif + + diff --git a/scintilla/include/boost/regex/v5/regex_token_iterator.hpp b/scintilla/include/boost/regex/v5/regex_token_iterator.hpp new file mode 100644 index 0000000000..16832746f7 --- /dev/null +++ b/scintilla/include/boost/regex/v5/regex_token_iterator.hpp @@ -0,0 +1,255 @@ +/* + * + * Copyright (c) 2003 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE regex_token_iterator.hpp + * VERSION see + * DESCRIPTION: Provides regex_token_iterator implementation. + */ + +#ifndef BOOST_REGEX_V5_REGEX_TOKEN_ITERATOR_HPP +#define BOOST_REGEX_V5_REGEX_TOKEN_ITERATOR_HPP + +#include + +namespace boost{ + +template +class regex_token_iterator_implementation +{ + typedef basic_regex regex_type; + typedef sub_match value_type; + + match_results what; // current match + BidirectionalIterator base; // start of search area + BidirectionalIterator end; // end of search area + const regex_type re; // the expression + match_flag_type flags; // match flags + value_type result; // the current string result + int N; // the current sub-expression being enumerated + std::vector subs; // the sub-expressions to enumerate + +public: + regex_token_iterator_implementation(const regex_type* p, BidirectionalIterator last, int sub, match_flag_type f) + : end(last), re(*p), flags(f), N(0){ subs.push_back(sub); } + regex_token_iterator_implementation(const regex_type* p, BidirectionalIterator last, const std::vector& v, match_flag_type f) + : end(last), re(*p), flags(f), N(0), subs(v){} + template + regex_token_iterator_implementation(const regex_type* p, BidirectionalIterator last, const int (&submatches)[CN], match_flag_type f) + : end(last), re(*p), flags(f), N(0) + { + for(std::size_t i = 0; i < CN; ++i) + { + subs.push_back(submatches[i]); + } + } + regex_token_iterator_implementation(const regex_token_iterator_implementation& other) = default; + bool init(BidirectionalIterator first) + { + N = 0; + base = first; + if(regex_search(first, end, what, re, flags, base) == true) + { + N = 0; + result = ((subs[N] == -1) ? what.prefix() : what[(int)subs[N]]); + return true; + } + else if((subs[N] == -1) && (first != end)) + { + result.first = first; + result.second = end; + result.matched = (first != end); + N = -1; + return true; + } + return false; + } + bool compare(const regex_token_iterator_implementation& that) + { + if(this == &that) return true; + return (&re.get_data() == &that.re.get_data()) + && (end == that.end) + && (flags == that.flags) + && (N == that.N) + && (what[0].first == that.what[0].first) + && (what[0].second == that.what[0].second); + } + const value_type& get() + { return result; } + bool next() + { + if(N == -1) + return false; + if(N+1 < (int)subs.size()) + { + ++N; + result =((subs[N] == -1) ? what.prefix() : what[subs[N]]); + return true; + } + //if(what.prefix().first != what[0].second) + // flags |= /*match_prev_avail |*/ regex_constants::match_not_bob; + BidirectionalIterator last_end(what[0].second); + if(regex_search(last_end, end, what, re, ((what[0].first == what[0].second) ? flags | regex_constants::match_not_initial_null : flags), base)) + { + N =0; + result =((subs[N] == -1) ? what.prefix() : what[subs[N]]); + return true; + } + else if((last_end != end) && (subs[0] == -1)) + { + N =-1; + result.first = last_end; + result.second = end; + result.matched = (last_end != end); + return true; + } + return false; + } +private: + regex_token_iterator_implementation& operator=(const regex_token_iterator_implementation&); +}; + +template ::value_type, + class traits = regex_traits > +class regex_token_iterator +{ +private: + typedef regex_token_iterator_implementation impl; + typedef std::shared_ptr pimpl; +public: + typedef basic_regex regex_type; + typedef sub_match value_type; + typedef typename std::iterator_traits::difference_type + difference_type; + typedef const value_type* pointer; + typedef const value_type& reference; + typedef std::forward_iterator_tag iterator_category; + + regex_token_iterator(){} + regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re, + int submatch = 0, match_flag_type m = match_default) + : pdata(new impl(&re, b, submatch, m)) + { + if(!pdata->init(a)) + pdata.reset(); + } + regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re, + const std::vector& submatches, match_flag_type m = match_default) + : pdata(new impl(&re, b, submatches, m)) + { + if(!pdata->init(a)) + pdata.reset(); + } + template + regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re, + const int (&submatches)[N], match_flag_type m = match_default) + : pdata(new impl(&re, b, submatches, m)) + { + if(!pdata->init(a)) + pdata.reset(); + } + regex_token_iterator(const regex_token_iterator& that) + : pdata(that.pdata) {} + regex_token_iterator& operator=(const regex_token_iterator& that) + { + pdata = that.pdata; + return *this; + } + bool operator==(const regex_token_iterator& that)const + { + if((pdata.get() == 0) || (that.pdata.get() == 0)) + return pdata.get() == that.pdata.get(); + return pdata->compare(*(that.pdata.get())); + } + bool operator!=(const regex_token_iterator& that)const + { return !(*this == that); } + const value_type& operator*()const + { return pdata->get(); } + const value_type* operator->()const + { return &(pdata->get()); } + regex_token_iterator& operator++() + { + cow(); + if(0 == pdata->next()) + { + pdata.reset(); + } + return *this; + } + regex_token_iterator operator++(int) + { + regex_token_iterator result(*this); + ++(*this); + return result; + } +private: + + pimpl pdata; + + void cow() + { + // copy-on-write + if(pdata.get() && (pdata.use_count() > 1)) + { + pdata.reset(new impl(*(pdata.get()))); + } + } +}; + +typedef regex_token_iterator cregex_token_iterator; +typedef regex_token_iterator sregex_token_iterator; +#ifndef BOOST_NO_WREGEX +typedef regex_token_iterator wcregex_token_iterator; +typedef regex_token_iterator wsregex_token_iterator; +#endif + +template +inline regex_token_iterator make_regex_token_iterator(const charT* p, const basic_regex& e, int submatch = 0, regex_constants::match_flag_type m = regex_constants::match_default) +{ + return regex_token_iterator(p, p+traits::length(p), e, submatch, m); +} +template +inline regex_token_iterator::const_iterator, charT, traits> make_regex_token_iterator(const std::basic_string& p, const basic_regex& e, int submatch = 0, regex_constants::match_flag_type m = regex_constants::match_default) +{ + return regex_token_iterator::const_iterator, charT, traits>(p.begin(), p.end(), e, submatch, m); +} +template +inline regex_token_iterator make_regex_token_iterator(const charT* p, const basic_regex& e, const int (&submatch)[N], regex_constants::match_flag_type m = regex_constants::match_default) +{ + return regex_token_iterator(p, p+traits::length(p), e, submatch, m); +} +template +inline regex_token_iterator::const_iterator, charT, traits> make_regex_token_iterator(const std::basic_string& p, const basic_regex& e, const int (&submatch)[N], regex_constants::match_flag_type m = regex_constants::match_default) +{ + return regex_token_iterator::const_iterator, charT, traits>(p.begin(), p.end(), e, submatch, m); +} +template +inline regex_token_iterator make_regex_token_iterator(const charT* p, const basic_regex& e, const std::vector& submatch, regex_constants::match_flag_type m = regex_constants::match_default) +{ + return regex_token_iterator(p, p+traits::length(p), e, submatch, m); +} +template +inline regex_token_iterator::const_iterator, charT, traits> make_regex_token_iterator(const std::basic_string& p, const basic_regex& e, const std::vector& submatch, regex_constants::match_flag_type m = regex_constants::match_default) +{ + return regex_token_iterator::const_iterator, charT, traits>(p.begin(), p.end(), e, submatch, m); +} + +} // namespace boost + +#endif // BOOST_REGEX_V5_REGEX_TOKEN_ITERATOR_HPP + + + + diff --git a/scintilla/include/boost/regex/v5/regex_traits.hpp b/scintilla/include/boost/regex/v5/regex_traits.hpp new file mode 100644 index 0000000000..6c64695b61 --- /dev/null +++ b/scintilla/include/boost/regex/v5/regex_traits.hpp @@ -0,0 +1,130 @@ +/* + * + * Copyright (c) 2003 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE regex_traits.hpp + * VERSION see + * DESCRIPTION: Declares regular expression traits classes. + */ + +#ifndef BOOST_REGEX_TRAITS_HPP_INCLUDED +#define BOOST_REGEX_TRAITS_HPP_INCLUDED + +#include +#include +#include +#include +#include +#include +#include +#if defined(_WIN32) && !defined(BOOST_REGEX_NO_W32) +# include +#endif +#include + +namespace boost{ + +template +struct regex_traits : public implementationT +{ + regex_traits() : implementationT() {} +}; + +// +// class regex_traits_wrapper. +// this is what our implementation will actually store; +// it provides default implementations of the "optional" +// interfaces that we support, in addition to the +// required "standard" ones: +// +namespace BOOST_REGEX_DETAIL_NS{ + + template + struct has_boost_extensions_tag + { + template + static double checker(U*, typename U::boost_extensions_tag* = nullptr); + static char checker(...); + static T* get(); + + static const bool value = sizeof(checker(get())) > 1; + }; + + +template +struct default_wrapper : public BaseT +{ + typedef typename BaseT::char_type char_type; + std::string error_string(::boost::regex_constants::error_type e)const + { + return ::boost::BOOST_REGEX_DETAIL_NS::get_default_error_string(e); + } + ::boost::regex_constants::syntax_type syntax_type(char_type c)const + { + return (char_type(c & 0x7f) == c) ? get_default_syntax_type(static_cast(c)) : ::boost::regex_constants::syntax_char; + } + ::boost::regex_constants::escape_syntax_type escape_syntax_type(char_type c)const + { + return (char_type(c & 0x7f) == c) ? get_default_escape_syntax_type(static_cast(c)) : ::boost::regex_constants::escape_type_identity; + } + std::intmax_t toi(const char_type*& p1, const char_type* p2, int radix)const + { + return ::boost::BOOST_REGEX_DETAIL_NS::global_toi(p1, p2, radix, *this); + } + char_type translate(char_type c, bool icase)const + { + return (icase ? this->translate_nocase(c) : this->translate(c)); + } + char_type translate(char_type c)const + { + return BaseT::translate(c); + } + char_type tolower(char_type c)const + { + return ::boost::BOOST_REGEX_DETAIL_NS::global_lower(c); + } + char_type toupper(char_type c)const + { + return ::boost::BOOST_REGEX_DETAIL_NS::global_upper(c); + } +}; + +template +struct compute_wrapper_base +{ + typedef BaseT type; +}; +template +struct compute_wrapper_base +{ + typedef default_wrapper type; +}; + +} // namespace BOOST_REGEX_DETAIL_NS + +template +struct regex_traits_wrapper + : public ::boost::BOOST_REGEX_DETAIL_NS::compute_wrapper_base< + BaseT, + ::boost::BOOST_REGEX_DETAIL_NS::has_boost_extensions_tag::value + >::type +{ + regex_traits_wrapper(){} +private: + regex_traits_wrapper(const regex_traits_wrapper&); + regex_traits_wrapper& operator=(const regex_traits_wrapper&); +}; + +} // namespace boost + +#endif // include + diff --git a/scintilla/include/boost/regex/v5/regex_traits_defaults.hpp b/scintilla/include/boost/regex/v5/regex_traits_defaults.hpp new file mode 100644 index 0000000000..4b81441ffe --- /dev/null +++ b/scintilla/include/boost/regex/v5/regex_traits_defaults.hpp @@ -0,0 +1,996 @@ +/* + * + * Copyright (c) 2004 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE regex_traits_defaults.hpp + * VERSION see + * DESCRIPTION: Declares API's for access to regex_traits default properties. + */ + +#ifndef BOOST_REGEX_TRAITS_DEFAULTS_HPP_INCLUDED +#define BOOST_REGEX_TRAITS_DEFAULTS_HPP_INCLUDED + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace boost{ namespace BOOST_REGEX_DETAIL_NS{ + + +// +// helpers to suppress warnings: +// +template +inline bool is_extended(charT c) +{ + typedef typename std::make_unsigned::type unsigned_type; + return (sizeof(charT) > 1) && (static_cast(c) >= 256u); +} +inline bool is_extended(char) +{ return false; } + +inline const char* get_default_syntax(regex_constants::syntax_type n) +{ + // if the user hasn't supplied a message catalog, then this supplies + // default "messages" for us to load in the range 1-100. + const char* messages[] = { + "", + "(", + ")", + "$", + "^", + ".", + "*", + "+", + "?", + "[", + "]", + "|", + "\\", + "#", + "-", + "{", + "}", + "0123456789", + "b", + "B", + "<", + ">", + "", + "", + "A`", + "z'", + "\n", + ",", + "a", + "f", + "n", + "r", + "t", + "v", + "x", + "c", + ":", + "=", + "e", + "", + "", + "", + "", + "", + "", + "", + "", + "E", + "Q", + "X", + "C", + "Z", + "G", + "!", + "p", + "P", + "N", + "gk", + "K", + "R", + }; + + return ((n >= (sizeof(messages) / sizeof(messages[1]))) ? "" : messages[n]); +} + +inline const char* get_default_error_string(regex_constants::error_type n) +{ + static const char* const s_default_error_messages[] = { + "Success", /* REG_NOERROR 0 error_ok */ + "No match", /* REG_NOMATCH 1 error_no_match */ + "Invalid regular expression.", /* REG_BADPAT 2 error_bad_pattern */ + "Invalid collation character.", /* REG_ECOLLATE 3 error_collate */ + "Invalid character class name, collating name, or character range.", /* REG_ECTYPE 4 error_ctype */ + "Invalid or unterminated escape sequence.", /* REG_EESCAPE 5 error_escape */ + "Invalid back reference: specified capturing group does not exist.", /* REG_ESUBREG 6 error_backref */ + "Unmatched [ or [^ in character class declaration.", /* REG_EBRACK 7 error_brack */ + "Unmatched marking parenthesis ( or \\(.", /* REG_EPAREN 8 error_paren */ + "Unmatched quantified repeat operator { or \\{.", /* REG_EBRACE 9 error_brace */ + "Invalid content of repeat range.", /* REG_BADBR 10 error_badbrace */ + "Invalid range end in character class", /* REG_ERANGE 11 error_range */ + "Out of memory.", /* REG_ESPACE 12 error_space NOT USED */ + "Invalid preceding regular expression prior to repetition operator.", /* REG_BADRPT 13 error_badrepeat */ + "Premature end of regular expression", /* REG_EEND 14 error_end NOT USED */ + "Regular expression is too large.", /* REG_ESIZE 15 error_size NOT USED */ + "Unmatched ) or \\)", /* REG_ERPAREN 16 error_right_paren NOT USED */ + "Empty regular expression.", /* REG_EMPTY 17 error_empty */ + "The complexity of matching the regular expression exceeded predefined bounds. " + "Try refactoring the regular expression to make each choice made by the state machine unambiguous. " + "This exception is thrown to prevent \"eternal\" matches that take an " + "indefinite period time to locate.", /* REG_ECOMPLEXITY 18 error_complexity */ + "Ran out of stack space trying to match the regular expression.", /* REG_ESTACK 19 error_stack */ + "Invalid or unterminated Perl (?...) sequence.", /* REG_E_PERL 20 error_perl */ + "Unknown error.", /* REG_E_UNKNOWN 21 error_unknown */ + }; + + return (n > ::boost::regex_constants::error_unknown) ? s_default_error_messages[::boost::regex_constants::error_unknown] : s_default_error_messages[n]; +} + +inline regex_constants::syntax_type get_default_syntax_type(char c) +{ + // + // char_syntax determines how the compiler treats a given character + // in a regular expression. + // + static regex_constants::syntax_type char_syntax[] = { + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_newline, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /* */ // 32 + regex_constants::syntax_not, /*!*/ + regex_constants::syntax_char, /*"*/ + regex_constants::syntax_hash, /*#*/ + regex_constants::syntax_dollar, /*$*/ + regex_constants::syntax_char, /*%*/ + regex_constants::syntax_char, /*&*/ + regex_constants::escape_type_end_buffer, /*'*/ + regex_constants::syntax_open_mark, /*(*/ + regex_constants::syntax_close_mark, /*)*/ + regex_constants::syntax_star, /***/ + regex_constants::syntax_plus, /*+*/ + regex_constants::syntax_comma, /*,*/ + regex_constants::syntax_dash, /*-*/ + regex_constants::syntax_dot, /*.*/ + regex_constants::syntax_char, /*/*/ + regex_constants::syntax_digit, /*0*/ + regex_constants::syntax_digit, /*1*/ + regex_constants::syntax_digit, /*2*/ + regex_constants::syntax_digit, /*3*/ + regex_constants::syntax_digit, /*4*/ + regex_constants::syntax_digit, /*5*/ + regex_constants::syntax_digit, /*6*/ + regex_constants::syntax_digit, /*7*/ + regex_constants::syntax_digit, /*8*/ + regex_constants::syntax_digit, /*9*/ + regex_constants::syntax_colon, /*:*/ + regex_constants::syntax_char, /*;*/ + regex_constants::escape_type_left_word, /*<*/ + regex_constants::syntax_equal, /*=*/ + regex_constants::escape_type_right_word, /*>*/ + regex_constants::syntax_question, /*?*/ + regex_constants::syntax_char, /*@*/ + regex_constants::syntax_char, /*A*/ + regex_constants::syntax_char, /*B*/ + regex_constants::syntax_char, /*C*/ + regex_constants::syntax_char, /*D*/ + regex_constants::syntax_char, /*E*/ + regex_constants::syntax_char, /*F*/ + regex_constants::syntax_char, /*G*/ + regex_constants::syntax_char, /*H*/ + regex_constants::syntax_char, /*I*/ + regex_constants::syntax_char, /*J*/ + regex_constants::syntax_char, /*K*/ + regex_constants::syntax_char, /*L*/ + regex_constants::syntax_char, /*M*/ + regex_constants::syntax_char, /*N*/ + regex_constants::syntax_char, /*O*/ + regex_constants::syntax_char, /*P*/ + regex_constants::syntax_char, /*Q*/ + regex_constants::syntax_char, /*R*/ + regex_constants::syntax_char, /*S*/ + regex_constants::syntax_char, /*T*/ + regex_constants::syntax_char, /*U*/ + regex_constants::syntax_char, /*V*/ + regex_constants::syntax_char, /*W*/ + regex_constants::syntax_char, /*X*/ + regex_constants::syntax_char, /*Y*/ + regex_constants::syntax_char, /*Z*/ + regex_constants::syntax_open_set, /*[*/ + regex_constants::syntax_escape, /*\*/ + regex_constants::syntax_close_set, /*]*/ + regex_constants::syntax_caret, /*^*/ + regex_constants::syntax_char, /*_*/ + regex_constants::syntax_char, /*`*/ + regex_constants::syntax_char, /*a*/ + regex_constants::syntax_char, /*b*/ + regex_constants::syntax_char, /*c*/ + regex_constants::syntax_char, /*d*/ + regex_constants::syntax_char, /*e*/ + regex_constants::syntax_char, /*f*/ + regex_constants::syntax_char, /*g*/ + regex_constants::syntax_char, /*h*/ + regex_constants::syntax_char, /*i*/ + regex_constants::syntax_char, /*j*/ + regex_constants::syntax_char, /*k*/ + regex_constants::syntax_char, /*l*/ + regex_constants::syntax_char, /*m*/ + regex_constants::syntax_char, /*n*/ + regex_constants::syntax_char, /*o*/ + regex_constants::syntax_char, /*p*/ + regex_constants::syntax_char, /*q*/ + regex_constants::syntax_char, /*r*/ + regex_constants::syntax_char, /*s*/ + regex_constants::syntax_char, /*t*/ + regex_constants::syntax_char, /*u*/ + regex_constants::syntax_char, /*v*/ + regex_constants::syntax_char, /*w*/ + regex_constants::syntax_char, /*x*/ + regex_constants::syntax_char, /*y*/ + regex_constants::syntax_char, /*z*/ + regex_constants::syntax_open_brace, /*{*/ + regex_constants::syntax_or, /*|*/ + regex_constants::syntax_close_brace, /*}*/ + regex_constants::syntax_char, /*~*/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + }; + + return char_syntax[(unsigned char)c]; +} + +inline regex_constants::escape_syntax_type get_default_escape_syntax_type(char c) +{ + // + // char_syntax determines how the compiler treats a given character + // in a regular expression. + // + static regex_constants::escape_syntax_type char_syntax[] = { + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /* */ // 32 + regex_constants::escape_type_identity, /*!*/ + regex_constants::escape_type_identity, /*"*/ + regex_constants::escape_type_identity, /*#*/ + regex_constants::escape_type_identity, /*$*/ + regex_constants::escape_type_identity, /*%*/ + regex_constants::escape_type_identity, /*&*/ + regex_constants::escape_type_end_buffer, /*'*/ + regex_constants::syntax_open_mark, /*(*/ + regex_constants::syntax_close_mark, /*)*/ + regex_constants::escape_type_identity, /***/ + regex_constants::syntax_plus, /*+*/ + regex_constants::escape_type_identity, /*,*/ + regex_constants::escape_type_identity, /*-*/ + regex_constants::escape_type_identity, /*.*/ + regex_constants::escape_type_identity, /*/*/ + regex_constants::escape_type_decimal, /*0*/ + regex_constants::escape_type_backref, /*1*/ + regex_constants::escape_type_backref, /*2*/ + regex_constants::escape_type_backref, /*3*/ + regex_constants::escape_type_backref, /*4*/ + regex_constants::escape_type_backref, /*5*/ + regex_constants::escape_type_backref, /*6*/ + regex_constants::escape_type_backref, /*7*/ + regex_constants::escape_type_backref, /*8*/ + regex_constants::escape_type_backref, /*9*/ + regex_constants::escape_type_identity, /*:*/ + regex_constants::escape_type_identity, /*;*/ + regex_constants::escape_type_left_word, /*<*/ + regex_constants::escape_type_identity, /*=*/ + regex_constants::escape_type_right_word, /*>*/ + regex_constants::syntax_question, /*?*/ + regex_constants::escape_type_identity, /*@*/ + regex_constants::escape_type_start_buffer, /*A*/ + regex_constants::escape_type_not_word_assert, /*B*/ + regex_constants::escape_type_C, /*C*/ + regex_constants::escape_type_not_class, /*D*/ + regex_constants::escape_type_E, /*E*/ + regex_constants::escape_type_not_class, /*F*/ + regex_constants::escape_type_G, /*G*/ + regex_constants::escape_type_not_class, /*H*/ + regex_constants::escape_type_not_class, /*I*/ + regex_constants::escape_type_not_class, /*J*/ + regex_constants::escape_type_reset_start_mark, /*K*/ + regex_constants::escape_type_not_class, /*L*/ + regex_constants::escape_type_not_class, /*M*/ + regex_constants::escape_type_named_char, /*N*/ + regex_constants::escape_type_not_class, /*O*/ + regex_constants::escape_type_not_property, /*P*/ + regex_constants::escape_type_Q, /*Q*/ + regex_constants::escape_type_line_ending, /*R*/ + regex_constants::escape_type_not_class, /*S*/ + regex_constants::escape_type_not_class, /*T*/ + regex_constants::escape_type_not_class, /*U*/ + regex_constants::escape_type_not_class, /*V*/ + regex_constants::escape_type_not_class, /*W*/ + regex_constants::escape_type_X, /*X*/ + regex_constants::escape_type_not_class, /*Y*/ + regex_constants::escape_type_Z, /*Z*/ + regex_constants::escape_type_identity, /*[*/ + regex_constants::escape_type_identity, /*\*/ + regex_constants::escape_type_identity, /*]*/ + regex_constants::escape_type_identity, /*^*/ + regex_constants::escape_type_identity, /*_*/ + regex_constants::escape_type_start_buffer, /*`*/ + regex_constants::escape_type_control_a, /*a*/ + regex_constants::escape_type_word_assert, /*b*/ + regex_constants::escape_type_ascii_control, /*c*/ + regex_constants::escape_type_class, /*d*/ + regex_constants::escape_type_e, /*e*/ + regex_constants::escape_type_control_f, /*f*/ + regex_constants::escape_type_extended_backref, /*g*/ + regex_constants::escape_type_class, /*h*/ + regex_constants::escape_type_class, /*i*/ + regex_constants::escape_type_class, /*j*/ + regex_constants::escape_type_extended_backref, /*k*/ + regex_constants::escape_type_class, /*l*/ + regex_constants::escape_type_class, /*m*/ + regex_constants::escape_type_control_n, /*n*/ + regex_constants::escape_type_class, /*o*/ + regex_constants::escape_type_property, /*p*/ + regex_constants::escape_type_class, /*q*/ + regex_constants::escape_type_control_r, /*r*/ + regex_constants::escape_type_class, /*s*/ + regex_constants::escape_type_control_t, /*t*/ + regex_constants::escape_type_class, /*u*/ + regex_constants::escape_type_control_v, /*v*/ + regex_constants::escape_type_class, /*w*/ + regex_constants::escape_type_hex, /*x*/ + regex_constants::escape_type_class, /*y*/ + regex_constants::escape_type_end_buffer, /*z*/ + regex_constants::syntax_open_brace, /*{*/ + regex_constants::syntax_or, /*|*/ + regex_constants::syntax_close_brace, /*}*/ + regex_constants::escape_type_identity, /*~*/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + }; + + return char_syntax[(unsigned char)c]; +} + +// is charT c a combining character? +inline bool is_combining_implementation(std::uint_least16_t c) +{ + const std::uint_least16_t combining_ranges[] = { 0x0300, 0x0361, + 0x0483, 0x0486, + 0x0903, 0x0903, + 0x093E, 0x0940, + 0x0949, 0x094C, + 0x0982, 0x0983, + 0x09BE, 0x09C0, + 0x09C7, 0x09CC, + 0x09D7, 0x09D7, + 0x0A3E, 0x0A40, + 0x0A83, 0x0A83, + 0x0ABE, 0x0AC0, + 0x0AC9, 0x0ACC, + 0x0B02, 0x0B03, + 0x0B3E, 0x0B3E, + 0x0B40, 0x0B40, + 0x0B47, 0x0B4C, + 0x0B57, 0x0B57, + 0x0B83, 0x0B83, + 0x0BBE, 0x0BBF, + 0x0BC1, 0x0BCC, + 0x0BD7, 0x0BD7, + 0x0C01, 0x0C03, + 0x0C41, 0x0C44, + 0x0C82, 0x0C83, + 0x0CBE, 0x0CBE, + 0x0CC0, 0x0CC4, + 0x0CC7, 0x0CCB, + 0x0CD5, 0x0CD6, + 0x0D02, 0x0D03, + 0x0D3E, 0x0D40, + 0x0D46, 0x0D4C, + 0x0D57, 0x0D57, + 0x0F7F, 0x0F7F, + 0x20D0, 0x20E1, + 0x3099, 0x309A, + 0xFE20, 0xFE23, + 0xffff, 0xffff, }; + + const std::uint_least16_t* p = combining_ranges + 1; + while (*p < c) p += 2; + --p; + if ((c >= *p) && (c <= *(p + 1))) + return true; + return false; +} + +template +inline bool is_combining(charT c) +{ + return (c <= static_cast(0)) ? false : ((c >= static_cast((std::numeric_limits::max)())) ? false : is_combining_implementation(static_cast(c))); +} +template <> +inline bool is_combining(char) +{ + return false; +} +template <> +inline bool is_combining(signed char) +{ + return false; +} +template <> +inline bool is_combining(unsigned char) +{ + return false; +} +#ifdef _MSC_VER +template<> +inline bool is_combining(wchar_t c) +{ + return is_combining_implementation(static_cast(c)); +} +#elif !defined(__DECCXX) && !defined(__osf__) && !defined(__OSF__) && defined(WCHAR_MIN) && (WCHAR_MIN == 0) && !defined(BOOST_NO_INTRINSIC_WCHAR_T) +#if defined(WCHAR_MAX) && (WCHAR_MAX <= USHRT_MAX) +template<> +inline bool is_combining(wchar_t c) +{ + return is_combining_implementation(static_cast(c)); +} +#else +template<> +inline bool is_combining(wchar_t c) +{ + return (c >= (std::numeric_limits::max)()) ? false : is_combining_implementation(static_cast(c)); +} +#endif +#endif + +// +// is a charT c a line separator? +// +template +inline bool is_separator(charT c) +{ + return BOOST_REGEX_MAKE_BOOL( + (c == static_cast('\n')) + || (c == static_cast('\r')) + || (c == static_cast('\f')) + || (static_cast(c) == 0x2028u) + || (static_cast(c) == 0x2029u) + || (static_cast(c) == 0x85u)); +} +template <> +inline bool is_separator(char c) +{ + return BOOST_REGEX_MAKE_BOOL((c == '\n') || (c == '\r') || (c == '\f')); +} + +// +// get a default collating element: +// +inline std::string lookup_default_collate_name(const std::string& name) +{ + // + // these are the POSIX collating names: + // + static const char* def_coll_names[] = { + "NUL", "SOH", "STX", "ETX", "EOT", "ENQ", "ACK", "alert", "backspace", "tab", "newline", + "vertical-tab", "form-feed", "carriage-return", "SO", "SI", "DLE", "DC1", "DC2", "DC3", "DC4", "NAK", + "SYN", "ETB", "CAN", "EM", "SUB", "ESC", "IS4", "IS3", "IS2", "IS1", "space", "exclamation-mark", + "quotation-mark", "number-sign", "dollar-sign", "percent-sign", "ampersand", "apostrophe", + "left-parenthesis", "right-parenthesis", "asterisk", "plus-sign", "comma", "hyphen", + "period", "slash", "zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", + "colon", "semicolon", "less-than-sign", "equals-sign", "greater-than-sign", + "question-mark", "commercial-at", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", + "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "left-square-bracket", "backslash", + "right-square-bracket", "circumflex", "underscore", "grave-accent", "a", "b", "c", "d", "e", "f", + "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "left-curly-bracket", + "vertical-line", "right-curly-bracket", "tilde", "DEL", "", + }; + + // these multi-character collating elements + // should keep most Western-European locales + // happy - we should really localise these a + // little more - but this will have to do for + // now: + + static const char* def_multi_coll[] = { + "ae", + "Ae", + "AE", + "ch", + "Ch", + "CH", + "ll", + "Ll", + "LL", + "ss", + "Ss", + "SS", + "nj", + "Nj", + "NJ", + "dz", + "Dz", + "DZ", + "lj", + "Lj", + "LJ", + "", + }; + + unsigned int i = 0; + while (*def_coll_names[i]) + { + if (def_coll_names[i] == name) + { + return std::string(1, char(i)); + } + ++i; + } + i = 0; + while (*def_multi_coll[i]) + { + if (def_multi_coll[i] == name) + { + return def_multi_coll[i]; + } + ++i; + } + return std::string(); +} + +// +// get the state_id of a character classification, the individual +// traits classes then transform that state_id into a bitmask: +// +template +struct character_pointer_range +{ + const charT* p1; + const charT* p2; + + bool operator < (const character_pointer_range& r)const + { + return std::lexicographical_compare(p1, p2, r.p1, r.p2); + } + bool operator == (const character_pointer_range& r)const + { + // Not only do we check that the ranges are of equal size before + // calling std::equal, but there is no other algorithm available: + // not even a non-standard MS one. So forward to unchecked_equal + // in the MS case. +#ifdef __cpp_lib_robust_nonmodifying_seq_ops + return std::equal(p1, p2, r.p1, r.p2); +#elif defined(BOOST_REGEX_MSVC) + if (((p2 - p1) != (r.p2 - r.p1))) + return false; + const charT* with = r.p1; + const charT* pos = p1; + while (pos != p2) + if (*pos++ != *with++) return false; + return true; + +#else + return ((p2 - p1) == (r.p2 - r.p1)) && std::equal(p1, p2, r.p1); +#endif + } +}; +template +int get_default_class_id(const charT* p1, const charT* p2) +{ + static const charT data[73] = { + 'a', 'l', 'n', 'u', 'm', + 'a', 'l', 'p', 'h', 'a', + 'b', 'l', 'a', 'n', 'k', + 'c', 'n', 't', 'r', 'l', + 'd', 'i', 'g', 'i', 't', + 'g', 'r', 'a', 'p', 'h', + 'l', 'o', 'w', 'e', 'r', + 'p', 'r', 'i', 'n', 't', + 'p', 'u', 'n', 'c', 't', + 's', 'p', 'a', 'c', 'e', + 'u', 'n', 'i', 'c', 'o', 'd', 'e', + 'u', 'p', 'p', 'e', 'r', + 'v', + 'w', 'o', 'r', 'd', + 'x', 'd', 'i', 'g', 'i', 't', + }; + + static const character_pointer_range ranges[21] = + { + {data+0, data+5,}, // alnum + {data+5, data+10,}, // alpha + {data+10, data+15,}, // blank + {data+15, data+20,}, // cntrl + {data+20, data+21,}, // d + {data+20, data+25,}, // digit + {data+25, data+30,}, // graph + {data+29, data+30,}, // h + {data+30, data+31,}, // l + {data+30, data+35,}, // lower + {data+35, data+40,}, // print + {data+40, data+45,}, // punct + {data+45, data+46,}, // s + {data+45, data+50,}, // space + {data+57, data+58,}, // u + {data+50, data+57,}, // unicode + {data+57, data+62,}, // upper + {data+62, data+63,}, // v + {data+63, data+64,}, // w + {data+63, data+67,}, // word + {data+67, data+73,}, // xdigit + }; + const character_pointer_range* ranges_begin = ranges; + const character_pointer_range* ranges_end = ranges + (sizeof(ranges)/sizeof(ranges[0])); + + character_pointer_range t = { p1, p2, }; + const character_pointer_range* p = std::lower_bound(ranges_begin, ranges_end, t); + if((p != ranges_end) && (t == *p)) + return static_cast(p - ranges); + return -1; +} + +// +// helper functions: +// +template +std::ptrdiff_t global_length(const charT* p) +{ + std::ptrdiff_t n = 0; + while(*p) + { + ++p; + ++n; + } + return n; +} +template<> +inline std::ptrdiff_t global_length(const char* p) +{ + return (std::strlen)(p); +} +#ifndef BOOST_NO_WREGEX +template<> +inline std::ptrdiff_t global_length(const wchar_t* p) +{ + return (std::ptrdiff_t)(std::wcslen)(p); +} +#endif +template +inline charT global_lower(charT c) +{ + return c; +} +template +inline charT global_upper(charT c) +{ + return c; +} + +inline char do_global_lower(char c) +{ + return static_cast((std::tolower)((unsigned char)c)); +} + +inline char do_global_upper(char c) +{ + return static_cast((std::toupper)((unsigned char)c)); +} +#ifndef BOOST_NO_WREGEX +inline wchar_t do_global_lower(wchar_t c) +{ + return (std::towlower)(c); +} + +inline wchar_t do_global_upper(wchar_t c) +{ + return (std::towupper)(c); +} +#endif +// +// This sucks: declare template specialisations of global_lower/global_upper +// that just forward to the non-template implementation functions. We do +// this because there is one compiler (Compaq Tru64 C++) that doesn't seem +// to differentiate between templates and non-template overloads.... +// what's more, the primary template, plus all overloads have to be +// defined in the same translation unit (if one is inline they all must be) +// otherwise the "local template instantiation" compiler option can pick +// the wrong instantiation when linking: +// +template<> inline char global_lower(char c) { return do_global_lower(c); } +template<> inline char global_upper(char c) { return do_global_upper(c); } +#ifndef BOOST_NO_WREGEX +template<> inline wchar_t global_lower(wchar_t c) { return do_global_lower(c); } +template<> inline wchar_t global_upper(wchar_t c) { return do_global_upper(c); } +#endif + +template +int global_value(charT c) +{ + static const charT zero = '0'; + static const charT nine = '9'; + static const charT a = 'a'; + static const charT f = 'f'; + static const charT A = 'A'; + static const charT F = 'F'; + + if(c > f) return -1; + if(c >= a) return 10 + (c - a); + if(c > F) return -1; + if(c >= A) return 10 + (c - A); + if(c > nine) return -1; + if(c >= zero) return c - zero; + return -1; +} +template +std::intmax_t global_toi(const charT*& p1, const charT* p2, int radix, const traits& t) +{ + (void)t; // warning suppression + std::intmax_t limit = (std::numeric_limits::max)() / radix; + std::intmax_t next_value = t.value(*p1, radix); + if((p1 == p2) || (next_value < 0) || (next_value >= radix)) + return -1; + std::intmax_t result = 0; + while(p1 != p2) + { + next_value = t.value(*p1, radix); + if((next_value < 0) || (next_value >= radix)) + break; + result *= radix; + result += next_value; + ++p1; + if (result > limit) + return -1; + } + return result; +} + +template +inline typename std::enable_if<(sizeof(charT) > 1), const charT*>::type get_escape_R_string() +{ +#ifdef BOOST_REGEX_MSVC +# pragma warning(push) +# pragma warning(disable:4309 4245) +#endif + static const charT e1[] = { '(', '?', '-', 'x', ':', '(', '?', '>', '\x0D', '\x0A', '?', + '|', '[', '\x0A', '\x0B', '\x0C', static_cast(0x85), static_cast(0x2028), + static_cast(0x2029), ']', ')', ')', '\0' }; + static const charT e2[] = { '(', '?', '-', 'x', ':', '(', '?', '>', '\x0D', '\x0A', '?', + '|', '[', '\x0A', '\x0B', '\x0C', static_cast(0x85), ']', ')', ')', '\0' }; + + charT c = static_cast(0x2029u); + bool b = (static_cast(c) == 0x2029u); + + return (b ? e1 : e2); +#ifdef BOOST_REGEX_MSVC +# pragma warning(pop) +#endif +} + +template +inline typename std::enable_if<(sizeof(charT) == 1), const charT*>::type get_escape_R_string() +{ +#ifdef BOOST_REGEX_MSVC +# pragma warning(push) +# pragma warning(disable:4309 4245) +#endif + static const charT e2[] = { + static_cast('('), + static_cast('?'), + static_cast('-'), + static_cast('x'), + static_cast(':'), + static_cast('('), + static_cast('?'), + static_cast('>'), + static_cast('\x0D'), + static_cast('\x0A'), + static_cast('?'), + static_cast('|'), + static_cast('['), + static_cast('\x0A'), + static_cast('\x0B'), + static_cast('\x0C'), + static_cast('\x85'), + static_cast(']'), + static_cast(')'), + static_cast(')'), + static_cast('\0') + }; + return e2; +#ifdef BOOST_REGEX_MSVC +# pragma warning(pop) +#endif +} + +} // BOOST_REGEX_DETAIL_NS +} // boost + +#endif diff --git a/scintilla/include/boost/regex/v5/regex_workaround.hpp b/scintilla/include/boost/regex/v5/regex_workaround.hpp new file mode 100644 index 0000000000..8cd9044ce5 --- /dev/null +++ b/scintilla/include/boost/regex/v5/regex_workaround.hpp @@ -0,0 +1,159 @@ +/* + * + * Copyright (c) 1998-2005 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE regex_workarounds.cpp + * VERSION see + * DESCRIPTION: Declares Misc workarounds. + */ + +#ifndef BOOST_REGEX_WORKAROUND_HPP +#define BOOST_REGEX_WORKAROUND_HPP + +#include +#include +#include +#include + +#ifndef BOOST_REGEX_STANDALONE +#include +#include +#endif + +#ifdef BOOST_REGEX_NO_BOOL +# define BOOST_REGEX_MAKE_BOOL(x) static_cast((x) ? true : false) +#else +# define BOOST_REGEX_MAKE_BOOL(x) static_cast(x) +#endif + +/***************************************************************************** + * + * helper functions pointer_construct/pointer_destroy: + * + ****************************************************************************/ + +#ifdef __cplusplus +namespace boost{ namespace BOOST_REGEX_DETAIL_NS{ + +#ifdef BOOST_REGEX_MSVC +#pragma warning (push) +#pragma warning (disable : 4100) +#endif + +template +inline void pointer_destroy(T* p) +{ p->~T(); (void)p; } + +#ifdef BOOST_REGEX_MSVC +#pragma warning (pop) +#endif + +template +inline void pointer_construct(T* p, const T& t) +{ new (p) T(t); } + +}} // namespaces +#endif + +/***************************************************************************** + * + * helper function copy: + * + ****************************************************************************/ + +#if defined(BOOST_WORKAROUND) +#if BOOST_WORKAROUND(BOOST_REGEX_MSVC, >= 1400) && defined(__STDC_WANT_SECURE_LIB__) && __STDC_WANT_SECURE_LIB__ +#define BOOST_REGEX_HAS_STRCPY_S +#endif +#endif + +#ifdef __cplusplus +namespace boost{ namespace BOOST_REGEX_DETAIL_NS{ + +#if defined(BOOST_REGEX_MSVC) && (BOOST_REGEX_MSVC < 1910) + // + // MSVC 10 will either emit warnings or else refuse to compile + // code that makes perfectly legitimate use of std::copy, when + // the OutputIterator type is a user-defined class (apparently all user + // defined iterators are "unsafe"). What's more Microsoft have removed their + // non-standard "unchecked" versions, even though they are still in the MS + // documentation!! Work around this as best we can: + // + template + inline OutputIterator copy( + InputIterator first, + InputIterator last, + OutputIterator dest + ) + { + while (first != last) + *dest++ = *first++; + return dest; + } +#else + using std::copy; +#endif + + +#if defined(BOOST_REGEX_HAS_STRCPY_S) + + // use safe versions of strcpy etc: + using ::strcpy_s; + using ::strcat_s; +#else + inline std::size_t strcpy_s( + char *strDestination, + std::size_t sizeInBytes, + const char *strSource + ) + { + std::size_t lenSourceWithNull = std::strlen(strSource) + 1; + if (lenSourceWithNull > sizeInBytes) + return 1; + std::memcpy(strDestination, strSource, lenSourceWithNull); + return 0; + } + inline std::size_t strcat_s( + char *strDestination, + std::size_t sizeInBytes, + const char *strSource + ) + { + std::size_t lenSourceWithNull = std::strlen(strSource) + 1; + std::size_t lenDestination = std::strlen(strDestination); + if (lenSourceWithNull + lenDestination > sizeInBytes) + return 1; + std::memcpy(strDestination + lenDestination, strSource, lenSourceWithNull); + return 0; + } + +#endif + + inline void overflow_error_if_not_zero(std::size_t i) + { + if(i) + { + std::overflow_error e("String buffer too small"); +#ifndef BOOST_REGEX_STANDALONE + boost::throw_exception(e); +#else + throw e; +#endif + } + } + +}} // namespaces + +#endif // __cplusplus + +#endif // include guard + diff --git a/scintilla/include/boost/regex/v5/states.hpp b/scintilla/include/boost/regex/v5/states.hpp new file mode 100644 index 0000000000..60fc99d0a8 --- /dev/null +++ b/scintilla/include/boost/regex/v5/states.hpp @@ -0,0 +1,299 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE states.cpp + * VERSION see + * DESCRIPTION: Declares internal state machine structures. + */ + +#ifndef BOOST_REGEX_V5_STATES_HPP +#define BOOST_REGEX_V5_STATES_HPP + +namespace boost{ +namespace BOOST_REGEX_DETAIL_NS{ + +/*** mask_type ******************************************************* +Whenever we have a choice of two alternatives, we use an array of bytes +to indicate which of the two alternatives it is possible to take for any +given input character. If mask_take is set, then we can take the next +state, and if mask_skip is set then we can take the alternative. +***********************************************************************/ +enum mask_type +{ + mask_take = 1, + mask_skip = 2, + mask_init = 4, + mask_any = mask_skip | mask_take, + mask_all = mask_any +}; + +/*** helpers ********************************************************** +These helpers let us use function overload resolution to detect whether +we have narrow or wide character strings: +***********************************************************************/ +struct _narrow_type{}; +struct _wide_type{}; +template struct is_byte; +template<> struct is_byte { typedef _narrow_type width_type; }; +template<> struct is_byte{ typedef _narrow_type width_type; }; +template<> struct is_byte { typedef _narrow_type width_type; }; +template struct is_byte { typedef _wide_type width_type; }; + +/*** enum syntax_element_type ****************************************** +Every record in the state machine falls into one of the following types: +***********************************************************************/ +enum syntax_element_type +{ + // start of a marked sub-expression, or perl-style (?...) extension + syntax_element_startmark = 0, + // end of a marked sub-expression, or perl-style (?...) extension + syntax_element_endmark = syntax_element_startmark + 1, + // any sequence of literal characters + syntax_element_literal = syntax_element_endmark + 1, + // start of line assertion: ^ + syntax_element_start_line = syntax_element_literal + 1, + // end of line assertion $ + syntax_element_end_line = syntax_element_start_line + 1, + // match any character: . + syntax_element_wild = syntax_element_end_line + 1, + // end of expression: we have a match when we get here + syntax_element_match = syntax_element_wild + 1, + // perl style word boundary: \b + syntax_element_word_boundary = syntax_element_match + 1, + // perl style within word boundary: \B + syntax_element_within_word = syntax_element_word_boundary + 1, + // start of word assertion: \< + syntax_element_word_start = syntax_element_within_word + 1, + // end of word assertion: \> + syntax_element_word_end = syntax_element_word_start + 1, + // start of buffer assertion: \` + syntax_element_buffer_start = syntax_element_word_end + 1, + // end of buffer assertion: \' + syntax_element_buffer_end = syntax_element_buffer_start + 1, + // backreference to previously matched sub-expression + syntax_element_backref = syntax_element_buffer_end + 1, + // either a wide character set [..] or one with multicharacter collating elements: + syntax_element_long_set = syntax_element_backref + 1, + // narrow character set: [...] + syntax_element_set = syntax_element_long_set + 1, + // jump to a new state in the machine: + syntax_element_jump = syntax_element_set + 1, + // choose between two production states: + syntax_element_alt = syntax_element_jump + 1, + // a repeat + syntax_element_rep = syntax_element_alt + 1, + // match a combining character sequence + syntax_element_combining = syntax_element_rep + 1, + // perl style soft buffer end: \z + syntax_element_soft_buffer_end = syntax_element_combining + 1, + // perl style continuation: \G + syntax_element_restart_continue = syntax_element_soft_buffer_end + 1, + // single character repeats: + syntax_element_dot_rep = syntax_element_restart_continue + 1, + syntax_element_char_rep = syntax_element_dot_rep + 1, + syntax_element_short_set_rep = syntax_element_char_rep + 1, + syntax_element_long_set_rep = syntax_element_short_set_rep + 1, + // a backstep for lookbehind repeats: + syntax_element_backstep = syntax_element_long_set_rep + 1, + // an assertion that a mark was matched: + syntax_element_assert_backref = syntax_element_backstep + 1, + syntax_element_toggle_case = syntax_element_assert_backref + 1, + // a recursive expression: + syntax_element_recurse = syntax_element_toggle_case + 1, + // Verbs: + syntax_element_fail = syntax_element_recurse + 1, + syntax_element_accept = syntax_element_fail + 1, + syntax_element_commit = syntax_element_accept + 1, + syntax_element_then = syntax_element_commit + 1 +}; + +#ifdef BOOST_REGEX_DEBUG +// dwa 09/26/00 - This is needed to suppress warnings about an ambiguous conversion +std::ostream& operator<<(std::ostream&, syntax_element_type); +#endif + +struct re_syntax_base; + +/*** union offset_type ************************************************ +Points to another state in the machine. During machine construction +we use integral offsets, but these are converted to pointers before +execution of the machine. +***********************************************************************/ +union offset_type +{ + re_syntax_base* p; + std::ptrdiff_t i; +}; + +/*** struct re_syntax_base ******************************************** +Base class for all states in the machine. +***********************************************************************/ +struct re_syntax_base +{ + syntax_element_type type; // what kind of state this is + offset_type next; // next state in the machine +}; + +/*** struct re_brace ************************************************** +A marked parenthesis. +***********************************************************************/ +struct re_brace : public re_syntax_base +{ + // The index to match, can be zero (don't mark the sub-expression) + // or negative (for perl style (?...) extensions): + int index; + bool icase; +}; + +/*** struct re_dot ************************************************** +Match anything. +***********************************************************************/ +enum +{ + dont_care = 1, + force_not_newline = 0, + force_newline = 2, + + test_not_newline = 2, + test_newline = 3 +}; +struct re_dot : public re_syntax_base +{ + unsigned char mask; +}; + +/*** struct re_literal ************************************************ +A string of literals, following this structure will be an +array of characters: charT[length] +***********************************************************************/ +struct re_literal : public re_syntax_base +{ + unsigned int length; +}; + +/*** struct re_case ************************************************ +Indicates whether we are moving to a case insensive block or not +***********************************************************************/ +struct re_case : public re_syntax_base +{ + bool icase; +}; + +/*** struct re_set_long *********************************************** +A wide character set of characters, following this structure will be +an array of type charT: +First csingles null-terminated strings +Then 2 * cranges NULL terminated strings +Then cequivalents NULL terminated strings +***********************************************************************/ +template +struct re_set_long : public re_syntax_base +{ + unsigned int csingles, cranges, cequivalents; + mask_type cclasses; + mask_type cnclasses; + bool isnot; + bool singleton; +}; + +/*** struct re_set **************************************************** +A set of narrow-characters, matches any of _map which is none-zero +***********************************************************************/ +struct re_set : public re_syntax_base +{ + unsigned char _map[1 << CHAR_BIT]; +}; + +/*** struct re_jump *************************************************** +Jump to a new location in the machine (not next). +***********************************************************************/ +struct re_jump : public re_syntax_base +{ + offset_type alt; // location to jump to +}; + +/*** struct re_alt *************************************************** +Jump to a new location in the machine (possibly next). +***********************************************************************/ +struct re_alt : public re_jump +{ + unsigned char _map[1 << CHAR_BIT]; // which characters can take the jump + unsigned int can_be_null; // true if we match a NULL string +}; + +/*** struct re_repeat ************************************************* +Repeat a section of the machine +***********************************************************************/ +struct re_repeat : public re_alt +{ + std::size_t min, max; // min and max allowable repeats + int state_id; // Unique identifier for this repeat + bool leading; // True if this repeat is at the start of the machine (lets us optimize some searches) + bool greedy; // True if this is a greedy repeat +}; + +/*** struct re_recurse ************************************************ +Recurse to a particular subexpression. +**********************************************************************/ +struct re_recurse : public re_jump +{ + int state_id; // identifier of first nested repeat within the recursion. +}; + +/*** struct re_commit ************************************************* +Used for the PRUNE, SKIP and COMMIT verbs which basically differ only in what happens +if no match is found and we start searching forward. +**********************************************************************/ +enum commit_type +{ + commit_prune, + commit_skip, + commit_commit +}; +struct re_commit : public re_syntax_base +{ + commit_type action; +}; + +/*** enum re_jump_size_type ******************************************* +Provides compiled size of re_jump structure (allowing for trailing alignment). +We provide this so we know how manybytes to insert when constructing the machine +(The value of padding_mask is defined in regex_raw_buffer.hpp). +***********************************************************************/ +enum re_jump_size_type +{ + re_jump_size = (sizeof(re_jump) + padding_mask) & ~(padding_mask), + re_repeater_size = (sizeof(re_repeat) + padding_mask) & ~(padding_mask), + re_alt_size = (sizeof(re_alt) + padding_mask) & ~(padding_mask) +}; + +/*** proc re_is_set_member ********************************************* +Forward declaration: we'll need this one later... +***********************************************************************/ + +template +struct regex_data; + +template +iterator re_is_set_member(iterator next, + iterator last, + const re_set_long* set_, + const regex_data& e, bool icase); + +} // namespace BOOST_REGEX_DETAIL_NS + +} // namespace boost + +#endif + + diff --git a/scintilla/include/boost/regex/v5/sub_match.hpp b/scintilla/include/boost/regex/v5/sub_match.hpp new file mode 100644 index 0000000000..aa61b560dd --- /dev/null +++ b/scintilla/include/boost/regex/v5/sub_match.hpp @@ -0,0 +1,382 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE sub_match.cpp + * VERSION see + * DESCRIPTION: Declares template class sub_match. + */ + +#ifndef BOOST_REGEX_V5_SUB_MATCH_HPP +#define BOOST_REGEX_V5_SUB_MATCH_HPP + +namespace boost{ + +template +struct sub_match : public std::pair +{ + typedef typename std::iterator_traits::value_type value_type; + typedef typename std::iterator_traits::difference_type difference_type; + typedef BidiIterator iterator_type; + typedef BidiIterator iterator; + typedef BidiIterator const_iterator; + + bool matched; + + sub_match() : std::pair(), matched(false) {} + sub_match(BidiIterator i) : std::pair(i, i), matched(false) {} + template + operator std::basic_string ()const + { + return matched ? std::basic_string(this->first, this->second) : std::basic_string(); + } + difference_type length()const + { + difference_type n = matched ? std::distance((BidiIterator)this->first, (BidiIterator)this->second) : 0; + return n; + } + std::basic_string str()const + { + std::basic_string result; + if(matched) + { + std::size_t len = std::distance((BidiIterator)this->first, (BidiIterator)this->second); + result.reserve(len); + BidiIterator i = this->first; + while(i != this->second) + { + result.append(1, *i); + ++i; + } + } + return result; + } + int compare(const sub_match& s)const + { + if(matched != s.matched) + return static_cast(matched) - static_cast(s.matched); + return str().compare(s.str()); + } + int compare(const std::basic_string& s)const + { + return str().compare(s); + } + int compare(const value_type* p)const + { + return str().compare(p); + } + + bool operator==(const sub_match& that)const + { return compare(that) == 0; } + bool operator !=(const sub_match& that)const + { return compare(that) != 0; } + bool operator<(const sub_match& that)const + { return compare(that) < 0; } + bool operator>(const sub_match& that)const + { return compare(that) > 0; } + bool operator<=(const sub_match& that)const + { return compare(that) <= 0; } + bool operator>=(const sub_match& that)const + { return compare(that) >= 0; } + +#ifdef BOOST_REGEX_MATCH_EXTRA + typedef std::vector > capture_sequence_type; + + const capture_sequence_type& captures()const + { + if(!m_captures) + m_captures.reset(new capture_sequence_type()); + return *m_captures; + } + // + // Private implementation API: DO NOT USE! + // + capture_sequence_type& get_captures()const + { + if(!m_captures) + m_captures.reset(new capture_sequence_type()); + return *m_captures; + } + +private: + mutable std::unique_ptr m_captures; +public: + +#endif + sub_match(const sub_match& that, bool +#ifdef BOOST_REGEX_MATCH_EXTRA + deep_copy +#endif + = true + ) + : std::pair(that), + matched(that.matched) + { +#ifdef BOOST_REGEX_MATCH_EXTRA + if(that.m_captures) + if(deep_copy) + m_captures.reset(new capture_sequence_type(*(that.m_captures))); +#endif + } + sub_match& operator=(const sub_match& that) + { + this->first = that.first; + this->second = that.second; + matched = that.matched; +#ifdef BOOST_REGEX_MATCH_EXTRA + if(that.m_captures) + get_captures() = *(that.m_captures); +#endif + return *this; + } + // + // Make this type a range, for both Boost.Range, and C++11: + // + BidiIterator begin()const { return this->first; } + BidiIterator end()const { return this->second; } +}; + +typedef sub_match csub_match; +typedef sub_match ssub_match; +#ifndef BOOST_NO_WREGEX +typedef sub_match wcsub_match; +typedef sub_match wssub_match; +#endif + +// comparison to std::basic_string<> part 1: +template +inline bool operator == (const std::basic_string::value_type, traits, Allocator>& s, + const sub_match& m) +{ return s.compare(m.str()) == 0; } +template +inline bool operator != (const std::basic_string::value_type, traits, Allocator>& s, + const sub_match& m) +{ return s.compare(m.str()) != 0; } +template +inline bool operator < (const std::basic_string::value_type, traits, Allocator>& s, + const sub_match& m) +{ return s.compare(m.str()) < 0; } +template +inline bool operator <= (const std::basic_string::value_type, traits, Allocator>& s, + const sub_match& m) +{ return s.compare(m.str()) <= 0; } +template +inline bool operator >= (const std::basic_string::value_type, traits, Allocator>& s, + const sub_match& m) +{ return s.compare(m.str()) >= 0; } +template +inline bool operator > (const std::basic_string::value_type, traits, Allocator>& s, + const sub_match& m) +{ return s.compare(m.str()) > 0; } +// comparison to std::basic_string<> part 2: +template +inline bool operator == (const sub_match& m, + const std::basic_string::value_type, traits, Allocator>& s) +{ return m.str().compare(s) == 0; } +template +inline bool operator != (const sub_match& m, + const std::basic_string::value_type, traits, Allocator>& s) +{ return m.str().compare(s) != 0; } +template +inline bool operator < (const sub_match& m, + const std::basic_string::value_type, traits, Allocator>& s) +{ return m.str().compare(s) < 0; } +template +inline bool operator > (const sub_match& m, + const std::basic_string::value_type, traits, Allocator>& s) +{ return m.str().compare(s) > 0; } +template +inline bool operator <= (const sub_match& m, + const std::basic_string::value_type, traits, Allocator>& s) +{ return m.str().compare(s) <= 0; } +template +inline bool operator >= (const sub_match& m, + const std::basic_string::value_type, traits, Allocator>& s) +{ return m.str().compare(s) >= 0; } +// comparison to const charT* part 1: +template +inline bool operator == (const sub_match& m, + typename std::iterator_traits::value_type const* s) +{ return m.str().compare(s) == 0; } +template +inline bool operator != (const sub_match& m, + typename std::iterator_traits::value_type const* s) +{ return m.str().compare(s) != 0; } +template +inline bool operator > (const sub_match& m, + typename std::iterator_traits::value_type const* s) +{ return m.str().compare(s) > 0; } +template +inline bool operator < (const sub_match& m, + typename std::iterator_traits::value_type const* s) +{ return m.str().compare(s) < 0; } +template +inline bool operator >= (const sub_match& m, + typename std::iterator_traits::value_type const* s) +{ return m.str().compare(s) >= 0; } +template +inline bool operator <= (const sub_match& m, + typename std::iterator_traits::value_type const* s) +{ return m.str().compare(s) <= 0; } +// comparison to const charT* part 2: +template +inline bool operator == (typename std::iterator_traits::value_type const* s, + const sub_match& m) +{ return m.str().compare(s) == 0; } +template +inline bool operator != (typename std::iterator_traits::value_type const* s, + const sub_match& m) +{ return m.str().compare(s) != 0; } +template +inline bool operator < (typename std::iterator_traits::value_type const* s, + const sub_match& m) +{ return m.str().compare(s) > 0; } +template +inline bool operator > (typename std::iterator_traits::value_type const* s, + const sub_match& m) +{ return m.str().compare(s) < 0; } +template +inline bool operator <= (typename std::iterator_traits::value_type const* s, + const sub_match& m) +{ return m.str().compare(s) >= 0; } +template +inline bool operator >= (typename std::iterator_traits::value_type const* s, + const sub_match& m) +{ return m.str().compare(s) <= 0; } + +// comparison to const charT& part 1: +template +inline bool operator == (const sub_match& m, + typename std::iterator_traits::value_type const& s) +{ return m.str().compare(0, m.length(), &s, 1) == 0; } +template +inline bool operator != (const sub_match& m, + typename std::iterator_traits::value_type const& s) +{ return m.str().compare(0, m.length(), &s, 1) != 0; } +template +inline bool operator > (const sub_match& m, + typename std::iterator_traits::value_type const& s) +{ return m.str().compare(0, m.length(), &s, 1) > 0; } +template +inline bool operator < (const sub_match& m, + typename std::iterator_traits::value_type const& s) +{ return m.str().compare(0, m.length(), &s, 1) < 0; } +template +inline bool operator >= (const sub_match& m, + typename std::iterator_traits::value_type const& s) +{ return m.str().compare(0, m.length(), &s, 1) >= 0; } +template +inline bool operator <= (const sub_match& m, + typename std::iterator_traits::value_type const& s) +{ return m.str().compare(0, m.length(), &s, 1) <= 0; } +// comparison to const charT* part 2: +template +inline bool operator == (typename std::iterator_traits::value_type const& s, + const sub_match& m) +{ return m.str().compare(0, m.length(), &s, 1) == 0; } +template +inline bool operator != (typename std::iterator_traits::value_type const& s, + const sub_match& m) +{ return m.str().compare(0, m.length(), &s, 1) != 0; } +template +inline bool operator < (typename std::iterator_traits::value_type const& s, + const sub_match& m) +{ return m.str().compare(0, m.length(), &s, 1) > 0; } +template +inline bool operator > (typename std::iterator_traits::value_type const& s, + const sub_match& m) +{ return m.str().compare(0, m.length(), &s, 1) < 0; } +template +inline bool operator <= (typename std::iterator_traits::value_type const& s, + const sub_match& m) +{ return m.str().compare(0, m.length(), &s, 1) >= 0; } +template +inline bool operator >= (typename std::iterator_traits::value_type const& s, + const sub_match& m) +{ return m.str().compare(0, m.length(), &s, 1) <= 0; } + +// addition operators: +template +inline std::basic_string::value_type, traits, Allocator> +operator + (const std::basic_string::value_type, traits, Allocator>& s, + const sub_match& m) +{ + std::basic_string::value_type, traits, Allocator> result; + result.reserve(s.size() + m.length() + 1); + return result.append(s).append(m.first, m.second); +} +template +inline std::basic_string::value_type, traits, Allocator> +operator + (const sub_match& m, + const std::basic_string::value_type, traits, Allocator>& s) +{ + std::basic_string::value_type, traits, Allocator> result; + result.reserve(s.size() + m.length() + 1); + return result.append(m.first, m.second).append(s); +} +template +inline std::basic_string::value_type> +operator + (typename std::iterator_traits::value_type const* s, + const sub_match& m) +{ + std::basic_string::value_type> result; + result.reserve(std::char_traits::value_type>::length(s) + m.length() + 1); + return result.append(s).append(m.first, m.second); +} +template +inline std::basic_string::value_type> +operator + (const sub_match& m, + typename std::iterator_traits::value_type const * s) +{ + std::basic_string::value_type> result; + result.reserve(std::char_traits::value_type>::length(s) + m.length() + 1); + return result.append(m.first, m.second).append(s); +} +template +inline std::basic_string::value_type> +operator + (typename std::iterator_traits::value_type const& s, + const sub_match& m) +{ + std::basic_string::value_type> result; + result.reserve(m.length() + 2); + return result.append(1, s).append(m.first, m.second); +} +template +inline std::basic_string::value_type> +operator + (const sub_match& m, + typename std::iterator_traits::value_type const& s) +{ + std::basic_string::value_type> result; + result.reserve(m.length() + 2); + return result.append(m.first, m.second).append(1, s); +} +template +inline std::basic_string::value_type> +operator + (const sub_match& m1, + const sub_match& m2) +{ + std::basic_string::value_type> result; + result.reserve(m1.length() + m2.length() + 1); + return result.append(m1.first, m1.second).append(m2.first, m2.second); +} +template +std::basic_ostream& + operator << (std::basic_ostream& os, + const sub_match& s) +{ + return (os << s.str()); +} + +} // namespace boost + +#endif + diff --git a/scintilla/include/boost/regex/v5/syntax_type.hpp b/scintilla/include/boost/regex/v5/syntax_type.hpp new file mode 100644 index 0000000000..3efdf0b0f9 --- /dev/null +++ b/scintilla/include/boost/regex/v5/syntax_type.hpp @@ -0,0 +1,105 @@ +/* + * + * Copyright (c) 2003 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE syntax_type.hpp + * VERSION see + * DESCRIPTION: Declares regular expression synatx type enumerator. + */ + +#ifndef BOOST_REGEX_SYNTAX_TYPE_HPP +#define BOOST_REGEX_SYNTAX_TYPE_HPP + +namespace boost{ +namespace regex_constants{ + +typedef unsigned char syntax_type; + +// +// values chosen are binary compatible with previous version: +// +static const syntax_type syntax_char = 0; +static const syntax_type syntax_open_mark = 1; +static const syntax_type syntax_close_mark = 2; +static const syntax_type syntax_dollar = 3; +static const syntax_type syntax_caret = 4; +static const syntax_type syntax_dot = 5; +static const syntax_type syntax_star = 6; +static const syntax_type syntax_plus = 7; +static const syntax_type syntax_question = 8; +static const syntax_type syntax_open_set = 9; +static const syntax_type syntax_close_set = 10; +static const syntax_type syntax_or = 11; +static const syntax_type syntax_escape = 12; +static const syntax_type syntax_dash = 14; +static const syntax_type syntax_open_brace = 15; +static const syntax_type syntax_close_brace = 16; +static const syntax_type syntax_digit = 17; +static const syntax_type syntax_comma = 27; +static const syntax_type syntax_equal = 37; +static const syntax_type syntax_colon = 36; +static const syntax_type syntax_not = 53; + +// extensions: + +static const syntax_type syntax_hash = 13; +static const syntax_type syntax_newline = 26; + +// escapes: + +typedef syntax_type escape_syntax_type; + +static const escape_syntax_type escape_type_word_assert = 18; +static const escape_syntax_type escape_type_not_word_assert = 19; +static const escape_syntax_type escape_type_control_f = 29; +static const escape_syntax_type escape_type_control_n = 30; +static const escape_syntax_type escape_type_control_r = 31; +static const escape_syntax_type escape_type_control_t = 32; +static const escape_syntax_type escape_type_control_v = 33; +static const escape_syntax_type escape_type_ascii_control = 35; +static const escape_syntax_type escape_type_hex = 34; +static const escape_syntax_type escape_type_unicode = 0; // not used +static const escape_syntax_type escape_type_identity = 0; // not used +static const escape_syntax_type escape_type_backref = syntax_digit; +static const escape_syntax_type escape_type_decimal = syntax_digit; // not used +static const escape_syntax_type escape_type_class = 22; +static const escape_syntax_type escape_type_not_class = 23; + +// extensions: + +static const escape_syntax_type escape_type_left_word = 20; +static const escape_syntax_type escape_type_right_word = 21; +static const escape_syntax_type escape_type_start_buffer = 24; // for \` +static const escape_syntax_type escape_type_end_buffer = 25; // for \' +static const escape_syntax_type escape_type_control_a = 28; // for \a +static const escape_syntax_type escape_type_e = 38; // for \e +static const escape_syntax_type escape_type_E = 47; // for \Q\E +static const escape_syntax_type escape_type_Q = 48; // for \Q\E +static const escape_syntax_type escape_type_X = 49; // for \X +static const escape_syntax_type escape_type_C = 50; // for \C +static const escape_syntax_type escape_type_Z = 51; // for \Z +static const escape_syntax_type escape_type_G = 52; // for \G + +static const escape_syntax_type escape_type_property = 54; // for \p +static const escape_syntax_type escape_type_not_property = 55; // for \P +static const escape_syntax_type escape_type_named_char = 56; // for \N +static const escape_syntax_type escape_type_extended_backref = 57; // for \g +static const escape_syntax_type escape_type_reset_start_mark = 58; // for \K +static const escape_syntax_type escape_type_line_ending = 59; // for \R + +static const escape_syntax_type syntax_max = 60; + +} +} + + +#endif diff --git a/scintilla/include/boost/regex/v5/u32regex_iterator.hpp b/scintilla/include/boost/regex/v5/u32regex_iterator.hpp new file mode 100644 index 0000000000..6677019aca --- /dev/null +++ b/scintilla/include/boost/regex/v5/u32regex_iterator.hpp @@ -0,0 +1,177 @@ +/* + * + * Copyright (c) 2003 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE u32regex_iterator.hpp + * VERSION see + * DESCRIPTION: Provides u32regex_iterator implementation. + */ + +#ifndef BOOST_REGEX_V5_U32REGEX_ITERATOR_HPP +#define BOOST_REGEX_V5_U32REGEX_ITERATOR_HPP + +namespace boost{ + +template +class u32regex_iterator_implementation +{ + typedef u32regex regex_type; + + match_results what; // current match + BidirectionalIterator base; // start of sequence + BidirectionalIterator end; // end of sequence + const regex_type re; // the expression + match_flag_type flags; // flags for matching + +public: + u32regex_iterator_implementation(const regex_type* p, BidirectionalIterator last, match_flag_type f) + : base(), end(last), re(*p), flags(f){} + bool init(BidirectionalIterator first) + { + base = first; + return u32regex_search(first, end, what, re, flags, base); + } + bool compare(const u32regex_iterator_implementation& that) + { + if(this == &that) return true; + return (&re.get_data() == &that.re.get_data()) && (end == that.end) && (flags == that.flags) && (what[0].first == that.what[0].first) && (what[0].second == that.what[0].second); + } + const match_results& get() + { return what; } + bool next() + { + //if(what.prefix().first != what[0].second) + // flags |= match_prev_avail; + BidirectionalIterator next_start = what[0].second; + match_flag_type f(flags); + if(!what.length()) + f |= regex_constants::match_not_initial_null; + //if(base != next_start) + // f |= regex_constants::match_not_bob; + bool result = u32regex_search(next_start, end, what, re, f, base); + if(result) + what.set_base(base); + return result; + } +private: + u32regex_iterator_implementation& operator=(const u32regex_iterator_implementation&); +}; + +template +class u32regex_iterator +{ +private: + typedef u32regex_iterator_implementation impl; + typedef std::shared_ptr pimpl; +public: + typedef u32regex regex_type; + typedef match_results value_type; + typedef typename std::iterator_traits::difference_type + difference_type; + typedef const value_type* pointer; + typedef const value_type& reference; + typedef std::forward_iterator_tag iterator_category; + + u32regex_iterator(){} + u32regex_iterator(BidirectionalIterator a, BidirectionalIterator b, + const regex_type& re, + match_flag_type m = match_default) + : pdata(new impl(&re, b, m)) + { + if(!pdata->init(a)) + { + pdata.reset(); + } + } + u32regex_iterator(const u32regex_iterator& that) + : pdata(that.pdata) {} + u32regex_iterator& operator=(const u32regex_iterator& that) + { + pdata = that.pdata; + return *this; + } + bool operator==(const u32regex_iterator& that)const + { + if((pdata.get() == 0) || (that.pdata.get() == 0)) + return pdata.get() == that.pdata.get(); + return pdata->compare(*(that.pdata.get())); + } + bool operator!=(const u32regex_iterator& that)const + { return !(*this == that); } + const value_type& operator*()const + { return pdata->get(); } + const value_type* operator->()const + { return &(pdata->get()); } + u32regex_iterator& operator++() + { + cow(); + if(0 == pdata->next()) + { + pdata.reset(); + } + return *this; + } + u32regex_iterator operator++(int) + { + u32regex_iterator result(*this); + ++(*this); + return result; + } +private: + + pimpl pdata; + + void cow() + { + // copy-on-write + if(pdata.get() && (pdata.use_count() > 1)) + { + pdata.reset(new impl(*(pdata.get()))); + } + } +}; + +typedef u32regex_iterator utf8regex_iterator; +typedef u32regex_iterator utf16regex_iterator; +typedef u32regex_iterator utf32regex_iterator; + +inline u32regex_iterator make_u32regex_iterator(const char* p, const u32regex& e, regex_constants::match_flag_type m = regex_constants::match_default) +{ + return u32regex_iterator(p, p+std::strlen(p), e, m); +} +#ifndef BOOST_NO_WREGEX +inline u32regex_iterator make_u32regex_iterator(const wchar_t* p, const u32regex& e, regex_constants::match_flag_type m = regex_constants::match_default) +{ + return u32regex_iterator(p, p+std::wcslen(p), e, m); +} +#endif +#if !defined(BOOST_REGEX_UCHAR_IS_WCHAR_T) +inline u32regex_iterator make_u32regex_iterator(const UChar* p, const u32regex& e, regex_constants::match_flag_type m = regex_constants::match_default) +{ + return u32regex_iterator(p, p+u_strlen(p), e, m); +} +#endif +template +inline u32regex_iterator::const_iterator> make_u32regex_iterator(const std::basic_string& p, const u32regex& e, regex_constants::match_flag_type m = regex_constants::match_default) +{ + typedef typename std::basic_string::const_iterator iter_type; + return u32regex_iterator(p.begin(), p.end(), e, m); +} +inline u32regex_iterator make_u32regex_iterator(const U_NAMESPACE_QUALIFIER UnicodeString& s, const u32regex& e, regex_constants::match_flag_type m = regex_constants::match_default) +{ + return u32regex_iterator(s.getBuffer(), s.getBuffer() + s.length(), e, m); +} + +} // namespace boost + +#endif // BOOST_REGEX_V5_REGEX_ITERATOR_HPP + diff --git a/scintilla/include/boost/regex/v5/u32regex_token_iterator.hpp b/scintilla/include/boost/regex/v5/u32regex_token_iterator.hpp new file mode 100644 index 0000000000..3d3032652f --- /dev/null +++ b/scintilla/include/boost/regex/v5/u32regex_token_iterator.hpp @@ -0,0 +1,312 @@ +/* + * + * Copyright (c) 2003 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE u32regex_token_iterator.hpp + * VERSION see + * DESCRIPTION: Provides u32regex_token_iterator implementation. + */ + +#ifndef BOOST_REGEX_V5_U32REGEX_TOKEN_ITERATOR_HPP +#define BOOST_REGEX_V5_U32REGEX_TOKEN_ITERATOR_HPP + +namespace boost{ + +#ifdef BOOST_REGEX_MSVC +# pragma warning(push) +# pragma warning(disable:4700) +#endif + +template +class u32regex_token_iterator_implementation +{ + typedef u32regex regex_type; + typedef sub_match value_type; + + match_results what; // current match + BidirectionalIterator end; // end of search area + BidirectionalIterator base; // start of search area + const regex_type re; // the expression + match_flag_type flags; // match flags + value_type result; // the current string result + int N; // the current sub-expression being enumerated + std::vector subs; // the sub-expressions to enumerate + +public: + u32regex_token_iterator_implementation(const regex_type* p, BidirectionalIterator last, int sub, match_flag_type f) + : end(last), re(*p), flags(f){ subs.push_back(sub); } + u32regex_token_iterator_implementation(const regex_type* p, BidirectionalIterator last, const std::vector& v, match_flag_type f) + : end(last), re(*p), flags(f), subs(v){} + template + u32regex_token_iterator_implementation(const regex_type* p, BidirectionalIterator last, const int (&submatches)[CN], match_flag_type f) + : end(last), re(*p), flags(f) + { + for(std::size_t i = 0; i < CN; ++i) + { + subs.push_back(submatches[i]); + } + } + + bool init(BidirectionalIterator first) + { + base = first; + N = 0; + if(u32regex_search(first, end, what, re, flags, base) == true) + { + N = 0; + result = ((subs[N] == -1) ? what.prefix() : what[(int)subs[N]]); + return true; + } + else if((subs[N] == -1) && (first != end)) + { + result.first = first; + result.second = end; + result.matched = (first != end); + N = -1; + return true; + } + return false; + } + bool compare(const u32regex_token_iterator_implementation& that) + { + if(this == &that) return true; + return (&re.get_data() == &that.re.get_data()) + && (end == that.end) + && (flags == that.flags) + && (N == that.N) + && (what[0].first == that.what[0].first) + && (what[0].second == that.what[0].second); + } + const value_type& get() + { return result; } + bool next() + { + if(N == -1) + return false; + if(N+1 < (int)subs.size()) + { + ++N; + result =((subs[N] == -1) ? what.prefix() : what[subs[N]]); + return true; + } + //if(what.prefix().first != what[0].second) + // flags |= match_prev_avail | regex_constants::match_not_bob; + BidirectionalIterator last_end(what[0].second); + if(u32regex_search(last_end, end, what, re, ((what[0].first == what[0].second) ? flags | regex_constants::match_not_initial_null : flags), base)) + { + N =0; + result =((subs[N] == -1) ? what.prefix() : what[subs[N]]); + return true; + } + else if((last_end != end) && (subs[0] == -1)) + { + N =-1; + result.first = last_end; + result.second = end; + result.matched = (last_end != end); + return true; + } + return false; + } +private: + u32regex_token_iterator_implementation& operator=(const u32regex_token_iterator_implementation&); +}; + +template +class u32regex_token_iterator +{ +private: + typedef u32regex_token_iterator_implementation impl; + typedef std::shared_ptr pimpl; +public: + typedef u32regex regex_type; + typedef sub_match value_type; + typedef typename std::iterator_traits::difference_type + difference_type; + typedef const value_type* pointer; + typedef const value_type& reference; + typedef std::forward_iterator_tag iterator_category; + + u32regex_token_iterator(){} + u32regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re, + int submatch = 0, match_flag_type m = match_default) + : pdata(new impl(&re, b, submatch, m)) + { + if(!pdata->init(a)) + pdata.reset(); + } + u32regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re, + const std::vector& submatches, match_flag_type m = match_default) + : pdata(new impl(&re, b, submatches, m)) + { + if(!pdata->init(a)) + pdata.reset(); + } + template + u32regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re, + const int (&submatches)[N], match_flag_type m = match_default) + : pdata(new impl(&re, b, submatches, m)) + { + if(!pdata->init(a)) + pdata.reset(); + } + u32regex_token_iterator(const u32regex_token_iterator& that) + : pdata(that.pdata) {} + u32regex_token_iterator& operator=(const u32regex_token_iterator& that) + { + pdata = that.pdata; + return *this; + } + bool operator==(const u32regex_token_iterator& that)const + { + if((pdata.get() == 0) || (that.pdata.get() == 0)) + return pdata.get() == that.pdata.get(); + return pdata->compare(*(that.pdata.get())); + } + bool operator!=(const u32regex_token_iterator& that)const + { return !(*this == that); } + const value_type& operator*()const + { return pdata->get(); } + const value_type* operator->()const + { return &(pdata->get()); } + u32regex_token_iterator& operator++() + { + cow(); + if(0 == pdata->next()) + { + pdata.reset(); + } + return *this; + } + u32regex_token_iterator operator++(int) + { + u32regex_token_iterator result(*this); + ++(*this); + return result; + } +private: + + pimpl pdata; + + void cow() + { + // copy-on-write + if(pdata.get() && (pdata.use_count() > 1)) + { + pdata.reset(new impl(*(pdata.get()))); + } + } +}; + +typedef u32regex_token_iterator utf8regex_token_iterator; +typedef u32regex_token_iterator utf16regex_token_iterator; +typedef u32regex_token_iterator utf32regex_token_iterator; + +// construction from an integral sub_match state_id: +inline u32regex_token_iterator make_u32regex_token_iterator(const char* p, const u32regex& e, int submatch = 0, regex_constants::match_flag_type m = regex_constants::match_default) +{ + return u32regex_token_iterator(p, p+std::strlen(p), e, submatch, m); +} +#ifndef BOOST_NO_WREGEX +inline u32regex_token_iterator make_u32regex_token_iterator(const wchar_t* p, const u32regex& e, int submatch = 0, regex_constants::match_flag_type m = regex_constants::match_default) +{ + return u32regex_token_iterator(p, p+std::wcslen(p), e, submatch, m); +} +#endif +#if !defined(BOOST_REGEX_UCHAR_IS_WCHAR_T) +inline u32regex_token_iterator make_u32regex_token_iterator(const UChar* p, const u32regex& e, int submatch = 0, regex_constants::match_flag_type m = regex_constants::match_default) +{ + return u32regex_token_iterator(p, p+u_strlen(p), e, submatch, m); +} +#endif +template +inline u32regex_token_iterator::const_iterator> make_u32regex_token_iterator(const std::basic_string& p, const u32regex& e, int submatch = 0, regex_constants::match_flag_type m = regex_constants::match_default) +{ + typedef typename std::basic_string::const_iterator iter_type; + return u32regex_token_iterator(p.begin(), p.end(), e, submatch, m); +} +inline u32regex_token_iterator make_u32regex_token_iterator(const U_NAMESPACE_QUALIFIER UnicodeString& s, const u32regex& e, int submatch = 0, regex_constants::match_flag_type m = regex_constants::match_default) +{ + return u32regex_token_iterator(s.getBuffer(), s.getBuffer() + s.length(), e, submatch, m); +} + +// construction from a reference to an array: +template +inline u32regex_token_iterator make_u32regex_token_iterator(const char* p, const u32regex& e, const int (&submatch)[N], regex_constants::match_flag_type m = regex_constants::match_default) +{ + return u32regex_token_iterator(p, p+std::strlen(p), e, submatch, m); +} +#ifndef BOOST_NO_WREGEX +template +inline u32regex_token_iterator make_u32regex_token_iterator(const wchar_t* p, const u32regex& e, const int (&submatch)[N], regex_constants::match_flag_type m = regex_constants::match_default) +{ + return u32regex_token_iterator(p, p+std::wcslen(p), e, submatch, m); +} +#endif +#if !defined(BOOST_REGEX_UCHAR_IS_WCHAR_T) +template +inline u32regex_token_iterator make_u32regex_token_iterator(const UChar* p, const u32regex& e, const int (&submatch)[N], regex_constants::match_flag_type m = regex_constants::match_default) +{ + return u32regex_token_iterator(p, p+u_strlen(p), e, submatch, m); +} +#endif +template +inline u32regex_token_iterator::const_iterator> make_u32regex_token_iterator(const std::basic_string& p, const u32regex& e, const int (&submatch)[N], regex_constants::match_flag_type m = regex_constants::match_default) +{ + typedef typename std::basic_string::const_iterator iter_type; + return u32regex_token_iterator(p.begin(), p.end(), e, submatch, m); +} +template +inline u32regex_token_iterator make_u32regex_token_iterator(const U_NAMESPACE_QUALIFIER UnicodeString& s, const u32regex& e, const int (&submatch)[N], regex_constants::match_flag_type m = regex_constants::match_default) +{ + return u32regex_token_iterator(s.getBuffer(), s.getBuffer() + s.length(), e, submatch, m); +} + +// construction from a vector of sub_match state_id's: +inline u32regex_token_iterator make_u32regex_token_iterator(const char* p, const u32regex& e, const std::vector& submatch, regex_constants::match_flag_type m = regex_constants::match_default) +{ + return u32regex_token_iterator(p, p+std::strlen(p), e, submatch, m); +} +#ifndef BOOST_NO_WREGEX +inline u32regex_token_iterator make_u32regex_token_iterator(const wchar_t* p, const u32regex& e, const std::vector& submatch, regex_constants::match_flag_type m = regex_constants::match_default) +{ + return u32regex_token_iterator(p, p+std::wcslen(p), e, submatch, m); +} +#endif +#if !defined(U_WCHAR_IS_UTF16) && (U_SIZEOF_WCHAR_T != 2) +inline u32regex_token_iterator make_u32regex_token_iterator(const UChar* p, const u32regex& e, const std::vector& submatch, regex_constants::match_flag_type m = regex_constants::match_default) +{ + return u32regex_token_iterator(p, p+u_strlen(p), e, submatch, m); +} +#endif +template +inline u32regex_token_iterator::const_iterator> make_u32regex_token_iterator(const std::basic_string& p, const u32regex& e, const std::vector& submatch, regex_constants::match_flag_type m = regex_constants::match_default) +{ + typedef typename std::basic_string::const_iterator iter_type; + return u32regex_token_iterator(p.begin(), p.end(), e, submatch, m); +} +inline u32regex_token_iterator make_u32regex_token_iterator(const U_NAMESPACE_QUALIFIER UnicodeString& s, const u32regex& e, const std::vector& submatch, regex_constants::match_flag_type m = regex_constants::match_default) +{ + return u32regex_token_iterator(s.getBuffer(), s.getBuffer() + s.length(), e, submatch, m); +} + +#ifdef BOOST_REGEX_MSVC +# pragma warning(pop) +#endif + +} // namespace boost + +#endif // BOOST_REGEX_V5_REGEX_TOKEN_ITERATOR_HPP + + + + diff --git a/scintilla/include/boost/regex/v5/unicode_iterator.hpp b/scintilla/include/boost/regex/v5/unicode_iterator.hpp new file mode 100644 index 0000000000..9d3dd8b962 --- /dev/null +++ b/scintilla/include/boost/regex/v5/unicode_iterator.hpp @@ -0,0 +1,862 @@ +/* + * + * Copyright (c) 2004 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE unicode_iterator.hpp + * VERSION see + * DESCRIPTION: Iterator adapters for converting between different Unicode encodings. + */ + +/**************************************************************************** + +Contents: +~~~~~~~~~ + +1) Read Only, Input Adapters: +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +template +class u32_to_u8_iterator; + +Adapts sequence of UTF-32 code points to "look like" a sequence of UTF-8. + +template +class u8_to_u32_iterator; + +Adapts sequence of UTF-8 code points to "look like" a sequence of UTF-32. + +template +class u32_to_u16_iterator; + +Adapts sequence of UTF-32 code points to "look like" a sequence of UTF-16. + +template +class u16_to_u32_iterator; + +Adapts sequence of UTF-16 code points to "look like" a sequence of UTF-32. + +2) Single pass output iterator adapters: + +template +class utf8_output_iterator; + +Accepts UTF-32 code points and forwards them on as UTF-8 code points. + +template +class utf16_output_iterator; + +Accepts UTF-32 code points and forwards them on as UTF-16 code points. + +****************************************************************************/ + +#ifndef BOOST_REGEX_UNICODE_ITERATOR_HPP +#define BOOST_REGEX_UNICODE_ITERATOR_HPP +#include +#include +#include +#include +#include +#include // CHAR_BIT + +#ifndef BOOST_REGEX_STANDALONE +#include +#endif + +namespace boost{ + +namespace detail{ + +static const std::uint16_t high_surrogate_base = 0xD7C0u; +static const std::uint16_t low_surrogate_base = 0xDC00u; +static const std::uint32_t ten_bit_mask = 0x3FFu; + +inline bool is_high_surrogate(std::uint16_t v) +{ + return (v & 0xFFFFFC00u) == 0xd800u; +} +inline bool is_low_surrogate(std::uint16_t v) +{ + return (v & 0xFFFFFC00u) == 0xdc00u; +} +template +inline bool is_surrogate(T v) +{ + return (v & 0xFFFFF800u) == 0xd800; +} + +inline unsigned utf8_byte_count(std::uint8_t c) +{ + // if the most significant bit with a zero in it is in position + // 8-N then there are N bytes in this UTF-8 sequence: + std::uint8_t mask = 0x80u; + unsigned result = 0; + while(c & mask) + { + ++result; + mask >>= 1; + } + return (result == 0) ? 1 : ((result > 4) ? 4 : result); +} + +inline unsigned utf8_trailing_byte_count(std::uint8_t c) +{ + return utf8_byte_count(c) - 1; +} + +#ifdef BOOST_REGEX_MSVC +#pragma warning(push) +#pragma warning(disable:4100) +#endif +#ifndef BOOST_NO_EXCEPTIONS +BOOST_REGEX_NORETURN +#endif +inline void invalid_utf32_code_point(std::uint32_t val) +{ + std::stringstream ss; + ss << "Invalid UTF-32 code point U+" << std::showbase << std::hex << val << " encountered while trying to encode UTF-16 sequence"; + std::out_of_range e(ss.str()); +#ifndef BOOST_REGEX_STANDALONE + boost::throw_exception(e); +#else + throw e; +#endif +} +#ifdef BOOST_REGEX_MSVC +#pragma warning(pop) +#endif + + +} // namespace detail + +template +class u32_to_u16_iterator +{ + typedef typename std::iterator_traits::value_type base_value_type; + + static_assert(sizeof(base_value_type)*CHAR_BIT == 32, "Incorrectly sized template argument"); + static_assert(sizeof(U16Type)*CHAR_BIT == 16, "Incorrectly sized template argument"); + +public: + typedef std::ptrdiff_t difference_type; + typedef U16Type value_type; + typedef value_type const* pointer; + typedef value_type const reference; + typedef std::bidirectional_iterator_tag iterator_category; + + reference operator*()const + { + if(m_current == 2) + extract_current(); + return m_values[m_current]; + } + bool operator==(const u32_to_u16_iterator& that)const + { + if(m_position == that.m_position) + { + // Both m_currents must be equal, or both even + // this is the same as saying their sum must be even: + return (m_current + that.m_current) & 1u ? false : true; + } + return false; + } + bool operator!=(const u32_to_u16_iterator& that)const + { + return !(*this == that); + } + u32_to_u16_iterator& operator++() + { + // if we have a pending read then read now, so that we know whether + // to skip a position, or move to a low-surrogate: + if(m_current == 2) + { + // pending read: + extract_current(); + } + // move to the next surrogate position: + ++m_current; + // if we've reached the end skip a position: + if(m_values[m_current] == 0) + { + m_current = 2; + ++m_position; + } + return *this; + } + u32_to_u16_iterator operator++(int) + { + u32_to_u16_iterator r(*this); + ++(*this); + return r; + } + u32_to_u16_iterator& operator--() + { + if(m_current != 1) + { + // decrementing an iterator always leads to a valid position: + --m_position; + extract_current(); + m_current = m_values[1] ? 1 : 0; + } + else + { + m_current = 0; + } + return *this; + } + u32_to_u16_iterator operator--(int) + { + u32_to_u16_iterator r(*this); + --(*this); + return r; + } + BaseIterator base()const + { + return m_position; + } + // construct: + u32_to_u16_iterator() : m_position(), m_current(0) + { + m_values[0] = 0; + m_values[1] = 0; + m_values[2] = 0; + } + u32_to_u16_iterator(BaseIterator b) : m_position(b), m_current(2) + { + m_values[0] = 0; + m_values[1] = 0; + m_values[2] = 0; + } +private: + + void extract_current()const + { + // begin by checking for a code point out of range: + std::uint32_t v = *m_position; + if(v >= 0x10000u) + { + if(v > 0x10FFFFu) + detail::invalid_utf32_code_point(*m_position); + // split into two surrogates: + m_values[0] = static_cast(v >> 10) + detail::high_surrogate_base; + m_values[1] = static_cast(v & detail::ten_bit_mask) + detail::low_surrogate_base; + m_current = 0; + BOOST_REGEX_ASSERT(detail::is_high_surrogate(m_values[0])); + BOOST_REGEX_ASSERT(detail::is_low_surrogate(m_values[1])); + } + else + { + // 16-bit code point: + m_values[0] = static_cast(*m_position); + m_values[1] = 0; + m_current = 0; + // value must not be a surrogate: + if(detail::is_surrogate(m_values[0])) + detail::invalid_utf32_code_point(*m_position); + } + } + BaseIterator m_position; + mutable U16Type m_values[3]; + mutable unsigned m_current; +}; + +template +class u16_to_u32_iterator +{ + // special values for pending iterator reads: + static const U32Type pending_read = 0xffffffffu; + + typedef typename std::iterator_traits::value_type base_value_type; + + static_assert(sizeof(base_value_type)*CHAR_BIT == 16, "Incorrectly sized template argument"); + static_assert(sizeof(U32Type)*CHAR_BIT == 32, "Incorrectly sized template argument"); + +public: + typedef std::ptrdiff_t difference_type; + typedef U32Type value_type; + typedef value_type const* pointer; + typedef value_type const reference; + typedef std::bidirectional_iterator_tag iterator_category; + + reference operator*()const + { + if(m_value == pending_read) + extract_current(); + return m_value; + } + bool operator==(const u16_to_u32_iterator& that)const + { + return m_position == that.m_position; + } + bool operator!=(const u16_to_u32_iterator& that)const + { + return !(*this == that); + } + u16_to_u32_iterator& operator++() + { + // skip high surrogate first if there is one: + if(detail::is_high_surrogate(*m_position)) ++m_position; + ++m_position; + m_value = pending_read; + return *this; + } + u16_to_u32_iterator operator++(int) + { + u16_to_u32_iterator r(*this); + ++(*this); + return r; + } + u16_to_u32_iterator& operator--() + { + --m_position; + // if we have a low surrogate then go back one more: + if(detail::is_low_surrogate(*m_position)) + --m_position; + m_value = pending_read; + return *this; + } + u16_to_u32_iterator operator--(int) + { + u16_to_u32_iterator r(*this); + --(*this); + return r; + } + BaseIterator base()const + { + return m_position; + } + // construct: + u16_to_u32_iterator() : m_position() + { + m_value = pending_read; + } + u16_to_u32_iterator(BaseIterator b) : m_position(b) + { + m_value = pending_read; + } + // + // Range checked version: + // + u16_to_u32_iterator(BaseIterator b, BaseIterator start, BaseIterator end) : m_position(b) + { + m_value = pending_read; + // + // The range must not start with a low surrogate, or end in a high surrogate, + // otherwise we run the risk of running outside the underlying input range. + // Likewise b must not be located at a low surrogate. + // + std::uint16_t val; + if(start != end) + { + if((b != start) && (b != end)) + { + val = *b; + if(detail::is_surrogate(val) && ((val & 0xFC00u) == 0xDC00u)) + invalid_code_point(val); + } + val = *start; + if(detail::is_surrogate(val) && ((val & 0xFC00u) == 0xDC00u)) + invalid_code_point(val); + val = *--end; + if(detail::is_high_surrogate(val)) + invalid_code_point(val); + } + } +private: + static void invalid_code_point(std::uint16_t val) + { + std::stringstream ss; + ss << "Misplaced UTF-16 surrogate U+" << std::showbase << std::hex << val << " encountered while trying to encode UTF-32 sequence"; + std::out_of_range e(ss.str()); +#ifndef BOOST_REGEX_STANDALONE + boost::throw_exception(e); +#else + throw e; +#endif + } + void extract_current()const + { + m_value = static_cast(static_cast< std::uint16_t>(*m_position)); + // if the last value is a high surrogate then adjust m_position and m_value as needed: + if(detail::is_high_surrogate(*m_position)) + { + // precondition; next value must have be a low-surrogate: + BaseIterator next(m_position); + std::uint16_t t = *++next; + if((t & 0xFC00u) != 0xDC00u) + invalid_code_point(t); + m_value = (m_value - detail::high_surrogate_base) << 10; + m_value |= (static_cast(static_cast< std::uint16_t>(t)) & detail::ten_bit_mask); + } + // postcondition; result must not be a surrogate: + if(detail::is_surrogate(m_value)) + invalid_code_point(static_cast< std::uint16_t>(m_value)); + } + BaseIterator m_position; + mutable U32Type m_value; +}; + +template +class u32_to_u8_iterator +{ + typedef typename std::iterator_traits::value_type base_value_type; + + static_assert(sizeof(base_value_type)*CHAR_BIT == 32, "Incorrectly sized template argument"); + static_assert(sizeof(U8Type)*CHAR_BIT == 8, "Incorrectly sized template argument"); + +public: + typedef std::ptrdiff_t difference_type; + typedef U8Type value_type; + typedef value_type const* pointer; + typedef value_type const reference; + typedef std::bidirectional_iterator_tag iterator_category; + + reference operator*()const + { + if(m_current == 4) + extract_current(); + return m_values[m_current]; + } + bool operator==(const u32_to_u8_iterator& that)const + { + if(m_position == that.m_position) + { + // either the m_current's must be equal, or one must be 0 and + // the other 4: which means neither must have bits 1 or 2 set: + return (m_current == that.m_current) + || (((m_current | that.m_current) & 3) == 0); + } + return false; + } + bool operator!=(const u32_to_u8_iterator& that)const + { + return !(*this == that); + } + u32_to_u8_iterator& operator++() + { + // if we have a pending read then read now, so that we know whether + // to skip a position, or move to a low-surrogate: + if(m_current == 4) + { + // pending read: + extract_current(); + } + // move to the next surrogate position: + ++m_current; + // if we've reached the end skip a position: + if(m_values[m_current] == 0) + { + m_current = 4; + ++m_position; + } + return *this; + } + u32_to_u8_iterator operator++(int) + { + u32_to_u8_iterator r(*this); + ++(*this); + return r; + } + u32_to_u8_iterator& operator--() + { + if((m_current & 3) == 0) + { + --m_position; + extract_current(); + m_current = 3; + while(m_current && (m_values[m_current] == 0)) + --m_current; + } + else + --m_current; + return *this; + } + u32_to_u8_iterator operator--(int) + { + u32_to_u8_iterator r(*this); + --(*this); + return r; + } + BaseIterator base()const + { + return m_position; + } + // construct: + u32_to_u8_iterator() : m_position(), m_current(0) + { + m_values[0] = 0; + m_values[1] = 0; + m_values[2] = 0; + m_values[3] = 0; + m_values[4] = 0; + } + u32_to_u8_iterator(BaseIterator b) : m_position(b), m_current(4) + { + m_values[0] = 0; + m_values[1] = 0; + m_values[2] = 0; + m_values[3] = 0; + m_values[4] = 0; + } +private: + + void extract_current()const + { + std::uint32_t c = *m_position; + if(c > 0x10FFFFu) + detail::invalid_utf32_code_point(c); + if(c < 0x80u) + { + m_values[0] = static_cast(c); + m_values[1] = static_cast(0u); + m_values[2] = static_cast(0u); + m_values[3] = static_cast(0u); + } + else if(c < 0x800u) + { + m_values[0] = static_cast(0xC0u + (c >> 6)); + m_values[1] = static_cast(0x80u + (c & 0x3Fu)); + m_values[2] = static_cast(0u); + m_values[3] = static_cast(0u); + } + else if(c < 0x10000u) + { + m_values[0] = static_cast(0xE0u + (c >> 12)); + m_values[1] = static_cast(0x80u + ((c >> 6) & 0x3Fu)); + m_values[2] = static_cast(0x80u + (c & 0x3Fu)); + m_values[3] = static_cast(0u); + } + else + { + m_values[0] = static_cast(0xF0u + (c >> 18)); + m_values[1] = static_cast(0x80u + ((c >> 12) & 0x3Fu)); + m_values[2] = static_cast(0x80u + ((c >> 6) & 0x3Fu)); + m_values[3] = static_cast(0x80u + (c & 0x3Fu)); + } + m_current= 0; + } + BaseIterator m_position; + mutable U8Type m_values[5]; + mutable unsigned m_current; +}; + +template +class u8_to_u32_iterator +{ + // special values for pending iterator reads: + static const U32Type pending_read = 0xffffffffu; + + typedef typename std::iterator_traits::value_type base_value_type; + + static_assert(sizeof(base_value_type)*CHAR_BIT == 8, "Incorrectly sized template argument"); + static_assert(sizeof(U32Type)*CHAR_BIT == 32, "Incorrectly sized template argument"); + +public: + typedef std::ptrdiff_t difference_type; + typedef U32Type value_type; + typedef value_type const* pointer; + typedef value_type const reference; + typedef std::bidirectional_iterator_tag iterator_category; + + reference operator*()const + { + if(m_value == pending_read) + extract_current(); + return m_value; + } + bool operator==(const u8_to_u32_iterator& that)const + { + return m_position == that.m_position; + } + bool operator!=(const u8_to_u32_iterator& that)const + { + return !(*this == that); + } + u8_to_u32_iterator& operator++() + { + // We must not start with a continuation character: + if((static_cast(*m_position) & 0xC0) == 0x80) + invalid_sequence(); + // skip high surrogate first if there is one: + unsigned c = detail::utf8_byte_count(*m_position); + if(m_value == pending_read) + { + // Since we haven't read in a value, we need to validate the code points: + for(unsigned i = 0; i < c; ++i) + { + ++m_position; + // We must have a continuation byte: + if((i != c - 1) && ((static_cast(*m_position) & 0xC0) != 0x80)) + invalid_sequence(); + } + } + else + { + std::advance(m_position, c); + } + m_value = pending_read; + return *this; + } + u8_to_u32_iterator operator++(int) + { + u8_to_u32_iterator r(*this); + ++(*this); + return r; + } + u8_to_u32_iterator& operator--() + { + // Keep backtracking until we don't have a trailing character: + unsigned count = 0; + while((*--m_position & 0xC0u) == 0x80u) ++count; + // now check that the sequence was valid: + if(count != detail::utf8_trailing_byte_count(*m_position)) + invalid_sequence(); + m_value = pending_read; + return *this; + } + u8_to_u32_iterator operator--(int) + { + u8_to_u32_iterator r(*this); + --(*this); + return r; + } + BaseIterator base()const + { + return m_position; + } + // construct: + u8_to_u32_iterator() : m_position() + { + m_value = pending_read; + } + u8_to_u32_iterator(BaseIterator b) : m_position(b) + { + m_value = pending_read; + } + // + // Checked constructor: + // + u8_to_u32_iterator(BaseIterator b, BaseIterator start, BaseIterator end) : m_position(b) + { + m_value = pending_read; + // + // We must not start with a continuation character, or end with a + // truncated UTF-8 sequence otherwise we run the risk of going past + // the start/end of the underlying sequence: + // + if(start != end) + { + unsigned char v = *start; + if((v & 0xC0u) == 0x80u) + invalid_sequence(); + if((b != start) && (b != end) && ((*b & 0xC0u) == 0x80u)) + invalid_sequence(); + BaseIterator pos = end; + do + { + v = *--pos; + } + while((start != pos) && ((v & 0xC0u) == 0x80u)); + std::ptrdiff_t extra = detail::utf8_byte_count(v); + if(std::distance(pos, end) < extra) + invalid_sequence(); + } + } +private: + static void invalid_sequence() + { + std::out_of_range e("Invalid UTF-8 sequence encountered while trying to encode UTF-32 character"); +#ifndef BOOST_REGEX_STANDALONE + boost::throw_exception(e); +#else + throw e; +#endif + } + void extract_current()const + { + m_value = static_cast(static_cast< std::uint8_t>(*m_position)); + // we must not have a continuation character: + if((m_value & 0xC0u) == 0x80u) + invalid_sequence(); + // see how many extra bytes we have: + unsigned extra = detail::utf8_trailing_byte_count(*m_position); + // extract the extra bits, 6 from each extra byte: + BaseIterator next(m_position); + for(unsigned c = 0; c < extra; ++c) + { + ++next; + m_value <<= 6; + // We must have a continuation byte: + if((static_cast(*next) & 0xC0) != 0x80) + invalid_sequence(); + m_value += static_cast(*next) & 0x3Fu; + } + // we now need to remove a few of the leftmost bits, but how many depends + // upon how many extra bytes we've extracted: + static const std::uint32_t masks[4] = + { + 0x7Fu, + 0x7FFu, + 0xFFFFu, + 0x1FFFFFu, + }; + m_value &= masks[extra]; + // check the result is in range: + if(m_value > static_cast(0x10FFFFu)) + invalid_sequence(); + // The result must not be a surrogate: + if((m_value >= static_cast(0xD800)) && (m_value <= static_cast(0xDFFF))) + invalid_sequence(); + // We should not have had an invalidly encoded UTF8 sequence: + if((extra > 0) && (m_value <= static_cast(masks[extra - 1]))) + invalid_sequence(); + } + BaseIterator m_position; + mutable U32Type m_value; +}; + +template +class utf16_output_iterator +{ +public: + typedef void difference_type; + typedef void value_type; + typedef std::uint32_t* pointer; + typedef std::uint32_t& reference; + typedef std::output_iterator_tag iterator_category; + + utf16_output_iterator(const BaseIterator& b) + : m_position(b){} + utf16_output_iterator(const utf16_output_iterator& that) + : m_position(that.m_position){} + utf16_output_iterator& operator=(const utf16_output_iterator& that) + { + m_position = that.m_position; + return *this; + } + const utf16_output_iterator& operator*()const + { + return *this; + } + void operator=(std::uint32_t val)const + { + push(val); + } + utf16_output_iterator& operator++() + { + return *this; + } + utf16_output_iterator& operator++(int) + { + return *this; + } + BaseIterator base()const + { + return m_position; + } +private: + void push(std::uint32_t v)const + { + if(v >= 0x10000u) + { + // begin by checking for a code point out of range: + if(v > 0x10FFFFu) + detail::invalid_utf32_code_point(v); + // split into two surrogates: + *m_position++ = static_cast(v >> 10) + detail::high_surrogate_base; + *m_position++ = static_cast(v & detail::ten_bit_mask) + detail::low_surrogate_base; + } + else + { + // 16-bit code point: + // value must not be a surrogate: + if(detail::is_surrogate(v)) + detail::invalid_utf32_code_point(v); + *m_position++ = static_cast(v); + } + } + mutable BaseIterator m_position; +}; + +template +class utf8_output_iterator +{ +public: + typedef void difference_type; + typedef void value_type; + typedef std::uint32_t* pointer; + typedef std::uint32_t& reference; + typedef std::output_iterator_tag iterator_category; + + utf8_output_iterator(const BaseIterator& b) + : m_position(b){} + utf8_output_iterator(const utf8_output_iterator& that) + : m_position(that.m_position){} + utf8_output_iterator& operator=(const utf8_output_iterator& that) + { + m_position = that.m_position; + return *this; + } + const utf8_output_iterator& operator*()const + { + return *this; + } + void operator=(std::uint32_t val)const + { + push(val); + } + utf8_output_iterator& operator++() + { + return *this; + } + utf8_output_iterator& operator++(int) + { + return *this; + } + BaseIterator base()const + { + return m_position; + } +private: + void push(std::uint32_t c)const + { + if(c > 0x10FFFFu) + detail::invalid_utf32_code_point(c); + if(c < 0x80u) + { + *m_position++ = static_cast(c); + } + else if(c < 0x800u) + { + *m_position++ = static_cast(0xC0u + (c >> 6)); + *m_position++ = static_cast(0x80u + (c & 0x3Fu)); + } + else if(c < 0x10000u) + { + *m_position++ = static_cast(0xE0u + (c >> 12)); + *m_position++ = static_cast(0x80u + ((c >> 6) & 0x3Fu)); + *m_position++ = static_cast(0x80u + (c & 0x3Fu)); + } + else + { + *m_position++ = static_cast(0xF0u + (c >> 18)); + *m_position++ = static_cast(0x80u + ((c >> 12) & 0x3Fu)); + *m_position++ = static_cast(0x80u + ((c >> 6) & 0x3Fu)); + *m_position++ = static_cast(0x80u + (c & 0x3Fu)); + } + } + mutable BaseIterator m_position; +}; + +} // namespace boost + +#endif // BOOST_REGEX_UNICODE_ITERATOR_HPP + diff --git a/scintilla/include/boost/regex/v5/w32_regex_traits.hpp b/scintilla/include/boost/regex/v5/w32_regex_traits.hpp new file mode 100644 index 0000000000..16f7ee4eaa --- /dev/null +++ b/scintilla/include/boost/regex/v5/w32_regex_traits.hpp @@ -0,0 +1,1311 @@ +/* + * + * Copyright (c) 2004 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE w32_regex_traits.hpp + * VERSION see + * DESCRIPTION: Declares regular expression traits class w32_regex_traits. + */ + +#ifndef BOOST_W32_REGEX_TRAITS_HPP_INCLUDED +#define BOOST_W32_REGEX_TRAITS_HPP_INCLUDED + +#ifndef BOOST_REGEX_NO_WIN32_LOCALE + +#include +#include +#ifdef BOOST_HAS_THREADS +#include +#endif +#include +#include + +#if defined(_MSC_VER) && !defined(_WIN32_WCE) && !defined(UNDER_CE) +#pragma comment(lib, "user32.lib") +#endif + +#ifdef BOOST_REGEX_MSVC +#pragma warning(push) +#pragma warning(disable:4786) +#if BOOST_REGEX_MSVC < 1910 +#pragma warning(disable:4800) +#endif +#endif + +#ifndef BASETYPES +// +// windows.h not included, so lets forward declare what we need: +// +#ifndef NO_STRICT +#ifndef STRICT +#define STRICT 1 +#endif +#endif + +#if defined(STRICT) +#define BOOST_RE_DETAIL_DECLARE_HANDLE(x) struct x##__; typedef struct x##__ *x +#else +#define BOOST_RE_DETAIL_DECLARE_HANDLE(x) typedef void* x +#endif +// +// This must be in the global namespace: +// +extern "C" { + + BOOST_RE_DETAIL_DECLARE_HANDLE(HINSTANCE); + typedef HINSTANCE HMODULE; +} +#endif + +namespace boost{ + +// +// forward declaration is needed by some compilers: +// +template +class w32_regex_traits; + +namespace BOOST_REGEX_DETAIL_NS{ + +// +// start by typedeffing the types we'll need: +// +typedef unsigned long lcid_type; // placeholder for LCID. +typedef std::shared_ptr cat_type; // placeholder for dll HANDLE. + +// +// then add wrappers around the actual Win32 API's (ie implementation hiding): +// +lcid_type w32_get_default_locale(); +bool w32_is_lower(char, lcid_type); +#ifndef BOOST_NO_WREGEX +bool w32_is_lower(wchar_t, lcid_type); +#endif +bool w32_is_upper(char, lcid_type); +#ifndef BOOST_NO_WREGEX +bool w32_is_upper(wchar_t, lcid_type); +#endif +cat_type w32_cat_open(const std::string& name); +std::string w32_cat_get(const cat_type& cat, lcid_type state_id, int i, const std::string& def); +#ifndef BOOST_NO_WREGEX +std::wstring w32_cat_get(const cat_type& cat, lcid_type state_id, int i, const std::wstring& def); +#endif +std::string w32_transform(lcid_type state_id, const char* p1, const char* p2); +#ifndef BOOST_NO_WREGEX +std::wstring w32_transform(lcid_type state_id, const wchar_t* p1, const wchar_t* p2); +#endif +char w32_tolower(char c, lcid_type); +#ifndef BOOST_NO_WREGEX +wchar_t w32_tolower(wchar_t c, lcid_type); +#endif +char w32_toupper(char c, lcid_type); +#ifndef BOOST_NO_WREGEX +wchar_t w32_toupper(wchar_t c, lcid_type); +#endif +bool w32_is(lcid_type, std::uint32_t mask, char c); +#ifndef BOOST_NO_WREGEX +bool w32_is(lcid_type, std::uint32_t mask, wchar_t c); +#endif + +#ifndef BASETYPES +// +// Forward declarations of the small number of windows types and API's we use: +// + +#if !defined(__LP64__) +using dword = unsigned long; +#else +using DWORD = unsigned int; +#endif +using word = unsigned short; +using lctype = dword; + +static constexpr dword ct_ctype1 = 0x00000001; +static constexpr dword c1_upper = 0x0001; // upper case +static constexpr dword c1_lower = 0x0002; // lower case +static constexpr dword c1_digit = 0x0004; // decimal digits +static constexpr dword c1_space = 0x0008; // spacing characters +static constexpr dword c1_punct = 0x0010; // punctuation characters +static constexpr dword c1_cntrl = 0x0020; // control characters +static constexpr dword c1_blank = 0x0040; // blank characters +static constexpr dword c1_xdigit = 0x0080; // other digits +static constexpr dword c1_alpha = 0x0100; // any linguistic character +static constexpr dword c1_defined = 0x0200; // defined character +static constexpr unsigned int cp_acp = 0; +static constexpr dword lcmap_lowercase = 0x00000100; +static constexpr dword lcmap_uppercase = 0x00000200; +static constexpr dword lcmap_sortkey = 0x00000400; // WC sort key (normalize) +static constexpr lctype locale_idefaultansicodepage = 0x00001004; + +# ifdef UNDER_CE +# ifndef WINAPI +# ifndef _WIN32_WCE_EMULATION +# define BOOST_RE_STDCALL __cdecl // Note this doesn't match the desktop definition +# else +# define BOOST_RE_STDCALL __stdcall +# endif +# endif +# else +# if defined(_M_IX86) || defined(__i386__) +# define BOOST_RE_STDCALL __stdcall +# else + // On architectures other than 32-bit x86 __stdcall is ignored. Clang also issues a warning. +# define BOOST_RE_STDCALL +# endif +# endif + +#if defined (WIN32_PLATFORM_PSPC) +#define BOOST_RE_IMPORT __declspec( dllimport ) +#elif defined (_WIN32_WCE) +#define BOOST_RE_IMPORT +#else +#define BOOST_RE_IMPORT __declspec( dllimport ) +#endif + +extern "C" { + + BOOST_RE_IMPORT int BOOST_RE_STDCALL FreeLibrary(HMODULE hLibModule); + BOOST_RE_IMPORT int BOOST_RE_STDCALL LCMapStringA(lcid_type Locale, dword dwMapFlags, const char* lpSrcStr, int cchSrc, char* lpDestStr, int cchDest); + BOOST_RE_IMPORT int BOOST_RE_STDCALL LCMapStringW(lcid_type Locale, dword dwMapFlags, const wchar_t* lpSrcStr, int cchSrc, wchar_t* lpDestStr, int cchDest); + BOOST_RE_IMPORT int BOOST_RE_STDCALL MultiByteToWideChar(unsigned int CodePage, dword dwFlags, const char* lpMultiByteStr, int cbMultiByte, wchar_t* lpWideCharStr, int cchWideChar); + BOOST_RE_IMPORT int BOOST_RE_STDCALL LCMapStringW(lcid_type Locale, dword dwMapFlags, const wchar_t* lpSrcStr, int cchSrc, wchar_t* lpDestStr, int cchDest); + BOOST_RE_IMPORT int BOOST_RE_STDCALL WideCharToMultiByte(unsigned int CodePage, dword dwFlags, const wchar_t* lpWideCharStr, int cchWideChar, char* lpMultiByteStr, int cbMultiByte, const char* lpDefaultChar, int* lpUsedDefaultChar); + BOOST_RE_IMPORT int BOOST_RE_STDCALL GetStringTypeExA(lcid_type Locale, dword dwInfoType, const char* lpSrcStr, int cchSrc, word* lpCharType); + BOOST_RE_IMPORT int BOOST_RE_STDCALL GetStringTypeExW(lcid_type Locale, dword dwInfoType, const wchar_t* lpSrcStr, int cchSrc, word* lpCharType); + BOOST_RE_IMPORT lcid_type BOOST_RE_STDCALL GetUserDefaultLCID(); + BOOST_RE_IMPORT int BOOST_RE_STDCALL GetStringTypeExA(lcid_type Locale, dword dwInfoType, const char* lpSrcStr, int cchSrc, word* lpCharType); + BOOST_RE_IMPORT int BOOST_RE_STDCALL GetStringTypeExW(lcid_type Locale, dword dwInfoType, const wchar_t* lpSrcStr, int cchSrc, word* lpCharType); + BOOST_RE_IMPORT HMODULE BOOST_RE_STDCALL LoadLibraryA(const char* lpLibFileName); + BOOST_RE_IMPORT HMODULE BOOST_RE_STDCALL LoadLibraryW(const wchar_t* lpLibFileName); + BOOST_RE_IMPORT int BOOST_RE_STDCALL LoadStringW(HINSTANCE hInstance, unsigned int uID, wchar_t* lpBuffer, int cchBufferMax); + BOOST_RE_IMPORT int BOOST_RE_STDCALL LoadStringA(HINSTANCE hInstance, unsigned int uID, char* lpBuffer, int cchBufferMax); + BOOST_RE_IMPORT int BOOST_RE_STDCALL GetLocaleInfoW(lcid_type Locale, lctype LCType, wchar_t* lpLCData, int cchData); +} + +#else +// +// We have windows.h already included: +// +using dword = DWORD; +using word = WORD; +using lctype = LCTYPE; + +static constexpr dword ct_ctype1 = 0x00000001; +static constexpr dword c1_upper = 0x0001; // upper case +static constexpr dword c1_lower = 0x0002; // lower case +static constexpr dword c1_digit = 0x0004; // decimal digits +static constexpr dword c1_space = 0x0008; // spacing characters +static constexpr dword c1_punct = 0x0010; // punctuation characters +static constexpr dword c1_cntrl = 0x0020; // control characters +static constexpr dword c1_blank = 0x0040; // blank characters +static constexpr dword c1_xdigit = 0x0080; // other digits +static constexpr dword c1_alpha = 0x0100; // any linguistic character +static constexpr dword c1_defined = 0x0200; // defined character +static constexpr unsigned int cp_acp = 0; +static constexpr dword lcmap_lowercase = 0x00000100; +static constexpr dword lcmap_uppercase = 0x00000200; +static constexpr dword lcmap_sortkey = 0x00000400; // WC sort key (normalize) +static constexpr lctype locale_idefaultansicodepage = 0x00001004; + +using ::FreeLibrary; +using ::LCMapStringA; +using ::LCMapStringW; +using ::MultiByteToWideChar; +using ::LCMapStringW; +using ::WideCharToMultiByte; +using ::GetStringTypeExA; +using ::GetStringTypeExW; +using ::GetUserDefaultLCID; +using ::GetStringTypeExA; +using ::GetStringTypeExW; +using ::LoadLibraryA; +using ::LoadLibraryW; +using ::LoadStringW; +using ::LoadStringA; +using ::GetLocaleInfoW; + +#endif +// +// class w32_regex_traits_base: +// acts as a container for locale and the facets we are using. +// +template +struct w32_regex_traits_base +{ + w32_regex_traits_base(lcid_type l) + { imbue(l); } + lcid_type imbue(lcid_type l); + + lcid_type m_locale; +}; + +template +inline lcid_type w32_regex_traits_base::imbue(lcid_type l) +{ + lcid_type result(m_locale); + m_locale = l; + return result; +} + +// +// class w32_regex_traits_char_layer: +// implements methods that require specialisation for narrow characters: +// +template +class w32_regex_traits_char_layer : public w32_regex_traits_base +{ + typedef std::basic_string string_type; + typedef std::map map_type; + typedef typename map_type::const_iterator map_iterator_type; +public: + w32_regex_traits_char_layer(const lcid_type l); + + regex_constants::syntax_type syntax_type(charT c)const + { + map_iterator_type i = m_char_map.find(c); + return ((i == m_char_map.end()) ? 0 : i->second); + } + regex_constants::escape_syntax_type escape_syntax_type(charT c) const + { + map_iterator_type i = m_char_map.find(c); + if(i == m_char_map.end()) + { + if(::boost::BOOST_REGEX_DETAIL_NS::w32_is_lower(c, this->m_locale)) return regex_constants::escape_type_class; + if(::boost::BOOST_REGEX_DETAIL_NS::w32_is_upper(c, this->m_locale)) return regex_constants::escape_type_not_class; + return 0; + } + return i->second; + } + charT tolower(charT c)const + { + return ::boost::BOOST_REGEX_DETAIL_NS::w32_tolower(c, this->m_locale); + } + bool isctype(std::uint32_t mask, charT c)const + { + return ::boost::BOOST_REGEX_DETAIL_NS::w32_is(this->m_locale, mask, c); + } + +private: + string_type get_default_message(regex_constants::syntax_type); + // TODO: use a hash table when available! + map_type m_char_map; +}; + +template +w32_regex_traits_char_layer::w32_regex_traits_char_layer(::boost::BOOST_REGEX_DETAIL_NS::lcid_type l) + : w32_regex_traits_base(l) +{ + // we need to start by initialising our syntax map so we know which + // character is used for which purpose: + cat_type cat; + std::string cat_name(w32_regex_traits::get_catalog_name()); + if(cat_name.size()) + { + cat = ::boost::BOOST_REGEX_DETAIL_NS::w32_cat_open(cat_name); + if(!cat) + { + std::string m("Unable to open message catalog: "); + std::runtime_error err(m + cat_name); + boost::BOOST_REGEX_DETAIL_NS::raise_runtime_error(err); + } + } + // + // if we have a valid catalog then load our messages: + // + if(cat) + { + for(regex_constants::syntax_type i = 1; i < regex_constants::syntax_max; ++i) + { + string_type mss = ::boost::BOOST_REGEX_DETAIL_NS::w32_cat_get(cat, this->m_locale, i, get_default_message(i)); + for(typename string_type::size_type j = 0; j < mss.size(); ++j) + { + this->m_char_map[mss[j]] = i; + } + } + } + else + { + for(regex_constants::syntax_type i = 1; i < regex_constants::syntax_max; ++i) + { + const char* ptr = get_default_syntax(i); + while(ptr && *ptr) + { + this->m_char_map[static_cast(*ptr)] = i; + ++ptr; + } + } + } +} + +template +typename w32_regex_traits_char_layer::string_type + w32_regex_traits_char_layer::get_default_message(regex_constants::syntax_type i) +{ + const char* ptr = get_default_syntax(i); + string_type result; + while(ptr && *ptr) + { + result.append(1, static_cast(*ptr)); + ++ptr; + } + return result; +} + +// +// specialised version for narrow characters: +// +template <> +class w32_regex_traits_char_layer : public w32_regex_traits_base +{ + typedef std::string string_type; +public: + w32_regex_traits_char_layer(::boost::BOOST_REGEX_DETAIL_NS::lcid_type l) + : w32_regex_traits_base(l) + { + init(); + } + + regex_constants::syntax_type syntax_type(char c)const + { + return m_char_map[static_cast(c)]; + } + regex_constants::escape_syntax_type escape_syntax_type(char c) const + { + return m_char_map[static_cast(c)]; + } + char tolower(char c)const + { + return m_lower_map[static_cast(c)]; + } + bool isctype(std::uint32_t mask, char c)const + { + return m_type_map[static_cast(c)] & mask; + } + +private: + regex_constants::syntax_type m_char_map[1u << CHAR_BIT]; + char m_lower_map[1u << CHAR_BIT]; + std::uint16_t m_type_map[1u << CHAR_BIT]; + template + void init(); +}; + +// +// class w32_regex_traits_implementation: +// provides pimpl implementation for w32_regex_traits. +// +template +class w32_regex_traits_implementation : public w32_regex_traits_char_layer +{ +public: + typedef typename w32_regex_traits::char_class_type char_class_type; + static const char_class_type mask_word = 0x0400; // must be C1_DEFINED << 1 + static const char_class_type mask_unicode = 0x0800; // must be C1_DEFINED << 2 + static const char_class_type mask_horizontal = 0x1000; // must be C1_DEFINED << 3 + static const char_class_type mask_vertical = 0x2000; // must be C1_DEFINED << 4 + static const char_class_type mask_base = 0x3ff; // all the masks used by the CT_CTYPE1 group + + typedef std::basic_string string_type; + typedef charT char_type; + w32_regex_traits_implementation(::boost::BOOST_REGEX_DETAIL_NS::lcid_type l); + std::string error_string(regex_constants::error_type n) const + { + if(!m_error_strings.empty()) + { + std::map::const_iterator p = m_error_strings.find(n); + return (p == m_error_strings.end()) ? std::string(get_default_error_string(n)) : p->second; + } + return get_default_error_string(n); + } + char_class_type lookup_classname(const charT* p1, const charT* p2) const + { + char_class_type result = lookup_classname_imp(p1, p2); + if(result == 0) + { + typedef typename string_type::size_type size_type; + string_type temp(p1, p2); + for(size_type i = 0; i < temp.size(); ++i) + temp[i] = this->tolower(temp[i]); + result = lookup_classname_imp(&*temp.begin(), &*temp.begin() + temp.size()); + } + return result; + } + string_type lookup_collatename(const charT* p1, const charT* p2) const; + string_type transform_primary(const charT* p1, const charT* p2) const; + string_type transform(const charT* p1, const charT* p2) const + { + return ::boost::BOOST_REGEX_DETAIL_NS::w32_transform(this->m_locale, p1, p2); + } +private: + std::map m_error_strings; // error messages indexed by numberic ID + std::map m_custom_class_names; // character class names + std::map m_custom_collate_names; // collating element names + unsigned m_collate_type; // the form of the collation string + charT m_collate_delim; // the collation group delimiter + // + // helpers: + // + char_class_type lookup_classname_imp(const charT* p1, const charT* p2) const; +}; + +template +typename w32_regex_traits_implementation::string_type + w32_regex_traits_implementation::transform_primary(const charT* p1, const charT* p2) const +{ + string_type result; + // + // What we do here depends upon the format of the sort key returned by + // sort key returned by this->transform: + // + switch(m_collate_type) + { + case sort_C: + case sort_unknown: + // the best we can do is translate to lower case, then get a regular sort key: + { + result.assign(p1, p2); + typedef typename string_type::size_type size_type; + for(size_type i = 0; i < result.size(); ++i) + result[i] = this->tolower(result[i]); + result = this->transform(&*result.begin(), &*result.begin() + result.size()); + break; + } + case sort_fixed: + { + // get a regular sort key, and then truncate it: + result.assign(this->transform(p1, p2)); + result.erase(this->m_collate_delim); + break; + } + case sort_delim: + // get a regular sort key, and then truncate everything after the delim: + result.assign(this->transform(p1, p2)); + std::size_t i; + for(i = 0; i < result.size(); ++i) + { + if(result[i] == m_collate_delim) + break; + } + result.erase(i); + break; + } + if(result.empty()) + result = string_type(1, charT(0)); + return result; +} + +template +typename w32_regex_traits_implementation::string_type + w32_regex_traits_implementation::lookup_collatename(const charT* p1, const charT* p2) const +{ + typedef typename std::map::const_iterator iter_type; + if(m_custom_collate_names.size()) + { + iter_type pos = m_custom_collate_names.find(string_type(p1, p2)); + if(pos != m_custom_collate_names.end()) + return pos->second; + } + std::string name(p1, p2); + name = lookup_default_collate_name(name); + if(name.size()) + return string_type(name.begin(), name.end()); + if(p2 - p1 == 1) + return string_type(1, *p1); + return string_type(); +} + +template +w32_regex_traits_implementation::w32_regex_traits_implementation(::boost::BOOST_REGEX_DETAIL_NS::lcid_type l) +: w32_regex_traits_char_layer(l) +{ + cat_type cat; + std::string cat_name(w32_regex_traits::get_catalog_name()); + if(cat_name.size()) + { + cat = ::boost::BOOST_REGEX_DETAIL_NS::w32_cat_open(cat_name); + if(!cat) + { + std::string m("Unable to open message catalog: "); + std::runtime_error err(m + cat_name); + boost::BOOST_REGEX_DETAIL_NS::raise_runtime_error(err); + } + } + // + // if we have a valid catalog then load our messages: + // + if(cat) + { + // + // Error messages: + // + for(boost::regex_constants::error_type i = static_cast(0); + i <= boost::regex_constants::error_unknown; + i = static_cast(i + 1)) + { + const char* p = get_default_error_string(i); + string_type default_message; + while(*p) + { + default_message.append(1, static_cast(*p)); + ++p; + } + string_type s = ::boost::BOOST_REGEX_DETAIL_NS::w32_cat_get(cat, this->m_locale, i+200, default_message); + std::string result; + for(std::string::size_type j = 0; j < s.size(); ++j) + { + result.append(1, static_cast(s[j])); + } + m_error_strings[i] = result; + } + // + // Custom class names: + // + static const char_class_type masks[14] = + { + 0x0104u, // C1_ALPHA | C1_DIGIT + 0x0100u, // C1_ALPHA + 0x0020u, // C1_CNTRL + 0x0004u, // C1_DIGIT + (~(0x0020u|0x0008u) & 0x01ffu) | 0x0400u, // not C1_CNTRL or C1_SPACE + 0x0002u, // C1_LOWER + (~0x0020u & 0x01ffu) | 0x0400, // not C1_CNTRL + 0x0010u, // C1_PUNCT + 0x0008u, // C1_SPACE + 0x0001u, // C1_UPPER + 0x0080u, // C1_XDIGIT + 0x0040u, // C1_BLANK + w32_regex_traits_implementation::mask_word, + w32_regex_traits_implementation::mask_unicode, + }; + static const string_type null_string; + for(unsigned int j = 0; j <= 13; ++j) + { + string_type s(::boost::BOOST_REGEX_DETAIL_NS::w32_cat_get(cat, this->m_locale, j+300, null_string)); + if(s.size()) + this->m_custom_class_names[s] = masks[j]; + } + } + // + // get the collation format used by m_pcollate: + // + m_collate_type = BOOST_REGEX_DETAIL_NS::find_sort_syntax(this, &m_collate_delim); +} + +template +typename w32_regex_traits_implementation::char_class_type + w32_regex_traits_implementation::lookup_classname_imp(const charT* p1, const charT* p2) const +{ + static const char_class_type masks[22] = + { + 0, + 0x0104u, // C1_ALPHA | C1_DIGIT + 0x0100u, // C1_ALPHA + 0x0040u, // C1_BLANK + 0x0020u, // C1_CNTRL + 0x0004u, // C1_DIGIT + 0x0004u, // C1_DIGIT + (~(0x0020u|0x0008u|0x0040) & 0x01ffu) | 0x0400u, // not C1_CNTRL or C1_SPACE or C1_BLANK + w32_regex_traits_implementation::mask_horizontal, + 0x0002u, // C1_LOWER + 0x0002u, // C1_LOWER + (~0x0020u & 0x01ffu) | 0x0400, // not C1_CNTRL + 0x0010u, // C1_PUNCT + 0x0008u, // C1_SPACE + 0x0008u, // C1_SPACE + 0x0001u, // C1_UPPER + w32_regex_traits_implementation::mask_unicode, + 0x0001u, // C1_UPPER + w32_regex_traits_implementation::mask_vertical, + 0x0104u | w32_regex_traits_implementation::mask_word, + 0x0104u | w32_regex_traits_implementation::mask_word, + 0x0080u, // C1_XDIGIT + }; + if(m_custom_class_names.size()) + { + typedef typename std::map, char_class_type>::const_iterator map_iter; + map_iter pos = m_custom_class_names.find(string_type(p1, p2)); + if(pos != m_custom_class_names.end()) + return pos->second; + } + std::size_t state_id = 1u + (std::size_t)BOOST_REGEX_DETAIL_NS::get_default_class_id(p1, p2); + if(state_id < sizeof(masks) / sizeof(masks[0])) + return masks[state_id]; + return masks[0]; +} + + +template +std::shared_ptr > create_w32_regex_traits(::boost::BOOST_REGEX_DETAIL_NS::lcid_type l) +{ + // TODO: create a cache for previously constructed objects. + return boost::object_cache< ::boost::BOOST_REGEX_DETAIL_NS::lcid_type, w32_regex_traits_implementation >::get(l, 5); +} + +} // BOOST_REGEX_DETAIL_NS + +template +class w32_regex_traits +{ +public: + typedef charT char_type; + typedef std::size_t size_type; + typedef std::basic_string string_type; + typedef ::boost::BOOST_REGEX_DETAIL_NS::lcid_type locale_type; + typedef std::uint_least32_t char_class_type; + + struct boost_extensions_tag{}; + + w32_regex_traits() + : m_pimpl(BOOST_REGEX_DETAIL_NS::create_w32_regex_traits(::boost::BOOST_REGEX_DETAIL_NS::w32_get_default_locale())) + { } + static size_type length(const char_type* p) + { + return std::char_traits::length(p); + } + regex_constants::syntax_type syntax_type(charT c)const + { + return m_pimpl->syntax_type(c); + } + regex_constants::escape_syntax_type escape_syntax_type(charT c) const + { + return m_pimpl->escape_syntax_type(c); + } + charT translate(charT c) const + { + return c; + } + charT translate_nocase(charT c) const + { + return this->m_pimpl->tolower(c); + } + charT translate(charT c, bool icase) const + { + return icase ? this->m_pimpl->tolower(c) : c; + } + charT tolower(charT c) const + { + return this->m_pimpl->tolower(c); + } + charT toupper(charT c) const + { + return ::boost::BOOST_REGEX_DETAIL_NS::w32_toupper(c, this->m_pimpl->m_locale); + } + string_type transform(const charT* p1, const charT* p2) const + { + return ::boost::BOOST_REGEX_DETAIL_NS::w32_transform(this->m_pimpl->m_locale, p1, p2); + } + string_type transform_primary(const charT* p1, const charT* p2) const + { + return m_pimpl->transform_primary(p1, p2); + } + char_class_type lookup_classname(const charT* p1, const charT* p2) const + { + return m_pimpl->lookup_classname(p1, p2); + } + string_type lookup_collatename(const charT* p1, const charT* p2) const + { + return m_pimpl->lookup_collatename(p1, p2); + } + bool isctype(charT c, char_class_type f) const + { + if((f & BOOST_REGEX_DETAIL_NS::w32_regex_traits_implementation::mask_base) + && (this->m_pimpl->isctype(f & BOOST_REGEX_DETAIL_NS::w32_regex_traits_implementation::mask_base, c))) + return true; + else if((f & BOOST_REGEX_DETAIL_NS::w32_regex_traits_implementation::mask_unicode) && BOOST_REGEX_DETAIL_NS::is_extended(c)) + return true; + else if((f & BOOST_REGEX_DETAIL_NS::w32_regex_traits_implementation::mask_word) && (c == '_')) + return true; + else if((f & BOOST_REGEX_DETAIL_NS::w32_regex_traits_implementation::mask_vertical) + && (::boost::BOOST_REGEX_DETAIL_NS::is_separator(c) || (c == '\v'))) + return true; + else if((f & BOOST_REGEX_DETAIL_NS::w32_regex_traits_implementation::mask_horizontal) + && this->isctype(c, 0x0008u) && !this->isctype(c, BOOST_REGEX_DETAIL_NS::w32_regex_traits_implementation::mask_vertical)) + return true; + return false; + } + std::intmax_t toi(const charT*& p1, const charT* p2, int radix)const + { + return ::boost::BOOST_REGEX_DETAIL_NS::global_toi(p1, p2, radix, *this); + } + int value(charT c, int radix)const + { + int result = (int)::boost::BOOST_REGEX_DETAIL_NS::global_value(c); + return result < radix ? result : -1; + } + locale_type imbue(locale_type l) + { + ::boost::BOOST_REGEX_DETAIL_NS::lcid_type result(getloc()); + m_pimpl = BOOST_REGEX_DETAIL_NS::create_w32_regex_traits(l); + return result; + } + locale_type getloc()const + { + return m_pimpl->m_locale; + } + std::string error_string(regex_constants::error_type n) const + { + return m_pimpl->error_string(n); + } + + // + // extension: + // set the name of the message catalog in use (defaults to "boost_regex"). + // + static std::string catalog_name(const std::string& name); + static std::string get_catalog_name(); + +private: + std::shared_ptr > m_pimpl; + // + // catalog name handler: + // + static std::string& get_catalog_name_inst(); + +#ifdef BOOST_HAS_THREADS + static std::mutex& get_mutex_inst(); +#endif +}; + +template +std::string w32_regex_traits::catalog_name(const std::string& name) +{ +#ifdef BOOST_HAS_THREADS + std::lock_guard lk(get_mutex_inst()); +#endif + std::string result(get_catalog_name_inst()); + get_catalog_name_inst() = name; + return result; +} + +template +std::string& w32_regex_traits::get_catalog_name_inst() +{ + static std::string s_name; + return s_name; +} + +template +std::string w32_regex_traits::get_catalog_name() +{ +#ifdef BOOST_HAS_THREADS + std::lock_guard lk(get_mutex_inst()); +#endif + std::string result(get_catalog_name_inst()); + return result; +} + +#ifdef BOOST_HAS_THREADS +template +std::mutex& w32_regex_traits::get_mutex_inst() +{ + static std::mutex s_mutex; + return s_mutex; +} +#endif + +namespace BOOST_REGEX_DETAIL_NS { + +#ifdef BOOST_NO_ANSI_APIS + inline unsigned int get_code_page_for_locale_id(lcid_type idx) + { + wchar_t code_page_string[7]; + if (boost::BOOST_REGEX_DETAIL_NS::GetLocaleInfoW(idx, locale_idefaultansicodepage, code_page_string, 7) == 0) + return 0; + + return static_cast(_wtol(code_page_string)); +} +#endif + + template + inline void w32_regex_traits_char_layer::init() + { + // we need to start by initialising our syntax map so we know which + // character is used for which purpose: + std::memset(m_char_map, 0, sizeof(m_char_map)); + cat_type cat; + std::string cat_name(w32_regex_traits::get_catalog_name()); + if (cat_name.size()) + { + cat = ::boost::BOOST_REGEX_DETAIL_NS::w32_cat_open(cat_name); + if (!cat) + { + std::string m("Unable to open message catalog: "); + std::runtime_error err(m + cat_name); + ::boost::BOOST_REGEX_DETAIL_NS::raise_runtime_error(err); + } + } + // + // if we have a valid catalog then load our messages: + // + if (cat) + { + for (regex_constants::syntax_type i = 1; i < regex_constants::syntax_max; ++i) + { + string_type mss = ::boost::BOOST_REGEX_DETAIL_NS::w32_cat_get(cat, this->m_locale, i, get_default_syntax(i)); + for (string_type::size_type j = 0; j < mss.size(); ++j) + { + m_char_map[static_cast(mss[j])] = i; + } + } + } + else + { + for (regex_constants::syntax_type i = 1; i < regex_constants::syntax_max; ++i) + { + const char* ptr = get_default_syntax(i); + while (ptr && *ptr) + { + m_char_map[static_cast(*ptr)] = i; + ++ptr; + } + } + } + // + // finish off by calculating our escape types: + // + unsigned char i = 'A'; + do + { + if (m_char_map[i] == 0) + { + if (::boost::BOOST_REGEX_DETAIL_NS::w32_is(this->m_locale, 0x0002u, (char)i)) + m_char_map[i] = regex_constants::escape_type_class; + else if (::boost::BOOST_REGEX_DETAIL_NS::w32_is(this->m_locale, 0x0001u, (char)i)) + m_char_map[i] = regex_constants::escape_type_not_class; + } + } while (0xFF != i++); + + // + // fill in lower case map: + // + char char_map[1 << CHAR_BIT]; + for (int ii = 0; ii < (1 << CHAR_BIT); ++ii) + char_map[ii] = static_cast(ii); +#ifndef BOOST_NO_ANSI_APIS + int r = boost::BOOST_REGEX_DETAIL_NS::LCMapStringA(this->m_locale, lcmap_lowercase, char_map, 1 << CHAR_BIT, this->m_lower_map, 1 << CHAR_BIT); + BOOST_REGEX_ASSERT(r != 0); +#else + unsigned int code_page = get_code_page_for_locale_id(this->m_locale); + BOOST_REGEX_ASSERT(code_page != 0); + + wchar_t wide_char_map[1 << CHAR_BIT]; + int conv_r = boost::BOOST_REGEX_DETAIL_NS::MultiByteToWideChar(code_page, 0, char_map, 1 << CHAR_BIT, wide_char_map, 1 << CHAR_BIT); + BOOST_REGEX_ASSERT(conv_r != 0); + + wchar_t wide_lower_map[1 << CHAR_BIT]; + int r = boost::BOOST_REGEX_DETAIL_NS::LCMapStringW(this->m_locale, lcmap_lowercase, wide_char_map, 1 << CHAR_BIT, wide_lower_map, 1 << CHAR_BIT); + BOOST_REGEX_ASSERT(r != 0); + + conv_r = boost::BOOST_REGEX_DETAIL_NS::WideCharToMultiByte(code_page, 0, wide_lower_map, r, this->m_lower_map, 1 << CHAR_BIT, NULL, NULL); + BOOST_REGEX_ASSERT(conv_r != 0); +#endif + if (r < (1 << CHAR_BIT)) + { + // if we have multibyte characters then not all may have been given + // a lower case mapping: + for (int jj = r; jj < (1 << CHAR_BIT); ++jj) + this->m_lower_map[jj] = static_cast(jj); + } + +#ifndef BOOST_NO_ANSI_APIS + r = boost::BOOST_REGEX_DETAIL_NS::GetStringTypeExA(this->m_locale, ct_ctype1, char_map, 1 << CHAR_BIT, this->m_type_map); +#else + r = boost::BOOST_REGEX_DETAIL_NS::GetStringTypeExW(this->m_locale, ct_ctype1, wide_char_map, 1 << CHAR_BIT, this->m_type_map); +#endif + BOOST_REGEX_ASSERT(0 != r); + } + + inline lcid_type w32_get_default_locale() + { + return boost::BOOST_REGEX_DETAIL_NS::GetUserDefaultLCID(); + } + + inline bool w32_is_lower(char c, lcid_type idx) + { +#ifndef BOOST_NO_ANSI_APIS + word mask; + if (boost::BOOST_REGEX_DETAIL_NS::GetStringTypeExA(idx, ct_ctype1, &c, 1, &mask) && (mask & c1_lower)) + return true; + return false; +#else + unsigned int code_page = get_code_page_for_locale_id(idx); + if (code_page == 0) + return false; + + wchar_t wide_c; + if (boost::BOOST_REGEX_DETAIL_NS::MultiByteToWideChar(code_page, 0, &c, 1, &wide_c, 1) == 0) + return false; + + word mask; + if (boost::BOOST_REGEX_DETAIL_NS::GetStringTypeExW(idx, ct_ctype1, &wide_c, 1, &mask) && (mask & c1_lower)) + return true; + return false; +#endif + } + + inline bool w32_is_lower(wchar_t c, lcid_type idx) + { + word mask; + if (boost::BOOST_REGEX_DETAIL_NS::GetStringTypeExW(idx, ct_ctype1, &c, 1, &mask) && (mask & c1_lower)) + return true; + return false; + } + + inline bool w32_is_upper(char c, lcid_type idx) + { +#ifndef BOOST_NO_ANSI_APIS + word mask; + if (boost::BOOST_REGEX_DETAIL_NS::GetStringTypeExA(idx, ct_ctype1, &c, 1, &mask) && (mask & c1_upper)) + return true; + return false; +#else + unsigned int code_page = get_code_page_for_locale_id(idx); + if (code_page == 0) + return false; + + wchar_t wide_c; + if (boost::BOOST_REGEX_DETAIL_NS::MultiByteToWideChar(code_page, 0, &c, 1, &wide_c, 1) == 0) + return false; + + word mask; + if (boost::BOOST_REGEX_DETAIL_NS::GetStringTypeExW(idx, ct_ctype1, &wide_c, 1, &mask) && (mask & c1_upper)) + return true; + return false; +#endif + } + + inline bool w32_is_upper(wchar_t c, lcid_type idx) + { + word mask; + if (boost::BOOST_REGEX_DETAIL_NS::GetStringTypeExW(idx, ct_ctype1, &c, 1, &mask) && (mask & c1_upper)) + return true; + return false; + } + + inline void free_module(void* mod) + { + boost::BOOST_REGEX_DETAIL_NS::FreeLibrary(static_cast(mod)); + } + + inline cat_type w32_cat_open(const std::string& name) + { +#ifndef BOOST_NO_ANSI_APIS + cat_type result(boost::BOOST_REGEX_DETAIL_NS::LoadLibraryA(name.c_str()), &free_module); + return result; +#else + wchar_t* wide_name = (wchar_t*)_alloca((name.size() + 1) * sizeof(wchar_t)); + if (boost::BOOST_REGEX_DETAIL_NS::MultiByteToWideChar(cp_acp, 0, name.c_str(), (int)name.size(), wide_name, (int)(name.size() + 1)) == 0) + return cat_type(); + + cat_type result(boost::BOOST_REGEX_DETAIL_NS::LoadLibraryW(wide_name), &free_module); + return result; +#endif + } + + inline std::string w32_cat_get(const cat_type& cat, lcid_type, int i, const std::string& def) + { +#ifndef BOOST_NO_ANSI_APIS + char buf[256]; + if (0 == boost::BOOST_REGEX_DETAIL_NS::LoadStringA( + static_cast(cat.get()), + i, + buf, + 256 + )) + { + return def; + } +#else + wchar_t wbuf[256]; + int r = boost::BOOST_REGEX_DETAIL_NS::LoadStringW( + static_cast(cat.get()), + i, + wbuf, + 256 + ); + if (r == 0) + return def; + + + int buf_size = 1 + boost::BOOST_REGEX_DETAIL_NS::WideCharToMultiByte(cp_acp, 0, wbuf, r, NULL, 0, NULL, NULL); + char* buf = (char*)_alloca(buf_size); + if (boost::BOOST_REGEX_DETAIL_NS::WideCharToMultiByte(cp_acp, 0, wbuf, r, buf, buf_size, NULL, NULL) == 0) + return def; // failed conversion. +#endif + return std::string(buf); + } + +#ifndef BOOST_NO_WREGEX + inline std::wstring w32_cat_get(const cat_type& cat, lcid_type, int i, const std::wstring& def) + { + wchar_t buf[256]; + if (0 == boost::BOOST_REGEX_DETAIL_NS::LoadStringW(static_cast(cat.get()), i, buf, 256)) + { + return def; + } + return std::wstring(buf); + } +#endif + inline std::string w32_transform(lcid_type idx, const char* p1, const char* p2) + { +#ifndef BOOST_NO_ANSI_APIS + int bytes = boost::BOOST_REGEX_DETAIL_NS::LCMapStringA( + idx, // locale identifier + lcmap_sortkey, // mapping transformation type + p1, // source string + static_cast(p2 - p1), // number of characters in source string + 0, // destination buffer + 0 // size of destination buffer + ); + if (!bytes) + return std::string(p1, p2); + std::string result(++bytes, '\0'); + bytes = boost::BOOST_REGEX_DETAIL_NS::LCMapStringA( + idx, // locale identifier + lcmap_sortkey, // mapping transformation type + p1, // source string + static_cast(p2 - p1), // number of characters in source string + &*result.begin(), // destination buffer + bytes // size of destination buffer + ); +#else + unsigned int code_page = get_code_page_for_locale_id(idx); + if (code_page == 0) + return std::string(p1, p2); + + int src_len = static_cast(p2 - p1); + wchar_t* wide_p1 = (wchar_t*)_alloca((src_len + 1) * 2); + if (boost::BOOST_REGEX_DETAIL_NS::MultiByteToWideChar(code_page, 0, p1, src_len, wide_p1, src_len + 1) == 0) + return std::string(p1, p2); + + int bytes = boost::BOOST_REGEX_DETAIL_NS::LCMapStringW( + idx, // locale identifier + lcmap_sortkey, // mapping transformation type + wide_p1, // source string + src_len, // number of characters in source string + 0, // destination buffer + 0 // size of destination buffer + ); + if (!bytes) + return std::string(p1, p2); + std::string result(++bytes, '\0'); + bytes = boost::BOOST_REGEX_DETAIL_NS::LCMapStringW( + idx, // locale identifier + lcmap_sortkey, // mapping transformation type + wide_p1, // source string + src_len, // number of characters in source string + (wchar_t*) & *result.begin(), // destination buffer + bytes // size of destination buffer + ); +#endif + if (bytes > static_cast(result.size())) + return std::string(p1, p2); + while (result.size() && result[result.size() - 1] == '\0') + { + result.erase(result.size() - 1); + } + return result; + } + +#ifndef BOOST_NO_WREGEX + inline std::wstring w32_transform(lcid_type idx, const wchar_t* p1, const wchar_t* p2) + { + int bytes = boost::BOOST_REGEX_DETAIL_NS::LCMapStringW( + idx, // locale identifier + lcmap_sortkey, // mapping transformation type + p1, // source string + static_cast(p2 - p1), // number of characters in source string + 0, // destination buffer + 0 // size of destination buffer + ); + if (!bytes) + return std::wstring(p1, p2); + std::string result(++bytes, '\0'); + bytes = boost::BOOST_REGEX_DETAIL_NS::LCMapStringW( + idx, // locale identifier + lcmap_sortkey, // mapping transformation type + p1, // source string + static_cast(p2 - p1), // number of characters in source string + reinterpret_cast(&*result.begin()), // destination buffer *of bytes* + bytes // size of destination buffer + ); + if (bytes > static_cast(result.size())) + return std::wstring(p1, p2); + while (result.size() && result[result.size() - 1] == L'\0') + { + result.erase(result.size() - 1); + } + std::wstring r2; + for (std::string::size_type i = 0; i < result.size(); ++i) + r2.append(1, static_cast(static_cast(result[i]))); + return r2; + } +#endif + inline char w32_tolower(char c, lcid_type idx) + { + char result[2]; +#ifndef BOOST_NO_ANSI_APIS + int b = boost::BOOST_REGEX_DETAIL_NS::LCMapStringA( + idx, // locale identifier + lcmap_lowercase, // mapping transformation type + &c, // source string + 1, // number of characters in source string + result, // destination buffer + 1); // size of destination buffer + if (b == 0) + return c; +#else + unsigned int code_page = get_code_page_for_locale_id(idx); + if (code_page == 0) + return c; + + wchar_t wide_c; + if (boost::BOOST_REGEX_DETAIL_NS::MultiByteToWideChar(code_page, 0, &c, 1, &wide_c, 1) == 0) + return c; + + wchar_t wide_result; + int b = boost::BOOST_REGEX_DETAIL_NS::LCMapStringW( + idx, // locale identifier + lcmap_lowercase, // mapping transformation type + &wide_c, // source string + 1, // number of characters in source string + &wide_result, // destination buffer + 1); // size of destination buffer + if (b == 0) + return c; + + if (boost::BOOST_REGEX_DETAIL_NS::WideCharToMultiByte(code_page, 0, &wide_result, 1, result, 2, NULL, NULL) == 0) + return c; // No single byte lower case equivalent available +#endif + return result[0]; + } + +#ifndef BOOST_NO_WREGEX + inline wchar_t w32_tolower(wchar_t c, lcid_type idx) + { + wchar_t result[2]; + int b = boost::BOOST_REGEX_DETAIL_NS::LCMapStringW( + idx, // locale identifier + lcmap_lowercase, // mapping transformation type + &c, // source string + 1, // number of characters in source string + result, // destination buffer + 1); // size of destination buffer + if (b == 0) + return c; + return result[0]; + } +#endif + inline char w32_toupper(char c, lcid_type idx) + { + char result[2]; +#ifndef BOOST_NO_ANSI_APIS + int b = boost::BOOST_REGEX_DETAIL_NS::LCMapStringA( + idx, // locale identifier + lcmap_uppercase, // mapping transformation type + &c, // source string + 1, // number of characters in source string + result, // destination buffer + 1); // size of destination buffer + if (b == 0) + return c; +#else + unsigned int code_page = get_code_page_for_locale_id(idx); + if (code_page == 0) + return c; + + wchar_t wide_c; + if (boost::BOOST_REGEX_DETAIL_NS::MultiByteToWideChar(code_page, 0, &c, 1, &wide_c, 1) == 0) + return c; + + wchar_t wide_result; + int b = boost::BOOST_REGEX_DETAIL_NS::LCMapStringW( + idx, // locale identifier + lcmap_uppercase, // mapping transformation type + &wide_c, // source string + 1, // number of characters in source string + &wide_result, // destination buffer + 1); // size of destination buffer + if (b == 0) + return c; + + if (boost::BOOST_REGEX_DETAIL_NS::WideCharToMultiByte(code_page, 0, &wide_result, 1, result, 2, NULL, NULL) == 0) + return c; // No single byte upper case equivalent available. +#endif + return result[0]; + } + +#ifndef BOOST_NO_WREGEX + inline wchar_t w32_toupper(wchar_t c, lcid_type idx) + { + wchar_t result[2]; + int b = boost::BOOST_REGEX_DETAIL_NS::LCMapStringW( + idx, // locale identifier + lcmap_uppercase, // mapping transformation type + &c, // source string + 1, // number of characters in source string + result, // destination buffer + 1); // size of destination buffer + if (b == 0) + return c; + return result[0]; + } +#endif + inline bool w32_is(lcid_type idx, std::uint32_t m, char c) + { + word mask; +#ifndef BOOST_NO_ANSI_APIS + if (boost::BOOST_REGEX_DETAIL_NS::GetStringTypeExA(idx, ct_ctype1, &c, 1, &mask) && (mask & m & w32_regex_traits_implementation::mask_base)) + return true; +#else + unsigned int code_page = get_code_page_for_locale_id(idx); + if (code_page == 0) + return false; + + wchar_t wide_c; + if (boost::BOOST_REGEX_DETAIL_NS::MultiByteToWideChar(code_page, 0, &c, 1, &wide_c, 1) == 0) + return false; + + if (boost::BOOST_REGEX_DETAIL_NS::GetStringTypeExW(idx, ct_ctype1, &wide_c, 1, &mask) && (mask & m & w32_regex_traits_implementation::mask_base)) + return true; +#endif + if ((m & w32_regex_traits_implementation::mask_word) && (c == '_')) + return true; + return false; + } + +#ifndef BOOST_NO_WREGEX + inline bool w32_is(lcid_type idx, std::uint32_t m, wchar_t c) + { + word mask; + if (boost::BOOST_REGEX_DETAIL_NS::GetStringTypeExW(idx, ct_ctype1, &c, 1, &mask) && (mask & m & w32_regex_traits_implementation::mask_base)) + return true; + if ((m & w32_regex_traits_implementation::mask_word) && (c == '_')) + return true; + if ((m & w32_regex_traits_implementation::mask_unicode) && (c > 0xff)) + return true; + return false; + } +#endif + +} // BOOST_REGEX_DETAIL_NS + + +} // boost + +#ifdef BOOST_REGEX_MSVC +#pragma warning(pop) +#endif + +#endif // BOOST_REGEX_NO_WIN32_LOCALE + +#endif diff --git a/scintilla/include/boost/regex_fwd.hpp b/scintilla/include/boost/regex_fwd.hpp new file mode 100644 index 0000000000..99371b8cae --- /dev/null +++ b/scintilla/include/boost/regex_fwd.hpp @@ -0,0 +1,37 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org/libs/regex for documentation. + * FILE regex_fwd.cpp + * VERSION see + * DESCRIPTION: Forward declares boost::basic_regex<> and + * associated typedefs. + */ + +#ifndef BOOST_REGEX_FWD_HPP +#define BOOST_REGEX_FWD_HPP + +#ifndef BOOST_REGEX_CONFIG_HPP +#include +#endif + +#ifdef BOOST_REGEX_CXX03 +#include +#else +#include +#endif + +#endif + + + + diff --git a/scintilla/src/Document.cxx b/scintilla/src/Document.cxx index d2d711cea1..753a9a97a8 100644 --- a/scintilla/src/Document.cxx +++ b/scintilla/src/Document.cxx @@ -2157,9 +2157,9 @@ Sci::Position Document::FindText(Sci::Position minPos, Sci::Position maxPos, con //const bool forward = direction >= 0; const int increment = (direction >= 0) ? 1 : -1; // table for the condition: forward ? (pos < endSearch) : (pos >= endSearch) - // direction >= 0 direction < 0 - // pos >= endSearch: break continue - // pos < endSearch: continue break + // direction >= 0 direction < 0 + // pos >= endSearch: break continue + // pos < endSearch: continue break // i.e. continue search when direction and (pos - endSearch) have opposite signs, // which can be wrote as: (direction ^ (pos - endSearch)) < 0 @@ -2447,7 +2447,7 @@ void Document::SetCharClassesEx(const unsigned char *chars, size_t length) noexc } int Document::GetCharsOfClass(CharacterClass characterClass, unsigned char *buffer) const noexcept { - return charClass.GetCharsOfClass(characterClass, buffer); + return charClass.GetCharsOfClass(characterClass, buffer); } #if 0 @@ -3259,7 +3259,46 @@ class UTF8Iterator { #endif // _WIN32 #if defined(SCI_OWNREGEX) && defined(BOOST_REGEX_STANDALONE) +boost::regex_constants::match_flag_type MatchFlags(const Document* doc, Sci::Position startPos, Sci::Position endPos) noexcept { + boost::regex_constants::match_flag_type flagsMatch = boost::regex_constants::match_default | boost::regex_constants::match_not_dot_newline; + if (!doc->IsLineStartPosition(startPos)) + flagsMatch |= boost::regex_constants::match_not_bol; + if (!doc->IsLineEndPosition(endPos)) + flagsMatch |= boost::regex_constants::match_not_eol; + return flagsMatch; +} + +template +bool MatchOnLines(const Document* doc, const Regex& regexp, const RESearchRange& resr, RESearch& search) { + boost::match_results match, lastMatch; + const bool findForward = resr.startPos <= resr.endPos; + bool matched; + Iterator itStart(doc, resr.startPos); + Iterator itEnd(doc, resr.endPos); + const boost::regex_constants::match_flag_type flagsMatch = MatchFlags(doc, resr.startPos, resr.endPos); + if (findForward) { + matched = boost::regex_search(itStart, itEnd, match, regexp, flagsMatch); + } + else { + while (boost::regex_search(itEnd, itStart, match, regexp, flagsMatch)) { + lastMatch = match; + const size_t maxTag = std::min(match.size(), RESearch::MAXTAG); + itEnd = match[maxTag].second; + } + matched = (lastMatch.size() != 0); + } + + if (matched) { + if (!findForward) match = lastMatch; + const size_t maxTag = std::min(match.size(), RESearch::MAXTAG); + for (size_t co = 0; co < maxTag; co++) { + search.bopat[co] = match[co].first.Pos(); + search.eopat[co] = match[co].second.PosRoundUp(); + } + } + return matched; +} #elif defined(SCI_OWNREGEX) || !defined(NO_CXX11_REGEX) std::regex_constants::match_flag_type MatchFlags(const Document *doc, Sci::Position startPos, Sci::Position endPos) noexcept { @@ -3495,6 +3534,50 @@ Sci::Position BuiltinRegex::FindText(const Document *doc, Sci::Position minPos, Sci::Position BuiltinRegex::FindText(const Document *doc, Sci::Position minPos, Sci::Position maxPos, const char *pattern, FindOption flags, Sci::Position *length) { #ifdef BOOST_REGEX_STANDALONE + const RESearchRange resr(doc, minPos, maxPos); + try { + // const ElapsedPeriod ep; + boost::regex::flag_type flagsRe = boost::regex::ECMAScript; + + if (!caseSensitive) + flagsRe = flagsRe | boost::regex::icase; + + // Clear the RESearch so can fill in matches + search.Clear(); + + bool matched = false; + if (CpUtf8 == doc->dbcsCodePage) { + const std::wstring ws = WStringFromUTF8(s); + boost::wregex regexp; + regexp.assign(ws, flagsRe); + matched = MatchOnLines(doc, regexp, resr, search); + } + else { + boost::regex regexp; + regexp.assign(s, flagsRe); + matched = MatchOnLines(doc, regexp, resr, search); + } + + Sci::Position posMatch = -1; + if (matched) { + posMatch = search.bopat[0]; + *length = search.eopat[0] - search.bopat[0]; + } + // Example - search in doc/ScintillaHistory.html for + // [[:upper:]]eta[[:space:]] + // On MacBook, normally around 1 second but with locale imbued -> 14 seconds. + // const double durSearch = ep.Duration(); + // Platform::DebugPrintf("Search:%9.6g \n", durSearch); + return posMatch; +} + catch (const boost::regex_error&) { + // Failed to create regular expression + throw RegexError(); + } + catch (...) { + // Failed in some other way + return -1; + } #else return CxxRegexFindText(doc, minPos, maxPos, pattern, flags, length); #endif @@ -3503,6 +3586,9 @@ Sci::Position BuiltinRegex::FindText(const Document *doc, Sci::Position minPos, #endif // SCI_OWNREGEX const char *BuiltinRegex::SubstituteByPosition(Document *doc, const char *text, Sci::Position *length) { + // boost::regex version of this function should be substituted by wrapping format method of + // match_results for max compatibility. eg. catch group $0-$9. see detail: + // https://www.boost.org/doc/libs/1_83_0/libs/regex/doc/html/boost_regex/format/boost_format_syntax.html substituted.clear(); for (Sci::Position j = 0; j < *length; j++) { if (text[j] == '\\') { From 915b972789f197e501993986c8e8f1074e72c0fd Mon Sep 17 00:00:00 2001 From: atauzki Date: Fri, 13 Oct 2023 11:18:55 +0800 Subject: [PATCH 2/4] Fix reverse search bugs, and `\b` behavior. --- scintilla/src/Document.cxx | 57 ++++++++++++++++++++++++++------------ 1 file changed, 39 insertions(+), 18 deletions(-) diff --git a/scintilla/src/Document.cxx b/scintilla/src/Document.cxx index 753a9a97a8..6b64ee7be9 100644 --- a/scintilla/src/Document.cxx +++ b/scintilla/src/Document.cxx @@ -25,7 +25,9 @@ #include #if defined(SCI_OWNREGEX) && defined(BOOST_REGEX_STANDALONE) +#ifdef _WIN32 #include +#endif #include #elif defined(SCI_OWNREGEX) || !defined(NO_CXX11_REGEX) #include @@ -3260,37 +3262,56 @@ class UTF8Iterator { #if defined(SCI_OWNREGEX) && defined(BOOST_REGEX_STANDALONE) boost::regex_constants::match_flag_type MatchFlags(const Document* doc, Sci::Position startPos, Sci::Position endPos) noexcept { - boost::regex_constants::match_flag_type flagsMatch = boost::regex_constants::match_default | boost::regex_constants::match_not_dot_newline; + boost::regex_constants::match_flag_type flagsMatch = + boost::regex_constants::match_default | boost::regex_constants::match_not_dot_newline; if (!doc->IsLineStartPosition(startPos)) - flagsMatch |= boost::regex_constants::match_not_bol; + flagsMatch |= boost::regex_constants::match_not_bol | boost::regex_constants::match_not_bow; if (!doc->IsLineEndPosition(endPos)) - flagsMatch |= boost::regex_constants::match_not_eol; + flagsMatch |= boost::regex_constants::match_not_eol | boost::regex_constants::match_not_eow; return flagsMatch; } template bool MatchOnLines(const Document* doc, const Regex& regexp, const RESearchRange& resr, RESearch& search) { - boost::match_results match, lastMatch; - const bool findForward = resr.startPos <= resr.endPos; - bool matched; - Iterator itStart(doc, resr.startPos); - Iterator itEnd(doc, resr.endPos); - const boost::regex_constants::match_flag_type flagsMatch = MatchFlags(doc, resr.startPos, resr.endPos); - - if (findForward) { + boost::match_results match; + bool matched = false; + if (resr.increment > 0) { + Iterator itStart(doc, resr.startPos); + Iterator itEnd(doc, resr.endPos); + const boost::regex_constants::match_flag_type flagsMatch = MatchFlags(doc, resr.startPos, resr.endPos); matched = boost::regex_search(itStart, itEnd, match, regexp, flagsMatch); } else { - while (boost::regex_search(itEnd, itStart, match, regexp, flagsMatch)) { - lastMatch = match; - const size_t maxTag = std::min(match.size(), RESearch::MAXTAG); - itEnd = match[maxTag].second; + // Line by line. + for (Sci::Line line = resr.lineRangeStart; line != resr.lineRangeBreak; line += resr.increment) { + const Range lineRange = resr.LineRange(line); + Iterator itStart(doc, lineRange.start); + Iterator itEnd(doc, lineRange.end); + boost::regex_constants::match_flag_type flagsMatch = MatchFlags(doc, lineRange.start, lineRange.end); + matched = boost::regex_search(itStart, itEnd, match, regexp, flagsMatch); + // Check for the last match on this line. + if (matched) { + Sci::Position endPos = match[0].second.PosRoundUp(); + while (endPos < lineRange.end) { + if (match[0].first == match[0].second) { + endPos = doc->NextPosition(endPos, 1); + } + Iterator itNext(doc, endPos); + flagsMatch = MatchFlags(doc, itNext.Pos(), lineRange.end); + boost::match_results matchNext; + if (boost::regex_search(itNext, itEnd, matchNext, regexp, flagsMatch)) { + match = matchNext; + endPos = match[0].second.PosRoundUp(); + } + else { + break; + } + } + break; + } } - matched = (lastMatch.size() != 0); } - if (matched) { - if (!findForward) match = lastMatch; const size_t maxTag = std::min(match.size(), RESearch::MAXTAG); for (size_t co = 0; co < maxTag; co++) { search.bopat[co] = match[co].first.Pos(); From 11a4cbd5af11b7ee6e8ab01365b20c187b0ad0f0 Mon Sep 17 00:00:00 2001 From: atauzki Date: Sun, 15 Oct 2023 08:56:58 +0800 Subject: [PATCH 3/4] Sync code changes. --- scintilla/src/Document.cxx | 39 ++++++++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/scintilla/src/Document.cxx b/scintilla/src/Document.cxx index 6b64ee7be9..27c00d30bd 100644 --- a/scintilla/src/Document.cxx +++ b/scintilla/src/Document.cxx @@ -3014,6 +3014,8 @@ class BuiltinRegex final : public RegexSearchBase { private: #if defined(SCI_OWNREGEX) && defined(BOOST_REGEX_STANDALONE) + boost::wregex regexUTF8; + boost::regex regexByte; #elif defined(SCI_OWNREGEX) || !defined(NO_CXX11_REGEX) std::wregex regexUTF8; std::regex regexByte; @@ -3557,26 +3559,35 @@ Sci::Position BuiltinRegex::FindText(const Document *doc, Sci::Position minPos, #ifdef BOOST_REGEX_STANDALONE const RESearchRange resr(doc, minPos, maxPos); try { - // const ElapsedPeriod ep; + //const ElapsedPeriod ep; boost::regex::flag_type flagsRe = boost::regex::ECMAScript; - - if (!caseSensitive) + // Flags that appear to have no effect: + // | std::regex::collate | std::regex::extended; + if (!FlagSet(flags, FindOption::MatchCase)) flagsRe = flagsRe | boost::regex::icase; // Clear the RESearch so can fill in matches search.Clear(); - bool matched = false; + const size_t patternLen = *length; + bool matched = flags != previousFlags || patternLen != cachedPattern.length() + || memcmp(pattern, cachedPattern.data(), patternLen) != 0; if (CpUtf8 == doc->dbcsCodePage) { - const std::wstring ws = WStringFromUTF8(s); - boost::wregex regexp; - regexp.assign(ws, flagsRe); - matched = MatchOnLines(doc, regexp, resr, search); + if (matched) { + const std::wstring ws = WStringFromUTF8(pattern); + regexUTF8.assign(ws, flagsRe); + previousFlags = flags; + cachedPattern.assign(pattern, patternLen); + } + matched = MatchOnLines(doc, regexUTF8, resr, search); } else { - boost::regex regexp; - regexp.assign(s, flagsRe); - matched = MatchOnLines(doc, regexp, resr, search); + if (matched) { + regexByte.assign(pattern, patternLen, flagsRe); + previousFlags = flags; + cachedPattern.assign(pattern, patternLen); + } + matched = MatchOnLines(doc, regexByte, resr, search); } Sci::Position posMatch = -1; @@ -3587,10 +3598,10 @@ Sci::Position BuiltinRegex::FindText(const Document *doc, Sci::Position minPos, // Example - search in doc/ScintillaHistory.html for // [[:upper:]]eta[[:space:]] // On MacBook, normally around 1 second but with locale imbued -> 14 seconds. - // const double durSearch = ep.Duration(); - // Platform::DebugPrintf("Search:%9.6g \n", durSearch); + //const double durSearch = ep.Duration(); + //Platform::DebugPrintf("Search:%9.6g \n", durSearch); return posMatch; -} + } catch (const boost::regex_error&) { // Failed to create regular expression throw RegexError(); From 30d183a4f07de7c338135dfa35793fac182365de Mon Sep 17 00:00:00 2001 From: atauzki Date: Sun, 15 Oct 2023 09:43:01 +0800 Subject: [PATCH 4/4] Fix forward search of line start. --- scintilla/src/Document.cxx | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/scintilla/src/Document.cxx b/scintilla/src/Document.cxx index 27c00d30bd..aedd13a1cb 100644 --- a/scintilla/src/Document.cxx +++ b/scintilla/src/Document.cxx @@ -3265,11 +3265,14 @@ class UTF8Iterator { #if defined(SCI_OWNREGEX) && defined(BOOST_REGEX_STANDALONE) boost::regex_constants::match_flag_type MatchFlags(const Document* doc, Sci::Position startPos, Sci::Position endPos) noexcept { boost::regex_constants::match_flag_type flagsMatch = - boost::regex_constants::match_default | boost::regex_constants::match_not_dot_newline; + boost::regex_constants::match_default + | boost::regex_constants::match_not_dot_newline + | boost::regex_constants::match_not_bol + | boost::regex_constants::match_not_eol; if (!doc->IsLineStartPosition(startPos)) - flagsMatch |= boost::regex_constants::match_not_bol | boost::regex_constants::match_not_bow; + flagsMatch |= boost::regex_constants::match_not_bow; if (!doc->IsLineEndPosition(endPos)) - flagsMatch |= boost::regex_constants::match_not_eol | boost::regex_constants::match_not_eow; + flagsMatch |= boost::regex_constants::match_not_eow; return flagsMatch; }