diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000000..494e6e2cf1 --- /dev/null +++ b/.clang-format @@ -0,0 +1,242 @@ +--- +BasedOnStyle: Microsoft +AccessModifierOffset: -4 +AlignAfterOpenBracket: Align +AlignArrayOfStructures: Left +AlignConsecutiveAssignments: + Enabled: false + AcrossEmptyLines: false + AcrossComments: false + AlignCompound: false + AlignFunctionPointers: false + PadOperators: true +AlignConsecutiveBitFields: + Enabled: false + AcrossEmptyLines: false + AcrossComments: false + AlignCompound: false + AlignFunctionPointers: false + PadOperators: false +AlignConsecutiveDeclarations: + Enabled: false + AcrossEmptyLines: false + AcrossComments: false + AlignCompound: false + AlignFunctionPointers: false + PadOperators: false +AlignConsecutiveMacros: + Enabled: false + AcrossEmptyLines: false + AcrossComments: false + AlignCompound: false + AlignFunctionPointers: false + PadOperators: false +AlignConsecutiveShortCaseStatements: + Enabled: false + AcrossEmptyLines: false + AcrossComments: false + AlignCaseColons: false +AlignEscapedNewlines: Right +AlignOperands: Align +AlignTrailingComments: + Kind: Always + OverEmptyLines: 0 +AllowAllArgumentsOnNextLine: true +AllowAllParametersOfDeclarationOnNextLine: true +AllowBreakBeforeNoexceptSpecifier: Never +AllowShortBlocksOnASingleLine: Empty +AllowShortCaseLabelsOnASingleLine: false +AllowShortCompoundRequirementOnASingleLine: true +AllowShortEnumsOnASingleLine: false +AllowShortFunctionsOnASingleLine: None +AllowShortIfStatementsOnASingleLine: Never +AllowShortLambdasOnASingleLine: All +AllowShortLoopsOnASingleLine: false +AlwaysBreakAfterDefinitionReturnType: None +AlwaysBreakBeforeMultilineStrings: false +AttributeMacros: + - __capability +BinPackArguments: true +BinPackParameters: true +BitFieldColonSpacing: Both +BraceWrapping: + AfterCaseLabel: false + AfterClass: true + AfterControlStatement: Always + AfterEnum: true + AfterFunction: true + AfterNamespace: true + AfterObjCDeclaration: true + AfterStruct: true + AfterUnion: false + AfterExternBlock: true + BeforeCatch: true + BeforeElse: true + BeforeLambdaBody: true + BeforeWhile: true + IndentBraces: false + SplitEmptyFunction: true + SplitEmptyRecord: true + SplitEmptyNamespace: true +BreakAdjacentStringLiterals: true +BreakAfterAttributes: Leave +BreakAfterJavaFieldAnnotations: false +BreakArrays: true +BreakBeforeBinaryOperators: None +BreakBeforeBraces: Custom +BreakBeforeConceptDeclarations: Always +BreakBeforeInlineASMColon: OnlyMultiline +BreakBeforeTernaryOperators: true +BreakConstructorInitializers: BeforeColon +BreakInheritanceList: BeforeColon +BreakStringLiterals: true +ColumnLimit: 120 +CommentPragmas: "^ IWYU pragma:" +CompactNamespaces: true +ConstructorInitializerIndentWidth: 4 +ContinuationIndentWidth: 4 +Cpp11BracedListStyle: true +DerivePointerAlignment: false +DisableFormat: false +EmptyLineAfterAccessModifier: Never +EmptyLineBeforeAccessModifier: LogicalBlock +ExperimentalAutoDetectBinPacking: false +FixNamespaceComments: true +ForEachMacros: + - foreach + - Q_FOREACH + - BOOST_FOREACH +IfMacros: + - KJ_IF_MAYBE +IncludeBlocks: Preserve +IncludeCategories: + - Regex: ^"(llvm|llvm-c|clang|clang-c)/ + Priority: 2 + SortPriority: 0 + CaseSensitive: false + - Regex: ^(<|"(gtest|gmock|isl|json)/) + Priority: 3 + SortPriority: 0 + CaseSensitive: false + - Regex: .* + Priority: 1 + SortPriority: 0 + CaseSensitive: false +IncludeIsMainRegex: (Test)?$ +IncludeIsMainSourceRegex: "" +IndentAccessModifiers: false +IndentCaseBlocks: true +IndentCaseLabels: false +IndentExternBlock: NoIndent +IndentGotoLabels: true +IndentPPDirectives: None +IndentRequiresClause: true +IndentWidth: 4 +IndentWrappedFunctionNames: true +InsertBraces: true +InsertNewlineAtEOF: true +InsertTrailingCommas: None +IntegerLiteralSeparator: + Binary: 0 + BinaryMinDigits: 0 + Decimal: 0 + DecimalMinDigits: 0 + Hex: 0 + HexMinDigits: 0 +JavaScriptQuotes: Leave +JavaScriptWrapImports: true +LambdaBodyIndentation: OuterScope +Language: Cpp +LineEnding: DeriveLF +MacroBlockBegin: "" +MacroBlockEnd: "" +MaxEmptyLinesToKeep: 1 +NamespaceIndentation: All +ObjCBinPackProtocolList: Auto +ObjCBlockIndentWidth: 2 +ObjCBreakBeforeNestedBlockParam: true +ObjCSpaceAfterProperty: false +ObjCSpaceBeforeProtocolList: true +PPIndentWidth: -1 +PackConstructorInitializers: BinPack +PenaltyBreakAssignment: 2 +PenaltyBreakBeforeFirstCallParameter: 19 +PenaltyBreakComment: 300 +PenaltyBreakFirstLessLess: 120 +PenaltyBreakOpenParenthesis: 0 +PenaltyBreakScopeResolution: 500 +PenaltyBreakString: 1000 +PenaltyBreakTemplateDeclaration: 20 +PenaltyExcessCharacter: 1000000 +PenaltyIndentedWhitespace: 0 +PenaltyReturnTypeOnItsOwnLine: 1000 +PointerAlignment: Left +QualifierAlignment: Leave +ReferenceAlignment: Pointer +ReflowComments: true +RemoveBracesLLVM: false +RemoveParentheses: Leave +RemoveSemicolon: false +RequiresClausePosition: WithPreceding +RequiresExpressionIndentation: OuterScope +SeparateDefinitionBlocks: Leave +ShortNamespaceLines: 1 +SkipMacroDefinitionBody: false +SortIncludes: CaseSensitive +SortJavaStaticImport: Before +SortUsingDeclarations: LexicographicNumeric +SpaceAfterCStyleCast: false +SpaceAfterLogicalNot: false +SpaceAfterTemplateKeyword: false +SpaceAroundPointerQualifiers: Default +SpaceBeforeAssignmentOperators: true +SpaceBeforeCaseColon: false +SpaceBeforeCpp11BracedList: false +SpaceBeforeCtorInitializerColon: true +SpaceBeforeInheritanceColon: true +SpaceBeforeJsonColon: false +SpaceBeforeParens: ControlStatements +SpaceBeforeParensOptions: + AfterControlStatements: true + AfterForeachMacros: true + AfterFunctionDeclarationName: false + AfterFunctionDefinitionName: false + AfterIfMacros: true + AfterOverloadedOperator: false + AfterPlacementOperator: true + AfterRequiresInClause: true + AfterRequiresInExpression: true + BeforeNonEmptyParentheses: true +SpaceBeforeRangeBasedForLoopColon: true +SpaceBeforeSquareBrackets: false +SpaceInEmptyBlock: false +SpacesBeforeTrailingComments: 1 +SpacesInAngles: Never +SpacesInContainerLiterals: true +SpacesInLineCommentPrefix: + Minimum: 1 + Maximum: -1 +SpacesInParens: Never +SpacesInParensOptions: + InConditionalStatements: false + InCStyleCasts: false + InEmptyParentheses: false + Other: false +SpacesInSquareBrackets: false +Standard: Latest +StatementAttributeLikeMacros: + - Q_EMIT +StatementMacros: + - Q_UNUSED + - QT_REQUIRE_VERSION +TabWidth: 4 +UseTab: ForContinuationAndIndentation +VerilogBreakBetweenInstancePorts: true +WhitespaceSensitiveMacros: + - BOOST_PP_STRINGIZE + - CF_SWIFT_NAME + - NS_SWIFT_NAME + - PP_STRINGIZE + - STRINGIZE +NamespaceMacros: + - Q_NAMESPACE diff --git a/CMakeLists.txt b/CMakeLists.txt index b07339c4cc..c48f4b3cbf 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -67,7 +67,8 @@ option(ENABLE_USE_WOLFSSL "Build with wolfssl support" OFF) option(ENABLE_MOLD "Build using mold" OFF) option(ENABLE_LLD "Build using lld" OFF) -option(ENABLE_STATIC_LINK_TO_GCC "Build static link to gcc" ON) +option(ENABLE_STATIC "Build static binary" OFF) +option(ENABLE_STATIC_LINK_TO_GCC "Build static link to gcc" OFF) option(ENABLE_TCMALLOC_STATIC "Build with Tcmalloc support" OFF) option(ENABLE_JEMALLOC_STATIC "Build with Jemalloc support" OFF) @@ -190,6 +191,8 @@ if (MSVC) set(CMAKE_CXX_STANDARD_REQUIRED ON) message(STATUS "Set default cxx standard: C++17") endif() + + add_compile_options(/utf-8) endif() ################################################################################ @@ -240,10 +243,13 @@ if (NOT MSVC) message(STATUS "clang supports compiler-rt, use it") add_link_options(-rtlib=compiler-rt -unwindlib=libunwind) endif() - else() + elseif(ENABLE_STATIC_LINK_TO_GCC) set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libstdc++ -static-libgcc") endif() elseif (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + if (ENABLE_STATIC) + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static") + endif() if (MINGW) set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static -static-libstdc++") else() @@ -273,14 +279,14 @@ if (NOT MSVC) if (ENABLE_MOLD) if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang") if (MOLD_LINKER) - list(APPEND CMAKE_EXE_LINKER_FLAGS "-fuse-ld=mold") + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fuse-ld=mold") endif() endif() endif() if (ENABLE_LLD) if (LLD_LINKER) - list(APPEND CMAKE_EXE_LINKER_FLAGS "-fuse-ld=lld") + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fuse-ld=lld") endif() endif() endif(NOT MSVC) @@ -410,6 +416,8 @@ if (ENABLE_USE_IO_URING) message(STATUS "Linux using io_uring...") add_definitions(-DBOOST_ASIO_HAS_IO_URING -DBOOST_ASIO_DISABLE_EPOLL) link_libraries(${IOUring_LIBRARIES}) + else() + message(FATAL_ERROR "io_uring librariy not found") endif() endif() @@ -480,6 +488,12 @@ add_subdirectory(third_party/fmt) set(FMT_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/third_party/fmt/include) set(FMT_LIBRARIES fmt) +set(CTRE_BUILD_TESTS OFF) +set(CTRE_BUILD_PACKAGE OFF) +add_subdirectory(third_party/compile-time-regular-expressions) + +link_libraries(ctre) + ################################################################################ # 添加 proxy 项目 diff --git a/proxy/CMakeLists.txt b/proxy/CMakeLists.txt index e55c20e825..99a9a0474c 100644 --- a/proxy/CMakeLists.txt +++ b/proxy/CMakeLists.txt @@ -31,6 +31,7 @@ target_sources(libproxy ${CMAKE_CURRENT_SOURCE_DIR}/src/lib_asio_beast.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/ipip.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/src/proxy_server.cpp ) target_include_directories(libproxy @@ -55,6 +56,7 @@ target_link_libraries(libproxy Boost::uuid Boost::asio Boost::beast + Boost::hana fmt ${OPENSSL_LIBRARIES} diff --git a/proxy/include/proxy/fileop.hpp b/proxy/include/proxy/fileop.hpp index d2349cb3c9..14be9c92a6 100644 --- a/proxy/include/proxy/fileop.hpp +++ b/proxy/include/proxy/fileop.hpp @@ -23,6 +23,7 @@ # include #endif +#include ////////////////////////////////////////////////////////////////////////// @@ -117,13 +118,13 @@ namespace fileop { template - std::streamsize read(const std::fstream& file, std::span val) + std::streamsize read(const boost::nowide::fstream& file, std::span val) { return details::read(*file.rdbuf(), val); } inline std::streamsize - read(const std::fstream& file, std::string& val) + read(const boost::nowide::fstream& file, std::string& val) { return details::read(*file.rdbuf(), val); } @@ -131,14 +132,14 @@ namespace fileop { template std::streamsize read(const P& file, std::span val) { - std::fstream f(file.string(), std::ios_base::binary | std::ios_base::in); + boost::nowide::fstream f(file.string(), std::ios_base::binary | std::ios_base::in); return details::read(*f.rdbuf(), val); } template std::streamsize read(const P& file, std::string& val) { - std::fstream f(file.string(), std::ios_base::binary | std::ios_base::in); + boost::nowide::fstream f(file.string(), std::ios_base::binary | std::ios_base::in); std::error_code ec; auto fsize = details::filesize(file); if (fsize < 0) @@ -164,13 +165,13 @@ namespace fileop { template - std::streamsize write(std::fstream& file, std::span val) + std::streamsize write(boost::nowide::fstream& file, std::span val) { return details::write(*file.rdbuf(), val); } inline std::streamsize - write(std::fstream& file, std::string_view val) + write(boost::nowide::fstream& file, std::string_view val) { return details::write(*file.rdbuf(), val); } @@ -181,7 +182,7 @@ namespace fileop { { details::create_parent_directories(file); - std::fstream f(file.string(), + boost::nowide::fstream f(file.string(), std::ios_base::binary | std::ios_base::out | std::ios_base::trunc); @@ -194,7 +195,7 @@ namespace fileop { { details::create_parent_directories(file); - std::fstream f(file.string(), + boost::nowide::fstream f(file.string(), std::ios_base::binary | std::ios_base::out | std::ios_base::trunc); diff --git a/proxy/include/proxy/logging.hpp b/proxy/include/proxy/logging.hpp index a4353686eb..4c4d0b3b7a 100644 --- a/proxy/include/proxy/logging.hpp +++ b/proxy/include/proxy/logging.hpp @@ -198,7 +198,7 @@ namespace std { namespace xlogger { - namespace fs = std::filesystem; + namespace fs = boost::filesystem; #ifndef LOGGING_DISABLE_BOOST_ASIO_ENDPOINT namespace net = boost::asio; @@ -745,7 +745,7 @@ class auto_logger_file__ if (!global_logging___) return; - std::error_code ignore_ec; + boost::system::error_code ignore_ec; if (!fs::exists(m_log_path, ignore_ec) && global_write_logging___) fs::create_directories( m_log_path.parent_path(), ignore_ec); @@ -764,7 +764,7 @@ class auto_logger_file__ if (!global_logging___) return; - std::error_code ignore_ec; + boost::system::error_code ignore_ec; if (!fs::exists(m_log_path, ignore_ec) && global_write_logging___) fs::create_directories( m_log_path.parent_path(), ignore_ec); @@ -822,7 +822,7 @@ class auto_logger_file__ m_last_time = time; - std::error_code ec; + boost::system::error_code ec; if (!fs::copy_file(m_log_path, filename, ec)) break; @@ -833,7 +833,7 @@ class auto_logger_file__ auto fn = filename.string(); std::thread th([fn]() { - std::error_code ignore_ec; + boost::system::error_code ignore_ec; std::mutex& m = xlogging_compress__::compress_lock(); std::lock_guard lock(m); if (!xlogging_compress__::do_compress_gz(fn)) @@ -1442,6 +1442,26 @@ class logger___ #endif return strcat_impl(v); } + + inline logger___& operator<<(const std::pmr::string& v) + { +#ifdef LOGGING_ENABLE_AUTO_UTF8 + if (!global_logging___) + return *this; + if (!logger_aux__::utf8_check_is_valid(v)) + { + auto wres = logger_aux__::string_wide(v); + if (wres) + { + auto ret = logger_aux__::utf16_utf8(*wres); + if (ret) + return strcat_impl(*ret); + } + } +#endif + return strcat_impl(v); + } + inline logger___& operator<<(const std::wstring& v) { if (!global_logging___) @@ -1720,7 +1740,7 @@ class logger___ return *this; } #endif - inline logger___& operator<<(const fs::path& p) noexcept + inline logger___& operator<<(const std::filesystem::path& p) noexcept { if (!global_logging___) return *this; diff --git a/proxy/include/proxy/proxy_server.hpp b/proxy/include/proxy/proxy_server.hpp index fec228b843..1f4d1ce174 100644 --- a/proxy/include/proxy/proxy_server.hpp +++ b/proxy/include/proxy/proxy_server.hpp @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -67,13 +68,15 @@ #include #include +#include +#include #ifdef _MSC_VER # pragma warning(push) # pragma warning(disable: 4819) #endif -#include +#include #ifdef _MSC_VER # pragma warning(pop) @@ -107,6 +110,33 @@ #include "proxy/proxy_stream.hpp" +inline bool operator == (const std::string& a, const std::pmr::string& b) +{ + return std::string_view(a) == std::string_view(b); +} + +inline bool operator == (const std::pmr::string& a, const std::string& b) +{ + return std::string_view(a) == std::string_view(b); +} + +template +concept supports_stream_rate_limit = requires(S s, proxy_session p) +{ + p.stream_rate_limit(s, 1); +}; + +template +concept supports_stream_expires_after = requires(S s, proxy_session p) +{ + p.stream_expires_after(s, std::chrono::seconds(0)); +}; + +template +concept supports_shutdown = requires(S s) +{ + s.shutdown(boost::asio::socket_base::shutdown_receive); +}; namespace proxy { @@ -123,22 +153,34 @@ namespace proxy { namespace urls = boost::urls; // form - namespace fs = std::filesystem; + namespace fs = boost::filesystem; + + using pmr_alloc_t = std::pmr::polymorphic_allocator; + using pmr_fields = http::basic_fields; + + using string_body = http::basic_string_body, pmr_alloc_t>; + using span_body = http::span_body; - using string_body = http::string_body; - using dynamic_body = http::dynamic_body; + // using string_body = http::string_body; + using dynamic_body = http::basic_dynamic_body>; using buffer_body = http::buffer_body; - using dynamic_request = http::request; - using string_request = http::request; + using dynamic_request = http::request; + using string_request = http::request; - using string_response = http::response; - using buffer_response = http::response; + using string_response = http::response; + using buffer_response = http::response; + using custom_body_response = http::response; + using span_response = http::response; - using request_parser = http::request_parser; + using request_parser = http::request_parser; + using response_parser = http::response_parser; + + using response_serializer = http::response_serializer; + using string_response_serializer = http::response_serializer; + using span_response_serializer = http::response_serializer; + using custom_body_response_serializer = http::response_serializer; - using response_serializer = http::response_serializer; - using string_response_serializer = http::response_serializer; using io_util::read; using io_util::write; @@ -224,7 +266,7 @@ R"x*x*x( )x*x*x"; - inline const char* fake_404_content_fmt = + inline const char fake_404_content_fmt[] = R"x*x*x(HTTP/1.1 404 Not Found Server: nginx/1.20.2 Date: {} @@ -240,7 +282,7 @@ Connection: close )x*x*x"; - inline const char* fake_407_content_fmt = + inline const char fake_407_content_fmt[] = R"x*x*x(HTTP/1.1 407 Proxy Authentication Required Server: nginx/1.20.2 Date: {} @@ -251,7 +293,7 @@ Content-Length: 0 )x*x*x"; - inline const char* fake_416_content = + inline const char fake_416_content[] = R"x*x*x( 416 Requested Range Not Satisfiable @@ -261,7 +303,7 @@ R"x*x*x( )x*x*x"; - inline const char* fake_302_content = + inline const char fake_302_content[] = R"x*x*x( 301 Moved Permanently @@ -272,14 +314,6 @@ R"x*x*x( )x*x*x"; - inline constexpr auto head_fmt = - LR"(Index of {}

Index of {}


)";
-	inline constexpr auto tail_fmt =
-		L"

"; - inline constexpr auto body_fmt = - L"{}{} {} {}\r\n"; - - ////////////////////////////////////////////////////////////////////////// // udp_session_expired_time 用于指定 udp session 的默认过期时间, 单位为秒. @@ -302,7 +336,7 @@ R"x*x*x( 0x16, // ssl }; - inline const std::map global_mimes = + inline const std::map global_mimes = { { ".html", "text/html; charset=utf-8" }, { ".htm", "text/html; charset=utf-8" }, @@ -580,14 +614,15 @@ R"x*x*x( struct http_context { + pmr_alloc_t alloc; // 在 http 请求时, 保存正则表达式命中时匹配的结果列表. - std::vector command_; + std::pmr::vector command_; // 保存 http 客户端的请求信息. string_request& request_; // 保存 http 客户端请求的原始目标. - std::string target_; + std::string_view target_; // 保存 http 客户端请求目标的具体路径, 即: doc 目录 + target_ 组成的路径. std::string target_path_; @@ -627,63 +662,55 @@ R"x*x*x( using http_ranges = std::vector>; // parser_http_ranges 用于解析 http range 请求头. - inline http_ranges parser_http_ranges(std::string range) const noexcept - { - // 去掉前后空白. - range = strutil::remove_spaces(range); - - // range 必须以 bytes= 开头, 否则返回空数组. - if (!range.starts_with("bytes=")) - return {}; - - // 去掉开头的 bytes= 字符串. - boost::ireplace_first(range, "bytes=", ""); + http_ranges parser_http_ranges(std::string_view range_line) noexcept + { http_ranges results; - // 获取其中所有 range 字符串. - auto ranges = strutil::split(range, ","); - for (const auto& str : ranges) + if (!range_line.empty()) { - auto r = strutil::split(std::string(str), "-"); + // 去掉前后空白. + range_line = strutil::remove_spaces(range_line); - // range 只有一个数值. - if (r.size() == 1) - { - if (str.front() == '-') { - auto pos = std::atoll(r.front().data()); - results.emplace_back(-1, pos); - } else { - auto pos = std::atoll(r.front().data()); - results.emplace_back(pos, -1); - } - } - else if (r.size() == 2) + // range 必须以 bytes= 开头, 否则返回空数组. + if (auto regex_match_result = ctre::match<"bytes=([0-9,\\- ]+)">(range_line)) { - // range 有 start 和 end 的情况, 解析成整数到容器. - auto& start_str = r[0]; - auto& end_str = r[1]; + std::string_view range = regex_match_result.get<1>(); - if (start_str.empty() && !end_str.empty()) - { - auto end = std::atoll(end_str.data()); - results.emplace_back(-1, end); - } - else + std::array pre_alloc_buf; + std::pmr::monotonic_buffer_resource mbr(pre_alloc_buf.data(), pre_alloc_buf.size()); + std::pmr::polymorphic_allocator alloc(&mbr); + + std::pmr::vector ranges{alloc}; + + // 获取其中所有 range 字符串. + strutil::split(range, ",", std::back_inserter(ranges)); + for (const auto& str : ranges) { - auto start = std::atoll(start_str.data()); - auto end = std::atoll(end_str.data()); - if (end_str.empty()) - end = -1; + if (auto range_match = ctre::match<"(-?[0-9 ]+)(-([0-9 ]*))?">(str)) + { + auto first = range_match.get<1>().to_number(); - results.emplace_back(start, end); + if (first < 0) + { + results.emplace_back(-1, -first); + } + else + { + if (range_match.get<3>().to_view().empty()) + { + results.emplace_back(first, -1); + } + else + { + auto second = range_match.get<3>().to_number(); + + results.emplace_back(first, second); + } + } + } } } - else - { - // 在一个 range 项中不应该存在3个'-', 否则则是无效项. - return {}; - } } return results; @@ -871,14 +898,11 @@ R"x*x*x( if (!ret) co_return; - size_t l2r_transferred = 0; - size_t r2l_transferred = 0; - - co_await( - transfer(m_local_socket, m_remote_socket, l2r_transferred) + auto [l2r_transferred, r2l_transferred] = co_await( + transfer(m_local_socket, m_remote_socket) && - transfer(m_remote_socket, m_local_socket, r2l_transferred) - ); + transfer(m_remote_socket, m_local_socket) + ); XLOG_DBG << "connection id: " << m_connection_id @@ -1810,13 +1834,10 @@ R"x*x*x( // 发起数据传输协程. if (command == SOCKS_CMD_CONNECT) { - size_t l2r_transferred = 0; - size_t r2l_transferred = 0; - - co_await( - transfer(m_local_socket, m_remote_socket, l2r_transferred) + auto [l2r_transferred, r2l_transferred ] = co_await( + transfer(m_local_socket, m_remote_socket) && - transfer(m_remote_socket, m_local_socket, r2l_transferred) + transfer(m_remote_socket, m_local_socket) ); XLOG_DBG << "connection id: " @@ -2256,14 +2277,11 @@ R"x*x*x( if (error_code != SOCKS4_REQUEST_GRANTED) co_return; - size_t l2r_transferred = 0; - size_t r2l_transferred = 0; - - co_await( - transfer(m_local_socket, m_remote_socket, l2r_transferred) + auto [l2r_transferred , r2l_transferred ]= co_await( + transfer(m_local_socket, m_remote_socket) && - transfer(m_remote_socket, m_local_socket, r2l_transferred) - ); + transfer(m_remote_socket, m_local_socket) + ); XLOG_DBG << "connection id: " << m_connection_id @@ -2275,2176 +2293,397 @@ R"x*x*x( co_return; } - inline int http_authorization(std::string_view pa) - { - if (m_option.auth_users_.empty()) - return PROXY_AUTH_SUCCESS; - - if (pa.empty()) - return PROXY_AUTH_NONE; + int http_authorization(std::string_view pa); - auto pos = pa.find(' '); - if (pos == std::string::npos) - return PROXY_AUTH_ILLEGAL; + net::awaitable http_proxy_get(); - auto type = pa.substr(0, pos); - auto auth = pa.substr(pos + 1); + net::awaitable http_proxy_connect(); - if (type != "Basic") - return PROXY_AUTH_ILLEGAL; + net::awaitable socks_auth(); - std::string userinfo( - beast::detail::base64::decoded_size(auth.size()), 0); - auto [len, _] = beast::detail::base64::decode( - (char*)userinfo.data(), - auth.data(), - auth.size()); - userinfo.resize(len); - - pos = userinfo.find(':'); + template + net::awaitable transfer(S1& from, S2& to, std::streamsize bytes_to_be_sent = -1) + { - std::string uname = userinfo.substr(0, pos); - std::string passwd = userinfo.substr(pos + 1); + std::size_t bytes_transferred = 0; - bool verify_passed = m_option.auth_users_.empty(); + if constexpr (supports_stream_rate_limit) + { + stream_rate_limit(from, m_option.tcp_rate_limit_); + } + if constexpr (supports_stream_rate_limit) + { + stream_rate_limit(to, m_option.tcp_rate_limit_); + } - for (auto [user, pwd] : m_option.auth_users_) + if constexpr (supports_stream_expires_after) { - if (uname == user && passwd == pwd) - { - verify_passed = true; - user_rate_limit_config(user); - break; - } + stream_expires_after(from, std::chrono::seconds(m_option.tcp_timeout_)); } - auto endp = m_local_socket.remote_endpoint(); - auto client = endp.address().to_string(); - client += ":" + std::to_string(endp.port()); + constexpr auto buf_size = 512 * 1024; - if (!verify_passed) - return PROXY_AUTH_FAILED; + std::unique_ptr buf0((char*)std::malloc(buf_size), &std::free); + std::unique_ptr buf1((char*)std::malloc(buf_size), &std::free); - return PROXY_AUTH_SUCCESS; - } + // 分别使用主从缓冲指针用于并发读写. + auto primary_buf = buf0.get(); + auto secondary_buf = buf1.get(); - inline net::awaitable http_proxy_get() - { + // 首先邓读取第一个数据作为预备, 以用于后面的交替读写逻辑. + auto read_size = (bytes_to_be_sent == -1) ? buf_size : std::min(bytes_to_be_sent, buf_size); boost::system::error_code ec; - bool keep_alive = false; - std::optional parser; - bool first = true; - - while (!m_abort) + auto bytes = co_await from.async_read_some(net::buffer(primary_buf, read_size), net_awaitable[ec]); + if (bytes_to_be_sent != -1) bytes_to_be_sent -= bytes; + if (ec || m_abort) { - parser.emplace(); - parser->body_limit(1024 * 1024 * 10); - if (!first) - m_local_buffer.consume(m_local_buffer.size()); + if (bytes > 0) + bytes_transferred += co_await net::async_write(to, + net::buffer(primary_buf, bytes), net_awaitable[ec]); - // 读取 http 请求头. - co_await http::async_read( - m_local_socket, - m_local_buffer, - *parser, - net_awaitable[ec]); - if (ec) + to.shutdown(net::socket_base::shutdown_send, ec); + co_return bytes_transferred; + } + + for (; !m_abort;) + { + if constexpr (supports_stream_expires_after) { - XLOG_WARN << "connection id: " - << m_connection_id - << (keep_alive ? ", keepalive" : "") - << ", http_proxy_get request async_read: " - << ec.message(); + stream_expires_after(to, std::chrono::seconds(m_option.tcp_timeout_)); + } - co_return !first; + if constexpr (supports_stream_expires_after) + { + stream_expires_after(from, std::chrono::seconds(m_option.tcp_timeout_)); } - auto req = parser->release(); - auto mth = std::string(req.method_string()); - auto target_view = std::string(req.target()); - auto pa = std::string(req[http::field::proxy_authorization]); + read_size = (bytes_to_be_sent == -1) ? buf_size : std::min(bytes_to_be_sent, buf_size); + + if (read_size > 0) + { + // 并发读写. + auto [write_bytes, read_bytes] = + co_await( + net::async_write(to, + net::buffer(primary_buf, bytes), net_awaitable[ec]) + && + from.async_read_some( + net::buffer(secondary_buf, read_size), net_awaitable[ec]) + ); - keep_alive = req.keep_alive(); + // 交换主从缓冲区. + std::swap(primary_buf, secondary_buf); - XLOG_DBG << "connection id: " - << m_connection_id - << ", method: " << mth - << ", target: " << target_view - << (pa.empty() ? std::string() - : ", proxy_authorization: " + pa); - - // 判定是否为 GET url 代理模式. - bool get_url_proxy = false; - if (boost::istarts_with(target_view, "https://") || - boost::istarts_with(target_view, "http://")) + bytes = read_bytes; + if (bytes_to_be_sent != -1) bytes_to_be_sent -= bytes; + bytes_transferred += write_bytes; + } + else { - get_url_proxy = true; + bytes_transferred += co_await net::async_write(to, + net::buffer(primary_buf, bytes), net_awaitable[ec]); + co_return bytes_transferred; } - // http 代理认证, 如果请求的 rarget 不是 http url 或认证 - // 失败, 则按正常 web 请求处理. - auto auth = http_authorization(pa); - if (auth != PROXY_AUTH_SUCCESS || !get_url_proxy) + // 如果 async_write 失败, 则也无需要再读取数据, 如果 + // async_read_some 失败, 则也无数据可用于写, 所以无论哪一种情况 + // 都可以直接退出. + if (ec) { - auto expect_url = urls::parse_absolute_uri(target_view); + if constexpr (supports_shutdown) + to.shutdown(net::socket_base::shutdown_send, ec); + if constexpr (supports_shutdown) + from.shutdown(net::socket_base::shutdown_receive, ec); + co_return bytes_transferred; + } + } + co_return bytes_transferred; + } - if (!expect_url.has_error()) - { - XLOG_WARN << "connection id: " - << m_connection_id - << ", proxy err: " - << pauth_error_message(auth); + template + inline bool check_condition( + const boost::system::error_code&, Stream& stream, Endpoint&) const + { + if (!m_bind_interface) + return true; - co_return !first; - } + tcp::endpoint bind_endpoint(*m_bind_interface, 0); + boost::system::error_code err; - // 如果 doc 目录为空, 则不允许访问目录 - // 这里直接返回错误页面. - if (m_option.doc_directory_.empty()) - co_return !first; + stream.open(bind_endpoint.protocol(), err); + if (err) + return false; - // htpasswd 表示需要用户认证. - if (m_option.htpasswd_) - { - // 处理 http 认证, 如果客户没有传递认证信息, 则返回 401. - // 如果用户认证信息没有设置, 则直接返回 401. - auto auth = req[http::field::authorization]; - if (auth.empty() || m_option.auth_users_.empty()) - { - XLOG_WARN << "connection id: " - << m_connection_id - << ", auth error: " - << (auth.empty() ? "no auth" : "no user"); + stream.bind(bind_endpoint, err); + if (err) + return false; - co_await unauthorized_http_route(req); - co_return true; - } + return true; + } - auto auth_result = http_authorization(auth); - if (auth_result != PROXY_AUTH_SUCCESS) - { - XLOG_WARN << "connection id: " - << m_connection_id - << ", auth error: " - << pauth_error_message(auth_result); + net::awaitable connect_bridge_proxy(tcp::socket& remote_socket, std::string target_host, + uint16_t target_port, boost::system::error_code& ec); - co_await unauthorized_http_route(req); - co_return true; - } - } + inline net::awaitable start_connect_host( + std::string target_host, + uint16_t target_port, + boost::system::error_code& ec, + bool resolve = false) + { + auto executor = co_await net::this_coro::executor; - // 如果不允许目录索引, 检查请求的是否为文件, 如果是具体文件则按文 - // 件请求处理, 否则返回 403. - if (!m_option.autoindex_) - { - auto path = make_real_target_path(req.target()); + tcp::socket& remote_socket = + net_tcp_socket(m_remote_socket); - if (!fs::is_directory(path, ec)) - { - co_await normal_web_server(req, parser); - co_return true; - } + if (m_bridge_proxy) + { + auto ret = co_await connect_bridge_proxy( + remote_socket, + target_host, + target_port, + ec); - // 如果不允许目录索引, 则直接返回 403 forbidden. - co_await forbidden_http_route(req); + co_return ret; + } + else + { + net::ip::basic_resolver_results targets; + if (resolve) + { + tcp::resolver resolver{ executor }; - co_return true; - } + targets = co_await resolver.async_resolve( + target_host, + std::to_string(target_port), + net_awaitable[ec]); + if (ec) + { + XLOG_WARN << "connection id: " + << m_connection_id + << ", resolve: " + << target_host + << ", error: " + << ec.message(); - // 按正常 http 目录请求来处理. - co_await normal_web_server(req, parser); - co_return true; + co_return false; + } } + else + { + tcp::endpoint dst_endpoint; - const auto authority_pos = target_view.find_first_of("//") + 2; + dst_endpoint.address( + net::ip::address::from_string(target_host)); + dst_endpoint.port(target_port); - std::string host; + targets = net::ip::basic_resolver_results::create( + dst_endpoint, "", ""); + } - const auto scheme_id = urls::string_to_scheme(target_view.substr(0, authority_pos - 3)); - uint16_t port = urls::default_port(scheme_id); + if (m_option.happyeyeballs_) + { + co_await asio_util::async_connect( + remote_socket, + targets, + [this](const auto& ec, auto& stream, auto& endp) { + return check_condition(ec, stream, endp); + }, + net_awaitable[ec]); + } + else + { + for (auto endpoint : targets) + { + ec = boost::asio::error::host_not_found; - auto host_pos = authority_pos; - auto host_end = std::string::npos; + if (m_option.connect_v4_only_) + { + if (endpoint.endpoint().address().is_v6()) + continue; + } + else if (m_option.connect_v6_only_) + { + if (endpoint.endpoint().address().is_v4()) + continue; + } - auto port_start = std::string::npos; + boost::system::error_code ignore_ec; + remote_socket.close(ignore_ec); - for (auto pos = authority_pos; pos < target_view.size(); pos++) - { - const auto& c = target_view[pos]; - if (c == '@') - { - host_pos = pos + 1; + if (m_bind_interface) + { + tcp::endpoint bind_endpoint( + *m_bind_interface, + 0); - host_end = std::string::npos; - port_start = std::string::npos; - } - else if (c == ':') - { - host_end = pos; - port_start = pos + 1; - } - else if (c == '/' || (pos + 1 == target_view.size())) - { - if (host_end == std::string::npos) - host_end = pos; - host = target_view.substr(host_pos, host_end - host_pos); + remote_socket.open(bind_endpoint.protocol(), ec); + if (ec) + break; - if (port_start != std::string::npos) - port = (uint16_t)std::atoi(target_view.substr(port_start, pos - port_start).c_str()); + remote_socket.bind(bind_endpoint, ec); + if (ec) + break; + } - break; + co_await remote_socket.async_connect( + endpoint, + net_awaitable[ec]); + if (!ec) + break; } } - if (!m_remote_socket.is_open()) + if (ec) { - // 连接到目标主机. - co_await start_connect_host(host, - port ? port : 80, ec, true); - if (ec) - { - XLOG_FWARN("connection id: {}," - " connect to target {}:{} error: {}", - m_connection_id, - host, - port, - ec.message()); - - co_return !first; - } - } + XLOG_FWARN("connection id: {}, connect to target {}:{} error: {}", + m_connection_id, + target_host, + target_port, + ec.message()); - // 处理代理请求头. - const auto path_pos = target_view.find_first_of("/", authority_pos); - if (path_pos == std::string_view::npos) - req.target("/"); - else - req.target(std::string(target_view.substr(path_pos))); + co_return false; + } - req.set(http::field::host, host); + m_remote_socket = init_proxy_stream( + std::move(remote_socket)); + } - if (req.find(http::field::connection) == req.end() && - req.find(http::field::proxy_connection) != req.end()) - req.set(http::field::connection, req[http::field::proxy_connection]); + co_return true; + } - req.erase(http::field::proxy_authorization); - req.erase(http::field::proxy_connection); + // is_crytpo_stream 判断当前连接是否为加密连接. + inline bool is_crytpo_stream() const + { + return boost::variant2::holds_alternative(m_remote_socket); + } - co_await http::async_write( - m_remote_socket, req, net_awaitable[ec]); - if (ec) - { - XLOG_WARN << "connection id: " - << m_connection_id - << ", http_proxy_get request async_write: " - << ec.message(); - co_return !first; - } + net::awaitable normal_web_server(string_request& req, pmr_alloc_t alloc); - m_local_buffer.consume(m_local_buffer.size()); - beast::flat_buffer buf; + fs::path path_cat(std::string_view doc, std::string_view target); - http::response_parser parser; - parser.body_limit(1024 * 1024 * 10); + template + inline std::string make_unc_path(const Path& path) + { + auto ret = path.string(); - auto bytes = co_await http::async_read( - m_remote_socket, buf, parser, net_awaitable[ec]); - if (ec) - { - XLOG_WARN << "connection id: " - << m_connection_id - << ", http_proxy_get response async_read: " - << ec.message(); - co_return !first; - } - - co_await http::async_write( - m_local_socket, parser.release(), net_awaitable[ec]); - if (ec) - { - XLOG_WARN << "connection id: " - << m_connection_id - << ", http_proxy_get response async_write: " - << ec.message(); - co_return !first; - } - - XLOG_DBG << "connection id: " - << m_connection_id - << ", transfer completed" - << ", remote to local: " - << bytes; - - first = false; - if (!keep_alive) - break; - } - - co_return true; - } - - inline net::awaitable http_proxy_connect() - { - http::request req; - boost::system::error_code ec; - - // 读取 http 请求头. - co_await http::async_read(m_local_socket, - m_local_buffer, req, net_awaitable[ec]); - if (ec) - { - XLOG_ERR << "connection id: " - << m_connection_id - << ", http_proxy_connect async_read: " - << ec.message(); - - co_return false; - } - - auto mth = std::string(req.method_string()); - auto target_view = std::string(req.target()); - auto pa = std::string(req[http::field::proxy_authorization]); - - XLOG_DBG << "connection id: " - << m_connection_id - << ", method: " << mth - << ", target: " << target_view - << (pa.empty() ? std::string() - : ", proxy_authorization: " + pa); - - // http 代理认证. - auto auth = http_authorization(pa); - if (auth != PROXY_AUTH_SUCCESS) - { - XLOG_WARN << "connection id: " - << m_connection_id - << ", proxy err: " - << pauth_error_message(auth); - - auto fake_page = fmt::vformat(fake_407_content_fmt, - fmt::make_format_args(server_date_string())); - - co_await net::async_write( - m_local_socket, - net::buffer(fake_page), - net::transfer_all(), - net_awaitable[ec]); - - co_return true; - } - - auto pos = target_view.find(':'); - if (pos == std::string::npos) - { - XLOG_ERR << "connection id: " - << m_connection_id - << ", illegal target: " - << target_view; - co_return false; - } - - std::string host(target_view.substr(0, pos)); - std::string port(target_view.substr(pos + 1)); - - co_await start_connect_host(host, - static_cast(std::atol(port.c_str())), ec, true); - if (ec) - { - XLOG_FWARN("connection id: {}," - " connect to target {}:{} error: {}", - m_connection_id, - host, - port, - ec.message()); - co_return false; - } - - http::response res{ - http::status::ok, req.version() }; - res.reason("Connection established"); - - co_await http::async_write( - m_local_socket, - res, - net_awaitable[ec]); - if (ec) - { - XLOG_FWARN("connection id: {}," - " async write response {}:{} error: {}", - m_connection_id, - host, - port, - ec.message()); - co_return false; - } - - size_t l2r_transferred = 0; - size_t r2l_transferred = 0; - - co_await( - transfer(m_local_socket, m_remote_socket, l2r_transferred) - && - transfer(m_remote_socket, m_local_socket, r2l_transferred) - ); - - XLOG_DBG << "connection id: " - << m_connection_id - << ", transfer completed" - << ", local to remote: " - << l2r_transferred - << ", remote to local: " - << r2l_transferred; - - co_return true; - } - - inline net::awaitable socks_auth() - { - // +----+------+----------+------+----------+ - // |VER | ULEN | UNAME | PLEN | PASSWD | - // +----+------+----------+------+----------+ - // | 1 | 1 | 1 to 255 | 1 | 1 to 255 | - // +----+------+----------+------+----------+ - // [ ] - - boost::system::error_code ec; - m_local_buffer.consume(m_local_buffer.size()); - auto bytes = co_await net::async_read(m_local_socket, - m_local_buffer, - net::transfer_exactly(2), - net_awaitable[ec]); - if (ec) - { - XLOG_WARN << "connection id: " - << m_connection_id - << ", read client username/passwd error: " - << ec.message(); - co_return false; - } - - auto p = net::buffer_cast(m_local_buffer.data()); - int auth_version = read(p); - if (auth_version != 1) - { - XLOG_WARN << "connection id: " - << m_connection_id - << ", socks negotiation, unsupported socks5 protocol"; - co_return false; - } - int name_length = read(p); - if (name_length <= 0 || name_length > 255) - { - XLOG_WARN << "connection id: " - << m_connection_id - << ", socks negotiation, invalid name length"; - co_return false; - } - name_length += 1; - - // +----+------+----------+------+----------+ - // |VER | ULEN | UNAME | PLEN | PASSWD | - // +----+------+----------+------+----------+ - // | 1 | 1 | 1 to 255 | 1 | 1 to 255 | - // +----+------+----------+------+----------+ - // [ ] - m_local_buffer.consume(m_local_buffer.size()); - bytes = co_await net::async_read(m_local_socket, - m_local_buffer, - net::transfer_exactly(name_length), - net_awaitable[ec]); - if (ec) - { - XLOG_WARN << "connection id: " - << m_connection_id - << ", read client username error: " - << ec.message(); - co_return false; - } - - std::string uname; - - p = net::buffer_cast(m_local_buffer.data()); - for (size_t i = 0; i < bytes - 1; i++) - uname.push_back(read(p)); - - int passwd_len = read(p); - if (passwd_len <= 0 || passwd_len > 255) - { - XLOG_WARN << "connection id: " - << m_connection_id - << ", socks negotiation, invalid passwd length"; - co_return false; - } - - // +----+------+----------+------+----------+ - // |VER | ULEN | UNAME | PLEN | PASSWD | - // +----+------+----------+------+----------+ - // | 1 | 1 | 1 to 255 | 1 | 1 to 255 | - // +----+------+----------+------+----------+ - // [ ] - m_local_buffer.consume(m_local_buffer.size()); - bytes = co_await net::async_read(m_local_socket, - m_local_buffer, - net::transfer_exactly(passwd_len), - net_awaitable[ec]); - if (ec) - { - XLOG_WARN << "connection id: " - << m_connection_id - << ", read client passwd error: " - << ec.message(); - co_return false; - } - - std::string passwd; - - p = net::buffer_cast(m_local_buffer.data()); - for (size_t i = 0; i < bytes; i++) - passwd.push_back(read(p)); - - // SOCKS5验证用户和密码. - auto endp = m_local_socket.remote_endpoint(); - auto client = endp.address().to_string(); - client += ":" + std::to_string(endp.port()); - - // 用户认证逻辑. - bool verify_passed = m_option.auth_users_.empty(); - - for (auto [user, pwd] : m_option.auth_users_) - { - if (uname == user && passwd == pwd) - { - verify_passed = true; - user_rate_limit_config(user); - break; - } - } - - XLOG_DBG << "connection id: " - << m_connection_id - << ", auth: " - << uname - << ", passwd: " - << passwd - << ", client: " - << client; - - net::streambuf wbuf; - auto wp = net::buffer_cast(wbuf.prepare(16)); - write(0x01, wp); // version 只能是1. - if (verify_passed) - { - write(0x00, wp); // 认证通过返回0x00, 其它值为失败. - } - else - { - write(0x01, wp); // 认证返回0x01为失败. - } - - // 返回认证状态. - // +----+--------+ - // |VER | STATUS | - // +----+--------+ - // | 1 | 1 | - // +----+--------+ - wbuf.commit(2); - co_await net::async_write(m_local_socket, - wbuf, - net::transfer_exactly(2), - net_awaitable[ec]); - if (ec) - { - XLOG_WARN << "connection id: " - << m_connection_id - << ", server write status error: " - << ec.message(); - co_return false; - } - - co_return verify_passed; - } - - template - net::awaitable transfer(S1& from, S2& to, size_t& bytes_transferred) - { - bytes_transferred = 0; - - stream_rate_limit(from, m_option.tcp_rate_limit_); - stream_rate_limit(to, m_option.tcp_rate_limit_); - - stream_expires_after(from, std::chrono::seconds(m_option.tcp_timeout_)); - - constexpr auto buf_size = 512 * 1024; - - std::unique_ptr buf0((char*)std::malloc(buf_size), &std::free); - std::unique_ptr buf1((char*)std::malloc(buf_size), &std::free); - - // 分别使用主从缓冲指针用于并发读写. - auto primary_buf = buf0.get(); - auto secondary_buf = buf1.get(); - - // 首先邓读取第一个数据作为预备, 以用于后面的交替读写逻辑. - boost::system::error_code ec; - auto bytes = co_await from.async_read_some(net::buffer(primary_buf, buf_size), net_awaitable[ec]); - if (ec || m_abort) - { - if (bytes > 0) - co_await net::async_write(to, - net::buffer(primary_buf, bytes), net_awaitable[ec]); - - to.shutdown(net::socket_base::shutdown_send, ec); - co_return; - } - - for (; !m_abort;) - { - stream_expires_after(to, std::chrono::seconds(m_option.tcp_timeout_)); - stream_expires_after(from, std::chrono::seconds(m_option.tcp_timeout_)); - - // 并发读写. - auto [write_bytes, read_bytes] = - co_await( - net::async_write(to, - net::buffer(primary_buf, bytes), net_awaitable[ec]) - && - from.async_read_some( - net::buffer(secondary_buf, buf_size), net_awaitable[ec]) - ); - - // 交换主从缓冲区. - std::swap(primary_buf, secondary_buf); - - bytes = read_bytes; - bytes_transferred += bytes; - - // 如果 async_write 失败, 则也无需要再读取数据, 如果 - // async_read_some 失败, 则也无数据可用于写, 所以无论哪一种情况 - // 都可以直接退出. - if (ec) - { - to.shutdown(net::socket_base::shutdown_send, ec); - from.shutdown(net::socket_base::shutdown_receive, ec); - co_return; - } - } - } - - template - inline bool check_condition( - const boost::system::error_code&, Stream& stream, Endpoint&) const - { - if (!m_bind_interface) - return true; - - tcp::endpoint bind_endpoint(*m_bind_interface, 0); - boost::system::error_code err; - - stream.open(bind_endpoint.protocol(), err); - if (err) - return false; - - stream.bind(bind_endpoint, err); - if (err) - return false; - - return true; - } - - inline net::awaitable - connect_bridge_proxy(tcp::socket& remote_socket, - std::string target_host, - uint16_t target_port, - boost::system::error_code& ec) - { - auto executor = co_await net::this_coro::executor; - - tcp::resolver resolver{ executor }; - - auto proxy_host = std::string(m_bridge_proxy->host()); - std::string proxy_port; - if (m_bridge_proxy->port_number() == 0) - proxy_port = std::to_string(urls::default_port(m_bridge_proxy->scheme_id())); - else - proxy_port = std::to_string(m_bridge_proxy->port_number()); - if (proxy_port.empty()) - proxy_port = m_bridge_proxy->scheme(); - - XLOG_DBG << "connection id: " - << m_connection_id - << ", connect to next proxy: " - << proxy_host - << ":" - << proxy_port; - - tcp::resolver::results_type targets; - - if (!detect_hostname(proxy_host)) - { - net::ip::tcp::endpoint endp( - net::ip::address::from_string(proxy_host), - m_bridge_proxy->port_number() ? - m_bridge_proxy->port_number() : - urls::default_port(m_bridge_proxy->scheme_id())); - - targets = tcp::resolver::results_type::create( - endp, proxy_host, m_bridge_proxy->scheme()); - } - else - { - targets = co_await resolver.async_resolve( - proxy_host, - proxy_port, - net_awaitable[ec]); - - if (ec) - { - XLOG_FWARN("connection id: {}," - " resolver to next proxy {}:{} error: {}", - m_connection_id, - std::string(m_bridge_proxy->host()), - std::string(m_bridge_proxy->port()), - ec.message()); - - co_return false; - } - } - - if (m_option.happyeyeballs_) - { - co_await asio_util::async_connect( - remote_socket, - targets, - [this](const auto& ec, auto& stream, auto& endp) { - return check_condition(ec, stream, endp); - }, - net_awaitable[ec]); - } - else - { - for (auto endpoint : targets) - { - ec = boost::asio::error::host_not_found; - - if (m_option.connect_v4_only_) - { - if (endpoint.endpoint().address().is_v6()) - continue; - } - else if (m_option.connect_v6_only_) - { - if (endpoint.endpoint().address().is_v4()) - continue; - } - - boost::system::error_code ignore_ec; - remote_socket.close(ignore_ec); - - if (m_bind_interface) - { - tcp::endpoint bind_endpoint( - *m_bind_interface, - 0); - - remote_socket.open( - bind_endpoint.protocol(), - ec); - if (ec) - break; - - remote_socket.bind( - bind_endpoint, - ec); - if (ec) - break; - } - - co_await remote_socket.async_connect( - endpoint, - net_awaitable[ec]); - if (!ec) - break; - } - } - - if (ec) - { - XLOG_FWARN("connection id: {}," - " connect to next proxy {}:{} error: {}", - m_connection_id, - std::string(m_bridge_proxy->host()), - std::string(m_bridge_proxy->port()), - ec.message()); - - co_return false; - } - - XLOG_DBG << "connection id: " - << m_connection_id - << ", connect to next proxy: " - << proxy_host - << ":" - << proxy_port - << " success"; - - // 如果启用了 noise, 则在向上游代理服务器发起 tcp 连接成功后, 发送 noise - // 数据以及接收 noise 数据. - if (m_option.scramble_) - { - if (!co_await noise_handshake(remote_socket, m_outin_key, m_outout_key)) - co_return false; - - XLOG_DBG << "connection id: " - << m_connection_id - << ", with upstream noise completed"; - } - - // 使用ssl加密与下一级代理通信. - if (m_option.proxy_pass_use_ssl_) - { - // 设置 ssl cert 证书目录. - if (fs::exists(m_option.ssl_cacert_path_)) - { - m_ssl_cli_context.add_verify_path( - m_option.ssl_cacert_path_, ec); - if (ec) - { - XLOG_FWARN("connection id: {}, " - "load cert path: {}, " - "error: {}", - m_connection_id, - m_option.ssl_cacert_path_, - ec.message()); - - co_return false; - } - } - } - - auto scheme = m_bridge_proxy->scheme(); - - auto instantiate_stream = - [this, - &scheme, - &proxy_host, - &remote_socket, - &ec] - () mutable -> net::awaitable - { - ec = {}; - - XLOG_DBG << "connection id: " - << m_connection_id - << ", connect to next proxy: " - << proxy_host - << " instantiate stream"; - - if (m_option.proxy_pass_use_ssl_ || scheme == "https") - { - m_ssl_cli_context.set_verify_mode(net::ssl::verify_peer); - auto cert = default_root_certificates(); - m_ssl_cli_context.add_certificate_authority( - net::buffer(cert.data(), cert.size()), - ec); - if (ec) - { - XLOG_FWARN("connection id: {}," - " add_certificate_authority error: {}", - m_connection_id, - ec.message()); - } - - m_ssl_cli_context.use_tmp_dh( - net::buffer(default_dh_param()), ec); - - m_ssl_cli_context.set_verify_callback( - net::ssl::rfc2818_verification(proxy_host), ec); - if (ec) - { - XLOG_FWARN("connection id: {}," - " set_verify_callback error: {}", - m_connection_id, - ec.message()); - } - - // 生成 ssl socket 对象. - auto sock_stream = init_proxy_stream( - std::move(remote_socket), m_ssl_cli_context); - - // get origin ssl stream type. - ssl_stream& ssl_socket = - boost::variant2::get(sock_stream); - - if (m_option.scramble_) - { - auto& next_layer = ssl_socket.next_layer(); - - using NextLayerType = - std::decay_t; - - if constexpr (!std::same_as) - { - next_layer.set_scramble_key( - m_outout_key - ); - - next_layer.set_unscramble_key( - m_outin_key - ); - } - } - - std::string sni = m_option.proxy_ssl_name_.empty() - ? proxy_host : m_option.proxy_ssl_name_; - - // Set SNI Hostname. - if (!SSL_set_tlsext_host_name( - ssl_socket.native_handle(), sni.c_str())) - { - XLOG_FWARN("connection id: {}," - " SSL_set_tlsext_host_name error: {}", - m_connection_id, - ::ERR_get_error()); - } - - XLOG_DBG << "connection id: " - << m_connection_id - << ", do async ssl handshake..."; - - // do async handshake. - co_await ssl_socket.async_handshake( - net::ssl::stream_base::client, - net_awaitable[ec]); - if (ec) - { - XLOG_FWARN("connection id: {}," - " ssl client protocol handshake error: {}", - m_connection_id, - ec.message()); - } - - XLOG_FDBG("connection id: {}, ssl handshake: {}", - m_connection_id, - proxy_host); - - co_return sock_stream; - } - - auto sock_stream = init_proxy_stream( - std::move(remote_socket)); - - auto& sock = - boost::variant2::get(sock_stream); - - if (m_option.scramble_) - { - using NextLayerType = - std::decay_t; - - if constexpr (!std::same_as) - { - sock.set_scramble_key( - m_outout_key - ); - - sock.set_unscramble_key( - m_outin_key - ); - } - } - - co_return sock_stream; - }; - - m_remote_socket = std::move(co_await instantiate_stream()); - - XLOG_DBG << "connection id: " - << m_connection_id - << ", connect to next proxy: " - << proxy_host - << ":" - << proxy_port - << " start upstream handshake with " - << std::string(scheme); - - if (scheme.starts_with("socks")) - { - socks_client_option opt; - - opt.target_host = target_host; - opt.target_port = target_port; - opt.proxy_hostname = true; - opt.username = std::string(m_bridge_proxy->user()); - opt.password = std::string(m_bridge_proxy->password()); - - if (scheme == "socks4") - opt.version = socks4_version; - else if (scheme == "socks4a") - opt.version = socks4a_version; - - co_await async_socks_handshake( - m_remote_socket, - opt, - net_awaitable[ec]); - } - else if (scheme.starts_with("http")) - { - http_proxy_client_option opt; - - opt.target_host = target_host; - opt.target_port = target_port; - opt.username = std::string(m_bridge_proxy->user()); - opt.password = std::string(m_bridge_proxy->password()); - - co_await async_http_proxy_handshake( - m_remote_socket, - opt, - net_awaitable[ec]); - } - - if (ec) - { - XLOG_FWARN("connection id: {}" - ", {} connect to next host {}:{} error: {}", - m_connection_id, - std::string(scheme), - target_host, - target_port, - ec.message()); - - co_return false; - } - - co_return true; - } - - inline net::awaitable start_connect_host( - std::string target_host, - uint16_t target_port, - boost::system::error_code& ec, - bool resolve = false) - { - auto executor = co_await net::this_coro::executor; - - tcp::socket& remote_socket = - net_tcp_socket(m_remote_socket); - - if (m_bridge_proxy) - { - auto ret = co_await connect_bridge_proxy( - remote_socket, - target_host, - target_port, - ec); - - co_return ret; - } - else - { - net::ip::basic_resolver_results targets; - if (resolve) - { - tcp::resolver resolver{ executor }; - - targets = co_await resolver.async_resolve( - target_host, - std::to_string(target_port), - net_awaitable[ec]); - if (ec) - { - XLOG_WARN << "connection id: " - << m_connection_id - << ", resolve: " - << target_host - << ", error: " - << ec.message(); - - co_return false; - } - } - else - { - tcp::endpoint dst_endpoint; - - dst_endpoint.address( - net::ip::address::from_string(target_host)); - dst_endpoint.port(target_port); - - targets = net::ip::basic_resolver_results::create( - dst_endpoint, "", ""); - } - - if (m_option.happyeyeballs_) - { - co_await asio_util::async_connect( - remote_socket, - targets, - [this](const auto& ec, auto& stream, auto& endp) { - return check_condition(ec, stream, endp); - }, - net_awaitable[ec]); - } - else - { - for (auto endpoint : targets) - { - ec = boost::asio::error::host_not_found; - - if (m_option.connect_v4_only_) - { - if (endpoint.endpoint().address().is_v6()) - continue; - } - else if (m_option.connect_v6_only_) - { - if (endpoint.endpoint().address().is_v4()) - continue; - } - - boost::system::error_code ignore_ec; - remote_socket.close(ignore_ec); - - if (m_bind_interface) - { - tcp::endpoint bind_endpoint( - *m_bind_interface, - 0); - - remote_socket.open( - bind_endpoint.protocol(), - ec); - if (ec) - break; - - remote_socket.bind( - bind_endpoint, - ec); - if (ec) - break; - } - - co_await remote_socket.async_connect( - endpoint, - net_awaitable[ec]); - if (!ec) - break; - } - } - - if (ec) - { - XLOG_FWARN("connection id: {}, connect to target {}:{} error: {}", - m_connection_id, - target_host, - target_port, - ec.message()); - - co_return false; - } - - m_remote_socket = init_proxy_stream( - std::move(remote_socket)); - } - - co_return true; - } - - // is_crytpo_stream 判断当前连接是否为加密连接. - inline bool is_crytpo_stream() const - { - return boost::variant2::holds_alternative(m_remote_socket); - } - - inline net::awaitable - normal_web_server(http::request& req, std::optional& parser) - { - boost::system::error_code ec; - - bool keep_alive = false; - bool has_read_header = true; - - for (; !m_abort;) - { - if (!has_read_header) - { - // normal_web_server 调用是从 http_proxy_get - // 跳转过来的, 该函数已经读取了请求头, 所以第1次不需 - // 要再次读取请求头, 即 has_read_header 为 true. - // 当 keepalive 时,需要读取请求头, 此时 has_read_header - // 为 false, 则在此读取和解析后续的 http 请求头. - parser.emplace(); - parser->body_limit(1024 * 512); // 512k - m_local_buffer.consume(m_local_buffer.size()); - - co_await http::async_read_header( - m_local_socket, - m_local_buffer, - *parser, - net_awaitable[ec]); - if (ec) - { - XLOG_DBG << "connection id: " - << m_connection_id - << (keep_alive ? ", keepalive" : "") - << ", web async_read_header: " - << ec.message(); - co_return; - } - - req = parser->release(); - } - - if (req[http::field::expect] == "100-continue") - { - http::response res; - res.version(11); - res.result(http::status::method_not_allowed); - - co_await http::async_write( - m_local_socket, - res, - net_awaitable[ec]); - if (ec) - { - XLOG_DBG << "connection id: " - << m_connection_id - << ", web expect async_write: " - << ec.message(); - } - co_return; - } - - has_read_header = false; - keep_alive = req.keep_alive(); - - if (beast::websocket::is_upgrade(req)) - { - auto fake_page = fmt::vformat(fake_404_content_fmt, - fmt::make_format_args(server_date_string())); - - co_await net::async_write( - m_local_socket, - net::buffer(fake_page), - net::transfer_all(), - net_awaitable[ec]); - - co_return; - } - - std::string target = req.target(); - boost::smatch what; - http_context http_ctx{ {}, req, target, make_real_target_path(req.target()) }; - - #define BEGIN_HTTP_ROUTE() if (false) {} - #define ON_HTTP_ROUTE(exp, func) \ - else if (boost::regex_match( \ - target, what, boost::regex{ exp })) { \ - for (auto i = 1; i < static_cast(what.size()); i++) \ - http_ctx.command_.emplace_back(what[i]); \ - co_await func(http_ctx); \ - } - #define END_HTTP_ROUTE() else { \ - co_await default_http_route( \ - req, \ - fake_400_content, \ - http::status::bad_request ); } - - BEGIN_HTTP_ROUTE() - ON_HTTP_ROUTE(R"(^(.*)?\/$)", on_http_dir) - ON_HTTP_ROUTE(R"(^(.*)?(\/\?q=json.*)$)", on_http_json) - ON_HTTP_ROUTE(R"(^(?!.*\/$).*$)", on_http_get) - END_HTTP_ROUTE() - - if (!keep_alive) break; - continue; - } - - co_await m_local_socket.lowest_layer().async_wait( - net::socket_base::wait_read, net_awaitable[ec]); - - co_return; - } - - inline fs::path path_cat( - const std::wstring& doc, const std::wstring& target) - { - size_t start_pos = 0; - for (auto& c : target) - { - if (!(c == L'/' || c == '\\')) - break; - - start_pos++; - } - - std::wstring_view sv; - std::wstring slash = L"/"; - - if (start_pos < target.size()) - sv = std::wstring_view(target.c_str() + start_pos); -#ifdef WIN32 - slash = L"\\"; - if (doc.back() == L'/' || - doc.back() == L'\\') - slash = L""; - return fs::path(doc + slash + std::wstring(sv)); -#else - if (doc.back() == L'/') - slash = L""; - return fs::path( - boost::nowide::narrow(doc + slash + std::wstring(sv))); -#endif // WIN32 - }; - - template - inline std::string make_unc_path(const Path& path) - { - auto ret = path.string(); - -#ifdef WIN32 - if (ret.size() > MAX_PATH) - { - boost::replace_all(ret, "/", "\\"); - return "\\\\?\\" + ret; - } -#endif - - return ret; - } - - inline std::wstring make_target_path(const std::string& target) - { - std::string url = "http://example.com"; - if (target.starts_with("/")) - url += target; - else - url += "/" + target; - - auto result = urls::parse_uri(url); - if (result.has_error()) - return boost::nowide::widen(target); - - return boost::nowide::widen(result->path()); - } - - inline std::string make_real_target_path(const std::string& target) - { - auto target_path = make_target_path(target); - auto doc_path = boost::nowide::widen(m_option.doc_directory_); - -#ifdef WIN32 - auto ret = make_unc_path(path_cat(doc_path, target_path)); -#else - auto ret = path_cat(doc_path, target_path).string(); -#endif - - return ret; - } - - inline std::tuple file_last_wirte_time(const fs::path& file) - { - static auto loc_time = [](auto t) -> struct tm* - { - using time_type = std::decay_t; - if constexpr (std::is_same_v) - { - auto sctp = std::chrono::time_point_cast< - std::chrono::system_clock::duration>(t - - std::filesystem::file_time_type::clock::now() + - std::chrono::system_clock::now()); - auto time = std::chrono::system_clock::to_time_t(sctp); - return std::localtime(&time); - } - else if constexpr (std::is_same_v) - { - return std::localtime(&t); - } - else - { - static_assert(!std::is_same_v, "time type required!"); - } - }; - - boost::system::error_code ec; - std::string time_string; - fs::path unc_path; - - auto ftime = fs::last_write_time(file, ec); - if (ec) - { - #ifdef WIN32 - if (file.string().size() > MAX_PATH) - { - unc_path = make_unc_path(file); - ftime = fs::last_write_time(unc_path, ec); - } - #endif - } - - if (!ec) - { - auto tm = loc_time(ftime); - - char tmbuf[64] = { 0 }; - std::strftime(tmbuf, - sizeof(tmbuf), - "%m-%d-%Y %H:%M", - tm); - - time_string = tmbuf; - } - - return { time_string, unc_path }; - } - - inline std::vector - format_path_list(const std::string& path, boost::system::error_code& ec) - { - fs::directory_iterator end; - fs::directory_iterator it(path, ec); - if (ec) - { - XLOG_DBG << "connection id: " - << m_connection_id - << ", format_path_list read dir: " - << path - << ", error: " - << ec.message(); - return {}; - } - - std::vector path_list; - std::vector file_list; - - for (; it != end && !m_abort; it++) - { - const auto& item = it->path(); - - auto [ftime, unc_path] = file_last_wirte_time(item); - std::wstring time_string = boost::nowide::widen(ftime); - - std::wstring rpath; - - if (fs::is_directory(unc_path.empty() ? item : unc_path, ec)) - { - auto leaf = boost::nowide::narrow(item.filename().wstring()); - leaf = leaf + "/"; - rpath = boost::nowide::widen(leaf); - int width = 50 - static_cast(rpath.size()); - width = width < 0 ? 0 : width; - std::wstring space(width, L' '); - auto show_path = rpath; - if (show_path.size() > 50) { - show_path = show_path.substr(0, 47); - show_path += L"..>"; - } - auto str = fmt::format(body_fmt, - rpath, - show_path, - space, - time_string, - L"-"); - - path_list.push_back(str); - } - else - { - auto leaf = boost::nowide::narrow(item.filename().wstring()); - rpath = boost::nowide::widen(leaf); - int width = 50 - (int)rpath.size(); - width = width < 0 ? 0 : width; - std::wstring space(width, L' '); - std::wstring filesize; - if (unc_path.empty()) - unc_path = item; - auto sz = static_cast(fs::file_size( - unc_path, ec)); - if (ec) - sz = 0; - filesize = boost::nowide::widen( - strutil::add_suffix(sz)); - auto show_path = rpath; - if (show_path.size() > 50) { - show_path = show_path.substr(0, 47); - show_path += L"..>"; - } - auto str = fmt::format(body_fmt, - rpath, - show_path, - space, - time_string, - filesize); - - file_list.push_back(str); - } - } - - ec = {}; - - path_list.insert(path_list.end(), - file_list.begin(), file_list.end()); - - return path_list; - } - - inline std::string file_hash(const fs::path& p, boost::system::error_code& ec) - { - ec = {}; - - std::ifstream file(p.string(), std::ios::binary); - if (!file) - { - ec = boost::system::error_code(errno, - boost::system::generic_category()); - return {}; - } - - boost::uuids::detail::sha1 sha1; - const auto buf_size = 1024 * 1024 * 4; - std::unique_ptr bufs((char*)std::malloc(buf_size), &std::free); - - while (file.read(bufs.get(), buf_size) || file.gcount()) - sha1.process_bytes(bufs.get(), file.gcount()); - - boost::uuids::detail::sha1::digest_type hash; - sha1.get_digest(hash); - - std::stringstream ss; - for (auto const& c : hash) - ss << std::hex << std::setfill('0') << std::setw(2) << static_cast(c); - - return ss.str(); - } - - template - inline auto async_hash_file(const fs::path& path, CompletionToken&& token) - { - auto self = shared_from_this(); - - return net::async_initiate( - [this, self, path] - (auto&& handler) mutable - { - std::thread( - [this, self, path, handler = std::move(handler)]() mutable - { - boost::system::error_code ec; - - auto hash = file_hash(path, ec); - - auto executor = net::get_associated_executor(handler); - net::post(executor, [this, self, - ec = std::move(ec), - hash = std::move(hash), - handler = std::move(handler)]() mutable - { - handler(ec, hash); - }); - } - ).detach(); - }, token); - } - - inline net::awaitable on_http_json(const http_context& hctx) - { - boost::system::error_code ec; - auto& request = hctx.request_; - - auto target = make_real_target_path(hctx.command_[0]); - - fs::directory_iterator end; - fs::directory_iterator it(target, ec); - if (ec) - { - string_response res{ http::status::found, request.version() }; - res.set(http::field::server, version_string); - res.set(http::field::date, server_date_string()); - res.set(http::field::location, "/"); - res.keep_alive(request.keep_alive()); - res.prepare_payload(); - - http::serializer sr(res); - co_await http::async_write( - m_local_socket, - sr, - net_awaitable[ec]); - if (ec) - XLOG_WARN << "connection id: " - << m_connection_id - << ", http_dir write location err: " - << ec.message(); - - co_return; - } - - bool hash = false; - - urls::params_view qp(hctx.command_[1]); - if (qp.find("hash") != qp.end()) - hash = true; - - boost::json::array path_list; - - for (; it != end && !m_abort; it++) +#ifdef WIN32 + if (ret.size() > MAX_PATH) { - const auto& item = it->path(); - boost::json::object obj; - - auto [ftime, unc_path] = file_last_wirte_time(item); - obj["last_write_time"] = ftime; - - if (fs::is_directory(unc_path.empty() ? item : unc_path, ec)) - { - auto leaf = boost::nowide::narrow(item.filename().wstring()); - obj["filename"] = leaf; - obj["is_dir"] = true; - } - else - { - auto leaf = boost::nowide::narrow(item.filename().wstring()); - obj["filename"] = leaf; - obj["is_dir"] = false; - if (unc_path.empty()) - unc_path = item; - auto sz = fs::file_size(unc_path, ec); - if (ec) - sz = 0; - obj["filesize"] = sz; - if (hash) - { - auto ret = co_await - async_hash_file(unc_path, net_awaitable[ec]); - if (ec) - ret = ""; - obj["hash"] = ret; - } - } - - path_list.push_back(obj); + boost::replace_all(ret, "/", "\\"); + return "\\\\?\\" + ret; } +#endif - auto body = boost::json::serialize(path_list); - - string_response res{ http::status::ok, request.version() }; - res.set(http::field::server, version_string); - res.set(http::field::date, server_date_string()); - res.set(http::field::content_type, "application/json"); - res.keep_alive(request.keep_alive()); - res.body() = body; - res.prepare_payload(); - - http::serializer sr(res); - co_await http::async_write( - m_local_socket, - sr, - net_awaitable[ec]); - if (ec) - XLOG_WARN << "connection id: " - << m_connection_id - << ", http dir write body err: " - << ec.message(); - - co_return; + return ret; } - inline net::awaitable on_http_dir(const http_context& hctx) + inline std::string make_target_path(std::string_view target) { - using namespace std::literals; - - boost::system::error_code ec; - auto& request = hctx.request_; - - // 查找目录下是否存在 index.html 或 index.htm 文件, 如果存在则返回该文件. - // 否则返回目录下的文件列表. - auto index_html = fs::path(hctx.target_path_) / "index.html"; - fs::exists(index_html, ec) ? index_html = index_html : - index_html = fs::path(hctx.target_path_) / "index.htm"; - - if (fs::exists(index_html, ec)) - { - std::ifstream file(index_html.string(), std::ios::binary); - if (file) - { - std::string content( - (std::istreambuf_iterator(file)), - std::istreambuf_iterator()); - - string_response res{ http::status::ok, request.version() }; - res.set(http::field::server, version_string); - res.set(http::field::date, server_date_string()); - auto ext = strutil::to_lower(index_html.extension().string()); - if (global_mimes.count(ext)) - res.set(http::field::content_type, global_mimes.at(ext)); - else - res.set(http::field::content_type, "text/plain"); - res.keep_alive(request.keep_alive()); - res.body() = content; - res.prepare_payload(); - - http::serializer sr(res); - co_await http::async_write( - m_local_socket, - sr, - net_awaitable[ec]); - if (ec) - XLOG_WARN << "connection id: " - << m_connection_id - << ", http dir write index err: " - << ec.message(); - - co_return; - } - } - - auto path_list = format_path_list(hctx.target_path_, ec); - if (ec) + std::string url = "http://example.com"; + if (target.starts_with("/")) + url += target; + else { - string_response res{ http::status::found, request.version() }; - res.set(http::field::server, version_string); - res.set(http::field::date, server_date_string()); - res.set(http::field::location, "/"); - res.keep_alive(request.keep_alive()); - res.prepare_payload(); - - http::serializer sr(res); - co_await http::async_write( - m_local_socket, - sr, - net_awaitable[ec]); - if (ec) - XLOG_WARN << "connection id: " - << m_connection_id - << ", http_dir write location err: " - << ec.message(); - - co_return; + url += "/"; + url += target; } - auto target_path = make_target_path(hctx.target_); - std::wstring head = fmt::format(head_fmt, - target_path, - target_path); - - std::wstring body = fmt::format(body_fmt, - L"../", - L"../", - L"", - L"", - L""); - - for (auto& s : path_list) - body += s; - body = head + body + tail_fmt; - - string_response res{ http::status::ok, request.version() }; - res.set(http::field::server, version_string); - res.set(http::field::date, server_date_string()); - res.keep_alive(request.keep_alive()); - res.body() = boost::nowide::narrow(body); - res.prepare_payload(); - - http::serializer sr(res); - co_await http::async_write( - m_local_socket, - sr, - net_awaitable[ec]); - if (ec) - XLOG_WARN << "connection id: " - << m_connection_id - << ", http dir write body err: " - << ec.message(); + auto result = urls::parse_uri(url); + if (result.has_error()) + return std::string(target);//boost::nowide::widen(target); - co_return; + return result->path();//boost::nowide::widen(result->path()); } - inline net::awaitable on_http_get(const http_context& hctx) + inline std::string make_real_target_path(std::string_view target) { - boost::system::error_code ec; - - const auto& request = hctx.request_; - const fs::path& path = hctx.target_path_; - - if (!fs::exists(path, ec)) - { - XLOG_WARN << "connection id: " - << m_connection_id - << ", http " - << hctx.target_ - << " file not exists"; - - auto fake_page = fmt::vformat(fake_404_content_fmt, - fmt::make_format_args(server_date_string())); - - co_await net::async_write( - m_local_socket, - net::buffer(fake_page), - net::transfer_all(), - net_awaitable[ec]); - - co_return; - } - - if (fs::is_directory(path, ec)) - { - XLOG_DBG << "connection id: " - << m_connection_id - << ", http " - << hctx.target_ - << " is directory"; - - std::string url = "http://"; - if (is_crytpo_stream()) - url = "https://"; - url += request[http::field::host]; - urls::url u(url); - std::string target = hctx.target_ + "/"; - u.set_path(target); - - co_await location_http_route(request, u.buffer()); - - co_return; - } - - size_t content_length = fs::file_size(path, ec); - if (ec) - { - XLOG_WARN << "connection id: " - << m_connection_id - << ", http " - << hctx.target_ - << " file size error: " - << ec.message(); - - co_await default_http_route( - request, fake_400_content, http::status::bad_request); - - co_return; - } - - std::fstream file(path.string(), - std::ios_base::binary | - std::ios_base::in); - - std::string user_agent; - if (request.count(http::field::user_agent)) - user_agent = std::string(request[http::field::user_agent]); - - std::string referer; - if (request.count(http::field::referer)) - referer = std::string(request[http::field::referer]); - - XLOG_DBG << "connection id: " - << m_connection_id - << ", http file: " - << hctx.target_ - << ", size: " - << content_length - << (request.count("Range") ? - ", range: " + std::string(request["Range"]) - : std::string()) - << (!user_agent.empty() ? - ", user_agent: " + user_agent - : std::string()) - << (!referer.empty() ? - ", referer: " + referer - : std::string()); - - http::status st = http::status::ok; - auto range = parser_http_ranges(request["Range"]); - - // 只支持一个 range 的请求, 不支持多个 range 的请求. - if (range.size() == 1) - { - st = http::status::partial_content; - auto& r = range.front(); - - // 起始位置为 -1, 表示从文件末尾开始读取, 例如 Range: -500 - // 则表示读取文件末尾的 500 字节. - if (r.first == -1) - { - // 如果第二个参数也为 -1, 则表示请求有问题, 返回 416. - if (r.second < 0) - { - co_await default_http_route(request, - fake_416_content, - http::status::range_not_satisfiable); - co_return; - } - else if (r.second >= 0) - { - // 计算起始位置和结束位置, 例如 Range: -5 - // 则表示读取文件末尾的 5 字节. - // content_length - r.second 表示起始位置. - // content_length - 1 表示结束位置. - // 例如文件长度为 10 字节, 则起始位置为 5, - // 结束位置为 9(数据总长度为[0-9]), 一共 5 字节. - r.first = content_length - r.second; - r.second = content_length - 1; - } - } - else if (r.second == -1) - { - // 起始位置为正数, 表示从文件头开始读取, 例如 Range: 500 - // 则表示读取文件头的 500 字节. - if (r.first < 0) - { - co_await default_http_route(request, - fake_416_content, - http::status::range_not_satisfiable); - co_return; - } - else - { - r.second = content_length - 1; - } - } - - file.seekg(r.first, std::ios_base::beg); - } - - buffer_response res{ st, request.version() }; - - res.set(http::field::server, version_string); - res.set(http::field::date, server_date_string()); - - auto ext = strutil::to_lower(fs::path(path).extension().string()); + auto target_path = make_target_path(target); + auto doc_path = m_option.doc_directory_; - if (global_mimes.count(ext)) - res.set(http::field::content_type, global_mimes.at(ext)); - else - res.set(http::field::content_type, "text/plain"); +#ifdef WIN32 + auto ret = make_unc_path(path_cat(doc_path, target_path)); +#else + auto ret = path_cat(doc_path, target_path).string(); +#endif - if (st == http::status::ok) - res.set(http::field::accept_ranges, "bytes"); + return ret; + } - if (st == http::status::partial_content) + inline std::tuple file_last_wirte_time(const fs::path& file) + { + static auto loc_time = [](auto t) -> struct tm* { - const auto& r = range.front(); - - if (r.second < r.first && r.second >= 0) + using time_type = std::decay_t; + if constexpr (std::is_same_v) { - co_await default_http_route(request, - fake_416_content, - http::status::range_not_satisfiable); - co_return; + auto sctp = std::chrono::time_point_cast< + std::chrono::system_clock::duration>(t - + std::filesystem::file_time_type::clock::now() + + std::chrono::system_clock::now()); + auto time = std::chrono::system_clock::to_time_t(sctp); + return std::localtime(&time); } - - std::string content_range = fmt::format( - "bytes {}-{}/{}", - r.first, - r.second, - content_length); - - content_length = r.second - r.first + 1; - res.set(http::field::content_range, content_range); - } - - res.keep_alive(hctx.request_.keep_alive()); - res.content_length(content_length); - - response_serializer sr(res); - - res.body().data = nullptr; - res.body().more = false; - - co_await http::async_write_header( - m_local_socket, - sr, - net_awaitable[ec]); - if (ec) - { - XLOG_WARN << "connection id: " - << m_connection_id - << ", http async_write_header: " - << ec.message(); - - co_return; - } - - auto buf_size = 5 * 1024 * 1024; - if (m_option.tcp_rate_limit_ > 0 && m_option.tcp_rate_limit_ < buf_size) - buf_size = m_option.tcp_rate_limit_; - - std::unique_ptr bufs((char*)std::malloc(buf_size), &std::free); - char* buf = bufs.get(); - std::streamsize total = 0; - - stream_rate_limit(m_local_socket, m_option.tcp_rate_limit_); - - do - { - auto bytes_transferred = fileop::read(file, std::span(buf, buf_size)); - bytes_transferred = std::min( - bytes_transferred, - content_length - total - ); - if (bytes_transferred == 0 || - total >= (std::streamsize)content_length) + else if constexpr (std::is_same_v) { - res.body().data = nullptr; - res.body().more = false; + return std::localtime(&t); } else { - res.body().data = buf; - res.body().size = bytes_transferred; - res.body().more = true; - } - - stream_expires_after(m_local_socket, std::chrono::seconds(m_option.tcp_timeout_)); - - co_await http::async_write( - m_local_socket, - sr, - net_awaitable[ec]); - total += bytes_transferred; - if (ec == http::error::need_buffer) - { - ec = {}; - continue; - } - if (ec) - { - XLOG_WARN << "connection id: " - << m_connection_id - << ", http async_write: " - << ec.message() - << ", already write: " - << total; - co_return; + static_assert(!std::is_same_v, "time type required!"); } - } while (!sr.is_done()); - - XLOG_DBG << "connection id: " - << m_connection_id - << ", http request: " - << hctx.target_ - << ", completed"; - - co_return; - } - - inline std::string server_date_string() - { - auto time = std::time(nullptr); - auto gmt = gmtime((const time_t*)&time); - - std::string str(64, '\0'); - auto ret = strftime((char*)str.data(), 64, "%a, %d %b %Y %H:%M:%S GMT", gmt); - str.resize(ret); - - return str; - } + }; - inline net::awaitable default_http_route( - const string_request& request, std::string response, http::status status) - { boost::system::error_code ec; + std::string time_string; + fs::path unc_path; - string_response res{ status, request.version() }; - res.set(http::field::server, version_string); - res.set(http::field::date, server_date_string()); - res.set(http::field::content_type, "text/html"); - - res.keep_alive(true); - res.body() = response; - res.prepare_payload(); - - http::serializer sr(res); - co_await http::async_write(m_local_socket, sr, net_awaitable[ec]); + auto ftime = fs::last_write_time(file, ec); if (ec) { - XLOG_WARN << "connection id: " - << m_connection_id - << ", default http route err: " - << ec.message(); + #ifdef WIN32 + if (file.string().size() > MAX_PATH) + { + unc_path = make_unc_path(file); + ftime = fs::last_write_time(unc_path, ec); + } + #endif } - co_return; - } - - inline net::awaitable location_http_route( - const string_request& request, const std::string& path) - { - boost::system::error_code ec; - - string_response res{ http::status::moved_permanently, request.version() }; - res.set(http::field::server, version_string); - res.set(http::field::date, server_date_string()); - res.set(http::field::content_type, "text/html"); - res.set(http::field::location, path); + if (!ec) + { + auto tm = loc_time(ftime); - res.keep_alive(true); - res.body() = fake_302_content; - res.prepare_payload(); + char tmbuf[64] = { 0 }; + std::strftime(tmbuf, + sizeof(tmbuf), + "%m-%d-%Y %H:%M", + tm); - http::serializer sr(res); - co_await http::async_write(m_local_socket, sr, net_awaitable[ec]); - if (ec) - { - XLOG_WARN << "connection id: " - << m_connection_id - << ", location http route err: " - << ec.message(); + time_string = tmbuf; } - co_return; + return { time_string, unc_path }; } - inline net::awaitable forbidden_http_route(const string_request& request) - { - boost::system::error_code ec; - - string_response res{ http::status::forbidden, request.version() }; - res.set(http::field::server, version_string); - res.set(http::field::date, server_date_string()); - res.set(http::field::content_type, "text/html"); + std::pmr::vector format_path_list(const std::string& path, + boost::system::error_code& ec, pmr_alloc_t alloc); - res.keep_alive(true); - res.body() = fake_403_content; - res.prepare_payload(); + net::awaitable on_http_json(const http_context& hctx); - http::serializer sr(res); - co_await http::async_write( - m_local_socket, sr, net_awaitable[ec]); - if (ec) - { - XLOG_WARN << "connection id: " - << m_connection_id - << ", forbidden http route err: " - << ec.message(); - } + net::awaitable on_http_dir(const http_context& hctx); - co_return; - } + net::awaitable on_http_get(const http_context& hctx); - inline net::awaitable unauthorized_http_route(const string_request& request) - { - boost::system::error_code ec; + std::pmr::string server_date_string(pmr_alloc_t alloc = pmr_alloc_t{}); - string_response res{ http::status::unauthorized, request.version() }; - res.set(http::field::server, version_string); - res.set(http::field::date, server_date_string()); - res.set(http::field::content_type, "text/html; charset=UTF-8"); - res.set(http::field::www_authenticate, "Basic realm=\"proxy\""); + net::awaitable default_http_route( + const string_request& request, std::string response, http::status status); - res.keep_alive(true); - res.body() = fake_401_content; - res.prepare_payload(); + net::awaitable location_http_route(const string_request& request, const std::string& path); - http::serializer sr(res); - co_await http::async_write( - m_local_socket, sr, net_awaitable[ec]); - if (ec) - { - XLOG_WARN << "connection id: " - << m_connection_id - << ", unauthorized http route err: " - << ec.message(); - } + net::awaitable forbidden_http_route(const string_request& request); - co_return; - } + net::awaitable unauthorized_http_route(const string_request& request); inline void user_rate_limit_config(const std::string& user) { @@ -4637,6 +2876,8 @@ R"x*x*x( init_ssl_context(); + boost::nowide::nowide_filesystem(); + boost::system::error_code ec; if (fs::exists(m_option.ipip_db_, ec)) @@ -4695,7 +2936,7 @@ R"x*x*x( { pem_file result{ filepath, pem_type::none }; - std::ifstream file(filepath); + boost::nowide::ifstream file(filepath); if (!file.is_open()) return result; diff --git a/proxy/include/proxy/strutil.hpp b/proxy/include/proxy/strutil.hpp index 58c0333f20..66e1bf4e16 100644 --- a/proxy/include/proxy/strutil.hpp +++ b/proxy/include/proxy/strutil.hpp @@ -29,6 +29,8 @@ #include +#include "ctre.hpp" + //! The strutil namespace namespace strutil { @@ -438,6 +440,48 @@ namespace strutil return tokens; } + /** + * @brief Splits input std::string str according to input std::string delim. + * Taken from: https://stackoverflow.com/a/46931770/1892346. + * @param str - std::string that will be split. + * @param delim - the delimiter. + * @param output - the insert iterator to insert splited token + * @return splitted tokens count. + */ + template + inline std::size_t split(std::string_view str, std::string_view delim, InsertIterator output) + { + size_t pos_start = 0, pos_end, delim_len = delim.length(); + std::string_view token; + std::size_t split_count = 0; + + while ((pos_end = str.find(delim, pos_start)) != std::string::npos) + { + token = str.substr(pos_start, pos_end - pos_start); + pos_start = pos_end + delim_len; + output = token; + split_count ++; + } + + output = str.substr(pos_start); + split_count ++; + return split_count; + } + + inline std::string_view remove_spaces(std::string_view str) + { + auto start_pos = 0; + auto end_pos = str.size(); + + while ( std::isspace( str[start_pos] ) ) + start_pos ++; + + while ( std::isspace( str[end_pos-1] ) ) + end_pos --; + + return str.substr(start_pos, end_pos); + } + /** * @brief Splits input string using regex as a delimiter. * @param src - std::string that will be split. diff --git a/proxy/src/proxy_server.cpp b/proxy/src/proxy_server.cpp new file mode 100644 index 0000000000..234f35105a --- /dev/null +++ b/proxy/src/proxy_server.cpp @@ -0,0 +1,1792 @@ + + +#include "proxy/proxy_server.hpp" + +#include +#include +#include "ctre.hpp" + +namespace proxy +{ + static const char fake_404_content[] = +R"x*x*x(404 Not Found + +

404 Not Found

+
+
nginx/1.20.2
+ +)x*x*x"; + + static const char fake_502_content[] = +R"xx( +502 Bad Gateway + +

502 Bad Gateway

+
nginx/1.26.2
+ +)xx"; + + inline constexpr auto head_fmt = + R"(Index of {}

Index of {}


)"; + inline constexpr auto tail_fmt = + "

"; + inline constexpr auto body_fmt = + // "{}{} {} {}\r\n"; + "{}{}{}\r\n"; + + + inline std::string file_hash(const fs::path& p, boost::system::error_code& ec) + { + ec = {}; + + boost::nowide::ifstream file(p.string(), std::ios::binary); + if (!file) + { + ec = boost::system::error_code(errno, + boost::system::generic_category()); + return {}; + } + + boost::uuids::detail::sha1 sha1; + const auto buf_size = 1024 * 1024 * 4; + std::unique_ptr bufs((char*)std::malloc(buf_size), &std::free); + + while (file.read(bufs.get(), buf_size) || file.gcount()) + sha1.process_bytes(bufs.get(), file.gcount()); + + boost::uuids::detail::sha1::digest_type hash; + sha1.get_digest(hash); + + std::stringstream ss; + for (auto const& c : hash) + ss << std::hex << std::setfill('0') << std::setw(2) << static_cast(c); + + return ss.str(); + } + + template + inline auto async_hash_file(const fs::path& path, CompletionToken&& token) + { + return net::async_initiate( + [path](auto&& handler) mutable + { + std::thread( + [path, handler = std::move(handler)]() mutable + { + boost::system::error_code ec; + + auto hash = file_hash(path, ec); + + auto executor = net::get_associated_executor(handler); + net::post(executor, [ + ec = std::move(ec), + hash = std::move(hash), + handler = std::move(handler)]() mutable + { + handler(ec, hash); + }); + } + ).detach(); + }, token); + } + + net::awaitable proxy_session::connect_bridge_proxy(tcp::socket& remote_socket, std::string target_host, + uint16_t target_port, boost::system::error_code& ec) + { + auto executor = co_await net::this_coro::executor; + + tcp::resolver resolver{executor}; + + auto proxy_host = std::string(m_bridge_proxy->host()); + std::string proxy_port; + if (m_bridge_proxy->port_number() == 0) + { + proxy_port = std::to_string(urls::default_port(m_bridge_proxy->scheme_id())); + } + else + { + proxy_port = std::to_string(m_bridge_proxy->port_number()); + } + if (proxy_port.empty()) + { + proxy_port = m_bridge_proxy->scheme(); + } + + XLOG_DBG << "connection id: " << m_connection_id << ", connect to next proxy: " << proxy_host << ":" + << proxy_port; + + tcp::resolver::results_type targets; + + if (!detect_hostname(proxy_host)) + { + net::ip::tcp::endpoint endp(net::ip::address::from_string(proxy_host), + m_bridge_proxy->port_number() + ? m_bridge_proxy->port_number() + : urls::default_port(m_bridge_proxy->scheme_id())); + + targets = tcp::resolver::results_type::create(endp, proxy_host, m_bridge_proxy->scheme()); + } + else + { + targets = co_await resolver.async_resolve(proxy_host, proxy_port, net_awaitable[ec]); + + if (ec) + { + XLOG_FWARN("connection id: {}," + " resolver to next proxy {}:{} error: {}", + m_connection_id, std::string(m_bridge_proxy->host()), std::string(m_bridge_proxy->port()), + ec.message()); + + co_return false; + } + } + + if (m_option.happyeyeballs_) + { + co_await asio_util::async_connect(remote_socket, targets, [this](const auto& ec, auto& stream, auto& endp) + { return check_condition(ec, stream, endp); }, net_awaitable[ec]); + } + else + { + for (auto endpoint : targets) + { + ec = boost::asio::error::host_not_found; + + if (m_option.connect_v4_only_) + { + if (endpoint.endpoint().address().is_v6()) + { + continue; + } + } + else if (m_option.connect_v6_only_) + { + if (endpoint.endpoint().address().is_v4()) + { + continue; + } + } + + boost::system::error_code ignore_ec; + remote_socket.close(ignore_ec); + + if (m_bind_interface) + { + tcp::endpoint bind_endpoint(*m_bind_interface, 0); + + remote_socket.open(bind_endpoint.protocol(), ec); + if (ec) + { + break; + } + + remote_socket.bind(bind_endpoint, ec); + if (ec) + { + break; + } + } + + co_await remote_socket.async_connect(endpoint, net_awaitable[ec]); + if (!ec) + { + break; + } + } + } + + if (ec) + { + XLOG_FWARN("connection id: {}," + " connect to next proxy {}:{} error: {}", + m_connection_id, std::string(m_bridge_proxy->host()), std::string(m_bridge_proxy->port()), + ec.message()); + + co_return false; + } + + XLOG_DBG << "connection id: " << m_connection_id << ", connect to next proxy: " << proxy_host << ":" + << proxy_port << " success"; + + // 如果启用了 noise, 则在向上游代理服务器发起 tcp 连接成功后, 发送 noise + // 数据以及接收 noise 数据. + if (m_option.scramble_) + { + if (!co_await noise_handshake(remote_socket, m_outin_key, m_outout_key)) + { + co_return false; + } + + XLOG_DBG << "connection id: " << m_connection_id << ", with upstream noise completed"; + } + + // 使用ssl加密与下一级代理通信. + if (m_option.proxy_pass_use_ssl_) + { + // 设置 ssl cert 证书目录. + if (fs::exists(m_option.ssl_cacert_path_)) + { + m_ssl_cli_context.add_verify_path(m_option.ssl_cacert_path_, ec); + if (ec) + { + XLOG_FWARN("connection id: {}, " + "load cert path: {}, " + "error: {}", + m_connection_id, m_option.ssl_cacert_path_, ec.message()); + + co_return false; + } + } + } + + auto scheme = m_bridge_proxy->scheme(); + + auto instantiate_stream = [this, &scheme, &proxy_host, &remote_socket, + &ec]() mutable -> net::awaitable + { + ec = {}; + + XLOG_DBG << "connection id: " << m_connection_id << ", connect to next proxy: " << proxy_host + << " instantiate stream"; + + if (m_option.proxy_pass_use_ssl_ || scheme == "https") + { + m_ssl_cli_context.set_verify_mode(net::ssl::verify_peer); + auto cert = default_root_certificates(); + m_ssl_cli_context.add_certificate_authority(net::buffer(cert.data(), cert.size()), ec); + if (ec) + { + XLOG_FWARN("connection id: {}," + " add_certificate_authority error: {}", + m_connection_id, ec.message()); + } + + m_ssl_cli_context.use_tmp_dh(net::buffer(default_dh_param()), ec); + + m_ssl_cli_context.set_verify_callback(net::ssl::rfc2818_verification(proxy_host), ec); + if (ec) + { + XLOG_FWARN("connection id: {}," + " set_verify_callback error: {}", + m_connection_id, ec.message()); + } + + // 生成 ssl socket 对象. + auto sock_stream = init_proxy_stream(std::move(remote_socket), m_ssl_cli_context); + + // get origin ssl stream type. + ssl_stream& ssl_socket = boost::variant2::get(sock_stream); + + if (m_option.scramble_) + { + auto& next_layer = ssl_socket.next_layer(); + + using NextLayerType = std::decay_t; + + if constexpr (!std::same_as) + { + next_layer.set_scramble_key(m_outout_key); + + next_layer.set_unscramble_key(m_outin_key); + } + } + + std::string sni = m_option.proxy_ssl_name_.empty() ? proxy_host : m_option.proxy_ssl_name_; + + // Set SNI Hostname. + if (!SSL_set_tlsext_host_name(ssl_socket.native_handle(), sni.c_str())) + { + XLOG_FWARN("connection id: {}," + " SSL_set_tlsext_host_name error: {}", + m_connection_id, ::ERR_get_error()); + } + + XLOG_DBG << "connection id: " << m_connection_id << ", do async ssl handshake..."; + + // do async handshake. + co_await ssl_socket.async_handshake(net::ssl::stream_base::client, net_awaitable[ec]); + if (ec) + { + XLOG_FWARN("connection id: {}," + " ssl client protocol handshake error: {}", + m_connection_id, ec.message()); + } + + XLOG_FDBG("connection id: {}, ssl handshake: {}", m_connection_id, proxy_host); + + co_return sock_stream; + } + + auto sock_stream = init_proxy_stream(std::move(remote_socket)); + + auto& sock = boost::variant2::get(sock_stream); + + if (m_option.scramble_) + { + using NextLayerType = std::decay_t; + + if constexpr (!std::same_as) + { + sock.set_scramble_key(m_outout_key); + + sock.set_unscramble_key(m_outin_key); + } + } + + co_return sock_stream; + }; + + m_remote_socket = std::move(co_await instantiate_stream()); + + XLOG_DBG << "connection id: " << m_connection_id << ", connect to next proxy: " << proxy_host << ":" + << proxy_port << " start upstream handshake with " << std::string(scheme); + + if (scheme.starts_with("socks")) + { + socks_client_option opt; + + opt.target_host = target_host; + opt.target_port = target_port; + opt.proxy_hostname = true; + opt.username = std::string(m_bridge_proxy->user()); + opt.password = std::string(m_bridge_proxy->password()); + + if (scheme == "socks4") + { + opt.version = socks4_version; + } + else if (scheme == "socks4a") + { + opt.version = socks4a_version; + } + + co_await async_socks_handshake(m_remote_socket, opt, net_awaitable[ec]); + } + else if (scheme.starts_with("http")) + { + http_proxy_client_option opt; + + opt.target_host = target_host; + opt.target_port = target_port; + opt.username = std::string(m_bridge_proxy->user()); + opt.password = std::string(m_bridge_proxy->password()); + + co_await async_http_proxy_handshake(m_remote_socket, opt, net_awaitable[ec]); + } + + if (ec) + { + XLOG_FWARN("connection id: {}" + ", {} connect to next host {}:{} error: {}", + m_connection_id, std::string(scheme), target_host, target_port, ec.message()); + + co_return false; + } + + co_return true; + } + + net::awaitable proxy_session::on_http_json(const http_context& hctx) + { + boost::system::error_code ec; + auto& request = hctx.request_; + + auto target = make_real_target_path(hctx.command_[1]); + + std::array pre_alloc_buf; + std::pmr::monotonic_buffer_resource mbr(pre_alloc_buf.data(), pre_alloc_buf.size()); + std::pmr::polymorphic_allocator alloc(&mbr); + + fs::directory_iterator end; + fs::directory_iterator it(target, ec); + if (ec) + { + string_response res{ + std::piecewise_construct, + std::make_tuple(alloc), + std::make_tuple(http::status::found, request.version(), alloc) + }; + res.set(http::field::server, version_string); + res.set(http::field::date, server_date_string(alloc)); + res.set(http::field::location, "/"); + res.keep_alive(request.keep_alive()); + res.prepare_payload(); + + string_response_serializer sr(res); + co_await http::async_write(m_local_socket, sr, net_awaitable[ec]); + if (ec) + { + XLOG_WARN << "connection id: " << m_connection_id << ", http_dir write location err: " << ec.message(); + } + + co_return; + } + + bool hash = false; + + urls::params_view qp(hctx.command_[3]); + if (qp.find("hash") != qp.end()) + { + hash = true; + } + + boost::json::array path_list; + + for (; it != end && !m_abort; it++) + { + const auto& item = it->path(); + boost::json::object obj; + + auto [ftime, unc_path] = file_last_wirte_time(item); + obj["last_write_time"] = ftime; + + if (fs::is_directory(unc_path.empty() ? item : unc_path, ec)) + { + obj["filename"] = item.filename().string(); + obj["is_dir"] = true; + } + else + { + obj["filename"] = item.filename().string(); + obj["is_dir"] = false; + if (unc_path.empty()) + { + unc_path = item; + } + auto sz = fs::file_size(unc_path, ec); + if (ec) + { + sz = 0; + } + obj["filesize"] = sz; + if (hash) + { + auto ret = co_await async_hash_file(unc_path, net_awaitable[ec]); + if (ec) + { + ret = ""; + } + obj["hash"] = ret; + } + } + + path_list.push_back(obj); + } + + auto body = boost::json::serialize(path_list); + + string_response res{ + std::piecewise_construct, + std::make_tuple(alloc), + std::make_tuple(http::status::ok, request.version(), alloc) + }; + res.set(http::field::server, version_string); + res.set(http::field::date, server_date_string(alloc)); + res.set(http::field::content_type, "application/json"); + res.keep_alive(request.keep_alive()); + res.body() = body; + res.prepare_payload(); + + string_response_serializer sr(res); + co_await http::async_write(m_local_socket, sr, net_awaitable[ec]); + if (ec) + { + XLOG_WARN << "connection id: " << m_connection_id << ", http dir write body err: " << ec.message(); + } + + co_return; + } + + net::awaitable proxy_session::on_http_dir(const http_context& hctx) + { + using namespace std::literals; + + boost::system::error_code ec; + auto& request = hctx.request_; + + std::array pre_alloc_buf; + std::pmr::monotonic_buffer_resource mbr(pre_alloc_buf.data(), pre_alloc_buf.size()); + std::pmr::polymorphic_allocator alloc(&mbr); + + // 查找目录下是否存在 index.html 或 index.htm 文件, 如果存在则返回该文件. + // 否则返回目录下的文件列表. + auto index_html = fs::path(hctx.target_path_) / "index.html"; + fs::exists(index_html, ec) ? index_html = index_html : index_html = fs::path(hctx.target_path_) / "index.htm"; + + if (fs::exists(index_html, ec)) + { + boost::nowide::ifstream file(index_html.string(), std::ios::binary); + if (file) + { + std::pmr::string content((std::istreambuf_iterator(file)), std::istreambuf_iterator(), + alloc); + + string_response res{ + std::piecewise_construct, + std::make_tuple(content, alloc), + std::make_tuple(http::status::ok, request.version(), alloc) + }; + res.set(http::field::server, version_string); + res.set(http::field::date, server_date_string(alloc)); + auto ext = strutil::to_lower(index_html.extension().string()); + if (global_mimes.count(ext)) + { + res.set(http::field::content_type, global_mimes.at(ext)); + } + else + { + res.set(http::field::content_type, "text/plain"); + } + res.keep_alive(request.keep_alive()); + res.prepare_payload(); + + string_response_serializer sr(res); + co_await http::async_write(m_local_socket, sr, net_awaitable[ec]); + if (ec) + { + XLOG_WARN << "connection id: " << m_connection_id << ", http dir write index err: " << ec.message(); + } + + co_return; + } + } + + auto path_list = format_path_list(hctx.target_path_, ec, alloc); + + assert(path_list.get_allocator() == alloc); + + if (ec) + { + string_response res{ + std::piecewise_construct, + std::make_tuple(alloc), + std::make_tuple(http::status::found, request.version(), alloc) + }; + + res.set(http::field::server, version_string); + res.set(http::field::date, server_date_string(alloc)); + res.set(http::field::location, "/"); + res.keep_alive(request.keep_alive()); + res.prepare_payload(); + + string_response_serializer sr(res); + co_await http::async_write(m_local_socket, sr, net_awaitable[ec]); + if (ec) + { + XLOG_WARN << "connection id: " << m_connection_id << ", http_dir write location err: " << ec.message(); + } + + co_return; + } + + auto target_path = make_target_path(hctx.target_); + std::pmr::string autoindex_page{alloc}; + autoindex_page.reserve(4096); + fmt::format_to(std::back_inserter(autoindex_page), head_fmt, target_path, target_path); + fmt::format_to(std::back_inserter(autoindex_page), body_fmt, "../", "../", "", ""); + + for (const auto& s : path_list) + { + autoindex_page += s; + } + + autoindex_page += tail_fmt; + + string_response res{ + std::piecewise_construct, + std::make_tuple(std::move(autoindex_page), alloc), + std::make_tuple(http::status::ok, request.version(), alloc) + }; + + res.set(http::field::server, version_string); + res.set(http::field::date, server_date_string(alloc)); + res.keep_alive(request.keep_alive()); + res.prepare_payload(); + + string_response_serializer sr(res); + co_await http::async_write(m_local_socket, sr, net_awaitable[ec]); + if (ec) + { + XLOG_WARN << "connection id: " << m_connection_id << ", http dir write body err: " << ec.message(); + } + + co_return; + } + + net::awaitable proxy_session::on_http_get(const http_context& hctx) + { + boost::system::error_code ec; + + const auto& request = hctx.request_; + fs::path path = std::string_view{hctx.target_path_}; + + if (!fs::exists(path, ec)) + { + XLOG_WARN << "connection id: " << m_connection_id << ", http " << hctx.target_ << " file not exists"; + + std::pmr::string fake_page{hctx.alloc}; + + span_response res{ + std::piecewise_construct, + std::make_tuple(boost::span{fake_404_content, sizeof fake_404_content - 1}), + std::make_tuple(http::status::not_found, request.version(), hctx.alloc) + }; + res.set(http::field::server, version_string); + res.set(http::field::date, server_date_string(hctx.alloc)); + res.keep_alive(request.keep_alive()); + res.prepare_payload(); + + span_response_serializer sr(res); + + co_await http::async_write(m_local_socket, sr, net_awaitable[ec]); + + co_return; + } + + if (fs::is_directory(path, ec)) + { + XLOG_DBG << "connection id: " << m_connection_id << ", http " << hctx.target_ << " is directory"; + + std::pmr::string url = {"http://", hctx.alloc}; + if (is_crytpo_stream()) + { + url = "https://"; + } + url += request[http::field::host]; + urls::url u(url); + std::pmr::string target{hctx.target_ , hctx.alloc}; + target += "/"; + u.set_path(target); + + co_await location_http_route(request, u.buffer()); + + co_return; + } + + size_t content_length = fs::file_size(path, ec); + if (ec) + { + XLOG_WARN << "connection id: " << m_connection_id << ", http " << hctx.target_ + << " file size error: " << ec.message(); + + co_await default_http_route(request, fake_400_content, http::status::bad_request); + + co_return; + } +#if defined (BOOST_ASIO_HAS_FILE) +# if defined(_WIN32) + net::stream_file file(co_await net::this_coro::executor); + file.assign(::CreateFileW(path.wstring().c_str(), GENERIC_READ, FILE_SHARE_READ, 0, + OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL | FILE_FLAG_OVERLAPPED|FILE_FLAG_SEQUENTIAL_SCAN, 0), ec); +# else + net::stream_file file(co_await net::this_coro::executor, path.string(), net::stream_file::read_only); +# endif +#else // BOOST_ASIO_HAS_FILE + boost::nowide::fstream file(path.string(), std::ios_base::binary | std::ios_base::in); +#endif //BOOST_ASIO_HAS_FILE + + std::pmr::string user_agent { hctx.alloc }; + if (request.count(http::field::user_agent)) + { + user_agent = request[http::field::user_agent]; + } + + std::pmr::string referer { hctx.alloc }; + if (request.count(http::field::referer)) + { + referer = request[http::field::referer]; + } + + XLOG_DBG << "connection id: " << m_connection_id << ", http file: " << hctx.target_ + << ", size: " << content_length + << (request.count("Range") ? ", range: " + std::pmr::string(request["Range"], hctx.alloc) : std::pmr::string(hctx.alloc)) + << (!user_agent.empty() ? ", user_agent: " + user_agent : std::pmr::string(hctx.alloc)) + << (!referer.empty() ? ", referer: " + referer : std::pmr::string(hctx.alloc)); + + http::status st = http::status::ok; + auto range = parser_http_ranges(request["Range"]); + + // 只支持一个 range 的请求, 不支持多个 range 的请求. + if (range.size() == 1) + { + st = http::status::partial_content; + auto& r = range.front(); + + // 起始位置为 -1, 表示从文件末尾开始读取, 例如 Range: -500 + // 则表示读取文件末尾的 500 字节. + if (r.first == -1) + { + // 如果第二个参数也为 -1, 则表示请求有问题, 返回 416. + if (r.second < 0) + { + co_await default_http_route(request, fake_416_content, http::status::range_not_satisfiable); + co_return; + } + else if (r.second >= 0) + { + // 计算起始位置和结束位置, 例如 Range: -5 + // 则表示读取文件末尾的 5 字节. + // content_length - r.second 表示起始位置. + // content_length - 1 表示结束位置. + // 例如文件长度为 10 字节, 则起始位置为 5, + // 结束位置为 9(数据总长度为[0-9]), 一共 5 字节. + r.first = content_length - r.second; + r.second = content_length - 1; + } + } + else if (r.second == -1) + { + // 起始位置为正数, 表示从文件头开始读取, 例如 Range: 500 + // 则表示读取文件头的 500 字节. + if (r.first < 0) + { + co_await default_http_route(request, fake_416_content, http::status::range_not_satisfiable); + co_return; + } + else + { + r.second = content_length - 1; + + if (r.first == content_length) + { + std::pmr::string content_range{hctx.alloc}; + fmt::format_to(std::back_inserter(content_range), "bytes */{}", r.second, r.second, content_length); + + span_response res{ + std::piecewise_construct, + std::make_tuple(boost::span{fake_416_content, sizeof (fake_416_content) - 1}), + std::make_tuple(http::status::range_not_satisfiable, request.version(), hctx.alloc) + }; + + res.set(http::field::server, version_string); + res.set(http::field::date, server_date_string(hctx.alloc)); + res.set(http::field::content_type, "text/html; charset=UTF-8"); + res.set(http::field::content_range, content_range); + + res.keep_alive(hctx.request_.keep_alive()); + res.prepare_payload(); + + span_response_serializer sr(res); + co_await http::async_write(m_local_socket, sr, net_awaitable[ec]); + co_return; + } + } + } +#if defined (BOOST_ASIO_HAS_FILE) + file.seek(r.first, net::stream_file::seek_set); +#else + file.seekg(r.first, std::ios_base::beg); +#endif + } + + if (ec) + { + XLOG_WARN << "connection id: " << m_connection_id << ", open target: " << path << " failed: " << ec.message(); + // FILE OPEN FAILED + // 返回 502 + st = http::status::internal_server_error; + + span_response res{ + std::piecewise_construct, + std::make_tuple(boost::span{fake_502_content, sizeof (fake_502_content) - 1}), + std::make_tuple(http::status::found, request.version(), hctx.alloc) + }; + + res.set(http::field::server, version_string); + res.set(http::field::date, server_date_string(hctx.alloc)); + res.set(http::field::content_type, "text/html; charset=utf-8"); + res.keep_alive(true); + res.prepare_payload(); + + span_response_serializer sr(res); + co_await http::async_write(m_local_socket, sr, net_awaitable[ec]); + if (ec) + { + XLOG_WARN << "connection id: " << m_connection_id << ", send 502 err: " << ec.message(); + } + + co_return; + } + + custom_body_response res{ + std::piecewise_construct, + std::make_tuple(), + std::make_tuple(st, request.version(), hctx.alloc) + }; + + res.set(http::field::server, version_string); + res.set(http::field::date, server_date_string(hctx.alloc)); + auto ext = strutil::to_lower(fs::path(path).extension().string()); + if (global_mimes.count(ext)) + res.set(http::field::content_type, global_mimes.at(ext)); + else + res.set(http::field::content_type, "text/plain"); + + if (st == http::status::ok) + { + res.set(http::field::accept_ranges, "bytes"); + } + + if (st == http::status::partial_content) + { + const auto& r = range.front(); + + if (r.second < r.first && r.second >= 0) + { + co_await default_http_route(request, fake_416_content, http::status::range_not_satisfiable); + co_return; + } + + std::pmr::string content_range{hctx.alloc}; + fmt::format_to(std::back_inserter(content_range), "bytes {}-{}/{}", r.first, r.second, content_length); + + content_length = r.second - r.first + 1; + res.set(http::field::content_range, content_range); + } + + res.keep_alive(hctx.request_.keep_alive()); + res.content_length(content_length); + + custom_body_response_serializer sr(res); + + co_await http::async_write_header(m_local_socket, sr, net_awaitable[ec]); + if (ec) + { + XLOG_WARN << "connection id: " << m_connection_id << ", http async_write_header: " << ec.message(); + + co_return; + } + + auto buf_size = 64 * 1024; + if (m_option.tcp_rate_limit_ > 0 && m_option.tcp_rate_limit_ < buf_size) + { + buf_size = m_option.tcp_rate_limit_; + } + + std::streamsize total = 0; + stream_rate_limit(m_local_socket, m_option.tcp_rate_limit_); +#if defined (BOOST_ASIO_HAS_FILE) + total = co_await transfer(file, m_local_socket, content_length); +#else + std::unique_ptr bufs((char*)std::malloc(buf_size), &std::free); + char* buf = bufs.get(); + + do + { + auto remain_to_read = std::min(buf_size, content_length - total); + auto bytes_transferred = fileop::read(file, std::span(buf, remain_to_read)); + if (bytes_transferred == 0 || total >= (std::streamsize)content_length) + { + break; + } + + bytes_transferred = std::min(bytes_transferred, content_length - total); + + stream_expires_after(m_local_socket, std::chrono::seconds(m_option.tcp_timeout_)); + + co_await net::async_write(m_local_socket, net::buffer(buf, bytes_transferred), net::transfer_all(), net_awaitable[ec]); + total += bytes_transferred; + if (ec) + { + XLOG_WARN << "connection id: " << m_connection_id << ", http async_write: " << ec.message() + << ", already write: " << total; + co_return; + } + } + while (total < content_length); +#endif + if (ec) + { + m_local_socket.close(ec); + + XLOG_WARN << "connection id: " << m_connection_id << ", http async_write: " << ec.message() + << ", already write: " << total; + } + + XLOG_DBG << "connection id: " << m_connection_id << ", http request: " << hctx.target_ << ", completed, size: " << total; + + co_return; + } + + template + struct route_op + { + boost::asio::awaitable operator()(auto* _proxy_session, auto target, auto& http_ctx, auto alloc) const + { + if (auto result = ctre::match( target ) ) + { + boost::hana::for_each(std::make_index_sequence(), [&](auto element) + { + // 将 正则匹配到的 () 子串,给依次 push 到 http_ctx.command_ 这个容器里. + http_ctx.command_.push_back(result.template get()); + }); + co_await (_proxy_session->*func)(http_ctx); + co_return true; + } + co_return false; + } + }; + + template + boost::asio::awaitable routes(proxy_session* _proxy_session, auto& target, auto& http_ctx, auto alloc) + { + // 依次等待 route_op 执行,因为是 || 所以如果一个成功了,剩下的就不等了. + ( (co_await RouteOPs(_proxy_session, target, http_ctx, alloc)) || ...); + } + + net::awaitable proxy_session::normal_web_server(string_request& req, pmr_alloc_t alloc) + { + boost::system::error_code ec; + + bool keep_alive = false; + bool has_read_header = true; + + for (; !m_abort;) + { + std::optional parser; + if (!has_read_header) + { + // normal_web_server 调用是从 http_proxy_get + // 跳转过来的, 该函数已经读取了请求头, 所以第1次不需 + // 要再次读取请求头, 即 has_read_header 为 true. + // 当 keepalive 时,需要读取请求头, 此时 has_read_header + // 为 false, 则在此读取和解析后续的 http 请求头. + parser.emplace(std::piecewise_construct, std::make_tuple(alloc), std::make_tuple(alloc)); + parser->body_limit(1024 * 512); // 512k + m_local_buffer.consume(m_local_buffer.size()); + + co_await http::async_read_header(m_local_socket, m_local_buffer, *parser, net_awaitable[ec]); + if (ec) + { + XLOG_DBG << "connection id: " << m_connection_id << (keep_alive ? ", keepalive" : "") + << ", web async_read_header: " << ec.message(); + co_return; + } + + req = parser->release(); + } + + if (req[http::field::expect] == "100-continue") + { + http::response res; + res.version(11); + res.result(http::status::method_not_allowed); + + co_await http::async_write(m_local_socket, res, net_awaitable[ec]); + if (ec) + { + XLOG_DBG << "connection id: " << m_connection_id << ", web expect async_write: " << ec.message(); + } + co_return; + } + + has_read_header = false; + keep_alive = req.keep_alive(); + + if (beast::websocket::is_upgrade(req)) + { + std::pmr::string fake_page{alloc}; + + fmt::vformat_to(std::back_inserter(fake_page), fake_404_content_fmt, fmt::make_format_args(server_date_string(alloc))); + + co_await net::async_write(m_local_socket, net::buffer(fake_page), net::transfer_all(), + net_awaitable[ec]); + + co_return; + } + + std::pmr::string target{req.target(), alloc}; + std::string_view target_pv{target}; + boost::match_results< + std::pmr::string::const_iterator, + std::pmr::polymorphic_allocator> + > what{alloc}; + + http_context http_ctx{ + alloc, + std::pmr::vector{alloc}, + req, + req.target(), + make_real_target_path(req.target()) + }; + + co_await routes< + route_op{}, + route_op{}, + route_op{} + >(this, target_pv, http_ctx, alloc); + + if (!keep_alive) + { + break; + } + continue; + } + + co_await m_local_socket.lowest_layer().async_wait(net::socket_base::wait_read, net_awaitable[ec]); + + co_return; + } + + int proxy_session::http_authorization(std::string_view pa) + { + if (m_option.auth_users_.empty()) + { + return PROXY_AUTH_SUCCESS; + } + + if (pa.empty()) + { + return PROXY_AUTH_NONE; + } + + auto pos = pa.find(' '); + if (pos == std::string::npos) + { + return PROXY_AUTH_ILLEGAL; + } + + auto type = pa.substr(0, pos); + auto auth = pa.substr(pos + 1); + + if (type != "Basic") + { + return PROXY_AUTH_ILLEGAL; + } + + char buff[1024]; + std::pmr::monotonic_buffer_resource mbr(buff, sizeof buff); + pmr_alloc_t alloc(&mbr); + + std::pmr::string userinfo(beast::detail::base64::decoded_size(auth.size()), 0, alloc); + auto [len, _] = beast::detail::base64::decode((char*)userinfo.data(), auth.data(), auth.size()); + userinfo.resize(len); + + pos = userinfo.find(':'); + + std::pmr::string uname{userinfo.substr(0, pos), alloc}; + std::pmr::string passwd{userinfo.substr(pos + 1), alloc}; + + bool verify_passed = m_option.auth_users_.empty(); + + for (auto [user, pwd] : m_option.auth_users_) + { + if (uname == user && passwd == pwd) + { + verify_passed = true; + user_rate_limit_config(user); + break; + } + } + + auto endp = m_local_socket.remote_endpoint(); + auto client = endp.address().to_string(); + client += ":" + std::to_string(endp.port()); + + if (!verify_passed) + { + return PROXY_AUTH_FAILED; + } + + return PROXY_AUTH_SUCCESS; + } + + net::awaitable proxy_session::http_proxy_get() + { + boost::system::error_code ec; + bool keep_alive = false; + bool first = true; + + while (!m_abort) + { + std::array pre_alloc_buf; + std::pmr::monotonic_buffer_resource mbr(pre_alloc_buf.data(), pre_alloc_buf.size()); + pmr_alloc_t alloc(&mbr); + std::optional parser; + parser.emplace(std::piecewise_construct, std::make_tuple(alloc), std::make_tuple(alloc)); + + parser->body_limit(1024 * 1024 * 10); + if (!first) + { + m_local_buffer.consume(m_local_buffer.size()); + } + + // 读取 http 请求头. + co_await http::async_read(m_local_socket, m_local_buffer, *parser, net_awaitable[ec]); + if (ec) + { + XLOG_WARN << "connection id: " << m_connection_id << (keep_alive ? ", keepalive" : "") + << ", http_proxy_get request async_read: " << ec.message(); + + co_return !first; + } + + auto req = parser->release(); + auto mth = std::pmr::string(req.method_string(), alloc); + auto target_view = std::pmr::string(req.target(), alloc); + auto pa = std::pmr::string(req[http::field::proxy_authorization], alloc); + + keep_alive = req.keep_alive(); + + XLOG_DBG << "connection id: " << m_connection_id << ", method: " << mth << ", target: " << target_view + << (pa.empty() ? std::pmr::string(alloc) : ", proxy_authorization: " + pa); + + // 判定是否为 GET url 代理模式. + bool get_url_proxy = false; + if (boost::istarts_with(target_view, "https://") || boost::istarts_with(target_view, "http://")) + { + get_url_proxy = true; + } + + // http 代理认证, 如果请求的 rarget 不是 http url 或认证 + // 失败, 则按正常 web 请求处理. + auto auth = http_authorization(pa); + if (auth != PROXY_AUTH_SUCCESS || !get_url_proxy) + { + auto expect_url = urls::parse_absolute_uri(target_view); + + if (!expect_url.has_error()) + { + XLOG_WARN << "connection id: " << m_connection_id << ", proxy err: " << pauth_error_message(auth); + + co_return !first; + } + + // 如果 doc 目录为空, 则不允许访问目录 + // 这里直接返回错误页面. + if (m_option.doc_directory_.empty()) + { + co_return !first; + } + + // htpasswd 表示需要用户认证. + if (m_option.htpasswd_) + { + // 处理 http 认证, 如果客户没有传递认证信息, 则返回 401. + // 如果用户认证信息没有设置, 则直接返回 401. + auto auth = req[http::field::authorization]; + if (auth.empty() || m_option.auth_users_.empty()) + { + XLOG_WARN << "connection id: " << m_connection_id + << ", auth error: " << (auth.empty() ? "no auth" : "no user"); + + co_await unauthorized_http_route(req); + co_return true; + } + + auto auth_result = http_authorization(auth); + if (auth_result != PROXY_AUTH_SUCCESS) + { + XLOG_WARN << "connection id: " << m_connection_id + << ", auth error: " << pauth_error_message(auth_result); + + co_await unauthorized_http_route(req); + co_return true; + } + } + + // 如果不允许目录索引, 检查请求的是否为文件, 如果是具体文件则按文 + // 件请求处理, 否则返回 403. + if (!m_option.autoindex_) + { + auto path = make_real_target_path(req.target()); + + if (!fs::is_directory(path, ec)) + { + co_await normal_web_server(req, alloc); + co_return true; + } + + // 如果不允许目录索引, 则直接返回 403 forbidden. + co_await forbidden_http_route(req); + + co_return true; + } + + // 按正常 http 目录请求来处理. + co_await normal_web_server(req, alloc); + co_return true; + } + + const auto authority_pos = target_view.find_first_of("//") + 2; + + std::string host; + + const auto scheme_id = urls::string_to_scheme(target_view.substr(0, authority_pos - 3)); + uint16_t port = urls::default_port(scheme_id); + + auto host_pos = authority_pos; + auto host_end = std::string::npos; + + auto port_start = std::string::npos; + + for (auto pos = authority_pos; pos < target_view.size(); pos++) + { + const auto& c = target_view[pos]; + if (c == '@') + { + host_pos = pos + 1; + + host_end = std::string::npos; + port_start = std::string::npos; + } + else if (c == ':') + { + host_end = pos; + port_start = pos + 1; + } + else if (c == '/' || (pos + 1 == target_view.size())) + { + if (host_end == std::string::npos) + { + host_end = pos; + } + host = target_view.substr(host_pos, host_end - host_pos); + + if (port_start != std::string::npos) + { + port = (uint16_t)std::atoi(target_view.substr(port_start, pos - port_start).c_str()); + } + + break; + } + } + + if (!m_remote_socket.is_open()) + { + // 连接到目标主机. + co_await start_connect_host(host, port ? port : 80, ec, true); + if (ec) + { + XLOG_FWARN("connection id: {}," + " connect to target {}:{} error: {}", + m_connection_id, host, port, ec.message()); + + co_return !first; + } + } + + // 处理代理请求头. + const auto path_pos = target_view.find_first_of("/", authority_pos); + if (path_pos == std::string_view::npos) + { + req.target("/"); + } + else + { + req.target(std::string(target_view.substr(path_pos))); + } + + req.set(http::field::host, host); + + if (req.find(http::field::connection) == req.end() && req.find(http::field::proxy_connection) != req.end()) + { + req.set(http::field::connection, req[http::field::proxy_connection]); + } + + req.erase(http::field::proxy_authorization); + req.erase(http::field::proxy_connection); + + co_await http::async_write(m_remote_socket, req, net_awaitable[ec]); + if (ec) + { + XLOG_WARN << "connection id: " << m_connection_id + << ", http_proxy_get request async_write: " << ec.message(); + co_return !first; + } + + m_local_buffer.consume(m_local_buffer.size()); + beast::flat_buffer buf; + + response_parser _parser{std::piecewise_construct, std::make_tuple(alloc), std::make_tuple(alloc)}; + _parser.body_limit(1024 * 1024 * 10); + + auto bytes = co_await http::async_read(m_remote_socket, buf, _parser, net_awaitable[ec]); + if (ec) + { + XLOG_WARN << "connection id: " << m_connection_id + << ", http_proxy_get response async_read: " << ec.message(); + co_return !first; + } + + co_await http::async_write(m_local_socket, _parser.release(), net_awaitable[ec]); + if (ec) + { + XLOG_WARN << "connection id: " << m_connection_id + << ", http_proxy_get response async_write: " << ec.message(); + co_return !first; + } + + XLOG_DBG << "connection id: " << m_connection_id << ", transfer completed" + << ", remote to local: " << bytes; + + first = false; + if (!keep_alive) + { + break; + } + } + + co_return true; + } + net::awaitable proxy_session::http_proxy_connect() + { + http::request req; + boost::system::error_code ec; + + // 读取 http 请求头. + co_await http::async_read(m_local_socket, m_local_buffer, req, net_awaitable[ec]); + if (ec) + { + XLOG_ERR << "connection id: " << m_connection_id << ", http_proxy_connect async_read: " << ec.message(); + + co_return false; + } + + auto mth = std::string(req.method_string()); + auto target_view = std::string(req.target()); + auto pa = std::string(req[http::field::proxy_authorization]); + + XLOG_DBG << "connection id: " << m_connection_id << ", method: " << mth << ", target: " << target_view + << (pa.empty() ? std::string() : ", proxy_authorization: " + pa); + + // http 代理认证. + auto auth = http_authorization(pa); + if (auth != PROXY_AUTH_SUCCESS) + { + XLOG_WARN << "connection id: " << m_connection_id << ", proxy err: " << pauth_error_message(auth); + + auto fake_page = fmt::vformat(fake_407_content_fmt, fmt::make_format_args(server_date_string())); + + co_await net::async_write(m_local_socket, net::buffer(fake_page), net::transfer_all(), net_awaitable[ec]); + + co_return true; + } + + auto pos = target_view.find(':'); + if (pos == std::string::npos) + { + XLOG_ERR << "connection id: " << m_connection_id << ", illegal target: " << target_view; + co_return false; + } + + std::string host(target_view.substr(0, pos)); + std::string port(target_view.substr(pos + 1)); + + co_await start_connect_host(host, static_cast(std::atol(port.c_str())), ec, true); + if (ec) + { + XLOG_FWARN("connection id: {}," + " connect to target {}:{} error: {}", + m_connection_id, host, port, ec.message()); + co_return false; + } + + http::response res{http::status::ok, req.version()}; + res.reason("Connection established"); + + co_await http::async_write(m_local_socket, res, net_awaitable[ec]); + if (ec) + { + XLOG_FWARN("connection id: {}," + " async write response {}:{} error: {}", + m_connection_id, host, port, ec.message()); + co_return false; + } + + auto [l2r_transferred, r2l_transferred] = co_await ( + transfer(m_local_socket, m_remote_socket) && + transfer(m_remote_socket, m_local_socket) + ); + + XLOG_DBG << "connection id: " << m_connection_id << ", transfer completed" + << ", local to remote: " << l2r_transferred << ", remote to local: " << r2l_transferred; + + co_return true; + } + + net::awaitable proxy_session::socks_auth() + { + // +----+------+----------+------+----------+ + // |VER | ULEN | UNAME | PLEN | PASSWD | + // +----+------+----------+------+----------+ + // | 1 | 1 | 1 to 255 | 1 | 1 to 255 | + // +----+------+----------+------+----------+ + // [ ] + + boost::system::error_code ec; + m_local_buffer.consume(m_local_buffer.size()); + auto bytes = + co_await net::async_read(m_local_socket, m_local_buffer, net::transfer_exactly(2), net_awaitable[ec]); + if (ec) + { + XLOG_WARN << "connection id: " << m_connection_id + << ", read client username/passwd error: " << ec.message(); + co_return false; + } + + auto p = net::buffer_cast(m_local_buffer.data()); + int auth_version = read(p); + if (auth_version != 1) + { + XLOG_WARN << "connection id: " << m_connection_id << ", socks negotiation, unsupported socks5 protocol"; + co_return false; + } + int name_length = read(p); + if (name_length <= 0 || name_length > 255) + { + XLOG_WARN << "connection id: " << m_connection_id << ", socks negotiation, invalid name length"; + co_return false; + } + name_length += 1; + + // +----+------+----------+------+----------+ + // |VER | ULEN | UNAME | PLEN | PASSWD | + // +----+------+----------+------+----------+ + // | 1 | 1 | 1 to 255 | 1 | 1 to 255 | + // +----+------+----------+------+----------+ + // [ ] + m_local_buffer.consume(m_local_buffer.size()); + bytes = co_await net::async_read(m_local_socket, m_local_buffer, net::transfer_exactly(name_length), + net_awaitable[ec]); + if (ec) + { + XLOG_WARN << "connection id: " << m_connection_id << ", read client username error: " << ec.message(); + co_return false; + } + + std::string uname; + + p = net::buffer_cast(m_local_buffer.data()); + for (size_t i = 0; i < bytes - 1; i++) + { + uname.push_back(read(p)); + } + + int passwd_len = read(p); + if (passwd_len <= 0 || passwd_len > 255) + { + XLOG_WARN << "connection id: " << m_connection_id << ", socks negotiation, invalid passwd length"; + co_return false; + } + + // +----+------+----------+------+----------+ + // |VER | ULEN | UNAME | PLEN | PASSWD | + // +----+------+----------+------+----------+ + // | 1 | 1 | 1 to 255 | 1 | 1 to 255 | + // +----+------+----------+------+----------+ + // [ ] + m_local_buffer.consume(m_local_buffer.size()); + bytes = co_await net::async_read(m_local_socket, m_local_buffer, net::transfer_exactly(passwd_len), + net_awaitable[ec]); + if (ec) + { + XLOG_WARN << "connection id: " << m_connection_id << ", read client passwd error: " << ec.message(); + co_return false; + } + + std::string passwd; + + p = net::buffer_cast(m_local_buffer.data()); + for (size_t i = 0; i < bytes; i++) + { + passwd.push_back(read(p)); + } + + // SOCKS5验证用户和密码. + auto endp = m_local_socket.remote_endpoint(); + auto client = endp.address().to_string(); + client += ":" + std::to_string(endp.port()); + + // 用户认证逻辑. + bool verify_passed = m_option.auth_users_.empty(); + + for (auto [user, pwd] : m_option.auth_users_) + { + if (uname == user && passwd == pwd) + { + verify_passed = true; + user_rate_limit_config(user); + break; + } + } + + XLOG_DBG << "connection id: " << m_connection_id << ", auth: " << uname << ", passwd: " << passwd + << ", client: " << client; + + net::streambuf wbuf; + auto wp = net::buffer_cast(wbuf.prepare(16)); + write(0x01, wp); // version 只能是1. + if (verify_passed) + { + write(0x00, wp); // 认证通过返回0x00, 其它值为失败. + } + else + { + write(0x01, wp); // 认证返回0x01为失败. + } + + // 返回认证状态. + // +----+--------+ + // |VER | STATUS | + // +----+--------+ + // | 1 | 1 | + // +----+--------+ + wbuf.commit(2); + co_await net::async_write(m_local_socket, wbuf, net::transfer_exactly(2), net_awaitable[ec]); + if (ec) + { + XLOG_WARN << "connection id: " << m_connection_id << ", server write status error: " << ec.message(); + co_return false; + } + + co_return verify_passed; + } + + std::pmr::vector proxy_session::format_path_list( + const std::string& path, boost::system::error_code& ec, pmr_alloc_t alloc) + { + fs::directory_iterator end; + fs::directory_iterator it(path, ec); + if (ec) + { + XLOG_DBG << "connection id: " << m_connection_id << ", format_path_list read dir: " << path + << ", error: " << ec.message(); + return {}; + } + + std::pmr::vector path_list{alloc}; + std::pmr::vector file_list{alloc}; + + for (; it != end && !m_abort; it++) + { + const auto& item = it->path(); + + auto [time_string, unc_path] = file_last_wirte_time(item); + // std::wstring time_string = boost::nowide::widen(ftime); + + std::pmr::string rpath{alloc}; + + if (fs::is_directory(unc_path.empty() ? item : unc_path, ec)) + { + rpath = item.filename().string(); + rpath += "/"; + + auto show_path = rpath; + if (show_path.size() > 50) + { + show_path = show_path.substr(0, 47); + show_path += "..>"; + } + std::pmr::string str(alloc); + fmt::format_to(std::back_inserter(str), body_fmt, rpath, show_path, time_string, "-"); + + path_list.push_back(std::move(str)); + } + else + { + rpath = item.filename().string(); + std::string filesize; + if (unc_path.empty()) + { + unc_path = item; + } + auto sz = static_cast(fs::file_size(unc_path, ec)); + if (ec) + { + sz = 0; + } + filesize = strutil::add_suffix(sz); + auto show_path = rpath; + if (show_path.size() > 50) + { + show_path = show_path.substr(0, 47); + show_path += "..>"; + } + std::pmr::string str(alloc); + fmt::format_to(std::back_inserter(str), body_fmt, rpath, show_path, time_string, filesize); + + file_list.push_back(std::move(str)); + } + } + + ec = {}; + + path_list.insert(path_list.end(), file_list.begin(), file_list.end()); + + return path_list; + } + + std::pmr::string proxy_session::server_date_string(pmr_alloc_t alloc) + { + auto time = std::time(nullptr); + auto gmt = gmtime((const time_t*)&time); + + std::pmr::string str(64, '\0', alloc); + auto ret = strftime((char*)str.data(), 64, "%a, %d %b %Y %H:%M:%S GMT", gmt); + str.resize(ret); + + return str; + } + + fs::path proxy_session::path_cat(std::string_view doc, std::string_view target) + { + size_t start_pos = 0; + for (auto& c : target) + { + if (!(c == '/' || c == '\\')) + { + break; + } + + start_pos++; + } + + std::array pre_alloc_buf; + std::pmr::monotonic_buffer_resource mbr(pre_alloc_buf.data(), pre_alloc_buf.size()); + std::pmr::polymorphic_allocator alloc(&mbr); + + std::string_view sv; + std::pmr::string slash{"/", alloc}; + + if (start_pos < target.size()) + { + sv = target.substr(start_pos); + } +#ifdef WIN32 + slash = "\\"; + if (doc.back() == '/' || doc.back() == '\\') + { + slash = ""; + } + auto filename = std::pmr::string(doc, alloc) + slash + std::pmr::string(sv, alloc); + return fs::path(std::string_view(filename)); +#else + if (doc.back() == '/') + { + slash = ""; + } + return fs::path(std::pmr::string(doc, alloc) + slash + std::pmr::string(sv, alloc)); +#endif // WIN32 + } + + net::awaitable proxy_session::default_http_route(const string_request& request, std::string response, + http::status status) + { + boost::system::error_code ec; + + std::array pre_alloc_buf; + std::pmr::monotonic_buffer_resource mbr(pre_alloc_buf.data(), pre_alloc_buf.size()); + pmr_alloc_t alloc(&mbr); + + string_response res{std::piecewise_construct, std::make_tuple(alloc), + std::make_tuple(status, request.version(), alloc)}; + + res.set(http::field::server, version_string); + res.set(http::field::date, server_date_string(alloc)); + res.set(http::field::content_type, "text/html"); + + res.keep_alive(true); + res.body() = response; + res.prepare_payload(); + + string_response_serializer sr(res); + co_await http::async_write(m_local_socket, sr, net_awaitable[ec]); + if (ec) + { + XLOG_WARN << "connection id: " << m_connection_id << ", default http route err: " << ec.message(); + } + + co_return; + } + + net::awaitable proxy_session::location_http_route(const string_request& request, const std::string& path) + { + boost::system::error_code ec; + + std::array pre_alloc_buf; + std::pmr::monotonic_buffer_resource mbr(pre_alloc_buf.data(), pre_alloc_buf.size()); + pmr_alloc_t alloc(&mbr); + + string_response res{std::piecewise_construct, std::make_tuple(alloc), + std::make_tuple(http::status::moved_permanently, request.version(), alloc)}; + + res.set(http::field::server, version_string); + res.set(http::field::date, server_date_string(alloc)); + res.set(http::field::content_type, "text/html"); + res.set(http::field::location, path); + + res.keep_alive(true); + res.body() = fake_302_content; + res.prepare_payload(); + + string_response_serializer sr(res); + co_await http::async_write(m_local_socket, sr, net_awaitable[ec]); + if (ec) + { + XLOG_WARN << "connection id: " << m_connection_id << ", location http route err: " << ec.message(); + } + + co_return; + } + + net::awaitable proxy_session::forbidden_http_route(const string_request& request) + { + boost::system::error_code ec; + + std::array pre_alloc_buf; + std::pmr::monotonic_buffer_resource mbr(pre_alloc_buf.data(), pre_alloc_buf.size()); + pmr_alloc_t alloc(&mbr); + + string_response res{std::piecewise_construct, std::make_tuple(alloc), + std::make_tuple(http::status::forbidden, request.version(), alloc)}; + + res.set(http::field::server, version_string); + res.set(http::field::date, server_date_string(alloc)); + res.set(http::field::content_type, "text/html"); + + res.keep_alive(true); + res.body() = fake_403_content; + res.prepare_payload(); + + http::serializer> sr(res); + co_await http::async_write(m_local_socket, sr, net_awaitable[ec]); + if (ec) + { + XLOG_WARN << "connection id: " << m_connection_id << ", forbidden http route err: " << ec.message(); + } + + co_return; + } + + net::awaitable proxy_session::unauthorized_http_route(const string_request& request) + { + boost::system::error_code ec; + + std::array pre_alloc_buf; + std::pmr::monotonic_buffer_resource mbr(pre_alloc_buf.data(), pre_alloc_buf.size()); + pmr_alloc_t alloc(&mbr); + + string_response res{std::piecewise_construct, std::make_tuple(alloc), + std::make_tuple(http::status::unauthorized, request.version(), alloc)}; + + res.set(http::field::server, version_string); + res.set(http::field::date, server_date_string(alloc)); + res.set(http::field::content_type, "text/html; charset=UTF-8"); + res.set(http::field::www_authenticate, "Basic realm=\"proxy\""); + + res.keep_alive(true); + res.body() = fake_401_content; + res.prepare_payload(); + + http::serializer> sr(res); + co_await http::async_write(m_local_socket, sr, net_awaitable[ec]); + if (ec) + { + XLOG_WARN << "connection id: " << m_connection_id << ", unauthorized http route err: " << ec.message(); + } + + co_return; + } +} // namespace proxy diff --git a/server/proxy_server/main.cpp b/server/proxy_server/main.cpp index f22feec5e3..8b1edf5ecb 100644 --- a/server/proxy_server/main.cpp +++ b/server/proxy_server/main.cpp @@ -11,6 +11,8 @@ #include +#include + #include #include #include @@ -307,6 +309,8 @@ namespace std int main(int argc, char** argv) { + boost::nowide::args a(argc,argv); // Fix arguments - make them UTF-8 + platform_init(); std::string config; @@ -404,10 +408,19 @@ and/or open issues at https://github.com/Jackarain/proxy)" po::notify(vm); } - if (disable_logs || log_dir.empty()) - xlogger::toggle_write_logging(false); + if (disable_logs && log_dir.empty()) + { + xlogger::turnoff_logging(); + } else - xlogger::init_logging(log_dir); + { + if (log_dir.empty()) + xlogger::toggle_write_logging(false); + else + xlogger::init_logging(log_dir); + if (disable_logs) + xlogger::toggle_console_logging(false); + } print_args(argc, argv, vm); diff --git a/third_party/compile-time-regular-expressions/.conan/test_package/CMakeLists.txt b/third_party/compile-time-regular-expressions/.conan/test_package/CMakeLists.txt new file mode 100644 index 0000000000..c64b54d8b4 --- /dev/null +++ b/third_party/compile-time-regular-expressions/.conan/test_package/CMakeLists.txt @@ -0,0 +1,11 @@ +project(test_package CXX) +cmake_minimum_required(VERSION 2.8 FATAL_ERROR) + +set(CMAKE_VERBOSE_MAKEFILE TRUE) +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17") + +include(${CMAKE_BINARY_DIR}/conanbuildinfo.cmake) +conan_basic_setup() + +add_executable(${PROJECT_NAME} test_package.cpp) +target_link_libraries(${PROJECT_NAME} ${CONAN_LIBS}) diff --git a/third_party/compile-time-regular-expressions/.conan/test_package/conanfile.py b/third_party/compile-time-regular-expressions/.conan/test_package/conanfile.py new file mode 100644 index 0000000000..d4a52663f9 --- /dev/null +++ b/third_party/compile-time-regular-expressions/.conan/test_package/conanfile.py @@ -0,0 +1,20 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +from conans import ConanFile, CMake, tools, RunEnvironment +import os + + +class TestPackageConan(ConanFile): + settings = "os", "compiler", "build_type", "arch" + generators = "cmake" + + def build(self): + cmake = CMake(self) + cmake.configure() + cmake.build() + + def test(self): + assert os.path.exists(os.path.join(self.deps_cpp_info["CTRE"].rootpath, "licenses", "LICENSE")) + bin_path = os.path.join("bin", "test_package") + self.run(bin_path, run_environment=True) diff --git a/third_party/compile-time-regular-expressions/.conan/test_package/test_package.cpp b/third_party/compile-time-regular-expressions/.conan/test_package/test_package.cpp new file mode 100644 index 0000000000..b1af18f0f2 --- /dev/null +++ b/third_party/compile-time-regular-expressions/.conan/test_package/test_package.cpp @@ -0,0 +1,32 @@ +#include +#include +#include +#include + +#include + +using namespace std::string_view_literals; +using namespace ctre::literals; + +struct date { std::string_view year; std::string_view month; std::string_view day; }; + +static constexpr ctll::fixed_string pattern = "^([0-9]{4})/([0-9]{1,2}+)/([0-9]{1,2}+)$"; + +constexpr std::optional extract_date(std::string_view s) noexcept { + if (auto [whole, year, month, day] = ctre::match(s); whole + ) { + return date{year.to_view(), month.to_view(), day.to_view()}; + } else { + return std::nullopt; + } +} + +int main() { + + assert(extract_date("2018/08/27"sv).has_value()); + assert(extract_date("2018/08/27"sv)->year == "2018"sv); + assert(extract_date("2018/08/27"sv)->month == "08"sv); + assert(extract_date("2018/08/27"sv)->day == "27"sv); + + return EXIT_SUCCESS; +} diff --git a/third_party/compile-time-regular-expressions/.github/workflows/tests.yml b/third_party/compile-time-regular-expressions/.github/workflows/tests.yml new file mode 100644 index 0000000000..08bd127f20 --- /dev/null +++ b/third_party/compile-time-regular-expressions/.github/workflows/tests.yml @@ -0,0 +1,81 @@ +name: Tests +on: [push, pull_request] +jobs: + appleclang: + strategy: + matrix: + macos: [12, 13] + standard: [17, 20] + fail-fast: false + name: "AppleClang (MacOS ${{ matrix.macos }}, C++${{ matrix.standard }})" + runs-on: macos-${{ matrix.macos }} + steps: + - uses: actions/checkout@v2 + - run: c++ -v + - run: make CXX=c++ CXX_STANDARD=2a + if: ${{ matrix.standard == '20' }} + - run: make CXX=c++ CXX_STANDARD=17 + if: ${{ matrix.standard == '17' }} + gcc: + strategy: + matrix: + gcc: [8, 9, 10, 11, 13] + standard: [17, 20] + fail-fast: false + name: "GCC ${{ matrix.gcc }} (C++${{ matrix.standard }})" + runs-on: ubuntu-20.04 + steps: + - name: "Install GCC" + uses: egor-tensin/setup-gcc@v1 + with: + version: ${{ matrix.gcc }} + - uses: actions/checkout@v2 + - run: c++ -v + - run: make CXX=c++ CXX_STANDARD=2a + if: ${{ matrix.standard == '20' }} + - run: make CXX=c++ CXX_STANDARD=17 + if: ${{ matrix.gcc < '9' && matrix.standard == '17' }} + - run: make CXX=c++ CXX_STANDARD=17 CXXFLAGS=-DCTRE_ENABLE_LITERALS PEDANTIC="" + if: ${{ matrix.gcc >= '9' && matrix.standard == '17' }} + clang: + strategy: + matrix: + clang: [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17] + stdlib: ["libc++", "libstdc++"] + standard: [17, 20] + fail-fast: false + name: "Clang ${{ matrix.clang }} (C++${{ matrix.standard }}, ${{ matrix.stdlib }})" + runs-on: ubuntu-20.04 + steps: + - name: "Install Clang" + uses: egor-tensin/setup-clang@v1 + with: + version: ${{ matrix.clang }} + - name: "Install libc++" + if: ${{ matrix.stdlib == 'libc++' }} + run: sudo apt-get install libc++abi-${{ matrix.clang }}-dev libc++1-${{ matrix.clang }} libc++-${{ matrix.clang }}-dev + - uses: actions/checkout@v2 + - run: c++ -v + - run: make CXX=c++ CXX_STANDARD=2a CXXFLAGS=-stdlib=${{ matrix.stdlib }} + if: ${{ matrix.standard == '20' }} + - run: make CXX=c++ CXX_STANDARD=17 CXXFLAGS=-stdlib=${{ matrix.stdlib }} + if: ${{ matrix.standard == '17' }} + msvc: + strategy: + matrix: + version: [14.29, ""] + fail-fast: false + name: "MSVC ${{ matrix.version }} (C++20)" + runs-on: windows-2022 + steps: + - name: Add MSVC ${{ matrix.version }} to PATH + uses: ilammy/msvc-dev-cmd@v1 + with: + toolset: ${{ matrix.version }} + - name: "Install Ninja & CMake" + run: choco install ninja cmake + - uses: actions/checkout@v2 + - name: "Configure" + run: cmake . -G Ninja -B build -DCTRE_BUILD_TESTS=ON -DCTRE_CXX_STANDARD=20 + - name: "Build" + run: cmake --build build --target ctre-test --verbose diff --git a/third_party/compile-time-regular-expressions/.gitignore b/third_party/compile-time-regular-expressions/.gitignore new file mode 100644 index 0000000000..a644be0f03 --- /dev/null +++ b/third_party/compile-time-regular-expressions/.gitignore @@ -0,0 +1,22 @@ +*.o +*.d +**/*.tmp +test +result +tests/benchmark-exec/* +!tests/benchmark-exec/Makefile +!tests/benchmark-exec/.gitignore +!tests/benchmark-exec/.tm_properties +!tests/benchmark-exec/*.cpp +!tests/benchmark-exec/*.hpp +!tests/benchmark-exec/*.js +*.pyc +.conan/test_package/build +mtent12.txt +*.zip +tests/benchmark-range/* +!tests/benchmark-range/*.cpp +!tests/benchmark-range/*.hpp +build +cmake-build-*/* +.idea/* \ No newline at end of file diff --git a/third_party/compile-time-regular-expressions/.gitmodules b/third_party/compile-time-regular-expressions/.gitmodules new file mode 100644 index 0000000000..e69de29bb2 diff --git a/third_party/compile-time-regular-expressions/.tm_properties b/third_party/compile-time-regular-expressions/.tm_properties new file mode 100644 index 0000000000..b08b62add8 --- /dev/null +++ b/third_party/compile-time-regular-expressions/.tm_properties @@ -0,0 +1,3 @@ +exclude = "{$exclude,**/*.dSYM,**/*.d,**/*.o,test,*.tmp}" +include = "{$include,.gitignore,.github,.travis*,.conan}" +excludeInFolderSearch = "{$excludeInFolderSearch,./ctre.hpp}" diff --git a/third_party/compile-time-regular-expressions/.travis.yml b/third_party/compile-time-regular-expressions/.travis.yml new file mode 100644 index 0000000000..3c280679d7 --- /dev/null +++ b/third_party/compile-time-regular-expressions/.travis.yml @@ -0,0 +1,113 @@ +language: cpp +dist: focal +os: linux + +jobs: + include: + - os: linux + language: python + python: "3.6" + services: + - docker + env: + - COMPILER=g++-8 + - CONAN_GCC_VERSIONS=8 + - CONAN_DOCKER_IMAGE=lasote/conangcc8 + install: + - pip install -U conan conan-package-tools + script: + - python .conan/build.py + + - os: linux + compiler: gcc + env: + - COMPILER=g++-8 + - CXX_STANDARD=17 + addons: + apt: + packages: ['g++-8'] + + - os: linux + compiler: gcc + env: + - COMPILER=g++-8 + - CXX_STANDARD=2a + addons: + apt: + packages: ['g++-8'] + + - os: linux + compiler: clang + env: + - COMPILER=clang++-6.0 + - CXX_STANDARD=17 + addons: + apt: + sources: ['ubuntu-toolchain-r-test', 'llvm-toolchain-trusty-6.0'] + packages: ['g++-8', 'clang-6.0'] + + - os: linux + compiler: clang + env: + - COMPILER=clang++-6.0 + - CXX_STANDARD=2a + addons: + apt: + sources: ['ubuntu-toolchain-r-test', 'llvm-toolchain-trusty-6.0'] + packages: ['g++-8', 'clang-6.0'] + +# FIXME: don't use GCC10 in 17 mode for tests as they are depending on operator"" +# - os: linux +# compiler: gcc +# env: +# - COMPILER=g++-10 +# - CXX_STANDARD=17 +# addons: +# apt: +# packages: ['g++-10'] + + - os: linux + compiler: gcc + env: + - COMPILER=g++-10 + - CXX_STANDARD=20 + addons: + apt: + packages: ['g++-10'] + + - os: osx + osx_image: xcode10 + env: + - CXX_STANDARD=17 + + - os: osx + osx_image: xcode10 + env: + - CXX_STANDARD=2a + + - os: osx + osx_image: xcode11 + env: + - CXX_STANDARD=17 + + - os: osx + osx_image: xcode11 + env: + - CXX_STANDARD=2a + + - os: osx + osx_image: xcode12 + env: + - CXX_STANDARD=17 + + - os: osx + osx_image: xcode12 + env: + - CXX_STANDARD=2a + +install: + - if [[ "${COMPILER}" != "" ]]; then export CXX=${COMPILER}; fi + - uname -a + - $CXX --version +script: + - make CXX_STANDARD=$CXX_STANDARD diff --git a/third_party/compile-time-regular-expressions/CMakeLists.txt b/third_party/compile-time-regular-expressions/CMakeLists.txt new file mode 100644 index 0000000000..9e5a80e9a5 --- /dev/null +++ b/third_party/compile-time-regular-expressions/CMakeLists.txt @@ -0,0 +1,196 @@ +cmake_minimum_required(VERSION 3.14...3.29) + +if(CMAKE_VERSION VERSION_GREATER_EQUAL "3.29.20240416") + set(CMAKE_EXPERIMENTAL_CXX_IMPORT_STD "0e5b6991-d74f-4b3d-a41c-cf096e0b2508") + set(CMAKE_CXX_MODULE_STD 1) +endif() + +# When updating to a newer version of CMake, see if we can use the following +project(ctre + HOMEPAGE_URL "https://compile-time.re" + VERSION 3.9.0 + LANGUAGES CXX) +set(PROJECT_DESCRIPTION "Fast compile-time regular expressions with support for matching/searching/capturing during compile-time or runtime.") + +include(CMakePackageConfigHelpers) +include(CMakeDependentOption) +include(GNUInstallDirs) +include(CTest) + +find_program(CTRE_DPKG_BUILDPACKAGE_FOUND dpkg-buildpackage) +find_program(CTRE_RPMBUILD_FOUND rpmbuild) + +cmake_dependent_option(CTRE_BUILD_TESTS "Build ctre Tests" ON + "BUILD_TESTING;CMAKE_SOURCE_DIR STREQUAL PROJECT_SOURCE_DIR" OFF) +cmake_dependent_option(CTRE_BUILD_PACKAGE "Build ctre Packages" ON + "CMAKE_SOURCE_DIR STREQUAL PROJECT_SOURCE_DIR" OFF) +cmake_dependent_option(CTRE_BUILD_PACKAGE_DEB + "Create DEB Package (${PROJECT_NAME})" ON + "CTRE_BUILD_PACKAGE;CTRE_DPKG_BUILDPACKAGE_FOUND" OFF) +cmake_dependent_option(CTRE_BUILD_PACKAGE_RPM + "Create RPM Package (${PROJECT_NAME})" ON + "CTRE_BUILD_PACKAGE;CTRE_RPMBUILD_FOUND" OFF) + +option(CTRE_MODULE "build C++ module" OFF) + +if(CTRE_MODULE) + if(CMAKE_VERSION VERSION_GREATER_EQUAL "3.29.20240416") + add_library(${PROJECT_NAME}) + + target_sources(${PROJECT_NAME} PUBLIC FILE_SET CXX_MODULES TYPE CXX_MODULES FILES ctre.cppm) + target_sources(${PROJECT_NAME} PUBLIC FILE_SET HEADERS TYPE HEADERS + BASE_DIRS + "${CMAKE_CURRENT_SOURCE_DIR}/include" + FILES + include/ctll.hpp + include/ctre/functions.hpp + include/ctre/utility.hpp + include/ctre/utf8.hpp + include/ctre/evaluation.hpp + include/ctre/starts_with_anchor.hpp + include/ctre/pcre_actions.hpp + include/ctre/rotate.hpp + include/ctre/iterators.hpp + include/ctre/literals.hpp + include/ctre/return_type.hpp + include/ctre/find_captures.hpp + include/ctre/id.hpp + include/ctre/atoms_characters.hpp + include/ctre/actions/mode.inc.hpp + include/ctre/actions/characters.inc.hpp + include/ctre/actions/class.inc.hpp + include/ctre/actions/look.inc.hpp + include/ctre/actions/sequence.inc.hpp + include/ctre/actions/fusion.inc.hpp + include/ctre/actions/asserts.inc.hpp + include/ctre/actions/capture.inc.hpp + include/ctre/actions/named_class.inc.hpp + include/ctre/actions/backreference.inc.hpp + include/ctre/actions/options.inc.hpp + include/ctre/actions/atomic_group.inc.hpp + include/ctre/actions/set.inc.hpp + include/ctre/actions/hexdec.inc.hpp + include/ctre/actions/repeat.inc.hpp + include/ctre/actions/properties.inc.hpp + include/ctre/actions/boundaries.inc.hpp + include/ctre/operators.hpp + include/ctre/pcre.hpp + include/ctre/atoms_unicode.hpp + include/ctre/range.hpp + include/ctre/wrapper.hpp + include/ctre/first.hpp + include/ctre/flags_and_modes.hpp + include/ctre/atoms.hpp + include/unicode-db.hpp + include/unicode-db/unicode_interface.hpp + include/unicode-db/unicode-db.hpp + include/ctll/parser.hpp + include/ctll/actions.hpp + include/ctll/fixed_string.hpp + include/ctll/list.hpp + include/ctll/utilities.hpp + include/ctll/grammars.hpp + include/ctre.hpp + include/ctre-unicode.hpp + ) + + # we are using `import std;` + if (NOT DEFINED CTRE_CXX_STANDARD OR CTRE_CXX_STANDARD LESS 23) + set(CTRE_CXX_STANDARD 23) + endif() + + target_compile_features(${PROJECT_NAME} PUBLIC cxx_std_${CTRE_CXX_STANDARD}) + + install(TARGETS ${PROJECT_NAME} EXPORT ${PROJECT_NAME}-targets + FILE_SET CXX_MODULES DESTINATION "${CMAKE_INSTALL_LIBDIR}/cxx/${PROJECT_NAME}" + FILE_SET HEADERS DESTINATION "include") + else() + message(FATAL_ERROR "unsupported cmake for c++ modules") + endif() +else() + add_library(${PROJECT_NAME} INTERFACE) + + target_include_directories(${PROJECT_NAME} INTERFACE + $ + $) + + if (NOT CTRE_CXX_STANDARD) + set(CTRE_CXX_STANDARD 20) + endif() + + target_compile_features(${PROJECT_NAME} INTERFACE cxx_std_${CTRE_CXX_STANDARD}) + set_property(TARGET ${PROJECT_NAME} PROPERTY CXX_SCAN_FOR_MODULES 0) + + install(TARGETS ${PROJECT_NAME} EXPORT ${PROJECT_NAME}-targets) + install(DIRECTORY include/ DESTINATION include + FILES_MATCHING PATTERN *.hpp) +endif() + +add_library(${PROJECT_NAME}::${PROJECT_NAME} ALIAS ${PROJECT_NAME}) + +if (NOT EXISTS "${PROJECT_BINARY_DIR}/${PROJECT_NAME}-config.cmake.in") + file(WRITE ${PROJECT_BINARY_DIR}/${PROJECT_NAME}-config.cmake.in [[ + @PACKAGE_INIT@ + include("${CMAKE_CURRENT_LIST_DIR}/@PROJECT_NAME@-targets.cmake") + ]]) +endif() + +configure_package_config_file( + "${PROJECT_BINARY_DIR}/${PROJECT_NAME}-config.cmake.in" + "${PROJECT_BINARY_DIR}/${PROJECT_NAME}-config.cmake" + INSTALL_DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}" + NO_SET_AND_CHECK_MACRO + NO_CHECK_REQUIRED_COMPONENTS_MACRO) + +write_basic_package_version_file(ctre-config-version.cmake + VERSION ${PROJECT_VERSION} + COMPATIBILITY SameMajorVersion) + +install(EXPORT ${PROJECT_NAME}-targets + DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}" + NAMESPACE ${PROJECT_NAME}::) +install( + FILES + "${PROJECT_BINARY_DIR}/${PROJECT_NAME}-config-version.cmake" + "${PROJECT_BINARY_DIR}/${PROJECT_NAME}-config.cmake" + DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}) + +if(CTRE_BUILD_TESTS) + add_subdirectory(tests) +endif() + +if (NOT CTRE_BUILD_PACKAGE) + return() +endif() + +list(APPEND source-generators TBZ2 TGZ TXZ ZIP) + +if (CTRE_BUILD_PACKAGE_DEB) + list(APPEND binary-generators "DEB") +endif() + +if (CTRE_BUILD_PACKAGE_RPM) + list(APPEND binary-generators "RPM") +endif() + +set(CPACK_SOURCE_GENERATOR ${source-generators}) +set(CPACK_GENERATOR ${binary-generators}) + +set(CPACK_PACKAGE_FILE_NAME "${PROJECT_NAME}-${PROJECT_VERSION}") +set(CPACK_SOURCE_PACKAGE_FILE_NAME "${CPACK_PACKAGE_FILE_NAME}") + +set(CPACK_DEBIAN_PACKAGE_MAINTAINER "Hana Dusíková") +set(CPACK_DEBIAN_PACKAGE_DESCRIPTION "${PROJECT_DESCRIPTION}") +set(CPACK_DEBIAN_PACKAGE_NAME "lib${PROJECT_NAME}-dev") + +set(CPACK_RPM_PACKAGE_NAME "lib${PROJECT_NAME}-devel") + +set(PKG_CONFIG_FILE_NAME "${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}.pc") +configure_file("${CMAKE_CURRENT_SOURCE_DIR}/packaging/pkgconfig.pc.in" "${PKG_CONFIG_FILE_NAME}" @ONLY) +install(FILES "${PKG_CONFIG_FILE_NAME}" + DESTINATION "${CMAKE_INSTALL_LIBDIR}/pkgconfig" +) + +list(APPEND CPACK_SOURCE_IGNORE_FILES /.git/ /build/ .gitignore .DS_Store) + +include(CPack) diff --git a/third_party/compile-time-regular-expressions/LICENSE b/third_party/compile-time-regular-expressions/LICENSE new file mode 100644 index 0000000000..bd8b243dfa --- /dev/null +++ b/third_party/compile-time-regular-expressions/LICENSE @@ -0,0 +1,218 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +--- LLVM Exceptions to the Apache 2.0 License ---- + +As an exception, if, as a result of your compiling your source code, portions +of this Software are embedded into an Object form of such source code, you +may redistribute such embedded portions in such Object form without complying +with the conditions of Sections 4(a), 4(b) and 4(d) of the License. + +In addition, if you combine or link compiled forms of this Software with +software that is licensed under the GPLv2 ("Combined Software") and if a +court of competent jurisdiction determines that the patent provision (Section +3), the indemnity provision (Section 9) or other Section of the License +conflicts with the conditions of the GPLv2, you may retroactively and +prospectively choose to deem waived or otherwise exclude such Section(s) of +the License, but only in their entirety and only with respect to the Combined +Software. diff --git a/third_party/compile-time-regular-expressions/Makefile b/third_party/compile-time-regular-expressions/Makefile new file mode 100644 index 0000000000..a48a30a031 --- /dev/null +++ b/third_party/compile-time-regular-expressions/Makefile @@ -0,0 +1,92 @@ +.PHONY: default all clean grammar compare single-header single-header/ctre.hpp single-header/ctre-unicode.hpp single-header/unicode-db.hpp + +default: all + +TARGETS := $(wildcard tests/benchmark-exec/*.cpp) +IGNORE := $(wildcard tests/benchmark/*.cpp) $(wildcard tests/benchmark-exec/*.cpp) + +DESATOMAT := /bin/false + +CXX_STANDARD := 20 + +PYTHON := python3.9 + +PEDANTIC:=-pedantic + +override CXXFLAGS := $(CXXFLAGS) -std=c++$(CXX_STANDARD) -Iinclude -O3 $(PEDANTIC) -Wall -Wextra -Werror -Wconversion +LDFLAGS := + +TESTS := $(wildcard tests/*.cpp) $(wildcard tests/benchmark/*.cpp) +TRUE_TARGETS := $(TARGETS:%.cpp=%) +override TRUE_TARGETS := $(filter-out $(IGNORE:%.cpp=%), $(TRUE_TARGETS)) +OBJECTS := $(TARGETS:%.cpp=%.o) $(TESTS:%.cpp=%.o) +override OBJECTS := $(filter-out $(IGNORE:%.cpp=%.o),$(OBJECTS)) +DEPEDENCY_FILES := $(OBJECTS:%.o=%.d) + +all: $(TRUE_TARGETS) $(OBJECTS) + +list: + echo $(SUPPORTED_CPP20) + +$(TRUE_TARGETS): %: %.o + $(CXX) $< $(LDFLAGS) -o $@ + +$(OBJECTS): %.o: %.cpp + $(CXX) $(CXXFLAGS) -MMD -c $< -o $@ + +-include $(DEPEDENCY_FILES) + +benchmark: + @$(MAKE) clean + @$(MAKE) IGNORE="" + +benchmark-clean: + @$(MAKE) IGNORE="" clean + +clean: + rm -f $(TRUE_TARGETS) $(OBJECTS) $(DEPEDENCY_FILES) mtent12.txt mtent12.zip + +grammar: include/ctre/pcre.hpp + +regrammar: + @rm -f include/ctre/pcre.hpp + @$(MAKE) grammar + +include/ctre/pcre.hpp: include/ctre/pcre.gram + @echo "LL1q $<" + @$(DESATOMAT) --ll --q --input=include/ctre/pcre.gram --output=include/ctre/ --generator=cpp_ctll_v2 --cfg:fname=pcre.hpp --cfg:namespace=ctre --cfg:guard=CTRE__PCRE__HPP --cfg:grammar_name=pcre + +mtent12.zip: + curl -s http://www.gutenberg.org/files/3200/old/mtent12.zip -o mtent12.zip + +mtent12.txt: mtent12.zip + unzip -o mtent12.zip + touch mtent12.txt + +single-header: single-header/ctre.hpp single-header/ctre-unicode.hpp single-header/unicode-db.hpp + +single-header/unicode-db.hpp: include/unicode-db/unicode-db.hpp + cp $+ $@ + +single-header/ctre.hpp: + ${PYTHON} -m quom include/ctre.hpp ctre.hpp.tmp + echo "/*" > single-header/ctre.hpp + cat LICENSE >> single-header/ctre.hpp + echo "*/" >> single-header/ctre.hpp + cat ctre.hpp.tmp >> single-header/ctre.hpp + rm ctre.hpp.tmp + +single-header/ctre-unicode.hpp: + ${PYTHON} -m quom include/ctre-unicode.hpp ctre-unicode.hpp.tmp + echo "/*" > single-header/ctre-unicode.hpp + cat LICENSE >> single-header/ctre-unicode.hpp + echo "*/" >> single-header/ctre-unicode.hpp + cat ctre-unicode.hpp.tmp >> single-header/ctre-unicode.hpp + rm ctre-unicode.hpp.tmp + +REPEAT:=10 + +compare: mtent12.txt + $(CXX) $(CXXFLAGS) -MMD -march=native -DPATTERN="\"(${PATTERN})\"" -c tests/benchmark-range/measurement.cpp -o tests/benchmark-range/measurement.o + $(CXX) tests/benchmark-range/measurement.o -lboost_regex -lpcre2-8 -lre2 -o tests/benchmark-range/measurement + tests/benchmark-range/measurement all mtent12.txt ${REPEAT} diff --git a/third_party/compile-time-regular-expressions/NOTES.md b/third_party/compile-time-regular-expressions/NOTES.md new file mode 100644 index 0000000000..b01a62ca7f --- /dev/null +++ b/third_party/compile-time-regular-expressions/NOTES.md @@ -0,0 +1,24 @@ +# Unsupported PCRE constructs + +* `\0dd` `\ddd` `\0{dd...}` octal numbers +* `\Q...\E` quoting +* `\cx` control characters +* `\C` data unit +* `\h` `\H` horizontal character classes +* `\v` `\V` vertical character classes +* `\p{xx}` `\P{xx}` character properties +* `\X` unicode grapheme cluster +* boundaries other than `^$` +* atomic groups +* comments +* options/modes +* subroutines +* conditional patterns +* callouts +* match point reset `\K` + + +# Other unsupported "things" +* `[[.hyphen.]]` named characters +* `[[=M=]]` whatever this is + diff --git a/third_party/compile-time-regular-expressions/README.md b/third_party/compile-time-regular-expressions/README.md new file mode 100644 index 0000000000..e609ef97ce --- /dev/null +++ b/third_party/compile-time-regular-expressions/README.md @@ -0,0 +1,295 @@ +# Compile time regular expressions v3 + +[![Build Status](https://travis-ci.org/hanickadot/compile-time-regular-expressions.svg?branch=master)](https://travis-ci.org/hanickadot/compile-time-regular-expressions) + +Fast compile-time regular expressions with support for matching/searching/capturing during compile-time or runtime. + +You can use the single header version from directory `single-header`. This header can be regenerated with `make single-header`. If you are using cmake, you can add this directory as subdirectory and link to target `ctre`. + +More info at [compile-time.re](https://compile-time.re/) + +## What this library can do + +```c++ +ctre::match<"REGEX">(subject); // C++20 +"REGEX"_ctre.match(subject); // C++17 + N3599 extension +``` + +* Matching +* Searching (`search` or `starts_with`) +* Capturing content (named captures are supported too, but only with syntax `(?...)`) +* Back-Reference (\g{N} syntax, and \1...\9 syntax too) +* Multiline support (with `multi_`) functions +* Unicode properties and UTF-8 support + +The library is implementing most of the PCRE syntax with a few exceptions: + +* callouts +* comments +* conditional patterns +* control characters (`\cX`) +* match point reset (`\K`) +* named characters +* octal numbers +* options / modes +* subroutines +* unicode grapheme cluster (`\X`) + +More documentation on [pcre.org](https://www.pcre.org/current/doc/html/pcre2syntax.html). + +### Unknown character escape behaviour + +Not all escaped characters are automatically inserted as self, behaviour of the library is escaped characters are with special meaning, unknown escaped character is a syntax error. + +Explicitly allowed character escapes which insert only the character are: + +```\-\"\<\>``` + +## Basic API + +This is approximated API specification from a user perspective (omitting `constexpr` and `noexcept` which are everywhere, and using C++20 syntax even the API is C++17 compatible): +```c++ +// look if whole input matches the regex: +template auto ctre::match(auto Range &&) -> regex_results; +template auto ctre::match(auto First &&, auto Last &&) -> regex_results; + +// look if input contains match somewhere inside of itself: +template auto ctre::search(auto Range &&) -> regex_results; +template auto ctre::search(auto First &&, auto Last &&) -> regex_results; + +// check if input starts with match (but doesn't need to match everything): +template auto ctre::starts_with(auto Range &&) -> regex_results; +template auto ctre::starts_with(auto First &&, auto Last &&) -> regex_results; + +// result type is deconstructible into a structured bindings +template <...> struct regex_results { + operator bool() const; // if it's a match + auto to_view() const -> std::string_view; // also view() + auto to_string() const -> std::string; // also str() + operator std::string_view() const; // also supports all char variants + explicit operator std::string() const; + + // also size(), begin(), end(), data() + + size_t count() const; // number of captures + template const captured_content & get() const; // provide specific capture, whole regex_results is implicit capture 0 +}; +``` + +### Range outputing API + +```c++ +// search for regex in input and return each occurence, ignoring rest: +template auto ctre::range(auto Range &&) -> range of regex_result; +template auto ctre::range(auto First &&, auto Last &&) -> range of regex_result; + +// return range of each match, stopping at something which can't be matched +template auto ctre::tokenize(auto Range &&) -> range of regex_result; +template auto ctre::tokenize(auto First &&, auto Last &&) -> range of regex_result; + +// return parts of the input splited by the regex, returning it as part of content of the implicit zero capture (other captures are not changed, you can use it to access how the values were splitted): +template auto ctre::split(auto Range &&) -> regex_result; +template auto ctre::split(auto First &&, auto Last &&) -> range of regex_result; +``` + +### Functors + +All the functions (`ctre::match`, `ctre::search`, `ctre::starts_with`, `ctre::range`, `ctre::tokenize`, `ctre::split`) are functors and can be used without parenthesis: + +```c++ +auto matcher = ctre::match<"regex">; +if (matcher(input)) ... +``` + +### Possible subjects (inputs) + +* `std::string`-like objects (`std::string_view` or your own string if it's providing `begin`/`end` functions with forward iterators) +* pairs of forward iterators + +### Unicode support + +To enable you need to include: +* `` +* or `` and `` + +Otherwise you will get missing symbols if you try to use the unicode support without enabling it. + +## Supported compilers + +* clang 7.0+ (template UDL, C++17 syntax) +* xcode clang 10.0+ (template UDL, C++17 syntax) +* clang 12.0+ (C++17 syntax, C++20 cNTTP syntax) +* gcc 8.0+ (template UDL, C++17 syntax) +* gcc 9.0+ (C++17 & C++20 cNTTP syntax) +* MSVC 14.29+ (Visual Studio 16.11+) (C++20) + +### Template UDL syntax + +The compiler must support extension N3599, for example as GNU extension in gcc (not in GCC 9.1+) and clang. + +```c++ +constexpr auto match(std::string_view sv) noexcept { + using namespace ctre::literals; + return "h.*"_ctre.match(sv); +} +``` + +If you need extension N3599 in GCC 9.1+, you can't use -pedantic. Also, you need to define macro `CTRE_ENABLE_LITERALS`. + +### C++17 syntax + +You can provide a pattern as a `constexpr ctll::fixed_string` variable. + +```c++ +static constexpr auto pattern = ctll::fixed_string{ "h.*" }; + +constexpr auto match(std::string_view sv) noexcept { + return ctre::match(sv); +} +``` + +(this is tested in MSVC 15.8.8) + +### C++20 syntax + +Currently, the only compiler which supports cNTTP syntax `ctre::match(subject)` is GCC 9+. + +```c++ +constexpr auto match(std::string_view sv) noexcept { + return ctre::match<"h.*">(sv); +} +``` + +## Examples + +### Extracting number from input + +```c++ +std::optional extract_number(std::string_view s) noexcept { + if (auto m = ctre::match<"[a-z]+([0-9]+)">(s)) { + return m.get<1>().to_view(); + } else { + return std::nullopt; + } +} +``` + +[link to compiler explorer](https://gcc.godbolt.org/z/5U67_e) + +### Extracting values from date + +```c++ +struct date { std::string_view year; std::string_view month; std::string_view day; }; + +std::optional extract_date(std::string_view s) noexcept { + using namespace ctre::literals; + if (auto [whole, year, month, day] = ctre::match<"(\\d{4})/(\\d{1,2})/(\\d{1,2})">(s); whole) { + return date{year, month, day}; + } else { + return std::nullopt; + } +} + +//static_assert(extract_date("2018/08/27"sv).has_value()); +//static_assert((*extract_date("2018/08/27"sv)).year == "2018"sv); +//static_assert((*extract_date("2018/08/27"sv)).month == "08"sv); +//static_assert((*extract_date("2018/08/27"sv)).day == "27"sv); +``` + +[link to compiler explorer](https://gcc.godbolt.org/z/x64CVp) + +### Using captures + +```c++ +auto result = ctre::match<"(?\\d{4})/(?\\d{1,2})/(?\\d{1,2})">(s); +return date{result.get<"year">(), result.get<"month">, result.get<"day">}; + +// or in C++ emulation, but the object must have a linkage +static constexpr ctll::fixed_string year = "year"; +static constexpr ctll::fixed_string month = "month"; +static constexpr ctll::fixed_string day = "day"; +return date{result.get(), result.get, result.get}; + +// or use numbered access +// capture 0 is the whole match +return date{result.get<1>(), result.get<2>, result.get<3>}; +``` + +### Lexer + +```c++ +enum class type { + unknown, identifier, number +}; + +struct lex_item { + type t; + std::string_view c; +}; + +std::optional lexer(std::string_view v) noexcept { + if (auto [m,id,num] = ctre::match<"([a-z]+)|([0-9]+)">(v); m) { + if (id) { + return lex_item{type::identifier, id}; + } else if (num) { + return lex_item{type::number, num}; + } + } + return std::nullopt; +} +``` + +[link to compiler explorer](https://gcc.godbolt.org/z/PKTiCC) + +### Range over input + +This support is preliminary, probably the API will be changed. + +```c++ +auto input = "123,456,768"sv; + +for (auto match: ctre::range<"([0-9]+),?">(input)) { + std::cout << std::string_view{match.get<0>()} << "\n"; +} +``` + +### Unicode + +```c++ +#include +#include +// needed if you want to output to the terminal +std::string_view cast_from_unicode(std::u8string_view input) noexcept { + return std::string_view(reinterpret_cast(input.data()), input.size()); +} +int main() +{ + using namespace std::literals; + std::u8string_view original = u8"Tu es un génie"sv; + + for (auto match : ctre::range<"\\p{Letter}+">(original)) + std::cout << cast_from_unicode(match) << std::endl; + return 0; +} +``` + +[link to compiler explorer](https://godbolt.org/z/erTshe6sz) + + +## Installing ctre using vcpkg + +You can download and install ctre using the [vcpkg](https://github.com/Microsoft/vcpkg) dependency manager: + +```bash +git clone https://github.com/Microsoft/vcpkg.git +cd vcpkg +./bootstrap-vcpkg.sh +./vcpkg integrate install +./vcpkg install ctre +``` + +The ctre port in vcpkg is kept up to date by Microsoft team members and community contributors. If the version is out of date, please [create an issue or pull request](https://github.com/Microsoft/vcpkg) on the vcpkg repository. + +## Running tests (for developers) + +Just run `make` in root of this project. diff --git a/third_party/compile-time-regular-expressions/clang-bench.txt b/third_party/compile-time-regular-expressions/clang-bench.txt new file mode 100644 index 0000000000..264827a293 --- /dev/null +++ b/third_party/compile-time-regular-expressions/clang-bench.txt @@ -0,0 +1,20 @@ +//--------- ABCD|DEFGH|EFGHI|A{4,} +egrep 0m49.353s +CTRE 0m10.093s +PCRE 0m12.515s +std::regex 21m9.309s +//--------- [0-9a-fA-F]{8,16} +egrep 0m32.256s +CTRE 0m14.197s +PCRE 0m17.832s +std::regex 2m34.505s +//--------- ^([0-9]{4,16})?[aA] +egrep 0m12.880s +CTRE 0m7.596s +PCRE 0m6.590s +std::regex 7m54.793s +//--------- ([aAbB]{4,}|[xXyY]{4,}|[1234]{4,})0 +egrep 1m56.412s +CTRE 0m59.864s +PCRE 0m43.486s +std::regex 27m35.004s diff --git a/third_party/compile-time-regular-expressions/conanfile.py b/third_party/compile-time-regular-expressions/conanfile.py new file mode 100755 index 0000000000..9edf262948 --- /dev/null +++ b/third_party/compile-time-regular-expressions/conanfile.py @@ -0,0 +1,27 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +from conans import ConanFile + + +class CtreConan(ConanFile): + name = "CTRE" + license = "Apache 2.0 with LLVM Exception" + url = "https://github.com/hanickadot/compile-time-regular-expressions" + author = "Hana Dusíková (ctre@hanicka.net)" + description = "Compile Time Regular Expression for C++17/20" + homepage = "https://github.com/hanickadot/compile-time-regular-expressions" + no_copy_source = True + scm = { + "type": "git", + "url": "auto", + "revision": "auto" + } + + def package(self): + self.copy("LICENSE", "licenses") + self.copy("*.hpp") + + def package_id(self): + self.info.header_only() + diff --git a/third_party/compile-time-regular-expressions/ctre.cppm b/third_party/compile-time-regular-expressions/ctre.cppm new file mode 100644 index 0000000000..6526e5050e --- /dev/null +++ b/third_party/compile-time-regular-expressions/ctre.cppm @@ -0,0 +1,26 @@ +module; + +#ifdef _MSVC_LANG +#pragma warning( disable : 5202 ) +#endif + +import std; + +export module ctre; + +#define CTRE_IN_A_MODULE +#define CTLL_IN_A_MODULE +#define UNICODE_DB_IN_A_MODULE + +using std::int16_t; +using std::int32_t; +using std::int64_t; +using std::int8_t; +using std::size_t; +using std::uint16_t; +using std::uint32_t; +using std::uint64_t; +using std::uint8_t; + +#include "ctre.hpp" +#include "unicode-db.hpp" diff --git a/third_party/compile-time-regular-expressions/docs/api.rst b/third_party/compile-time-regular-expressions/docs/api.rst new file mode 100644 index 0000000000..8dbecc9cf4 --- /dev/null +++ b/third_party/compile-time-regular-expressions/docs/api.rst @@ -0,0 +1,100 @@ +API +=== + +.. class:: ctll::fixed_string + + A compile-time fixed string. + + Example: :: + + static constexpr auto pattern = ctll::fixed_string{ "h.*" }; + + constexpr auto match(std::string_view sv) noexcept { + return ctre::match(sv); + } + +.. class:: template ctre::regex_results + + .. type:: char_type = typename std::iterator_traits::value_type + + The character type used by the ``Iterator``. + + .. function:: template constexpr captured_content::storage get() + template constexpr captured_content::storage get() + template constexpr captured_content::storage get() + + Returns the capture specified by ``Id`` or ``Name``. ID ``0`` is the full match, ID ``1`` is the first capture group, ID ``2`` is the second, etc. + Named groups are specified using ``(?)``. + + Example: :: + + if (auto m = ctre::match<"(?[a-z]+)([0-9]+)">("abc123")) { + m.get<"chars">(); //abc + m.get<2>(); //123 + } + + .. function:: constexpr size_t size() + + Returns the number of captures in this result object. + + .. function:: constexpr operator bool() const noexcept + + Returns whether the match was successful. + + .. function:: constexpr operator std::basic_string_view() const noexcept + constexpr std::basic_string_view to_view() const noexcept + constexpr std::basic_string_view view() const noexcept + + Converts the match to a string view. + + .. function:: constexpr explicit operator std::basic_string() const noexcept + constexpr std::basic_string to_string() const noexcept + constexpr std::basic_string str() const noexcept + + Converts the match to a string view. + +.. class:: template captured_content + + .. class:: template storage + + .. function:: constexpr auto begin() const noexcept + constexpr auto end() const noexcept + + Returns the begin or end iterator for the captured content. + + .. function:: constexpr operator bool() const noexcept + + Returns whether the match was successful. + + .. function:: constexpr auto size() const noexcept + + Returns the number of characters in the capture. + + .. function:: constexpr operator std::basic_string_view() const noexcept + constexpr std::basic_string_view to_view() const noexcept + constexpr std::basic_string_view view() const noexcept + + Converts the capture to a string view. + + .. function:: constexpr explicit operator std::basic_string() const noexcept + constexpr std::basic_string to_string() const noexcept + constexpr std::basic_string str() const noexcept + + Converts the capture to a string view. + + .. function:: constexpr static size_t get_id() noexcept + + Returns ``Id`` + +.. function:: template constexpr ctre::regex_results match(Args&&... args) + template constexpr ctre::regex_results match(Args&&... args) + + Matches ``RE`` against the whole input. + ``Args...`` must be either a string-like object with ``begin`` and ``end`` member functions, or a pair of forward iterators. + +.. function:: template constexpr ctre::regex_results search(Args&&... args) + template constexpr ctre::regex_results search(Args&&... args) + + Searches for a match somewhere within the input. + ``Args...`` must be either a string-like object with ``begin`` and ``end`` member functions, or a pair of forward iterators. + diff --git a/third_party/compile-time-regular-expressions/docs/conf.py b/third_party/compile-time-regular-expressions/docs/conf.py new file mode 100644 index 0000000000..b9bbc25717 --- /dev/null +++ b/third_party/compile-time-regular-expressions/docs/conf.py @@ -0,0 +1,54 @@ +# Configuration file for the Sphinx documentation builder. +# +# This file only contains a selection of the most common options. For a full +# list see the documentation: +# http://www.sphinx-doc.org/en/master/config + +# -- Path setup -------------------------------------------------------------- + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# +# import os +# import sys +# sys.path.insert(0, os.path.abspath('.')) + + +# -- Project information ----------------------------------------------------- + +project = 'ctre' +copyright = '2019, Hana Dusikova' +author = 'Hana Dusikova' +master_doc = 'index' +primary_domain = 'cpp' +highlight_language = 'cpp' + +# -- General configuration --------------------------------------------------- + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ +] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path. +exclude_patterns = [] + + +# -- Options for HTML output ------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = 'sphinx_rtd_theme' + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] diff --git a/third_party/compile-time-regular-expressions/docs/examples.rst b/third_party/compile-time-regular-expressions/docs/examples.rst new file mode 100644 index 0000000000..5281bebef7 --- /dev/null +++ b/third_party/compile-time-regular-expressions/docs/examples.rst @@ -0,0 +1,77 @@ +Examples +======== + +Extracting a number from input +------------------------------ +:: + + std::optional extract_number(std::string_view s) noexcept { + if (auto m = ctre::match<"[a-z]+([0-9]+)">(s)) { + return m.get<1>().to_view(); + } else { + return std::nullopt; + } + } + +`link to compiler explorer `_ + +Extracting values from date +--------------------------- +:: + + + struct date { std::string_view year; std::string_view month; std::string_view day; }; + std::optional extract_date(std::string_view s) noexcept { + using namespace ctre::literals; + if (auto [whole, year, month, day] = ctre::match<"(\\d{4})/(\\d{1,2})/(\\d{1,2})">(s); whole) { + return date{year, month, day}; + } else { + return std::nullopt; + } + } + + //static_assert(extract_date("2018/08/27"sv).has_value()); + //static_assert((*extract_date("2018/08/27"sv)).year == "2018"sv); + //static_assert((*extract_date("2018/08/27"sv)).month == "08"sv); + //static_assert((*extract_date("2018/08/27"sv)).day == "27"sv); + +`link to compiler explorer `_ + +Lexer +----- +:: + + enum class type { + unknown, identifier, number + }; + + struct lex_item { + type t; + std::string_view c; + }; + + std::optional lexer(std::string_view v) noexcept { + if (auto [m,id,num] = ctre::match<"([a-z]+)|([0-9]+)">(v); m) { + if (id) { + return lex_item{type::identifier, id}; + } else if (num) { + return lex_item{type::number, num}; + } + } + return std::nullopt; + } + +`link to compiler explorer `_ + +Range over input +---------------- + +This support is preliminary and probably the API will be changed. + +:: + + auto input = "123,456,768"sv; + + for (auto match: ctre::range<"([0-9]+),?">(input)) { + std::cout << std::string_view{match.get<0>()} << "\n"; + } \ No newline at end of file diff --git a/third_party/compile-time-regular-expressions/docs/index.rst b/third_party/compile-time-regular-expressions/docs/index.rst new file mode 100644 index 0000000000..67c7b3ef28 --- /dev/null +++ b/third_party/compile-time-regular-expressions/docs/index.rst @@ -0,0 +1,66 @@ +ctre +==== + +A compile-time (almost) PCRE-compatible regular expression matcher for C++. + +Overview +======== + +Fast compile-time regular expressions with support for matching/searching/capturing at compile-time or runtime. :: + + ctre::match<"REGEX">(subject); // C++20 + "REGEX"_ctre.match(subject); // C++17 + N3599 extension + +.. toctree:: + :maxdepth: 2 + + api + examples + regex_syntax + +Supported compilers +=================== + +- clang 6.0+ (template UDL, C++17 syntax) +- xcode clang 10.0+ (template UDL, C++17 syntax) +- gcc 7.4+ (template UDL, C++17 syntax) +- gcc 9.0+ (C++17 & C++20 cNTTP syntax) +- MSVC 15.8.8+ (C++17 syntax only) + +Basic Usage +=========== + +Template UDL syntax +------------------- + +Compiler must support N3599 extension, as GNU extension in gcc (not in GCC 9.1+) and clang. :: + + constexpr auto match(std::string_view sv) noexcept { + using namespace ctre::literals; + return "h.*"_ctre.match(sv); + } + +If you need N3599 extension in GCC 9.1+ you can't use -pedantic mode and define the macro ``CTRE_ENABLE_LITERALS``. + +C++17 syntax +------------ + +You can provide pattern as a constexpr ``ctll::fixed_string variable``. :: + + static constexpr auto pattern = ctll::fixed_string{ "h.*" }; + + constexpr auto match(std::string_view sv) noexcept { + return ctre::match(sv); + } + +(this is tested in MSVC 15.8.8) + +C++20 syntax +------------ + +Currently only compiler which supports cNTTP syntax ``ctre::match(subject)`` is GCC 9+. :: + + constexpr auto match(std::string_view sv) noexcept { + return ctre::match<"h.*">(sv); + } + diff --git a/third_party/compile-time-regular-expressions/docs/regex_syntax.rst b/third_party/compile-time-regular-expressions/docs/regex_syntax.rst new file mode 100644 index 0000000000..9af97adf5d --- /dev/null +++ b/third_party/compile-time-regular-expressions/docs/regex_syntax.rst @@ -0,0 +1,18 @@ +Regex Syntax +============ + +The library supports most of the `PCRE `_ syntax with a few exceptions: + +- callouts +- comments +- conditional patterns +- control characters (\\cX) +- horizontal / vertical character classes (\\h\\H\\v\\V) +- match point reset (\\K) +- named characters +- octal numbers +- options / modes +- subroutines +- unicode grapheme cluster (\\X) + +TODO more detailed regex information \ No newline at end of file diff --git a/third_party/compile-time-regular-expressions/future.cpp b/third_party/compile-time-regular-expressions/future.cpp new file mode 100644 index 0000000000..07931f6ec7 --- /dev/null +++ b/third_party/compile-time-regular-expressions/future.cpp @@ -0,0 +1,15 @@ +#include +#include +#include + +std::string match(std::string_view sv) { + if (auto match = ctre::match<"[a-z]+">(sv); match) { + return match.to_string(); + } else { + return "not_match"; + } +} + +int main() { + std::cout << match("hello") << "\n"; +} \ No newline at end of file diff --git a/third_party/compile-time-regular-expressions/gcc-bench.txt b/third_party/compile-time-regular-expressions/gcc-bench.txt new file mode 100644 index 0000000000..a943b97d71 --- /dev/null +++ b/third_party/compile-time-regular-expressions/gcc-bench.txt @@ -0,0 +1,20 @@ +//--------- ABCD|DEFGH|EFGHI|A{4,} +egrep 0m50.036s +CTRE 0m3.982s +PCRE 0m8.621s +std::regex 0m55.058s +//--------- [0-9a-fA-F]{8,16} +egrep 0m32.361s +CTRE 0m4.291s +PCRE 0m13.958s +std::regex 0m18.179s +//--------- ^([0-9]{4,16})?[aA] +egrep 0m12.819s +CTRE 0m2.844s +PCRE 0m2.614s +std::regex 0m22.876s +//--------- ([aAbB]{4,}|[xXyY]{4,}|[1234]{4,})0 +egrep 1m45.696s +CTRE 0m7.623s +PCRE 0m39.808s +std::regex 1m2.799s diff --git a/third_party/compile-time-regular-expressions/include/ctll.hpp b/third_party/compile-time-regular-expressions/include/ctll.hpp new file mode 100644 index 0000000000..2fe40e3419 --- /dev/null +++ b/third_party/compile-time-regular-expressions/include/ctll.hpp @@ -0,0 +1,6 @@ +#ifndef CTRE_V2__CTLL__HPP +#define CTRE_V2__CTLL__HPP + +#include "ctll/parser.hpp" + +#endif diff --git a/third_party/compile-time-regular-expressions/include/ctll/actions.hpp b/third_party/compile-time-regular-expressions/include/ctll/actions.hpp new file mode 100644 index 0000000000..a6a569df80 --- /dev/null +++ b/third_party/compile-time-regular-expressions/include/ctll/actions.hpp @@ -0,0 +1,29 @@ +#ifndef CTLL__ACTIONS__HPP +#define CTLL__ACTIONS__HPP + +namespace ctll { + struct empty_subject { }; + + struct empty_actions { + // dummy operator so using Actions::operator() later will not give error + template static constexpr auto apply(Action, InputSymbol, Subject subject) { + return subject; + } + }; + + template struct identity: public Actions { + using Actions::apply; + // allow empty_subject to exists + template constexpr static auto apply(Action, term, empty_subject) -> empty_subject { return {}; } + template constexpr static auto apply(Action, epsilon, empty_subject) -> empty_subject { return {}; } + }; + + template struct ignore_unknown: public Actions { + using Actions::apply; + // allow flow thru unknown actions + template constexpr static auto apply(Action, term, Subject) -> Subject { return {}; } + template constexpr static auto apply(Action, epsilon, Subject) -> Subject { return {}; } + }; +} + +#endif diff --git a/third_party/compile-time-regular-expressions/include/ctll/fixed_string.hpp b/third_party/compile-time-regular-expressions/include/ctll/fixed_string.hpp new file mode 100644 index 0000000000..5c7666b6f5 --- /dev/null +++ b/third_party/compile-time-regular-expressions/include/ctll/fixed_string.hpp @@ -0,0 +1,223 @@ +#ifndef CTLL__FIXED_STRING__GPP +#define CTLL__FIXED_STRING__GPP + +#ifndef CTLL_IN_A_MODULE +#include +#include +#include +#include +#include +#endif + +#include "utilities.hpp" + +namespace ctll { + +struct length_value_t { + uint32_t value; + uint8_t length; +}; + +constexpr length_value_t length_and_value_of_utf8_code_point(uint8_t first_unit) noexcept { + if ((first_unit & 0b1000'0000) == 0b0000'0000) return {static_cast(first_unit), 1}; + else if ((first_unit & 0b1110'0000) == 0b1100'0000) return {static_cast(first_unit & 0b0001'1111), 2}; + else if ((first_unit & 0b1111'0000) == 0b1110'0000) return {static_cast(first_unit & 0b0000'1111), 3}; + else if ((first_unit & 0b1111'1000) == 0b1111'0000) return {static_cast(first_unit & 0b0000'0111), 4}; + else if ((first_unit & 0b1111'1100) == 0b1111'1000) return {static_cast(first_unit & 0b0000'0011), 5}; + else if ((first_unit & 0b1111'1100) == 0b1111'1100) return {static_cast(first_unit & 0b0000'0001), 6}; + else return {0, 0}; +} + +constexpr char32_t value_of_trailing_utf8_code_point(uint8_t unit, bool & correct) noexcept { + if ((unit & 0b1100'0000) == 0b1000'0000) return unit & 0b0011'1111; + else { + correct = false; + return 0; + } +} + +constexpr length_value_t length_and_value_of_utf16_code_point(uint16_t first_unit) noexcept { + if ((first_unit & 0b1111110000000000) == 0b1101'1000'0000'0000) return {static_cast(first_unit & 0b0000001111111111), 2}; + else return {first_unit, 1}; +} + +struct construct_from_pointer_t { }; + +constexpr auto construct_from_pointer = construct_from_pointer_t{}; + +CTLL_EXPORT template struct fixed_string { + char32_t content[N] = {}; + size_t real_size{0}; + bool correct_flag{true}; + + template constexpr fixed_string(construct_from_pointer_t, const T * input) noexcept { + if constexpr (std::is_same_v) { + #ifdef CTRE_STRING_IS_UTF8 + size_t out{0}; + for (size_t i{0}; i < N; ++i) { + length_value_t info = length_and_value_of_utf8_code_point(input[i]); + switch (info.length) { + case 6: + if (++i < N) info.value = (info.value << 6) | value_of_trailing_utf8_code_point(input[i], correct_flag); + [[fallthrough]]; + case 5: + if (++i < N) info.value = (info.value << 6) | value_of_trailing_utf8_code_point(input[i], correct_flag); + [[fallthrough]]; + case 4: + if (++i < N) info.value = (info.value << 6) | value_of_trailing_utf8_code_point(input[i], correct_flag); + [[fallthrough]]; + case 3: + if (++i < N) info.value = (info.value << 6) | value_of_trailing_utf8_code_point(input[i], correct_flag); + [[fallthrough]]; + case 2: + if (++i < N) info.value = (info.value << 6) | value_of_trailing_utf8_code_point(input[i], correct_flag); + [[fallthrough]]; + case 1: + content[out++] = static_cast(info.value); + real_size++; + break; + default: + correct_flag = false; + return; + } + } + #else + for (size_t i{0}; i < N; ++i) { + content[i] = static_cast(input[i]); + real_size++; + } + #endif + #if __cpp_char8_t + } else if constexpr (std::is_same_v) { + size_t out{0}; + for (size_t i{0}; i < N; ++i) { + length_value_t info = length_and_value_of_utf8_code_point(input[i]); + switch (info.length) { + case 6: + if (++i < N) info.value = (info.value << 6) | value_of_trailing_utf8_code_point(input[i], correct_flag); + [[fallthrough]]; + case 5: + if (++i < N) info.value = (info.value << 6) | value_of_trailing_utf8_code_point(input[i], correct_flag); + [[fallthrough]]; + case 4: + if (++i < N) info.value = (info.value << 6) | value_of_trailing_utf8_code_point(input[i], correct_flag); + [[fallthrough]]; + case 3: + if (++i < N) info.value = (info.value << 6) | value_of_trailing_utf8_code_point(input[i], correct_flag); + [[fallthrough]]; + case 2: + if (++i < N) info.value = (info.value << 6) | value_of_trailing_utf8_code_point(input[i], correct_flag); + [[fallthrough]]; + case 1: + content[out++] = static_cast(info.value); + real_size++; + break; + default: + correct_flag = false; + return; + } + } + #endif + } else if constexpr (std::is_same_v) { + size_t out{0}; + for (size_t i{0}; i < N; ++i) { + length_value_t info = length_and_value_of_utf16_code_point(input[i]); + if (info.length == 2) { + if (++i < N) { + if ((input[i] & 0b1111'1100'0000'0000) == 0b1101'1100'0000'0000) { + content[out++] = ((info.value << 10) | (input[i] & 0b0000'0011'1111'1111)) + 0x10000; + } else { + correct_flag = false; + break; + } + } + } else { + content[out++] = info.value; + } + } + real_size = out; + } else if constexpr (std::is_same_v || std::is_same_v) { + for (size_t i{0}; i < N; ++i) { + content[i] = static_cast(input[i]); + real_size++; + } + } + } + + template constexpr fixed_string(const std::array & in) noexcept: fixed_string{construct_from_pointer, in.data()} { } + template constexpr fixed_string(const T (&input)[N+1]) noexcept: fixed_string{construct_from_pointer, input} { } + + constexpr fixed_string(const fixed_string & other) noexcept { + for (size_t i{0}; i < N; ++i) { + content[i] = other.content[i]; + } + real_size = other.real_size; + correct_flag = other.correct_flag; + } + constexpr bool correct() const noexcept { + return correct_flag; + } + constexpr size_t size() const noexcept { + return real_size; + } + constexpr const char32_t * begin() const noexcept { + return content; + } + constexpr const char32_t * end() const noexcept { + return content + size(); + } + constexpr char32_t operator[](size_t i) const noexcept { + return content[i]; + } + template constexpr bool is_same_as(const fixed_string & rhs) const noexcept { + if (real_size != rhs.size()) return false; + for (size_t i{0}; i != real_size; ++i) { + if (content[i] != rhs[i]) return false; + } + return true; + } + constexpr operator std::basic_string_view() const noexcept { + return std::basic_string_view{content, size()}; + } +}; + +template <> class fixed_string<0> { + static constexpr char32_t empty[1] = {0}; +public: + template constexpr fixed_string(const T *) noexcept { + + } + constexpr fixed_string(std::initializer_list) noexcept { + + } + constexpr fixed_string(const fixed_string &) noexcept { + + } + constexpr bool correct() const noexcept { + return true; + } + constexpr size_t size() const noexcept { + return 0; + } + constexpr const char32_t * begin() const noexcept { + return empty; + } + constexpr const char32_t * end() const noexcept { + return empty + size(); + } + constexpr char32_t operator[](size_t) const noexcept { + return 0; + } + constexpr operator std::basic_string_view() const noexcept { + return std::basic_string_view{empty, 0}; + } +}; + +template fixed_string(const CharT (&)[N]) -> fixed_string; +template fixed_string(const std::array &) -> fixed_string; + +template fixed_string(fixed_string) -> fixed_string; + +} + +#endif diff --git a/third_party/compile-time-regular-expressions/include/ctll/grammars.hpp b/third_party/compile-time-regular-expressions/include/ctll/grammars.hpp new file mode 100644 index 0000000000..fd5184c129 --- /dev/null +++ b/third_party/compile-time-regular-expressions/include/ctll/grammars.hpp @@ -0,0 +1,123 @@ +#ifndef CTLL__GRAMMARS__HPP +#define CTLL__GRAMMARS__HPP + +namespace ctll { + +// terminal type representing symbol / character of any type +template struct term { + static constexpr auto value = v; +}; + +// epsilon = nothing on input tape +// also used as an command for parsing means "do nothing" +struct epsilon { + static constexpr auto value = '-'; +}; + +// empty_stack_symbol = nothing on stack +struct empty_stack_symbol {}; + +// push is alias to list +template using push = list; + +// accept/reject type for controlling output of LL1 machine +struct accept { constexpr explicit operator bool() noexcept { return true; } }; +struct reject { constexpr explicit operator bool() noexcept { return false; } }; + +// action type, every action item in grammar must inherit from +struct action { + struct action_tag { }; +}; + +// move one character forward and pop it from stack command +struct pop_input { + struct pop_input_tag { }; +}; + +// additional overloads for type list +template constexpr auto push_front(pop_input, list) -> list { return {}; } + +template constexpr auto push_front(epsilon, list) -> list { return {}; } + +template constexpr auto push_front(list, list) -> list { return {}; } + +template constexpr auto pop_front_and_push_front(T item, list l) { + return push_front(item, pop_front(l)); +} + +// SPECIAL matching types for nicer grammars + +// match any term +struct anything { + constexpr inline anything() noexcept { } + template constexpr anything(term) noexcept; +}; + +// match range of term A-B +template struct range { + constexpr inline range() noexcept { } + //template constexpr range(term) noexcept requires (A <= V) && (V <= B); + template > constexpr range(term) noexcept; +}; + +#ifdef __EDG__ +template struct contains { + static constexpr bool value = ((Set == V) || ... || false); +}; +#endif + +// match terms defined in set +template struct set { + constexpr inline set() noexcept { } + #ifdef __EDG__ + template ::value>> constexpr set(term) noexcept; + #else + template > constexpr set(term) noexcept; + #endif +}; + +// match terms not defined in set +template struct neg_set { + constexpr inline neg_set() noexcept { } + + #ifdef __EDG__ + template ::value>> constexpr neg_set(term) noexcept; + #else + template > constexpr neg_set(term) noexcept; + #endif +}; + +// AUGMENTED grammar which completes user-defined grammar for all other cases +template struct augment_grammar: public Grammar { + // start nonterminal is defined in parent type + using typename Grammar::_start; + + // grammar rules are inherited from Grammar parent type + using Grammar::rule; + + // term on stack and on input means pop_input; + template static constexpr auto rule(term, term) -> ctll::pop_input; + + // if the type on stack (range, set, neg_set, anything) is constructible from the terminal => pop_input + template static constexpr auto rule(Expected, term) -> std::enable_if_t>, ctll::pop_input>; + + // empty stack and empty input means we are accepting + static constexpr auto rule(empty_stack_symbol, epsilon) -> ctll::accept; + + // not matching anything else => reject + static constexpr auto rule(...) -> ctll::reject; + + // start stack is just a list; + using start_stack = list; +}; + + + +} + + + + + +#endif + diff --git a/third_party/compile-time-regular-expressions/include/ctll/list.hpp b/third_party/compile-time-regular-expressions/include/ctll/list.hpp new file mode 100644 index 0000000000..a04ed51287 --- /dev/null +++ b/third_party/compile-time-regular-expressions/include/ctll/list.hpp @@ -0,0 +1,93 @@ +#ifndef CTLL__TYPE_STACK__HPP +#define CTLL__TYPE_STACK__HPP + +#include "utilities.hpp" + +namespace ctll { + +template struct list { }; + +struct _nothing { }; + +using empty_list = list<>; + +// calculate size of list content +template constexpr auto size(list) noexcept { return sizeof...(Ts); } + + +// check if the list is empty +template constexpr bool empty(list) noexcept { return false; } +constexpr bool empty(empty_list) { return true; } + + +// concat two lists together left to right +template constexpr auto concat(list, list) noexcept -> list { return {}; } + + +// push something to the front of a list +template constexpr auto push_front(T, list) noexcept -> list { return {}; } + + +// pop element from the front of a list +template constexpr auto pop_front(list) noexcept -> list { return {}; } +constexpr auto pop_front(empty_list) -> empty_list; + +// pop element from the front of a list and return new typelist too +template struct list_pop_pair { + Front front{}; + List list{}; + constexpr list_pop_pair() = default; +}; + +template constexpr auto pop_and_get_front(list, T = T()) noexcept -> list_pop_pair> { return {}; } +template constexpr auto pop_and_get_front(empty_list, T = T()) noexcept -> list_pop_pair { return {}; } + + +// return front of the list +template constexpr auto front(list, T = T()) noexcept -> Head { return {}; } +template constexpr auto front(empty_list, T = T()) noexcept -> T { return {}; } + +// rotate list +template struct rotate_item { + template friend constexpr auto operator+(list, rotate_item) noexcept -> list { return {}; } +}; + +template constexpr auto rotate(list) -> decltype((list<>{} + ... + rotate_item{})) { + return {}; +} + +// set operations +template struct item_matcher { + struct not_selected { + template friend constexpr auto operator+(list, not_selected) -> list; + }; + template struct wrapper { + template friend constexpr auto operator+(list, wrapper) -> list; + }; + + static constexpr auto check(T) { return std::true_type{}; } + static constexpr auto check(...) { return std::false_type{}; } + static constexpr auto select(T) { return not_selected{}; } + template static constexpr auto select(Y) { return wrapper{}; } +}; + +template constexpr bool exists_in(T, list) noexcept { + return (item_matcher::check(Ts{}) || ... || false); +} + +template constexpr auto add_item(T item, list l) noexcept { + if constexpr (exists_in(item, l)) { + return l; + } else { + return list{}; + } +} + +template constexpr auto remove_item(T, list) noexcept { + item_matcher matcher; + return decltype((list<>{} + ... + matcher.select(Ts{}))){}; +} + +} + +#endif diff --git a/third_party/compile-time-regular-expressions/include/ctll/parser.hpp b/third_party/compile-time-regular-expressions/include/ctll/parser.hpp new file mode 100644 index 0000000000..d3751f9aa7 --- /dev/null +++ b/third_party/compile-time-regular-expressions/include/ctll/parser.hpp @@ -0,0 +1,192 @@ +#ifndef CTLL__PARSER__HPP +#define CTLL__PARSER__HPP + +#include "fixed_string.hpp" +#include "list.hpp" +#include "grammars.hpp" +#include "actions.hpp" + +#ifndef CTLL_IN_A_MODULE +#include +#endif + +namespace ctll { + + +enum class decision { + reject, + accept, + undecided +}; + +struct placeholder { }; + +template using index_placeholder = placeholder; + +#if CTLL_CNTTP_COMPILER_CHECK +template struct parser { // in c++20 +#else +template struct parser { +#endif + + #ifdef __GNUC__ // workaround to GCC bug + #if CTLL_CNTTP_COMPILER_CHECK + static constexpr auto _input = input; // c++20 mode + #else + static constexpr auto & _input = input; // c++17 mode + #endif + #else + static constexpr auto _input = input; // everyone else + #endif + + using Actions = ctll::conditional, identity>; + using grammar = augment_grammar; + + template struct results { + + static constexpr bool is_correct = Decision == decision::accept; + + constexpr inline CTLL_FORCE_INLINE operator bool() const noexcept { + return is_correct; + } + + #ifdef __GNUC__ // workaround to GCC bug + #if CTLL_CNTTP_COMPILER_CHECK + static constexpr auto _input = input; // c++20 mode + #else + static constexpr auto & _input = input; // c++17 mode + #endif + #else + static constexpr auto _input = input; // everyone else + #endif + + using output_type = Subject; + static constexpr size_t position = Pos; + + constexpr auto operator+(placeholder) const noexcept { + if constexpr (Decision == decision::undecided) { + // parse for current char (RPos) with previous stack and subject :) + return parser::template decide({}, {}); + } else { + // if there is decision already => just push it to the end of fold expression + return *this; + } + } + }; + + template static constexpr auto get_current_term() noexcept { + if constexpr (Pos < input.size()) { + constexpr auto value = input[Pos]; + if constexpr (value <= static_cast((std::numeric_limits::max)())) { + return term(value)>{}; + } else { + return term{}; + } + + } else { + // return epsilon if we are past the input + return epsilon{}; + } + } + template static constexpr auto get_previous_term() noexcept { + if constexpr (Pos == 0) { + // there is no previous character on input if we are on start + return epsilon{}; + } else if constexpr ((Pos-1) < input.size()) { + constexpr auto value = input[Pos-1]; + if constexpr (value <= static_cast((std::numeric_limits::max)())) { + return term(value)>{}; + } else { + return term{}; + } + } else { + return epsilon{}; + } + } + // if rule is accept => return true and subject + template + static constexpr auto move(ctll::accept, Terminal, Stack, Subject) noexcept { + return typename parser::template results(); + } + // if rule is reject => return false and subject + template + static constexpr auto move(ctll::reject, Terminal, Stack, Subject) noexcept { + return typename parser::template results(); + } + // if rule is pop_input => move to next character + template + static constexpr auto move(ctll::pop_input, Terminal, Stack, Subject) noexcept { + return typename parser::template results(); + } + // if rule is string => push it to the front of stack + template + static constexpr auto move(push string, Terminal, Stack stack, Subject subject) noexcept { + return decide(push_front(string, stack), subject); + } + // if rule is epsilon (empty string) => continue + template + static constexpr auto move(epsilon, Terminal, Stack stack, Subject subject) noexcept { + return decide(stack, subject); + } + // if rule is string with current character at the beginning (term) => move to next character + // and push string without the character (quick LL(1)) + template + static constexpr auto move(push, Content...>, term, Stack stack, Subject) noexcept { + constexpr auto local_input = input; + return typename parser::template results(), stack)), Subject, decision::undecided>(); + } + // if rule is string with any character at the beginning (compatible with current term) => move to next character + // and push string without the character (quick LL(1)) + template + static constexpr auto move(push, term, Stack stack, Subject) noexcept { + constexpr auto local_input = input; + return typename parser::template results(), stack)), Subject, decision::undecided>(); + } + // decide if we need to take action or move + template static constexpr auto decide(Stack previous_stack, Subject previous_subject) noexcept { + // each call means we pop something from stack + auto top_symbol = decltype(ctll::front(previous_stack, empty_stack_symbol()))(); + // gcc pedantic warning + [[maybe_unused]] auto stack = decltype(ctll::pop_front(previous_stack))(); + + // in case top_symbol is action type (apply it on previous subject and get new one) + if constexpr (std::is_base_of_v) { + auto subject = Actions::apply(top_symbol, get_previous_term(), previous_subject); + + // in case that semantic action is error => reject input + if constexpr (std::is_same_v) { + return typename parser::template results(); + } else { + return decide(stack, subject); + } + } else { + // all other cases are ordinary for LL(1) parser + auto current_term = get_current_term(); + auto rule = decltype(grammar::rule(top_symbol,current_term))(); + return move(rule, current_term, stack, previous_subject); + } + } + + // trampolines with folded expression + template static constexpr auto trampoline_decide(Subject, std::index_sequence) noexcept { + // parse everything for first char and than for next and next ... + // Pos+1 is needed as we want to finish calculation with epsilons on stack + auto v = (decide<0, typename grammar::start_stack, Subject>({}, {}) + ... + index_placeholder()); + return v; + } + + template static constexpr auto trampoline_decide(Subject subject = {}) noexcept { + // there will be no recursion, just sequence long as the input + return trampoline_decide(subject, std::make_index_sequence()); + } + + template using output = decltype(trampoline_decide()); + template static inline constexpr bool correct_with = trampoline_decide(); + +}; + +} // end of ctll namespace + + +#endif + diff --git a/third_party/compile-time-regular-expressions/include/ctll/utilities.hpp b/third_party/compile-time-regular-expressions/include/ctll/utilities.hpp new file mode 100644 index 0000000000..808e2ffe2a --- /dev/null +++ b/third_party/compile-time-regular-expressions/include/ctll/utilities.hpp @@ -0,0 +1,67 @@ +#ifndef CTLL__UTILITIES__HPP +#define CTLL__UTILITIES__HPP + +#ifndef CTLL_IN_A_MODULE +#include +#endif + +#ifdef CTLL_IN_A_MODULE +#define CTLL_EXPORT export +#else +#define CTLL_EXPORT +#endif + +#if defined __cpp_nontype_template_parameter_class + #define CTLL_CNTTP_COMPILER_CHECK 1 +#elif defined __cpp_nontype_template_args +// compiler which defines correctly feature test macro (not you clang) + #if __cpp_nontype_template_args >= 201911L + #define CTLL_CNTTP_COMPILER_CHECK 1 + #elif __cpp_nontype_template_args >= 201411L +// appleclang 13+ + #if defined __apple_build_version__ + #if defined __clang_major__ && __clang_major__ >= 13 +// but only in c++20 and more + #if __cplusplus > 201703L + #define CTLL_CNTTP_COMPILER_CHECK 1 + #endif + #endif + #else +// clang 12+ + #if defined __clang_major__ && __clang_major__ >= 12 +// but only in c++20 and more + #if __cplusplus > 201703L + #define CTLL_CNTTP_COMPILER_CHECK 1 + #endif + #endif + #endif + #endif +#endif + +#ifndef CTLL_CNTTP_COMPILER_CHECK + #define CTLL_CNTTP_COMPILER_CHECK 0 +#endif + +#ifdef _MSC_VER +#define CTLL_FORCE_INLINE __forceinline +#else +#define CTLL_FORCE_INLINE __attribute__((always_inline)) +#endif + +namespace ctll { + +template struct conditional_helper; + +template <> struct conditional_helper { + template using type = A; +}; + +template <> struct conditional_helper { + template using type = B; +}; + +template using conditional = typename conditional_helper::template type; + +} + +#endif diff --git a/third_party/compile-time-regular-expressions/include/ctre-unicode.hpp b/third_party/compile-time-regular-expressions/include/ctre-unicode.hpp new file mode 100644 index 0000000000..5c8ac17b5f --- /dev/null +++ b/third_party/compile-time-regular-expressions/include/ctre-unicode.hpp @@ -0,0 +1,7 @@ +#ifndef CTRE_V2__CTRE_UNICODE__HPP +#define CTRE_V2__CTRE_UNICODE__HPP + +#include "ctre.hpp" +#include "unicode-db.hpp" + +#endif diff --git a/third_party/compile-time-regular-expressions/include/ctre.hpp b/third_party/compile-time-regular-expressions/include/ctre.hpp new file mode 100644 index 0000000000..fcc4b0d8e1 --- /dev/null +++ b/third_party/compile-time-regular-expressions/include/ctre.hpp @@ -0,0 +1,10 @@ +#ifndef CTRE_V2__CTRE__HPP +#define CTRE_V2__CTRE__HPP + +#include "ctre/literals.hpp" +#include "ctre/functions.hpp" +#include "ctre/iterators.hpp" +#include "ctre/range.hpp" +#include "ctre/operators.hpp" + +#endif diff --git a/third_party/compile-time-regular-expressions/include/ctre/actions/asserts.inc.hpp b/third_party/compile-time-regular-expressions/include/ctre/actions/asserts.inc.hpp new file mode 100644 index 0000000000..8b8077c8d3 --- /dev/null +++ b/third_party/compile-time-regular-expressions/include/ctre/actions/asserts.inc.hpp @@ -0,0 +1,29 @@ +#ifndef CTRE__ACTIONS__ASSERTS__HPP +#define CTRE__ACTIONS__ASSERTS__HPP + +// push_assert_begin +template static constexpr auto apply(pcre::push_assert_begin, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(assert_line_begin(), subject.stack), subject.parameters}; +} + +// push_assert_end +template static constexpr auto apply(pcre::push_assert_end, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(assert_line_end(), subject.stack), subject.parameters}; +} + +// push_assert_begin +template static constexpr auto apply(pcre::push_assert_subject_begin, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(assert_subject_begin(), subject.stack), subject.parameters}; +} + +// push_assert_subject_end +template static constexpr auto apply(pcre::push_assert_subject_end, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(assert_subject_end(), subject.stack), subject.parameters}; +} + +// push_assert_subject_end_with_lineend +template static constexpr auto apply(pcre::push_assert_subject_end_with_lineend, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(assert_subject_end_line(), subject.stack), subject.parameters}; +} + +#endif diff --git a/third_party/compile-time-regular-expressions/include/ctre/actions/atomic_group.inc.hpp b/third_party/compile-time-regular-expressions/include/ctre/actions/atomic_group.inc.hpp new file mode 100644 index 0000000000..f615c8ac4d --- /dev/null +++ b/third_party/compile-time-regular-expressions/include/ctre/actions/atomic_group.inc.hpp @@ -0,0 +1,19 @@ +#ifndef CTRE__ACTIONS__ATOMIC_GROUP__HPP +#define CTRE__ACTIONS__ATOMIC_GROUP__HPP + +// atomic start +template static constexpr auto apply(pcre::start_atomic, ctll::term, pcre_context, pcre_parameters>) { + return pcre_context{ctll::list(), pcre_parameters()}; +} + +// atomic +template static constexpr auto apply(pcre::make_atomic, ctll::term, pcre_context, pcre_parameters>) { + return pcre_context{ctll::list, Ts...>(), pcre_parameters()}; +} + +// atomic sequence +template static constexpr auto apply(pcre::make_atomic, ctll::term, pcre_context, atomic_start, Ts...>, pcre_parameters>) { + return pcre_context{ctll::list, Ts...>(), pcre_parameters()}; +} + +#endif diff --git a/third_party/compile-time-regular-expressions/include/ctre/actions/backreference.inc.hpp b/third_party/compile-time-regular-expressions/include/ctre/actions/backreference.inc.hpp new file mode 100644 index 0000000000..2a7de4d38d --- /dev/null +++ b/third_party/compile-time-regular-expressions/include/ctre/actions/backreference.inc.hpp @@ -0,0 +1,30 @@ +#ifndef CTRE__ACTIONS__BACKREFERENCE__HPP +#define CTRE__ACTIONS__BACKREFERENCE__HPP + +// backreference with name +template static constexpr auto apply(pcre::make_back_reference, ctll::term, pcre_context, Ts...>, pcre_parameters>) { + return pcre_context{ctll::push_front(back_reference_with_name>(), ctll::list()), pcre_parameters()}; +} + +// with just a number +template static constexpr auto apply(pcre::make_back_reference, ctll::term, pcre_context, Ts...>, pcre_parameters>) { + // if we are looking outside of existing list of Ids ... reject input during parsing + if constexpr (Counter < Id) { + return ctll::reject{}; + } else { + return pcre_context{ctll::push_front(back_reference(), ctll::list()), pcre_parameters()}; + } +} + +// relative backreference +template static constexpr auto apply(pcre::make_relative_back_reference, ctll::term, [[maybe_unused]] pcre_context, Ts...>, pcre_parameters>) { + // if we are looking outside of existing list of Ids ... reject input during parsing + if constexpr (Counter < Id) { + return ctll::reject{}; + } else { + constexpr size_t absolute_id = (Counter + 1) - Id; + return pcre_context{ctll::push_front(back_reference(), ctll::list()), pcre_parameters()}; + } +} + +#endif diff --git a/third_party/compile-time-regular-expressions/include/ctre/actions/boundaries.inc.hpp b/third_party/compile-time-regular-expressions/include/ctre/actions/boundaries.inc.hpp new file mode 100644 index 0000000000..097bc9dbeb --- /dev/null +++ b/third_party/compile-time-regular-expressions/include/ctre/actions/boundaries.inc.hpp @@ -0,0 +1,14 @@ +#ifndef CTRE__ACTIONS__BOUNDARIES__HPP +#define CTRE__ACTIONS__BOUNDARIES__HPP + +// push_word_boundary +template static constexpr auto apply(pcre::push_word_boundary, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(boundary(), subject.stack), subject.parameters}; +} + +// push_not_word_boundary +template static constexpr auto apply(pcre::push_not_word_boundary, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(boundary>(), subject.stack), subject.parameters}; +} + +#endif diff --git a/third_party/compile-time-regular-expressions/include/ctre/actions/capture.inc.hpp b/third_party/compile-time-regular-expressions/include/ctre/actions/capture.inc.hpp new file mode 100644 index 0000000000..ebf5437a68 --- /dev/null +++ b/third_party/compile-time-regular-expressions/include/ctre/actions/capture.inc.hpp @@ -0,0 +1,40 @@ +#ifndef CTRE__ACTIONS__CAPTURE__HPP +#define CTRE__ACTIONS__CAPTURE__HPP + +// prepare_capture +template static constexpr auto apply(pcre::prepare_capture, ctll::term, pcre_context, pcre_parameters>) { + return pcre_context{ctll::push_front(capture_id(), ctll::list()), pcre_parameters()}; +} + +// reset_capture +template static constexpr auto apply(pcre::reset_capture, ctll::term, pcre_context, Ts...>, pcre_parameters>) { + return pcre_context{ctll::list(), pcre_parameters()}; +} + +// capture +template static constexpr auto apply(pcre::make_capture, ctll::term, pcre_context, Ts...>, pcre_parameters>) { + return pcre_context{ctll::push_front(capture(), ctll::list()), pcre_parameters()}; +} +// capture (sequence) +template static constexpr auto apply(pcre::make_capture, ctll::term, pcre_context, capture_id, Ts...>, pcre_parameters>) { + return pcre_context{ctll::push_front(capture(), ctll::list()), pcre_parameters()}; +} +// push_name +template static constexpr auto apply(pcre::push_name, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(id(), subject.stack), subject.parameters}; +} +// push_name (concat) +template static constexpr auto apply(pcre::push_name, ctll::term, pcre_context, Ts...>, Parameters> subject) { + return pcre_context{ctll::push_front(id(), ctll::list()), subject.parameters}; +} +// capture with name +template static constexpr auto apply(pcre::make_capture_with_name, ctll::term, pcre_context, capture_id, Ts...>, pcre_parameters>) { + return pcre_context{ctll::push_front(capture_with_name, A>(), ctll::list()), pcre_parameters()}; +} +// capture with name (sequence) +template static constexpr auto apply(pcre::make_capture_with_name, ctll::term, pcre_context, id, capture_id, Ts...>, pcre_parameters>) { + return pcre_context{ctll::push_front(capture_with_name, Content...>(), ctll::list()), pcre_parameters()}; +} + + +#endif diff --git a/third_party/compile-time-regular-expressions/include/ctre/actions/characters.inc.hpp b/third_party/compile-time-regular-expressions/include/ctre/actions/characters.inc.hpp new file mode 100644 index 0000000000..4aa7ffd7a2 --- /dev/null +++ b/third_party/compile-time-regular-expressions/include/ctre/actions/characters.inc.hpp @@ -0,0 +1,41 @@ +#ifndef CTRE__ACTIONS__CHARACTERS__HPP +#define CTRE__ACTIONS__CHARACTERS__HPP + +// push character +template static constexpr auto apply(pcre::push_character, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(character(), subject.stack), subject.parameters}; +} +// push_any_character +template static constexpr auto apply(pcre::push_character_anything, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(any(), subject.stack), subject.parameters}; +} +// character_alarm +template static constexpr auto apply(pcre::push_character_alarm, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(character<'\x07'>(), subject.stack), subject.parameters}; +} +// character_escape +template static constexpr auto apply(pcre::push_character_escape, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(character<'\x14'>(), subject.stack), subject.parameters}; +} +// character_formfeed +template static constexpr auto apply(pcre::push_character_formfeed, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(character<'\x0C'>(), subject.stack), subject.parameters}; +} +// push_character_newline +template static constexpr auto apply(pcre::push_character_newline, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(character<'\x0A'>(), subject.stack), subject.parameters}; +} +// push_character_null +template static constexpr auto apply(pcre::push_character_null, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(character<'\0'>(), subject.stack), subject.parameters}; +} +// push_character_return_carriage +template static constexpr auto apply(pcre::push_character_return_carriage, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(character<'\x0D'>(), subject.stack), subject.parameters}; +} +// push_character_tab +template static constexpr auto apply(pcre::push_character_tab, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(character<'\x09'>(), subject.stack), subject.parameters}; +} + +#endif diff --git a/third_party/compile-time-regular-expressions/include/ctre/actions/class.inc.hpp b/third_party/compile-time-regular-expressions/include/ctre/actions/class.inc.hpp new file mode 100644 index 0000000000..c52c550b3d --- /dev/null +++ b/third_party/compile-time-regular-expressions/include/ctre/actions/class.inc.hpp @@ -0,0 +1,51 @@ +#ifndef CTRE__ACTIONS__CLASS__HPP +#define CTRE__ACTIONS__CLASS__HPP + +// class_digit +template static constexpr auto apply(pcre::class_digit, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(ctre::set(), subject.stack), subject.parameters}; +} +// class_non_digit +template static constexpr auto apply(pcre::class_nondigit, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(ctre::negative_set(), subject.stack), subject.parameters}; +} +// class_space +template static constexpr auto apply(pcre::class_space, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(ctre::set(), subject.stack), subject.parameters}; +} +// class_nonspace +template static constexpr auto apply(pcre::class_nonspace, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(ctre::negative_set(), subject.stack), subject.parameters}; +} + +// class_horizontal_space +template static constexpr auto apply(pcre::class_horizontal_space, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(ctre::set(), subject.stack), subject.parameters}; +} +// class_horizontal_nonspace +template static constexpr auto apply(pcre::class_non_horizontal_space, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(ctre::negative_set(), subject.stack), subject.parameters}; +} +// class_vertical_space +template static constexpr auto apply(pcre::class_vertical_space, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(ctre::set(), subject.stack), subject.parameters}; +} +// class_vertical_nonspace +template static constexpr auto apply(pcre::class_non_vertical_space, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(ctre::negative_set(), subject.stack), subject.parameters}; +} + +// class_word +template static constexpr auto apply(pcre::class_word, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(ctre::set(), subject.stack), subject.parameters}; +} +// class_nonword +template static constexpr auto apply(pcre::class_nonword, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(ctre::negative_set(), subject.stack), subject.parameters}; +} +// class_nonnewline +template static constexpr auto apply(pcre::class_nonnewline, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(ctre::negative_set>(), subject.stack), subject.parameters}; +} + +#endif diff --git a/third_party/compile-time-regular-expressions/include/ctre/actions/fusion.inc.hpp b/third_party/compile-time-regular-expressions/include/ctre/actions/fusion.inc.hpp new file mode 100644 index 0000000000..bccb20bf41 --- /dev/null +++ b/third_party/compile-time-regular-expressions/include/ctre/actions/fusion.inc.hpp @@ -0,0 +1,70 @@ +#ifndef CTRE__ACTIONS__FUSION__HPP +#define CTRE__ACTIONS__FUSION__HPP + +static constexpr size_t combine_max_repeat_length(size_t A, size_t B) { + if (A && B) return A+B; + else return 0; +} + +template static constexpr auto combine_repeat(repeat, repeat) { + return repeat(); +} + +template static constexpr auto combine_repeat(lazy_repeat, lazy_repeat) { + return lazy_repeat(); +} + +template static constexpr auto combine_repeat(possessive_repeat, possessive_repeat) { + [[maybe_unused]] constexpr bool first_is_unbounded = (MaxA == 0); + [[maybe_unused]] constexpr bool second_is_nonempty = (MinB > 0); + [[maybe_unused]] constexpr bool second_can_be_empty = (MinB == 0); + + if constexpr (first_is_unbounded && second_is_nonempty) { + // will always reject, but I keep the content, so I have some amount of captures + return sequence(); + } else if constexpr (first_is_unbounded) { + return possessive_repeat(); + } else if constexpr (second_can_be_empty) { + return possessive_repeat(); + } else { + return possessive_repeat(); + } +} + +// concat repeat sequences +template static constexpr auto apply(pcre::make_sequence, ctll::term, pcre_context, repeat, Ts...>, Parameters> subject) { + return pcre_context{ctll::push_front(combine_repeat(repeat(), repeat()), ctll::list()), subject.parameters}; +} + +// concat lazy repeat sequences +template static constexpr auto apply(pcre::make_sequence, ctll::term, pcre_context, lazy_repeat, Ts...>, Parameters> subject) { + return pcre_context{ctll::push_front(combine_repeat(lazy_repeat(), lazy_repeat()), ctll::list()), subject.parameters}; +} + +// concat possessive repeat seqeunces +template static constexpr auto apply(pcre::make_sequence, ctll::term, pcre_context, possessive_repeat, Ts...>, Parameters> subject) { + return pcre_context{ctll::push_front(combine_repeat(possessive_repeat(), possessive_repeat()), ctll::list()), subject.parameters}; +} + +// concat repeat sequences into sequence +template static constexpr auto apply(pcre::make_sequence, ctll::term, pcre_context, As...>,repeat,Ts...>, Parameters> subject) { + using result = decltype(combine_repeat(repeat(), repeat())); + + return pcre_context{ctll::push_front(sequence(), ctll::list()), subject.parameters}; +} + +// concat lazy repeat sequences into sequence +template static constexpr auto apply(pcre::make_sequence, ctll::term, pcre_context, As...>,lazy_repeat,Ts...>, Parameters> subject) { + using result = decltype(combine_repeat(lazy_repeat(), lazy_repeat())); + + return pcre_context{ctll::push_front(sequence(), ctll::list()), subject.parameters}; +} + +// concat possessive repeat sequences into sequence +template static constexpr auto apply(pcre::make_sequence, ctll::term, pcre_context, As...>,possessive_repeat,Ts...>, Parameters> subject) { + using result = decltype(combine_repeat(possessive_repeat(), possessive_repeat())); + + return pcre_context{ctll::push_front(sequence(), ctll::list()), subject.parameters}; +} + +#endif diff --git a/third_party/compile-time-regular-expressions/include/ctre/actions/hexdec.inc.hpp b/third_party/compile-time-regular-expressions/include/ctre/actions/hexdec.inc.hpp new file mode 100644 index 0000000000..ae30b6ee85 --- /dev/null +++ b/third_party/compile-time-regular-expressions/include/ctre/actions/hexdec.inc.hpp @@ -0,0 +1,29 @@ +#ifndef CTRE__ACTIONS__HEXDEC__HPP +#define CTRE__ACTIONS__HEXDEC__HPP + +// hexdec character support (seed) +template static constexpr auto apply(pcre::create_hexdec, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(number<0ull>(), subject.stack), subject.parameters}; +} +// hexdec character support (push value) +template static constexpr auto apply(pcre::push_hexdec, ctll::term, pcre_context, Ts...>, Parameters> subject) { + constexpr auto previous = N << 4ull; + if constexpr (V >= 'a' && V <= 'f') { + return pcre_context{ctll::push_front(number<(previous + (V - 'a' + 10))>(), ctll::list()), subject.parameters}; + } else if constexpr (V >= 'A' && V <= 'F') { + return pcre_context{ctll::push_front(number<(previous + (V - 'A' + 10))>(), ctll::list()), subject.parameters}; + } else { + return pcre_context{ctll::push_front(number<(previous + (V - '0'))>(), ctll::list()), subject.parameters}; + } +} +// hexdec character support (convert to character) +template static constexpr auto apply(pcre::finish_hexdec, ctll::term, pcre_context, Ts...>, Parameters> subject) { + constexpr size_t max_char = (std::numeric_limits::max)(); + if constexpr (N <= max_char) { + return pcre_context{ctll::push_front(character(), ctll::list()), subject.parameters}; + } else { + return pcre_context{ctll::push_front(character(), ctll::list()), subject.parameters}; + } +} + +#endif diff --git a/third_party/compile-time-regular-expressions/include/ctre/actions/look.inc.hpp b/third_party/compile-time-regular-expressions/include/ctre/actions/look.inc.hpp new file mode 100644 index 0000000000..8786a77500 --- /dev/null +++ b/third_party/compile-time-regular-expressions/include/ctre/actions/look.inc.hpp @@ -0,0 +1,68 @@ +#ifndef CTRE__ACTIONS__LOOKAHEAD__HPP +#define CTRE__ACTIONS__LOOKAHEAD__HPP + +// lookahead positive start +template static constexpr auto apply(pcre::start_lookahead_positive, ctll::term, pcre_context, pcre_parameters>) { + return pcre_context{ctll::list>, Ts...>(), pcre_parameters()}; +} + +// lookahead positive end +template static constexpr auto apply(pcre::look_finish, ctll::term, pcre_context>, Ts...>, pcre_parameters>) { + return pcre_context{ctll::list, Ts...>(), pcre_parameters()}; +} + +// lookahead positive end (sequence) +template static constexpr auto apply(pcre::look_finish, ctll::term, pcre_context, look_start>, Ts...>, pcre_parameters>) { + return pcre_context{ctll::list, Ts...>(), pcre_parameters()}; +} + +// lookahead negative start +template static constexpr auto apply(pcre::start_lookahead_negative, ctll::term, pcre_context, pcre_parameters>) { + return pcre_context{ctll::list>, Ts...>(), pcre_parameters()}; +} + +// lookahead negative end +template static constexpr auto apply(pcre::look_finish, ctll::term, pcre_context>, Ts...>, pcre_parameters>) { + return pcre_context{ctll::list, Ts...>(), pcre_parameters()}; +} + +// lookahead negative end (sequence) +template static constexpr auto apply(pcre::look_finish, ctll::term, pcre_context, look_start>, Ts...>, pcre_parameters>) { + return pcre_context{ctll::list, Ts...>(), pcre_parameters()}; +} + +// LOOKBEHIND + +// lookbehind positive start +template static constexpr auto apply(pcre::start_lookbehind_positive, ctll::term, pcre_context, pcre_parameters>) { + return pcre_context{ctll::list>, Ts...>(), pcre_parameters()}; +} + +// lookbehind positive end +template static constexpr auto apply(pcre::look_finish, ctll::term, pcre_context>, Ts...>, pcre_parameters>) { + return pcre_context{ctll::list, Ts...>(), pcre_parameters()}; +} + +// lookbehind positive end (sequence) +template static constexpr auto apply(pcre::look_finish, ctll::term, pcre_context, look_start>, Ts...>, pcre_parameters>) { + using my_lookbehind = decltype(ctre::convert_to_basic_list(ctll::rotate(ctll::list{}))); + return pcre_context{ctll::list(), pcre_parameters()}; +} + +// lookbehind negative start +template static constexpr auto apply(pcre::start_lookbehind_negative, ctll::term, pcre_context, pcre_parameters>) { + return pcre_context{ctll::list>, Ts...>(), pcre_parameters()}; +} + +// lookbehind negative end +template static constexpr auto apply(pcre::look_finish, ctll::term, pcre_context>, Ts...>, pcre_parameters>) { + return pcre_context{ctll::list, Ts...>(), pcre_parameters()}; +} + +// lookbehind negative end (sequence) +template static constexpr auto apply(pcre::look_finish, ctll::term, pcre_context, look_start>, Ts...>, pcre_parameters>) { + using my_lookbehind = decltype(ctre::convert_to_basic_list(ctll::rotate(ctll::list{}))); + return pcre_context{ctll::list(), pcre_parameters()}; +} + +#endif diff --git a/third_party/compile-time-regular-expressions/include/ctre/actions/mode.inc.hpp b/third_party/compile-time-regular-expressions/include/ctre/actions/mode.inc.hpp new file mode 100644 index 0000000000..d4e612b081 --- /dev/null +++ b/third_party/compile-time-regular-expressions/include/ctre/actions/mode.inc.hpp @@ -0,0 +1,32 @@ +#ifndef CTRE__ACTIONS__MODE__HPP +#define CTRE__ACTIONS__MODE__HPP + +// we need to reset counter and wrap Mode into mode_switch +template static constexpr auto apply_mode(Mode, ctll::list, Parameters) { + return pcre_context, Ts...>, Parameters>{}; +} + +template static constexpr auto apply_mode(Mode, ctll::list, Ts...>, pcre_parameters) { + return pcre_context, Ts...>, pcre_parameters>{}; +} + +// catch a semantic action into mode +template static constexpr auto apply(pcre::mode_case_insensitive mode, ctll::term,pcre_context, Parameters>) { + return apply_mode(mode, ctll::list{}, Parameters{}); +} + +template static constexpr auto apply(pcre::mode_case_sensitive mode, ctll::term,pcre_context, Parameters>) { + return apply_mode(mode, ctll::list{}, Parameters{}); +} + +template static constexpr auto apply(pcre::mode_singleline mode, ctll::term,pcre_context, Parameters>) { + return apply_mode(mode, ctll::list{}, Parameters{}); +} + +template static constexpr auto apply(pcre::mode_multiline mode, ctll::term,pcre_context, Parameters>) { + return apply_mode(mode, ctll::list{}, Parameters{}); +} + +// to properly reset capture + +#endif diff --git a/third_party/compile-time-regular-expressions/include/ctre/actions/named_class.inc.hpp b/third_party/compile-time-regular-expressions/include/ctre/actions/named_class.inc.hpp new file mode 100644 index 0000000000..0be2710967 --- /dev/null +++ b/third_party/compile-time-regular-expressions/include/ctre/actions/named_class.inc.hpp @@ -0,0 +1,61 @@ +#ifndef CTRE__ACTIONS__NAMED_CLASS__HPP +#define CTRE__ACTIONS__NAMED_CLASS__HPP + +// class_named_alnum +template static constexpr auto apply(pcre::class_named_alnum, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(ctre::alphanum_chars(), subject.stack), subject.parameters}; +} +// class_named_alpha +template static constexpr auto apply(pcre::class_named_alpha, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(ctre::alpha_chars(), subject.stack), subject.parameters}; +} +// class_named_digit +template static constexpr auto apply(pcre::class_named_digit, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(ctre::digit_chars(), subject.stack), subject.parameters}; +} +// class_named_ascii +template static constexpr auto apply(pcre::class_named_ascii, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(ctre::ascii_chars(), subject.stack), subject.parameters}; +} +// class_named_blank +template static constexpr auto apply(pcre::class_named_blank, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(ctre::enumeration<' ','\t'>(), subject.stack), subject.parameters}; +} +// class_named_cntrl +template static constexpr auto apply(pcre::class_named_cntrl, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(ctre::set, ctre::character<'\x7F'>>(), subject.stack), subject.parameters}; +} +// class_named_graph +template static constexpr auto apply(pcre::class_named_graph, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(ctre::char_range<'\x21','\x7E'>(), subject.stack), subject.parameters}; +} +// class_named_lower +template static constexpr auto apply(pcre::class_named_lower, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(ctre::char_range<'a','z'>(), subject.stack), subject.parameters}; +} +// class_named_upper +template static constexpr auto apply(pcre::class_named_upper, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(ctre::char_range<'A','Z'>(), subject.stack), subject.parameters}; +} +// class_named_print +template static constexpr auto apply(pcre::class_named_print, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(ctre::char_range<'\x20','\x7E'>(), subject.stack), subject.parameters}; +} +// class_named_space +template static constexpr auto apply(pcre::class_named_space, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(space_chars(), subject.stack), subject.parameters}; +} +// class_named_word +template static constexpr auto apply(pcre::class_named_word, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(word_chars(), subject.stack), subject.parameters}; +} +// class_named_punct +template static constexpr auto apply(pcre::class_named_punct, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(punct_chars(), subject.stack), subject.parameters}; +} +// class_named_xdigit +template static constexpr auto apply(pcre::class_named_xdigit, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(xdigit_chars(), subject.stack), subject.parameters}; +} + +#endif diff --git a/third_party/compile-time-regular-expressions/include/ctre/actions/options.inc.hpp b/third_party/compile-time-regular-expressions/include/ctre/actions/options.inc.hpp new file mode 100644 index 0000000000..93ef48ac33 --- /dev/null +++ b/third_party/compile-time-regular-expressions/include/ctre/actions/options.inc.hpp @@ -0,0 +1,55 @@ +#ifndef CTRE__ACTIONS__OPTIONS__HPP +#define CTRE__ACTIONS__OPTIONS__HPP + +// empty option for alternate +template static constexpr auto apply(pcre::push_empty, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(empty(), subject.stack), subject.parameters}; +} + +// empty option for empty regex +template static constexpr auto apply(pcre::push_empty, ctll::epsilon, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(empty(), subject.stack), subject.parameters}; +} + +// make_alternate (A|B) +template static constexpr auto apply(pcre::make_alternate, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(select(), ctll::list()), subject.parameters}; +} +// make_alternate (As..)|B => (As..|B) +template static constexpr auto apply(pcre::make_alternate, ctll::term, pcre_context, A, Ts...>, Parameters> subject) { + return pcre_context{ctll::push_front(select(), ctll::list()), subject.parameters}; +} + + +// make_optional +template static constexpr auto apply(pcre::make_optional, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(optional(), ctll::list()), subject.parameters}; +} + +template static constexpr auto apply(pcre::make_optional, ctll::term, pcre_context, Ts...>, Parameters> subject) { + return pcre_context{ctll::push_front(optional(), ctll::list()), subject.parameters}; +} + +// prevent from creating wrapped optionals +template static constexpr auto apply(pcre::make_optional, ctll::term, pcre_context, Ts...>, Parameters> subject) { + return pcre_context{ctll::push_front(optional(), ctll::list()), subject.parameters}; +} + +// in case inner optional is lazy, result should be lazy too +template static constexpr auto apply(pcre::make_optional, ctll::term, pcre_context, Ts...>, Parameters> subject) { + return pcre_context{ctll::push_front(lazy_optional(), ctll::list()), subject.parameters}; +} + +// make_lazy (optional) +template static constexpr auto apply(pcre::make_lazy, ctll::term, pcre_context, Ts...>, Parameters> subject) { + return pcre_context{ctll::push_front(lazy_optional(), ctll::list()), subject.parameters}; +} + +// if you already got a lazy optional, make_lazy is no-op +template static constexpr auto apply(pcre::make_lazy, ctll::term, pcre_context, Ts...>, Parameters> subject) { + return pcre_context{ctll::push_front(lazy_optional(), ctll::list()), subject.parameters}; +} + + + +#endif diff --git a/third_party/compile-time-regular-expressions/include/ctre/actions/properties.inc.hpp b/third_party/compile-time-regular-expressions/include/ctre/actions/properties.inc.hpp new file mode 100644 index 0000000000..05f7e80164 --- /dev/null +++ b/third_party/compile-time-regular-expressions/include/ctre/actions/properties.inc.hpp @@ -0,0 +1,73 @@ +#ifndef CTRE__ACTIONS__PROPERTIES__HPP +#define CTRE__ACTIONS__PROPERTIES__HPP + +// push_property_name +template static constexpr auto apply(pcre::push_property_name, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(property_name(), subject.stack), subject.parameters}; +} +// push_property_name (concat) +template static constexpr auto apply(pcre::push_property_name, ctll::term, pcre_context, Ts...>, Parameters> subject) { + return pcre_context{ctll::push_front(property_name(), ctll::list()), subject.parameters}; +} + +// push_property_value +template static constexpr auto apply(pcre::push_property_value, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(property_value(), subject.stack), subject.parameters}; +} +// push_property_value (concat) +template static constexpr auto apply(pcre::push_property_value, ctll::term, pcre_context, Ts...>, Parameters> subject) { + return pcre_context{ctll::push_front(property_value(), ctll::list()), subject.parameters}; +} + +// make_property +template static constexpr auto apply(pcre::make_property, ctll::term, [[maybe_unused]] pcre_context, Ts...>, Parameters> subject) { + //return ctll::reject{}; + constexpr char name[sizeof...(Name)]{static_cast(Name)...}; + constexpr auto p = uni::detail::binary_prop_from_string(get_string_view(name)); + + if constexpr (uni::detail::is_unknown(p)) { + return ctll::reject{}; + } else { + return pcre_context{ctll::push_front(make_binary_property

(), ctll::list()), subject.parameters}; + } +} + +// make_property +template static constexpr auto apply(pcre::make_property, ctll::term, [[maybe_unused]] pcre_context, property_name, Ts...>, Parameters> subject) { + //return ctll::reject{}; + constexpr auto prop = property_builder::template get(); + + if constexpr (std::is_same_v) { + return ctll::reject{}; + } else { + return pcre_context{ctll::push_front(prop, ctll::list()), subject.parameters}; + } +} + + +// make_property_negative +template static constexpr auto apply(pcre::make_property_negative, ctll::term, [[maybe_unused]] pcre_context, Ts...>, Parameters> subject) { + //return ctll::reject{}; + constexpr char name[sizeof...(Name)]{static_cast(Name)...}; + constexpr auto p = uni::detail::binary_prop_from_string(get_string_view(name)); + + if constexpr (uni::detail::is_unknown(p)) { + return ctll::reject{}; + } else { + return pcre_context{ctll::push_front(negate>(), ctll::list()), subject.parameters}; + } +} + +// make_property_negative +template static constexpr auto apply(pcre::make_property_negative, ctll::term, [[maybe_unused]] pcre_context, property_name, Ts...>, Parameters> subject) { + //return ctll::reject{}; + constexpr auto prop = property_builder::template get(); + + if constexpr (std::is_same_v) { + return ctll::reject{}; + } else { + return pcre_context{ctll::push_front(negate(), ctll::list()), subject.parameters}; + } +} + +#endif diff --git a/third_party/compile-time-regular-expressions/include/ctre/actions/repeat.inc.hpp b/third_party/compile-time-regular-expressions/include/ctre/actions/repeat.inc.hpp new file mode 100644 index 0000000000..98c2e0bc9d --- /dev/null +++ b/third_party/compile-time-regular-expressions/include/ctre/actions/repeat.inc.hpp @@ -0,0 +1,90 @@ +#ifndef CTRE__ACTIONS__REPEAT__HPP +#define CTRE__ACTIONS__REPEAT__HPP + +// repeat 1..N +template static constexpr auto apply(pcre::repeat_plus, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(plus(), ctll::list()), subject.parameters}; +} +// repeat 1..N (sequence) +template static constexpr auto apply(pcre::repeat_plus, ctll::term, pcre_context, Ts...>, Parameters> subject) { + return pcre_context{ctll::push_front(plus(), ctll::list()), subject.parameters}; +} + +// repeat 0..N +template static constexpr auto apply(pcre::repeat_star, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(star(), ctll::list()), subject.parameters}; +} +// repeat 0..N (sequence) +template static constexpr auto apply(pcre::repeat_star, ctll::term, pcre_context, Ts...>, Parameters> subject) { + return pcre_context{ctll::push_front(star(), ctll::list()), subject.parameters}; +} + +// create_number (seed) +template static constexpr auto apply(pcre::create_number, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(number(V - '0')>(), subject.stack), subject.parameters}; +} +// push_number +template static constexpr auto apply(pcre::push_number, ctll::term, pcre_context, Ts...>, Parameters> subject) { + constexpr size_t previous = N * 10ull; + return pcre_context{ctll::push_front(number<(previous + (V - '0'))>(), ctll::list()), subject.parameters}; +} + +// repeat A..B +template static constexpr auto apply(pcre::repeat_ab, ctll::term, pcre_context, number, Subject, Ts...>, Parameters> subject) { + return pcre_context{ctll::push_front(repeat(), ctll::list()), subject.parameters}; +} +// repeat A..B (sequence) +template static constexpr auto apply(pcre::repeat_ab, ctll::term, pcre_context, number, sequence, Ts...>, Parameters> subject) { + return pcre_context{ctll::push_front(repeat(), ctll::list()), subject.parameters}; +} + +// repeat_exactly +template static constexpr auto apply(pcre::repeat_exactly, ctll::term, pcre_context, Subject, Ts...>, Parameters> subject) { + return pcre_context{ctll::push_front(repeat(), ctll::list()), subject.parameters}; +} +// repeat_exactly A..B (sequence) +template static constexpr auto apply(pcre::repeat_exactly, ctll::term, pcre_context, sequence, Ts...>, Parameters> subject) { + return pcre_context{ctll::push_front(repeat(), ctll::list()), subject.parameters}; +} + +// repeat_at_least (A+) +template static constexpr auto apply(pcre::repeat_at_least, ctll::term, pcre_context, Subject, Ts...>, Parameters> subject) { + return pcre_context{ctll::push_front(repeat(), ctll::list()), subject.parameters}; +} +// repeat_at_least (A+) (sequence) +template static constexpr auto apply(pcre::repeat_at_least, ctll::term, pcre_context, sequence, Ts...>, Parameters> subject) { + return pcre_context{ctll::push_front(repeat(), ctll::list()), subject.parameters}; +} + +// make_lazy (plus) +template static constexpr auto apply(pcre::make_lazy, ctll::term, pcre_context, Ts...>, Parameters> subject) { + return pcre_context{ctll::push_front(lazy_plus(), ctll::list()), subject.parameters}; +} + +// make_lazy (star) +template static constexpr auto apply(pcre::make_lazy, ctll::term, pcre_context, Ts...>, Parameters> subject) { + return pcre_context{ctll::push_front(lazy_star(), ctll::list()), subject.parameters}; +} + +// make_lazy (repeat) +template static constexpr auto apply(pcre::make_lazy, ctll::term, pcre_context, Ts...>, Parameters> subject) { + return pcre_context{ctll::push_front(lazy_repeat(), ctll::list()), subject.parameters}; +} + +// make_possessive (plus) +template static constexpr auto apply(pcre::make_possessive, ctll::term, pcre_context, Ts...>, Parameters> subject) { + return pcre_context{ctll::push_front(possessive_plus(), ctll::list()), subject.parameters}; +} + +// make_possessive (star) +template static constexpr auto apply(pcre::make_possessive, ctll::term, pcre_context, Ts...>, Parameters> subject) { + return pcre_context{ctll::push_front(possessive_star(), ctll::list()), subject.parameters}; +} + +// make_possessive (repeat) +template static constexpr auto apply(pcre::make_possessive, ctll::term, pcre_context, Ts...>, Parameters> subject) { + return pcre_context{ctll::push_front(possessive_repeat(), ctll::list()), subject.parameters}; +} + + +#endif diff --git a/third_party/compile-time-regular-expressions/include/ctre/actions/sequence.inc.hpp b/third_party/compile-time-regular-expressions/include/ctre/actions/sequence.inc.hpp new file mode 100644 index 0000000000..f590432e20 --- /dev/null +++ b/third_party/compile-time-regular-expressions/include/ctre/actions/sequence.inc.hpp @@ -0,0 +1,32 @@ +#ifndef CTRE__ACTIONS__SEQUENCE__HPP +#define CTRE__ACTIONS__SEQUENCE__HPP + +// make_sequence +template static constexpr auto apply(pcre::make_sequence, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(sequence(), ctll::list()), subject.parameters}; +} +// make_sequence (concat) +template static constexpr auto apply(pcre::make_sequence, ctll::term, pcre_context,A,Ts...>, Parameters> subject) { + return pcre_context{ctll::push_front(sequence(), ctll::list()), subject.parameters}; +} + +// make_sequence (make string) +template static constexpr auto apply(pcre::make_sequence, ctll::term, pcre_context,character,Ts...>, Parameters> subject) { + return pcre_context{ctll::push_front(string(), ctll::list()), subject.parameters}; +} +// make_sequence (concat string) +template static constexpr auto apply(pcre::make_sequence, ctll::term, pcre_context,character,Ts...>, Parameters> subject) { + return pcre_context{ctll::push_front(string(), ctll::list()), subject.parameters}; +} + +// make_sequence (make string in front of different items) +template static constexpr auto apply(pcre::make_sequence, ctll::term, pcre_context,Sq...>,character,Ts...>, Parameters> subject) { + return pcre_context{ctll::push_front(sequence,Sq...>(), ctll::list()), subject.parameters}; +} +// make_sequence (concat string in front of different items) +template static constexpr auto apply(pcre::make_sequence, ctll::term, pcre_context,Sq...>,character,Ts...>, Parameters> subject) { + return pcre_context{ctll::push_front(sequence,Sq...>(), ctll::list()), subject.parameters}; +} + + +#endif diff --git a/third_party/compile-time-regular-expressions/include/ctre/actions/set.inc.hpp b/third_party/compile-time-regular-expressions/include/ctre/actions/set.inc.hpp new file mode 100644 index 0000000000..a5a08d14d9 --- /dev/null +++ b/third_party/compile-time-regular-expressions/include/ctre/actions/set.inc.hpp @@ -0,0 +1,66 @@ +#ifndef CTRE__ACTIONS__SET__HPP +#define CTRE__ACTIONS__SET__HPP + +// UTILITY +// add into set if not exists +template