diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000000..494e6e2cf1 --- /dev/null +++ b/.clang-format @@ -0,0 +1,242 @@ +--- +BasedOnStyle: Microsoft +AccessModifierOffset: -4 +AlignAfterOpenBracket: Align +AlignArrayOfStructures: Left +AlignConsecutiveAssignments: + Enabled: false + AcrossEmptyLines: false + AcrossComments: false + AlignCompound: false + AlignFunctionPointers: false + PadOperators: true +AlignConsecutiveBitFields: + Enabled: false + AcrossEmptyLines: false + AcrossComments: false + AlignCompound: false + AlignFunctionPointers: false + PadOperators: false +AlignConsecutiveDeclarations: + Enabled: false + AcrossEmptyLines: false + AcrossComments: false + AlignCompound: false + AlignFunctionPointers: false + PadOperators: false +AlignConsecutiveMacros: + Enabled: false + AcrossEmptyLines: false + AcrossComments: false + AlignCompound: false + AlignFunctionPointers: false + PadOperators: false +AlignConsecutiveShortCaseStatements: + Enabled: false + AcrossEmptyLines: false + AcrossComments: false + AlignCaseColons: false +AlignEscapedNewlines: Right +AlignOperands: Align +AlignTrailingComments: + Kind: Always + OverEmptyLines: 0 +AllowAllArgumentsOnNextLine: true +AllowAllParametersOfDeclarationOnNextLine: true +AllowBreakBeforeNoexceptSpecifier: Never +AllowShortBlocksOnASingleLine: Empty +AllowShortCaseLabelsOnASingleLine: false +AllowShortCompoundRequirementOnASingleLine: true +AllowShortEnumsOnASingleLine: false +AllowShortFunctionsOnASingleLine: None +AllowShortIfStatementsOnASingleLine: Never +AllowShortLambdasOnASingleLine: All +AllowShortLoopsOnASingleLine: false +AlwaysBreakAfterDefinitionReturnType: None +AlwaysBreakBeforeMultilineStrings: false +AttributeMacros: + - __capability +BinPackArguments: true +BinPackParameters: true +BitFieldColonSpacing: Both +BraceWrapping: + AfterCaseLabel: false + AfterClass: true + AfterControlStatement: Always + AfterEnum: true + AfterFunction: true + AfterNamespace: true + AfterObjCDeclaration: true + AfterStruct: true + AfterUnion: false + AfterExternBlock: true + BeforeCatch: true + BeforeElse: true + BeforeLambdaBody: true + BeforeWhile: true + IndentBraces: false + SplitEmptyFunction: true + SplitEmptyRecord: true + SplitEmptyNamespace: true +BreakAdjacentStringLiterals: true +BreakAfterAttributes: Leave +BreakAfterJavaFieldAnnotations: false +BreakArrays: true +BreakBeforeBinaryOperators: None +BreakBeforeBraces: Custom +BreakBeforeConceptDeclarations: Always +BreakBeforeInlineASMColon: OnlyMultiline +BreakBeforeTernaryOperators: true +BreakConstructorInitializers: BeforeColon +BreakInheritanceList: BeforeColon +BreakStringLiterals: true +ColumnLimit: 120 +CommentPragmas: "^ IWYU pragma:" +CompactNamespaces: true +ConstructorInitializerIndentWidth: 4 +ContinuationIndentWidth: 4 +Cpp11BracedListStyle: true +DerivePointerAlignment: false +DisableFormat: false +EmptyLineAfterAccessModifier: Never +EmptyLineBeforeAccessModifier: LogicalBlock +ExperimentalAutoDetectBinPacking: false +FixNamespaceComments: true +ForEachMacros: + - foreach + - Q_FOREACH + - BOOST_FOREACH +IfMacros: + - KJ_IF_MAYBE +IncludeBlocks: Preserve +IncludeCategories: + - Regex: ^"(llvm|llvm-c|clang|clang-c)/ + Priority: 2 + SortPriority: 0 + CaseSensitive: false + - Regex: ^(<|"(gtest|gmock|isl|json)/) + Priority: 3 + SortPriority: 0 + CaseSensitive: false + - Regex: .* + Priority: 1 + SortPriority: 0 + CaseSensitive: false +IncludeIsMainRegex: (Test)?$ +IncludeIsMainSourceRegex: "" +IndentAccessModifiers: false +IndentCaseBlocks: true +IndentCaseLabels: false +IndentExternBlock: NoIndent +IndentGotoLabels: true +IndentPPDirectives: None +IndentRequiresClause: true +IndentWidth: 4 +IndentWrappedFunctionNames: true +InsertBraces: true +InsertNewlineAtEOF: true +InsertTrailingCommas: None +IntegerLiteralSeparator: + Binary: 0 + BinaryMinDigits: 0 + Decimal: 0 + DecimalMinDigits: 0 + Hex: 0 + HexMinDigits: 0 +JavaScriptQuotes: Leave +JavaScriptWrapImports: true +LambdaBodyIndentation: OuterScope +Language: Cpp +LineEnding: DeriveLF +MacroBlockBegin: "" +MacroBlockEnd: "" +MaxEmptyLinesToKeep: 1 +NamespaceIndentation: All +ObjCBinPackProtocolList: Auto +ObjCBlockIndentWidth: 2 +ObjCBreakBeforeNestedBlockParam: true +ObjCSpaceAfterProperty: false +ObjCSpaceBeforeProtocolList: true +PPIndentWidth: -1 +PackConstructorInitializers: BinPack +PenaltyBreakAssignment: 2 +PenaltyBreakBeforeFirstCallParameter: 19 +PenaltyBreakComment: 300 +PenaltyBreakFirstLessLess: 120 +PenaltyBreakOpenParenthesis: 0 +PenaltyBreakScopeResolution: 500 +PenaltyBreakString: 1000 +PenaltyBreakTemplateDeclaration: 20 +PenaltyExcessCharacter: 1000000 +PenaltyIndentedWhitespace: 0 +PenaltyReturnTypeOnItsOwnLine: 1000 +PointerAlignment: Left +QualifierAlignment: Leave +ReferenceAlignment: Pointer +ReflowComments: true +RemoveBracesLLVM: false +RemoveParentheses: Leave +RemoveSemicolon: false +RequiresClausePosition: WithPreceding +RequiresExpressionIndentation: OuterScope +SeparateDefinitionBlocks: Leave +ShortNamespaceLines: 1 +SkipMacroDefinitionBody: false +SortIncludes: CaseSensitive +SortJavaStaticImport: Before +SortUsingDeclarations: LexicographicNumeric +SpaceAfterCStyleCast: false +SpaceAfterLogicalNot: false +SpaceAfterTemplateKeyword: false +SpaceAroundPointerQualifiers: Default +SpaceBeforeAssignmentOperators: true +SpaceBeforeCaseColon: false +SpaceBeforeCpp11BracedList: false +SpaceBeforeCtorInitializerColon: true +SpaceBeforeInheritanceColon: true +SpaceBeforeJsonColon: false +SpaceBeforeParens: ControlStatements +SpaceBeforeParensOptions: + AfterControlStatements: true + AfterForeachMacros: true + AfterFunctionDeclarationName: false + AfterFunctionDefinitionName: false + AfterIfMacros: true + AfterOverloadedOperator: false + AfterPlacementOperator: true + AfterRequiresInClause: true + AfterRequiresInExpression: true + BeforeNonEmptyParentheses: true +SpaceBeforeRangeBasedForLoopColon: true +SpaceBeforeSquareBrackets: false +SpaceInEmptyBlock: false +SpacesBeforeTrailingComments: 1 +SpacesInAngles: Never +SpacesInContainerLiterals: true +SpacesInLineCommentPrefix: + Minimum: 1 + Maximum: -1 +SpacesInParens: Never +SpacesInParensOptions: + InConditionalStatements: false + InCStyleCasts: false + InEmptyParentheses: false + Other: false +SpacesInSquareBrackets: false +Standard: Latest +StatementAttributeLikeMacros: + - Q_EMIT +StatementMacros: + - Q_UNUSED + - QT_REQUIRE_VERSION +TabWidth: 4 +UseTab: ForContinuationAndIndentation +VerilogBreakBetweenInstancePorts: true +WhitespaceSensitiveMacros: + - BOOST_PP_STRINGIZE + - CF_SWIFT_NAME + - NS_SWIFT_NAME + - PP_STRINGIZE + - STRINGIZE +NamespaceMacros: + - Q_NAMESPACE diff --git a/CMakeLists.txt b/CMakeLists.txt index b07339c4cc..c48f4b3cbf 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -67,7 +67,8 @@ option(ENABLE_USE_WOLFSSL "Build with wolfssl support" OFF) option(ENABLE_MOLD "Build using mold" OFF) option(ENABLE_LLD "Build using lld" OFF) -option(ENABLE_STATIC_LINK_TO_GCC "Build static link to gcc" ON) +option(ENABLE_STATIC "Build static binary" OFF) +option(ENABLE_STATIC_LINK_TO_GCC "Build static link to gcc" OFF) option(ENABLE_TCMALLOC_STATIC "Build with Tcmalloc support" OFF) option(ENABLE_JEMALLOC_STATIC "Build with Jemalloc support" OFF) @@ -190,6 +191,8 @@ if (MSVC) set(CMAKE_CXX_STANDARD_REQUIRED ON) message(STATUS "Set default cxx standard: C++17") endif() + + add_compile_options(/utf-8) endif() ################################################################################ @@ -240,10 +243,13 @@ if (NOT MSVC) message(STATUS "clang supports compiler-rt, use it") add_link_options(-rtlib=compiler-rt -unwindlib=libunwind) endif() - else() + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libstdc++ -static-libgcc") + endif() + elseif(ENABLE_STATIC_LINK_TO_GCC) set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libstdc++ -static-libgcc") endif() elseif (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + if (ENABLE_STATIC) + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static") + endif() if (MINGW) set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static -static-libstdc++") else() @@ -273,14 +279,14 @@ if (NOT MSVC) if (ENABLE_MOLD) if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang") if (MOLD_LINKER) + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fuse-ld=mold") endif() endif() endif() if (ENABLE_LLD) if (LLD_LINKER) + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fuse-ld=lld") endif() endif() endif(NOT MSVC) @@ -410,6 +416,8 @@ if (ENABLE_USE_IO_URING) message(STATUS "Linux using io_uring...") add_definitions(-DBOOST_ASIO_HAS_IO_URING -DBOOST_ASIO_DISABLE_EPOLL) link_libraries(${IOUring_LIBRARIES}) + else() + message(FATAL_ERROR "io_uring librariy not found") endif() endif() @@ -480,6 +488,12 @@ add_subdirectory(third_party/fmt) set(FMT_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/third_party/fmt/include) set(FMT_LIBRARIES fmt) +set(CTRE_BUILD_TESTS OFF) +set(CTRE_BUILD_PACKAGE OFF) +add_subdirectory(third_party/compile-time-regular-expressions) + +link_libraries(ctre) inline void user_rate_limit_config(const std::string& user) { @@ -4637,6 +2876,8 @@ R"x*x*x( init_ssl_context(); + boost::nowide::nowide_filesystem(); + boost::system::error_code ec; if (fs::exists(m_option.ipip_db_, ec)) @@ -4695,7 +2936,7 @@ R"x*x*x( { pem_file result{ filepath, pem_type::none }; + boost::nowide::ifstream file(filepath); if (!file.is_open()) return result; The strutil namespace namespace strutil { @@ -438,6 +440,48 @@ namespace strutil return tokens; } + /** + * @brief Splits input std::string str according to input std::string delim. + * Taken from: https://stackoverflow.com/a/46931770/1892346. + * @param str - std::string that will be split. + * @param delim - the delimiter. + * @param output - the insert iterator to insert splited token + * @return splitted tokens count. + */ + template + inline std::size_t split(std::string_view str, std::string_view delim, InsertIterator output) + { + size_t pos_start = 0, pos_end, delim_len = delim.length(); + std::string_view token; + std::size_t split_count = 0; + + while ((pos_end = str.find(delim, pos_start)) != std::string::npos) + { + token = str.substr(pos_start, pos_end - pos_start); + pos_start = pos_end + delim_len; + output = token; + split_count ++; + } + + output = str.substr(pos_start); + split_count ++; + return split_count; + } + + inline std::string_view remove_spaces(std::string_view str) + { + auto start_pos = 0; + auto end_pos = str.size(); + + while ( std::isspace( str[start_pos] ) ) + start_pos ++; + + while ( std::isspace( str[end_pos-1] ) ) + end_pos --; + + return str.substr(start_pos, end_pos); + } + /** * @brief Splits input string using regex as a delimiter. * @param src - std::string that will be split. diff --git a/proxy/src/proxy_server.cpp b/proxy/src/proxy_server.cpp new file mode 100644 index 0000000000..234f35105a --- /dev/null +++ b/proxy/src/proxy_server.cpp @@ -0,0 +1,1792 @@ + + +#include "proxy/proxy_server.hpp" + +#include +#include +#include "ctre.hpp" + +namespace proxy +{ + static const char fake_404_content[] = +R"x*x*x(404 Not Found + +

404 Not Found

+ +)x*x*x"; + + static const char fake_502_content[] = +R"xx( +502 Bad Gateway + +

502 Bad Gateway

+ +)xx"; + + inline constexpr auto head_fmt = + R"(Index of {}

Index of {}

)"; + inline constexpr auto tail_fmt = + "

"; + inline constexpr auto body_fmt = + // "{}{} {} {}\r\n"; + "{}{}{}\r\n"; + + + inline std::string file_hash(const fs::path& p, boost::system::error_code& ec) + { + ec = {}; + + boost::nowide::ifstream file(p.string(), std::ios::binary); + if (!file) + { + ec = boost::system::error_code(errno, + boost::system::generic_category()); + return {}; + } + + boost::uuids::detail::sha1 sha1; + const auto buf_size = 1024 * 1024 * 4; + std::unique_ptr bufs((char*)std::malloc(buf_size), &std::free); + + while (file.read(bufs.get(), buf_size) || file.gcount()) + sha1.process_bytes(bufs.get(), file.gcount()); + + boost::uuids::detail::sha1::digest_type hash; + sha1.get_digest(hash); + + std::stringstream ss; + for (auto const& c : hash) + ss << std::hex << std::setfill('0') << std::setw(2) << static_cast(c); + + return ss.str(); + } + + template + inline auto async_hash_file(const fs::path& path, CompletionToken&& token) + { + return net::async_initiate( + [path](auto&& handler) mutable + { + std::thread( + [path, handler = std::move(handler)]() mutable + { + boost::system::error_code ec; + + auto hash = file_hash(path, ec); + + auto executor = net::get_associated_executor(handler); + net::post(executor, [ + ec = std::move(ec), + hash = std::move(hash), + handler = std::move(handler)]() mutable + { + handler(ec, hash); + }); + } + ).detach(); + }, token); + } + + net::awaitable proxy_session::connect_bridge_proxy(tcp::socket& remote_socket, std::string target_host, + uint16_t target_port, boost::system::error_code& ec) + { + auto executor = co_await net::this_coro::executor; + + tcp::resolver resolver{executor}; + + auto proxy_host = std::string(m_bridge_proxy->host()); + std::string proxy_port; + if (m_bridge_proxy->port_number() == 0) + { + proxy_port = std::to_string(urls::default_port(m_bridge_proxy->scheme_id())); + } + else + { + proxy_port = std::to_string(m_bridge_proxy->port_number()); + } + if (proxy_port.empty()) + { + proxy_port = m_bridge_proxy->scheme(); + } + + XLOG_DBG << "connection id: " << m_connection_id << ", connect to next proxy: " << proxy_host << ":" + << proxy_port; + + tcp::resolver::results_type targets; + + if (!detect_hostname(proxy_host)) + { + net::ip::tcp::endpoint endp(net::ip::address::from_string(proxy_host), + m_bridge_proxy->port_number() + ? m_bridge_proxy->port_number() + : urls::default_port(m_bridge_proxy->scheme_id())); + + targets = tcp::resolver::results_type::create(endp, proxy_host, m_bridge_proxy->scheme()); + } + else + { + targets = co_await resolver.async_resolve(proxy_host, proxy_port, net_awaitable[ec]); + + if (ec) + { + XLOG_FWARN("connection id: {}," + " resolver to next proxy {}:{} error: {}", + m_connection_id, std::string(m_bridge_proxy->host()), std::string(m_bridge_proxy->port()), + ec.message()); + + co_return false; + } + } + + if (m_option.happyeyeballs_) + { + co_await asio_util::async_connect(remote_socket, targets, [this](const auto& ec, auto& stream, auto& endp) + { return check_condition(ec, stream, endp); }, net_awaitable[ec]); + } + else + { + for (auto endpoint : targets) + { + ec = boost::asio::error::host_not_found; + + if (m_option.connect_v4_only_) + { + if (endpoint.endpoint().address().is_v6()) + { + continue; + } + } + else if (m_option.connect_v6_only_) + { + if (endpoint.endpoint().address().is_v4()) + { + continue; + } + } + + boost::system::error_code ignore_ec; + remote_socket.close(ignore_ec); + + if (m_bind_interface) + { + tcp::endpoint bind_endpoint(*m_bind_interface, 0); + + remote_socket.open(bind_endpoint.protocol(), ec); + if (ec) + { + break; + } + + remote_socket.bind(bind_endpoint, ec); + if (ec) + { + break; + } + } + + co_await remote_socket.async_connect(endpoint, net_awaitable[ec]); + if (!ec) + { + break; + } + } + } + + if (ec) + { + XLOG_FWARN("connection id: {}," + " connect to next proxy {}:{} error: {}", + m_connection_id, std::string(m_bridge_proxy->host()), std::string(m_bridge_proxy->port()), + ec.message()); + + co_return false; + } + + XLOG_DBG << "connection id: " << m_connection_id << ", connect to next proxy: " << proxy_host << ":" + << proxy_port << " success"; + + // 如果启用了 noise, 则在向上游代理服务器发起 tcp 连接成功后, 发送 noise + // 数据以及接收 noise 数据. + if (m_option.scramble_) + { + if (!co_await noise_handshake(remote_socket, m_outin_key, m_outout_key)) + { + co_return false; + } + + XLOG_DBG << "connection id: " << m_connection_id << ", with upstream noise completed"; + } + + // 使用ssl加密与下一级代理通信. + if (m_option.proxy_pass_use_ssl_) + { + // 设置 ssl cert 证书目录. + if (fs::exists(m_option.ssl_cacert_path_)) + { + m_ssl_cli_context.add_verify_path(m_option.ssl_cacert_path_, ec); + if (ec) + { + XLOG_FWARN("connection id: {}, " + "load cert path: {}, " + "error: {}", + m_connection_id, m_option.ssl_cacert_path_, ec.message()); + + co_return false; + } + } + } + + auto scheme = m_bridge_proxy->scheme(); + + auto instantiate_stream = [this, &scheme, &proxy_host, &remote_socket, + &ec]() mutable -> net::awaitable + { + ec = {}; + + XLOG_DBG << "connection id: " << m_connection_id << ", connect to next proxy: " << proxy_host + << " instantiate stream"; + + if (m_option.proxy_pass_use_ssl_ || scheme == "https") + { + m_ssl_cli_context.set_verify_mode(net::ssl::verify_peer); + auto cert = default_root_certificates(); + m_ssl_cli_context.add_certificate_authority(net::buffer(cert.data(), cert.size()), ec); + if (ec) + { + XLOG_FWARN("connection id: {}," + " add_certificate_authority error: {}", + m_connection_id, ec.message()); + } + + m_ssl_cli_context.use_tmp_dh(net::buffer(default_dh_param()), ec); + + m_ssl_cli_context.set_verify_callback(net::ssl::rfc2818_verification(proxy_host), ec); + if (ec) + { + XLOG_FWARN("connection id: {}," + " set_verify_callback error: {}", + m_connection_id, ec.message()); + } + + // 生成 ssl socket 对象. + auto sock_stream = init_proxy_stream(std::move(remote_socket), m_ssl_cli_context); + + // get origin ssl stream type. + ssl_stream& ssl_socket = boost::variant2::get(sock_stream); + + if (m_option.scramble_) + { + auto& next_layer = ssl_socket.next_layer(); + + using NextLayerType = std::decay_t; + + if constexpr (!std::same_as) + { + next_layer.set_scramble_key(m_outout_key); + + next_layer.set_unscramble_key(m_outin_key); + } + } + + std::string sni = m_option.proxy_ssl_name_.empty() ? proxy_host : m_option.proxy_ssl_name_; + + // Set SNI Hostname. + if (!SSL_set_tlsext_host_name(ssl_socket.native_handle(), sni.c_str())) + { + XLOG_FWARN("connection id: {}," + " SSL_set_tlsext_host_name error: {}", + m_connection_id, ::ERR_get_error()); + } + + XLOG_DBG << "connection id: " << m_connection_id << ", do async ssl handshake..."; + + // do async handshake. + co_await ssl_socket.async_handshake(net::ssl::stream_base::client, net_awaitable[ec]); + if (ec) + { + XLOG_FWARN("connection id: {}," + " ssl client protocol handshake error: {}", + m_connection_id, ec.message()); + } + + XLOG_FDBG("connection id: {}, ssl handshake: {}", m_connection_id, proxy_host); + + co_return sock_stream; + } + + auto sock_stream = init_proxy_stream(std::move(remote_socket)); + + auto& sock = boost::variant2::get(sock_stream); + + if (m_option.scramble_) + { + using NextLayerType = std::decay_t; + + if constexpr (!std::same_as) + { + sock.set_scramble_key(m_outout_key); + + sock.set_unscramble_key(m_outin_key); + } + } + + co_return sock_stream; + }; + + m_remote_socket = std::move(co_await instantiate_stream()); + + XLOG_DBG << "connection id: " << m_connection_id << ", connect to next proxy: " << proxy_host << ":" + << proxy_port << " start upstream handshake with " << std::string(scheme); + + if (scheme.starts_with("socks")) + { + socks_client_option opt; + + opt.target_host = target_host; + opt.target_port = target_port; + opt.proxy_hostname = true; + opt.username = std::string(m_bridge_proxy->user()); + opt.password = std::string(m_bridge_proxy->password()); + + if (scheme == "socks4") + { + opt.version = socks4_version; + } + else if (scheme == "socks4a") + { + opt.version = socks4a_version; + } + + co_await async_socks_handshake(m_remote_socket, opt, net_awaitable[ec]); + } + else if (scheme.starts_with("http")) + { + http_proxy_client_option opt; + + opt.target_host = target_host; + opt.target_port = target_port; + opt.username = std::string(m_bridge_proxy->user()); + opt.password = std::string(m_bridge_proxy->password()); + + co_await async_http_proxy_handshake(m_remote_socket, opt, net_awaitable[ec]); + } + + if (ec) + { + XLOG_FWARN("connection id: {}" + ", {} connect to next host {}:{} error: {}", + m_connection_id, std::string(scheme), target_host, target_port, ec.message()); + + co_return false; + } + + co_return true; + } + + net::awaitable proxy_session::on_http_json(const http_context& hctx) + { + boost::system::error_code ec; + auto& request = hctx.request_; + + auto target = make_real_target_path(hctx.command_[1]); + + std::array pre_alloc_buf; + std::pmr::monotonic_buffer_resource mbr(pre_alloc_buf.data(), pre_alloc_buf.size()); + std::pmr::polymorphic_allocator alloc(&mbr); + + fs::directory_iterator end; + fs::directory_iterator it(target, ec); + if (ec) + { + string_response res{ + std::piecewise_construct, + std::make_tuple(alloc), + std::make_tuple(http::status::found, request.version(), alloc) + }; + res.set(http::field::server, version_string); + res.set(http::field::date, server_date_string(alloc)); + res.set(http::field::location, "/"); + res.keep_alive(request.keep_alive()); + res.prepare_payload(); + + string_response_serializer sr(res); + co_await http::async_write(m_local_socket, sr, net_awaitable[ec]); + if (ec) + { + XLOG_WARN << "connection id: " << m_connection_id << ", http_dir write location err: " << ec.message(); + } + + co_return; + } + + bool hash = false; + + urls::params_view qp(hctx.command_[3]); + if (qp.find("hash") != qp.end()) + { + hash = true; + } + + boost::json::array path_list; + + for (; it != end && !m_abort; it++) + { + const auto& item = it->path(); + boost::json::object obj; + + auto [ftime, unc_path] = file_last_wirte_time(item); + obj["last_write_time"] = ftime; + + if (fs::is_directory(unc_path.empty() ? item : unc_path, ec)) + { + obj["filename"] = item.filename().string(); + obj["is_dir"] = true; + } + else + { + obj["filename"] = item.filename().string(); + obj["is_dir"] = false; + if (unc_path.empty()) + { + unc_path = item; + } + auto sz = fs::file_size(unc_path, ec); + if (ec) + { + sz = 0; + } + obj["filesize"] = sz; + if (hash) + { + auto ret = co_await async_hash_file(unc_path, net_awaitable[ec]); + if (ec) + { + ret = ""; + } + obj["hash"] = ret; + } + } + + path_list.push_back(obj); + } + + auto body = boost::json::serialize(path_list); + + string_response res{ + std::piecewise_construct, + std::make_tuple(alloc), + std::make_tuple(http::status::ok, request.version(), alloc) + }; + res.set(http::field::server, version_string); + res.set(http::field::date, server_date_string(alloc)); + res.set(http::field::content_type, "application/json"); + res.keep_alive(request.keep_alive()); + res.body() = body; + res.prepare_payload(); + + string_response_serializer sr(res); + co_await http::async_write(m_local_socket, sr, net_awaitable[ec]); + if (ec) + { + XLOG_WARN << "connection id: " << m_connection_id << ", http dir write body err: " << ec.message(); + } + + co_return; + } + + net::awaitable proxy_session::on_http_dir(const http_context& hctx) + { + using namespace std::literals; + + boost::system::error_code ec; + auto& request = hctx.request_; + + std::array pre_alloc_buf; + std::pmr::monotonic_buffer_resource mbr(pre_alloc_buf.data(), pre_alloc_buf.size()); + std::pmr::polymorphic_allocator alloc(&mbr); + + // 查找目录下是否存在 index.html 或 index.htm 文件, 如果存在则返回该文件. + // 否则返回目录下的文件列表. + auto index_html = fs::path(hctx.target_path_) / "index.html"; + fs::exists(index_html, ec) ? index_html = index_html : index_html = fs::path(hctx.target_path_) / "index.htm"; + + if (fs::exists(index_html, ec)) + { + boost::nowide::ifstream file(index_html.string(), std::ios::binary); + if (file) + { + std::pmr::string content((std::istreambuf_iterator(file)), std::istreambuf_iterator(), + alloc); + + string_response res{ + std::piecewise_construct, + std::make_tuple(content, alloc), + std::make_tuple(http::status::ok, request.version(), alloc) + }; + res.set(http::field::server, version_string); + res.set(http::field::date, server_date_string(alloc)); + auto ext = strutil::to_lower(index_html.extension().string()); + if (global_mimes.count(ext)) + { + res.set(http::field::content_type, global_mimes.at(ext)); + } + else + { + res.set(http::field::content_type, "text/plain"); + } + res.keep_alive(request.keep_alive()); + res.prepare_payload(); + + string_response_serializer sr(res); + co_await http::async_write(m_local_socket, sr, net_awaitable[ec]); + if (ec) + { + XLOG_WARN << "connection id: " << m_connection_id << ", http dir write index err: " << ec.message(); + } + + co_return; + } + } + + auto path_list = format_path_list(hctx.target_path_, ec, alloc); + + assert(path_list.get_allocator() == alloc); + + if (ec) + { + string_response res{ + std::piecewise_construct, + std::make_tuple(alloc), + std::make_tuple(http::status::found, request.version(), alloc) + }; + + res.set(http::field::server, version_string); + res.set(http::field::date, server_date_string(alloc)); + res.set(http::field::location, "/"); + res.keep_alive(request.keep_alive()); + res.prepare_payload(); + + string_response_serializer sr(res); + co_await http::async_write(m_local_socket, sr, net_awaitable[ec]); + if (ec) + { + XLOG_WARN << "connection id: " << m_connection_id << ", http_dir write location err: " << ec.message(); + } + + co_return; + } + + auto target_path = make_target_path(hctx.target_); + std::pmr::string autoindex_page{alloc}; + autoindex_page.reserve(4096); + fmt::format_to(std::back_inserter(autoindex_page), head_fmt, target_path, target_path); + fmt::format_to(std::back_inserter(autoindex_page), body_fmt, "../", "../", "", ""); + + for (const auto& s : path_list) + { + autoindex_page += s; + } + + autoindex_page += tail_fmt; + + string_response res{ + std::piecewise_construct, + std::make_tuple(std::move(autoindex_page), alloc), + std::make_tuple(http::status::ok, request.version(), alloc) + }; + + res.set(http::field::server, version_string); + res.set(http::field::date, server_date_string(alloc)); + res.keep_alive(request.keep_alive()); + res.prepare_payload(); + + string_response_serializer sr(res); + co_await http::async_write(m_local_socket, sr, net_awaitable[ec]); + if (ec) + { + XLOG_WARN << "connection id: " << m_connection_id << ", http dir write body err: " << ec.message(); + } + + co_return; + } + + net::awaitable proxy_session::on_http_get(const http_context& hctx) + { + boost::system::error_code ec; + + const auto& request = hctx.request_; + fs::path path = std::string_view{hctx.target_path_}; + + if (!fs::exists(path, ec)) + { + XLOG_WARN << "connection id: " << m_connection_id << ", http " << hctx.target_ << " file not exists"; + + std::pmr::string fake_page{hctx.alloc}; + + span_response res{ + std::piecewise_construct, + std::make_tuple(boost::span{fake_404_content, sizeof fake_404_content - 1}), + std::make_tuple(http::status::not_found, request.version(), hctx.alloc) + }; + res.set(http::field::server, version_string); + res.set(http::field::date, server_date_string(hctx.alloc)); + res.keep_alive(request.keep_alive()); + res.prepare_payload(); + + span_response_serializer sr(res); + + co_await http::async_write(m_local_socket, sr, net_awaitable[ec]); + + co_return; + } + + if (fs::is_directory(path, ec)) + { + XLOG_DBG << "connection id: " << m_connection_id << ", http " << hctx.target_ << " is directory"; + + std::pmr::string url = {"http://", hctx.alloc}; + if (is_crytpo_stream()) + { + url = "https://"; + } + url += request[http::field::host]; + urls::url u(url); + std::pmr::string target{hctx.target_ , hctx.alloc}; + target += "/"; + u.set_path(target); + + co_await location_http_route(request, u.buffer()); + + co_return; + } + + size_t content_length = fs::file_size(path, ec); + if (ec) + { + XLOG_WARN << "connection id: " << m_connection_id << ", http " << hctx.target_ + << " file size error: " << ec.message(); + + co_await default_http_route(request, fake_400_content, http::status::bad_request); + + co_return; + } +#if defined (BOOST_ASIO_HAS_FILE) +# if defined(_WIN32) + net::stream_file file(co_await net::this_coro::executor); + file.assign(::CreateFileW(path.wstring().c_str(), GENERIC_READ, FILE_SHARE_READ, 0, + OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL | FILE_FLAG_OVERLAPPED|FILE_FLAG_SEQUENTIAL_SCAN, 0), ec); +# else + net::stream_file file(co_await net::this_coro::executor, path.string(), net::stream_file::read_only); +# endif +#else // BOOST_ASIO_HAS_FILE + boost::nowide::fstream file(path.string(), std::ios_base::binary | std::ios_base::in); +#endif //BOOST_ASIO_HAS_FILE + + std::pmr::string user_agent { hctx.alloc }; + if (request.count(http::field::user_agent)) + { + user_agent = request[http::field::user_agent]; + } + + std::pmr::string referer { hctx.alloc }; + if (request.count(http::field::referer)) + { + referer = request[http::field::referer]; + } + + XLOG_DBG << "connection id: " << m_connection_id << ", http file: " << hctx.target_ + << ", size: " << content_length + << (request.count("Range") ? ", range: " + std::pmr::string(request["Range"], hctx.alloc) : std::pmr::string(hctx.alloc)) + << (!user_agent.empty() ? ", user_agent: " + user_agent : std::pmr::string(hctx.alloc)) + << (!referer.empty() ? ", referer: " + referer : std::pmr::string(hctx.alloc)); + + http::status st = http::status::ok; + auto range = parser_http_ranges(request["Range"]); + + // 只支持一个 range 的请求, 不支持多个 range 的请求. + if (range.size() == 1) + { + st = http::status::partial_content; + auto& r = range.front(); + + // 起始位置为 -1, 表示从文件末尾开始读取, 例如 Range: -500 + // 则表示读取文件末尾的 500 字节. + if (r.first == -1) + { + // 如果第二个参数也为 -1, 则表示请求有问题, 返回 416. + if (r.second < 0) + { + co_await default_http_route(request, fake_416_content, http::status::range_not_satisfiable); + co_return; + } + else if (r.second >= 0) + { + // 计算起始位置和结束位置, 例如 Range: -5 + // 则表示读取文件末尾的 5 字节. + // content_length - r.second 表示起始位置. + // content_length - 1 表示结束位置. + // 例如文件长度为 10 字节, 则起始位置为 5, + // 结束位置为 9(数据总长度为[0-9]), 一共 5 字节. + r.first = content_length - r.second; + r.second = content_length - 1; + } + } + else if (r.second == -1) + { + // 起始位置为正数, 表示从文件头开始读取, 例如 Range: 500 + // 则表示读取文件头的 500 字节. + if (r.first < 0) + { + co_await default_http_route(request, fake_416_content, http::status::range_not_satisfiable); + co_return; + } + else + { + r.second = content_length - 1; + + if (r.first == content_length) + { + std::pmr::string content_range{hctx.alloc}; + fmt::format_to(std::back_inserter(content_range), "bytes */{}", r.second, r.second, content_length); + + span_response res{ + std::piecewise_construct, + std::make_tuple(boost::span{fake_416_content, sizeof (fake_416_content) - 1}), + std::make_tuple(http::status::range_not_satisfiable, request.version(), hctx.alloc) + }; + + res.set(http::field::server, version_string); + res.set(http::field::date, server_date_string(hctx.alloc)); + res.set(http::field::content_type, "text/html; charset=UTF-8"); + res.set(http::field::content_range, content_range); + + res.keep_alive(hctx.request_.keep_alive()); + res.prepare_payload(); + + span_response_serializer sr(res); + co_await http::async_write(m_local_socket, sr, net_awaitable[ec]); + co_return; + } + } + } +#if defined (BOOST_ASIO_HAS_FILE) + file.seek(r.first, net::stream_file::seek_set); +#else + file.seekg(r.first, std::ios_base::beg); +#endif + } + + if (ec) + { + XLOG_WARN << "connection id: " << m_connection_id << ", open target: " << path << " failed: " << ec.message(); + // FILE OPEN FAILED + // 返回 502 + st = http::status::internal_server_error; + + span_response res{ + std::piecewise_construct, + std::make_tuple(boost::span{fake_502_content, sizeof (fake_502_content) - 1}), + std::make_tuple(http::status::found, request.version(), hctx.alloc) + }; + + res.set(http::field::server, version_string); + res.set(http::field::date, server_date_string(hctx.alloc)); + res.set(http::field::content_type, "text/html; charset=utf-8"); + res.keep_alive(true); + res.prepare_payload(); + + span_response_serializer sr(res); + co_await http::async_write(m_local_socket, sr, net_awaitable[ec]); + if (ec) + { + XLOG_WARN << "connection id: " << m_connection_id << ", send 502 err: " << ec.message(); + } + + co_return; + } + + custom_body_response res{ + std::piecewise_construct, + std::make_tuple(), + std::make_tuple(st, request.version(), hctx.alloc) + }; + + res.set(http::field::server, version_string); + res.set(http::field::date, server_date_string(hctx.alloc)); + auto ext = strutil::to_lower(fs::path(path).extension().string()); + if (global_mimes.count(ext)) + res.set(http::field::content_type, global_mimes.at(ext)); + else + res.set(http::field::content_type, "text/plain"); + + if (st == http::status::ok) + { + res.set(http::field::accept_ranges, "bytes"); + } + + if (st == http::status::partial_content) + { + const auto& r = range.front(); + + if (r.second < r.first && r.second >= 0) + { + co_await default_http_route(request, fake_416_content, http::status::range_not_satisfiable); + co_return; + } + + std::pmr::string content_range{hctx.alloc}; + fmt::format_to(std::back_inserter(content_range), "bytes {}-{}/{}", r.first, r.second, content_length); + + content_length = r.second - r.first + 1; + res.set(http::field::content_range, content_range); + } + + res.keep_alive(hctx.request_.keep_alive()); + res.content_length(content_length); + + custom_body_response_serializer sr(res); + + co_await http::async_write_header(m_local_socket, sr, net_awaitable[ec]); + if (ec) + { + XLOG_WARN << "connection id: " << m_connection_id << ", http async_write_header: " << ec.message(); + + co_return; + } + + auto buf_size = 64 * 1024; + if (m_option.tcp_rate_limit_ > 0 && m_option.tcp_rate_limit_ < buf_size) + { + buf_size = m_option.tcp_rate_limit_; + } + + std::streamsize total = 0; + stream_rate_limit(m_local_socket, m_option.tcp_rate_limit_); +#if defined (BOOST_ASIO_HAS_FILE) + total = co_await transfer(file, m_local_socket, content_length); +#else + std::unique_ptr bufs((char*)std::malloc(buf_size), &std::free); + char* buf = bufs.get(); + + do + { + auto remain_to_read = std::min(buf_size, content_length - total); + auto bytes_transferred = fileop::read(file, std::span(buf, remain_to_read)); + if (bytes_transferred == 0 || total >= (std::streamsize)content_length) + { + break; + } + + bytes_transferred = std::min(bytes_transferred, content_length - total); + + stream_expires_after(m_local_socket, std::chrono::seconds(m_option.tcp_timeout_)); + + co_await net::async_write(m_local_socket, net::buffer(buf, bytes_transferred), net::transfer_all(), net_awaitable[ec]); + total += bytes_transferred; + if (ec) + { + XLOG_WARN << "connection id: " << m_connection_id << ", http async_write: " << ec.message() + << ", already write: " << total; + co_return; + } + } + while (total < content_length); +#endif + if (ec) + { + m_local_socket.close(ec); + + XLOG_WARN << "connection id: " << m_connection_id << ", http async_write: " << ec.message() + << ", already write: " << total; + } + + XLOG_DBG << "connection id: " << m_connection_id << ", http request: " << hctx.target_ << ", completed, size: " << total; + + co_return; + } + + template + struct route_op + { + boost::asio::awaitable operator()(auto* _proxy_session, auto target, auto& http_ctx, auto alloc) const + { + if (auto result = ctre::match( target ) ) + { + boost::hana::for_each(std::make_index_sequence(), [&](auto element) + { + // 将 正则匹配到的 () 子串,给依次 push 到 http_ctx.command_ 这个容器里. + http_ctx.command_.push_back(result.template get()); + }); + co_await (_proxy_session->*func)(http_ctx); + co_return true; + } + co_return false; + } + }; + + template + boost::asio::awaitable routes(proxy_session* _proxy_session, auto& target, auto& http_ctx, auto alloc) + { + // 依次等待 route_op 执行,因为是 || 所以如果一个成功了,剩下的就不等了. + ( (co_await RouteOPs(_proxy_session, target, http_ctx, alloc)) || ...); + } + + net::awaitable proxy_session::normal_web_server(string_request& req, pmr_alloc_t alloc) + { + boost::system::error_code ec; + + bool keep_alive = false; + bool has_read_header = true; + + for (; !m_abort;) + { + std::optional parser; + if (!has_read_header) + { + // normal_web_server 调用是从 http_proxy_get + // 跳转过来的, 该函数已经读取了请求头, 所以第1次不需 + // 要再次读取请求头, 即 has_read_header 为 true. + // 当 keepalive 时,需要读取请求头, 此时 has_read_header + // 为 false, 则在此读取和解析后续的 http 请求头. + parser.emplace(std::piecewise_construct, std::make_tuple(alloc), std::make_tuple(alloc)); + parser->body_limit(1024 * 512); // 512k + m_local_buffer.consume(m_local_buffer.size()); + + co_await http::async_read_header(m_local_socket, m_local_buffer, *parser, net_awaitable[ec]); + if (ec) + { + XLOG_DBG << "connection id: " << m_connection_id << (keep_alive ? ", keepalive" : "") + << ", web async_read_header: " << ec.message(); + co_return; + } + + req = parser->release(); + } + + if (req[http::field::expect] == "100-continue") + { + http::response res; + res.version(11); + res.result(http::status::method_not_allowed); + + co_await http::async_write(m_local_socket, res, net_awaitable[ec]); + if (ec) + { + XLOG_DBG << "connection id: " << m_connection_id << ", web expect async_write: " << ec.message(); + } + co_return; + } + + has_read_header = false; + keep_alive = req.keep_alive(); + + if (beast::websocket::is_upgrade(req)) + { + std::pmr::string fake_page{alloc}; + + fmt::vformat_to(std::back_inserter(fake_page), fake_404_content_fmt, fmt::make_format_args(server_date_string(alloc))); + + co_await net::async_write(m_local_socket, net::buffer(fake_page), net::transfer_all(), + net_awaitable[ec]); + + co_return; + } + + std::pmr::string target{req.target(), alloc}; + std::string_view target_pv{target}; + boost::match_results< + std::pmr::string::const_iterator, + std::pmr::polymorphic_allocator> + > what{alloc}; + + http_context http_ctx{ + alloc, + std::pmr::vector{alloc}, + req, + req.target(), + make_real_target_path(req.target()) + }; + + co_await routes< + route_op{}, + route_op{}, + route_op{} + >(this, target_pv, http_ctx, alloc); + + if (!keep_alive) + { + break; + } + continue; + } + + co_await m_local_socket.lowest_layer().async_wait(net::socket_base::wait_read, net_awaitable[ec]); + + co_return; + } + + int proxy_session::http_authorization(std::string_view pa) + { + if (m_option.auth_users_.empty()) + { + return PROXY_AUTH_SUCCESS; + } + + if (pa.empty()) + { + return PROXY_AUTH_NONE; + } + + auto pos = pa.find(' '); + if (pos == std::string::npos) + { + return PROXY_AUTH_ILLEGAL; + } + + auto type = pa.substr(0, pos); + auto auth = pa.substr(pos + 1); + + if (type != "Basic") + { + return PROXY_AUTH_ILLEGAL; + } + + char buff[1024]; + std::pmr::monotonic_buffer_resource mbr(buff, sizeof buff); + pmr_alloc_t alloc(&mbr); + + std::pmr::string userinfo(beast::detail::base64::decoded_size(auth.size()), 0, alloc); + auto [len, _] = beast::detail::base64::decode((char*)userinfo.data(), auth.data(), auth.size()); + userinfo.resize(len); + + pos = userinfo.find(':'); + + std::pmr::string uname{userinfo.substr(0, pos), alloc}; + std::pmr::string passwd{userinfo.substr(pos + 1), alloc}; + + bool verify_passed = m_option.auth_users_.empty(); + + for (auto [user, pwd] : m_option.auth_users_) + { + if (uname == user && passwd == pwd) + { + verify_passed = true; + user_rate_limit_config(user); + break; + } + } + + auto endp = m_local_socket.remote_endpoint(); + auto client = endp.address().to_string(); + client += ":" + std::to_string(endp.port()); + + if (!verify_passed) + { + return PROXY_AUTH_FAILED; + } + + return PROXY_AUTH_SUCCESS; + } + + net::awaitable proxy_session::http_proxy_get() + { + boost::system::error_code ec; + bool keep_alive = false; + bool first = true; + + while (!m_abort) + { + std::array pre_alloc_buf; + std::pmr::monotonic_buffer_resource mbr(pre_alloc_buf.data(), pre_alloc_buf.size()); + pmr_alloc_t alloc(&mbr); + std::optional parser; + parser.emplace(std::piecewise_construct, std::make_tuple(alloc), std::make_tuple(alloc)); + + parser->body_limit(1024 * 1024 * 10); + if (!first) + { + m_local_buffer.consume(m_local_buffer.size()); + } + + // 读取 http 请求头. + co_await http::async_read(m_local_socket, m_local_buffer, *parser, net_awaitable[ec]); + if (ec) + { + XLOG_WARN << "connection id: " << m_connection_id << (keep_alive ? ", keepalive" : "") + << ", http_proxy_get request async_read: " << ec.message(); + + co_return !first; + } + + auto req = parser->release(); + auto mth = std::pmr::string(req.method_string(), alloc); + auto target_view = std::pmr::string(req.target(), alloc); + auto pa = std::pmr::string(req[http::field::proxy_authorization], alloc); + + keep_alive = req.keep_alive(); + + XLOG_DBG << "connection id: " << m_connection_id << ", method: " << mth << ", target: " << target_view + << (pa.empty() ? std::pmr::string(alloc) : ", proxy_authorization: " + pa); + + // 判定是否为 GET url 代理模式. + bool get_url_proxy = false; + if (boost::istarts_with(target_view, "https://") || boost::istarts_with(target_view, "http://")) + { + get_url_proxy = true; + } + + // http 代理认证, 如果请求的 rarget 不是 http url 或认证 + // 失败, 则按正常 web 请求处理. + auto auth = http_authorization(pa); + if (auth != PROXY_AUTH_SUCCESS || !get_url_proxy) + { + auto expect_url = urls::parse_absolute_uri(target_view); + + if (!expect_url.has_error()) + { + XLOG_WARN << "connection id: " << m_connection_id << ", proxy err: " << pauth_error_message(auth); + + co_return !first; + } + + // 如果 doc 目录为空, 则不允许访问目录 + // 这里直接返回错误页面. + if (m_option.doc_directory_.empty()) + { + co_return !first; + } + + // htpasswd 表示需要用户认证. + if (m_option.htpasswd_) + { + // 处理 http 认证, 如果客户没有传递认证信息, 则返回 401. + // 如果用户认证信息没有设置, 则直接返回 401. + auto auth = req[http::field::authorization]; + if (auth.empty() || m_option.auth_users_.empty()) + { + XLOG_WARN << "connection id: " << m_connection_id + << ", auth error: " << (auth.empty() ? "no auth" : "no user"); + + co_await unauthorized_http_route(req); + co_return true; + } + + auto auth_result = http_authorization(auth); + if (auth_result != PROXY_AUTH_SUCCESS) + { + XLOG_WARN << "connection id: " << m_connection_id + << ", auth error: " << pauth_error_message(auth_result); + + co_await unauthorized_http_route(req); + co_return true; + } + } + + // 如果不允许目录索引, 检查请求的是否为文件, 如果是具体文件则按文 + // 件请求处理, 否则返回 403. + if (!m_option.autoindex_) + { + auto path = make_real_target_path(req.target()); + + if (!fs::is_directory(path, ec)) + { + co_await normal_web_server(req, alloc); + co_return true; + } + + // 如果不允许目录索引, 则直接返回 403 forbidden. + co_await forbidden_http_route(req); + + co_return true; + } + + // 按正常 http 目录请求来处理. + co_await normal_web_server(req, alloc); + co_return true; + } + + const auto authority_pos = target_view.find_first_of("//") + 2; + + std::string host; + + const auto scheme_id = urls::string_to_scheme(target_view.substr(0, authority_pos - 3)); + uint16_t port = urls::default_port(scheme_id); + + auto host_pos = authority_pos; + auto host_end = std::string::npos; + + auto port_start = std::string::npos; + + for (auto pos = authority_pos; pos < target_view.size(); pos++) + { + const auto& c = target_view[pos]; + if (c == '@') + { + host_pos = pos + 1; + + host_end = std::string::npos; + port_start = std::string::npos; + } + else if (c == ':') + { + host_end = pos; + port_start = pos + 1; + } + else if (c == '/' || (pos + 1 == target_view.size())) + { + if (host_end == std::string::npos) + { + host_end = pos; + } + host = target_view.substr(host_pos, host_end - host_pos); + + if (port_start != std::string::npos) + { + port = (uint16_t)std::atoi(target_view.substr(port_start, pos - port_start).c_str()); + } + + break; + } + } + + if (!m_remote_socket.is_open()) + { + // 连接到目标主机. + co_await start_connect_host(host, port ? port : 80, ec, true); + if (ec) + { + XLOG_FWARN("connection id: {}," + " connect to target {}:{} error: {}", + m_connection_id, host, port, ec.message()); + + co_return !first; + } + } + + // 处理代理请求头. + const auto path_pos = target_view.find_first_of("/", authority_pos); + if (path_pos == std::string_view::npos) + { + req.target("/"); + } + else + { + req.target(std::string(target_view.substr(path_pos))); + } + + req.set(http::field::host, host); + + if (req.find(http::field::connection) == req.end() && req.find(http::field::proxy_connection) != req.end()) + { + req.set(http::field::connection, req[http::field::proxy_connection]); + } + + req.erase(http::field::proxy_authorization); + req.erase(http::field::proxy_connection); + + co_await http::async_write(m_remote_socket, req, net_awaitable[ec]); + if (ec) + { + XLOG_WARN << "connection id: " << m_connection_id + << ", http_proxy_get request async_write: " << ec.message(); + co_return !first; + } + + m_local_buffer.consume(m_local_buffer.size()); + beast::flat_buffer buf; + + response_parser _parser{std::piecewise_construct, std::make_tuple(alloc), std::make_tuple(alloc)}; + _parser.body_limit(1024 * 1024 * 10); + + auto bytes = co_await http::async_read(m_remote_socket, buf, _parser, net_awaitable[ec]); + if (ec) + { + XLOG_WARN << "connection id: " << m_connection_id + << ", http_proxy_get response async_read: " << ec.message(); + co_return !first; + } + + co_await http::async_write(m_local_socket, _parser.release(), net_awaitable[ec]); + if (ec) + { + XLOG_WARN << "connection id: " << m_connection_id + << ", http_proxy_get response async_write: " << ec.message(); + co_return !first; + } + + XLOG_DBG << "connection id: " << m_connection_id << ", transfer completed" + << ", remote to local: " << bytes; + + first = false; + if (!keep_alive) + { + break; + } + } + + co_return true; + } + net::awaitable proxy_session::http_proxy_connect() + { + http::request req; + boost::system::error_code ec; + + // 读取 http 请求头. + co_await http::async_read(m_local_socket, m_local_buffer, req, net_awaitable[ec]); + if (ec) + { + XLOG_ERR << "connection id: " << m_connection_id << ", http_proxy_connect async_read: " << ec.message(); + + co_return false; + } + + auto mth = std::string(req.method_string()); + auto target_view = std::string(req.target()); + auto pa = std::string(req[http::field::proxy_authorization]); + + XLOG_DBG << "connection id: " << m_connection_id << ", method: " << mth << ", target: " << target_view + << (pa.empty() ? std::string() : ", proxy_authorization: " + pa); + + // http 代理认证. + auto auth = http_authorization(pa); + if (auth != PROXY_AUTH_SUCCESS) + { + XLOG_WARN << "connection id: " << m_connection_id << ", proxy err: " << pauth_error_message(auth); + + auto fake_page = fmt::vformat(fake_407_content_fmt, fmt::make_format_args(server_date_string())); + + co_await net::async_write(m_local_socket, net::buffer(fake_page), net::transfer_all(), net_awaitable[ec]); + + co_return true; + } + + auto pos = target_view.find(':'); + if (pos == std::string::npos) + { + XLOG_ERR << "connection id: " << m_connection_id << ", illegal target: " << target_view; + co_return false; + } + + std::string host(target_view.substr(0, pos)); + std::string port(target_view.substr(pos + 1)); + + co_await start_connect_host(host, static_cast(std::atol(port.c_str())), ec, true); + if (ec) + { + XLOG_FWARN("connection id: {}," + " connect to target {}:{} error: {}", + m_connection_id, host, port, ec.message()); + co_return false; + } + + http::response res{http::status::ok, req.version()}; + res.reason("Connection established"); + + co_await http::async_write(m_local_socket, res, net_awaitable[ec]); + if (ec) + { + XLOG_FWARN("connection id: {}," + " async write response {}:{} error: {}", + m_connection_id, host, port, ec.message()); + co_return false; + } + + auto [l2r_transferred, r2l_transferred] = co_await ( + transfer(m_local_socket, m_remote_socket) && + transfer(m_remote_socket, m_local_socket) + ); + + XLOG_DBG << "connection id: " << m_connection_id << ", transfer completed" + << ", local to remote: " << l2r_transferred << ", remote to local: " << r2l_transferred; + + co_return true; + } + + net::awaitable proxy_session::socks_auth() + { + // +----+------+----------+------+----------+ + // |VER | ULEN | UNAME | PLEN | PASSWD | + // +----+------+----------+------+----------+ + // | 1 | 1 | 1 to 255 | 1 | 1 to 255 | + // +----+------+----------+------+----------+ + // [ ] + + boost::system::error_code ec; + m_local_buffer.consume(m_local_buffer.size()); + auto bytes = + co_await net::async_read(m_local_socket, m_local_buffer, net::transfer_exactly(2), net_awaitable[ec]); + if (ec) + { + XLOG_WARN << "connection id: " << m_connection_id + << ", read client username/passwd error: " << ec.message(); + co_return false; + } + + auto p = net::buffer_cast(m_local_buffer.data()); + int auth_version = read(p); + if (auth_version != 1) + { + XLOG_WARN << "connection id: " << m_connection_id << ", socks negotiation, unsupported socks5 protocol"; + co_return false; + } + int name_length = read(p); + if (name_length <= 0 || name_length > 255) + { + XLOG_WARN << "connection id: " << m_connection_id << ", socks negotiation, invalid name length"; + co_return false; + } + name_length += 1; + + // +----+------+----------+------+----------+ + // |VER | ULEN | UNAME | PLEN | PASSWD | + // +----+------+----------+------+----------+ + // | 1 | 1 | 1 to 255 | 1 | 1 to 255 | + // +----+------+----------+------+----------+ + // [ ] + m_local_buffer.consume(m_local_buffer.size()); + bytes = co_await net::async_read(m_local_socket, m_local_buffer, net::transfer_exactly(name_length), + net_awaitable[ec]); + if (ec) + { + XLOG_WARN << "connection id: " << m_connection_id << ", read client username error: " << ec.message(); + co_return false; + } + + std::string uname; + + p = net::buffer_cast(m_local_buffer.data()); + for (size_t i = 0; i < bytes - 1; i++) + { + uname.push_back(read(p)); + } + + int passwd_len = read(p); + if (passwd_len <= 0 || passwd_len > 255) + { + XLOG_WARN << "connection id: " << m_connection_id << ", socks negotiation, invalid passwd length"; + co_return false; + } + + // +----+------+----------+------+----------+ + // |VER | ULEN | UNAME | PLEN | PASSWD | + // +----+------+----------+------+----------+ + // | 1 | 1 | 1 to 255 | 1 | 1 to 255 | + // +----+------+----------+------+----------+ + // [ ] + m_local_buffer.consume(m_local_buffer.size()); + bytes = co_await net::async_read(m_local_socket, m_local_buffer, net::transfer_exactly(passwd_len), + net_awaitable[ec]); + if (ec) + { + XLOG_WARN << "connection id: " << m_connection_id << ", read client passwd error: " << ec.message(); + co_return false; + } + + std::string passwd; + + p = net::buffer_cast(m_local_buffer.data()); + for (size_t i = 0; i < bytes; i++) + { + passwd.push_back(read(p)); + } + + // SOCKS5验证用户和密码. + auto endp = m_local_socket.remote_endpoint(); + auto client = endp.address().to_string(); + client += ":" + std::to_string(endp.port()); + + // 用户认证逻辑. + bool verify_passed = m_option.auth_users_.empty(); + + for (auto [user, pwd] : m_option.auth_users_) + { + if (uname == user && passwd == pwd) + { + verify_passed = true; + user_rate_limit_config(user); + break; + } + } + + XLOG_DBG << "connection id: " << m_connection_id << ", auth: " << uname << ", passwd: " << passwd + << ", client: " << client; + + net::streambuf wbuf; + auto wp = net::buffer_cast(wbuf.prepare(16)); + write(0x01, wp); // version 只能是1. + if (verify_passed) + { + write(0x00, wp); // 认证通过返回0x00, 其它值为失败. + } + else + { + write(0x01, wp); // 认证返回0x01为失败. + } + + // 返回认证状态. + // +----+--------+ + // |VER | STATUS | + // +----+--------+ + // | 1 | 1 | + // +----+--------+ + wbuf.commit(2); + co_await net::async_write(m_local_socket, wbuf, net::transfer_exactly(2), net_awaitable[ec]); + if (ec) + { + XLOG_WARN << "connection id: " << m_connection_id << ", server write status error: " << ec.message(); + co_return false; + } + + co_return verify_passed; + } + + std::pmr::vector proxy_session::format_path_list( + const std::string& path, boost::system::error_code& ec, pmr_alloc_t alloc) + { + fs::directory_iterator end; + fs::directory_iterator it(path, ec); + if (ec) + { + XLOG_DBG << "connection id: " << m_connection_id << ", format_path_list read dir: " << path + << ", error: " << ec.message(); + return {}; + } + + std::pmr::vector path_list{alloc}; + std::pmr::vector file_list{alloc}; + + for (; it != end && !m_abort; it++) + { + const auto& item = it->path(); + + auto [time_string, unc_path] = file_last_wirte_time(item); + // std::wstring time_string = boost::nowide::widen(ftime); + + std::pmr::string rpath{alloc}; + + if (fs::is_directory(unc_path.empty() ? item : unc_path, ec)) + { + rpath = item.filename().string(); + rpath += "/"; + + auto show_path = rpath; + if (show_path.size() > 50) + { + show_path = show_path.substr(0, 47); + show_path += "..>"; + } + std::pmr::string str(alloc); + fmt::format_to(std::back_inserter(str), body_fmt, rpath, show_path, time_string, "-"); + + path_list.push_back(std::move(str)); + } + else + { + rpath = item.filename().string(); + std::string filesize; + if (unc_path.empty()) + { + unc_path = item; + } + auto sz = static_cast(fs::file_size(unc_path, ec)); + if (ec) + { + sz = 0; + } + filesize = strutil::add_suffix(sz); + auto show_path = rpath; + if (show_path.size() > 50) + { + show_path = show_path.substr(0, 47); + show_path += "..>"; + } + std::pmr::string str(alloc); + fmt::format_to(std::back_inserter(str), body_fmt, rpath, show_path, time_string, filesize); + + file_list.push_back(std::move(str)); + } + } + + ec = {}; + + path_list.insert(path_list.end(), file_list.begin(), file_list.end()); + + return path_list; + } + + std::pmr::string proxy_session::server_date_string(pmr_alloc_t alloc) + { + auto time = std::time(nullptr); + auto gmt = gmtime((const time_t*)&time); + + std::pmr::string str(64, '\0', alloc); + auto ret = strftime((char*)str.data(), 64, "%a, %d %b %Y %H:%M:%S GMT", gmt); + str.resize(ret); + + return str; + } + + fs::path proxy_session::path_cat(std::string_view doc, std::string_view target) + { + size_t start_pos = 0; + for (auto& c : target) + { + if (!(c == '/' || c == '\\')) + { + break; + } + + start_pos++; + } + + std::array pre_alloc_buf; + std::pmr::monotonic_buffer_resource mbr(pre_alloc_buf.data(), pre_alloc_buf.size()); + std::pmr::polymorphic_allocator alloc(&mbr); + + std::string_view sv; + std::pmr::string slash{"/", alloc}; + + if (start_pos < target.size()) + { + sv = target.substr(start_pos); + } +#ifdef WIN32 + slash = "\\"; + if (doc.back() == '/' || doc.back() == '\\') + { + slash = ""; + } + auto filename = std::pmr::string(doc, alloc) + slash + std::pmr::string(sv, alloc); + return fs::path(std::string_view(filename)); +#else + if (doc.back() == '/') + { + slash = ""; + } + return fs::path(std::pmr::string(doc, alloc) + slash + std::pmr::string(sv, alloc)); +#endif // WIN32 + } + + net::awaitable proxy_session::default_http_route(const string_request& request, std::string response, + http::status status) + { + boost::system::error_code ec; + + std::array pre_alloc_buf; + std::pmr::monotonic_buffer_resource mbr(pre_alloc_buf.data(), pre_alloc_buf.size()); + pmr_alloc_t alloc(&mbr); + + string_response res{std::piecewise_construct, std::make_tuple(alloc), + std::make_tuple(status, request.version(), alloc)}; + + res.set(http::field::server, version_string); + res.set(http::field::date, server_date_string(alloc)); + res.set(http::field::content_type, "text/html"); + + res.keep_alive(true); + res.body() = response; + res.prepare_payload(); + + string_response_serializer sr(res); + co_await http::async_write(m_local_socket, sr, net_awaitable[ec]); + if (ec) + { + XLOG_WARN << "connection id: " << m_connection_id << ", default http route err: " << ec.message(); + } + + co_return; + } + + net::awaitable proxy_session::location_http_route(const string_request& request, const std::string& path) + { + boost::system::error_code ec; + + std::array pre_alloc_buf; + std::pmr::monotonic_buffer_resource mbr(pre_alloc_buf.data(), pre_alloc_buf.size()); + pmr_alloc_t alloc(&mbr); + + string_response res{std::piecewise_construct, std::make_tuple(alloc), + std::make_tuple(http::status::moved_permanently, request.version(), alloc)}; + + res.set(http::field::server, version_string); + res.set(http::field::date, server_date_string(alloc)); + res.set(http::field::content_type, "text/html"); + res.set(http::field::location, path); + + res.keep_alive(true); + res.body() = fake_302_content; + res.prepare_payload(); + + string_response_serializer sr(res); + co_await http::async_write(m_local_socket, sr, net_awaitable[ec]); + if (ec) + { + XLOG_WARN << "connection id: " << m_connection_id << ", location http route err: " << ec.message(); + } + + co_return; + } + + net::awaitable proxy_session::forbidden_http_route(const string_request& request) + { + boost::system::error_code ec; + + std::array pre_alloc_buf; + std::pmr::monotonic_buffer_resource mbr(pre_alloc_buf.data(), pre_alloc_buf.size()); + pmr_alloc_t alloc(&mbr); + + string_response res{std::piecewise_construct, std::make_tuple(alloc), + std::make_tuple(http::status::forbidden, request.version(), alloc)}; + + res.set(http::field::server, version_string); + res.set(http::field::date, server_date_string(alloc)); + res.set(http::field::content_type, "text/html"); + + res.keep_alive(true); + res.body() = fake_403_content; + res.prepare_payload(); + + http::serializer> sr(res); + co_await http::async_write(m_local_socket, sr, net_awaitable[ec]); + if (ec) + { + XLOG_WARN << "connection id: " << m_connection_id << ", forbidden http route err: " << ec.message(); + } + + co_return; + } + + net::awaitable proxy_session::unauthorized_http_route(const string_request& request) + { + boost::system::error_code ec; + + std::array pre_alloc_buf; + std::pmr::monotonic_buffer_resource mbr(pre_alloc_buf.data(), pre_alloc_buf.size()); + pmr_alloc_t alloc(&mbr); + + string_response res{std::piecewise_construct, std::make_tuple(alloc), + std::make_tuple(http::status::unauthorized, request.version(), alloc)}; + + res.set(http::field::server, version_string); + res.set(http::field::date, server_date_string(alloc)); + res.set(http::field::content_type, "text/html; charset=UTF-8"); + res.set(http::field::www_authenticate, "Basic realm=\"proxy\""); + + res.keep_alive(true); + res.body() = fake_401_content; + res.prepare_payload(); + + http::serializer> sr(res); + co_await http::async_write(m_local_socket, sr, net_awaitable[ec]); + if (ec) + { + XLOG_WARN << "connection id: " << m_connection_id << ", unauthorized http route err: " << ec.message(); + } + + co_return; + } +} // namespace proxy diff --git a/server/proxy_server/main.cpp b/server/proxy_server/main.cpp index f22feec5e3..8b1edf5ecb 100644 --- a/server/proxy_server/main.cpp +++ b/server/proxy_server/main.cpp @@ -11,6 +11,8 @@ #include +#include + #include #include #include @@ -307,6 +309,8 @@ namespace std int main(int argc, char** argv) { + boost::nowide::args a(argc,argv); // Fix arguments - make them UTF-8 + platform_init(); std::string config; @@ -404,10 +408,19 @@ and/or open issues at https://github.com/Jackarain/proxy)" po::notify(vm); } - if (disable_logs || log_dir.empty()) - xlogger::toggle_write_logging(false); + if (disable_logs && log_dir.empty()) + { + xlogger::turnoff_logging(); + } else - xlogger::init_logging(log_dir); + { + if (log_dir.empty()) + xlogger::toggle_write_logging(false); + else + xlogger::init_logging(log_dir); + if (disable_logs) + xlogger::toggle_console_logging(false); + } print_args(argc, argv, vm); diff --git a/third_party/compile-time-regular-expressions/.conan/test_package/CMakeLists.txt b/third_party/compile-time-regular-expressions/.conan/test_package/CMakeLists.txt new file mode 100644 index 0000000000..c64b54d8b4 --- /dev/null +++ b/third_party/compile-time-regular-expressions/.conan/test_package/CMakeLists.txt @@ -0,0 +1,11 @@ +project(test_package CXX) +cmake_minimum_required(VERSION 2.8 FATAL_ERROR) + +set(CMAKE_VERBOSE_MAKEFILE TRUE) +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17") + +include(${CMAKE_BINARY_DIR}/conanbuildinfo.cmake) +conan_basic_setup() + +add_executable(${PROJECT_NAME} test_package.cpp) +target_link_libraries(${PROJECT_NAME} ${CONAN_LIBS}) diff --git a/third_party/compile-time-regular-expressions/.conan/test_package/conanfile.py b/third_party/compile-time-regular-expressions/.conan/test_package/conanfile.py new file mode 100644 index 0000000000..d4a52663f9 --- /dev/null +++ b/third_party/compile-time-regular-expressions/.conan/test_package/conanfile.py @@ -0,0 +1,20 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +from conans import ConanFile, CMake, tools, RunEnvironment +import os + + +class TestPackageConan(ConanFile): + settings = "os", "compiler", "build_type", "arch" + generators = "cmake" + + def build(self): + cmake = CMake(self) + cmake.configure() + cmake.build() + + def test(self): + assert os.path.exists(os.path.join(self.deps_cpp_info["CTRE"].rootpath, "licenses", "LICENSE")) + bin_path = os.path.join("bin", "test_package") + self.run(bin_path, run_environment=True) diff --git a/third_party/compile-time-regular-expressions/.conan/test_package/test_package.cpp b/third_party/compile-time-regular-expressions/.conan/test_package/test_package.cpp new file mode 100644 index 0000000000..b1af18f0f2 --- /dev/null +++ b/third_party/compile-time-regular-expressions/.conan/test_package/test_package.cpp @@ -0,0 +1,32 @@ +#include +#include +#include +#include + +#include + +using namespace std::string_view_literals; +using namespace ctre::literals; + +struct date { std::string_view year; std::string_view month; std::string_view day; }; + +static constexpr ctll::fixed_string pattern = "^([0-9]{4})/([0-9]{1,2}+)/([0-9]{1,2}+)$"; + +constexpr std::optional extract_date(std::string_view s) noexcept { + if (auto [whole, year, month, day] = ctre::match(s); whole + ) { + return date{year.to_view(), month.to_view(), day.to_view()}; + } else { + return std::nullopt; + } +} + +int main() { + + assert(extract_date("2018/08/27"sv).has_value()); + assert(extract_date("2018/08/27"sv)->year == "2018"sv); + assert(extract_date("2018/08/27"sv)->month == "08"sv); + assert(extract_date("2018/08/27"sv)->day == "27"sv); + + return EXIT_SUCCESS; +} diff --git a/third_party/compile-time-regular-expressions/.github/workflows/tests.yml b/third_party/compile-time-regular-expressions/.github/workflows/tests.yml new file mode 100644 index 0000000000..08bd127f20 --- /dev/null +++ b/third_party/compile-time-regular-expressions/.github/workflows/tests.yml @@ -0,0 +1,81 @@ +name: Tests +on: [push, pull_request] +jobs: + appleclang: + strategy: + matrix: + macos: [12, 13] + standard: [17, 20] + fail-fast: false + name: "AppleClang (MacOS ${{ matrix.macos }}, C++${{ matrix.standard }})" + runs-on: macos-${{ matrix.macos }} + steps: + - uses: actions/checkout@v2 + - run: c++ -v + - run: make CXX=c++ CXX_STANDARD=2a + if: ${{ matrix.standard == '20' }} + - run: make CXX=c++ CXX_STANDARD=17 + if: ${{ matrix.standard == '17' }} + gcc: + strategy: + matrix: + gcc: [8, 9, 10, 11, 13] + standard: [17, 20] + fail-fast: false + name: "GCC ${{ matrix.gcc }} (C++${{ matrix.standard }})" + runs-on: ubuntu-20.04 + steps: + - name: "Install GCC" + uses: egor-tensin/setup-gcc@v1 + with: + version: ${{ matrix.gcc }} + - uses: actions/checkout@v2 + - run: c++ -v + - run: make CXX=c++ CXX_STANDARD=2a + if: ${{ matrix.standard == '20' }} + - run: make CXX=c++ CXX_STANDARD=17 + if: ${{ matrix.gcc < '9' && matrix.standard == '17' }} + - run: make CXX=c++ CXX_STANDARD=17 CXXFLAGS=-DCTRE_ENABLE_LITERALS PEDANTIC="" + if: ${{ matrix.gcc >= '9' && matrix.standard == '17' }} + clang: + strategy: + matrix: + clang: [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17] + stdlib: ["libc++", "libstdc++"] + standard: [17, 20] + fail-fast: false + name: "Clang ${{ matrix.clang }} (C++${{ matrix.standard }}, ${{ matrix.stdlib }})" + runs-on: ubuntu-20.04 + steps: + - name: "Install Clang" + uses: egor-tensin/setup-clang@v1 + with: + version: ${{ matrix.clang }} + - name: "Install libc++" + if: ${{ matrix.stdlib == 'libc++' }} + run: sudo apt-get install libc++abi-${{ matrix.clang }}-dev libc++1-${{ matrix.clang }} libc++-${{ matrix.clang }}-dev + - uses: actions/checkout@v2 + - run: c++ -v + - run: make CXX=c++ CXX_STANDARD=2a CXXFLAGS=-stdlib=${{ matrix.stdlib }} + if: ${{ matrix.standard == '20' }} + - run: make CXX=c++ CXX_STANDARD=17 CXXFLAGS=-stdlib=${{ matrix.stdlib }} + if: ${{ matrix.standard == '17' }} + msvc: + strategy: + matrix: + version: [14.29, ""] + fail-fast: false + name: "MSVC ${{ matrix.version }} (C++20)" + runs-on: windows-2022 + steps: + - name: Add MSVC ${{ matrix.version }} to PATH + uses: ilammy/msvc-dev-cmd@v1 + with: + toolset: ${{ matrix.version }} + - name: "Install Ninja & CMake" + run: choco install ninja cmake + - uses: actions/checkout@v2 + - name: "Configure" + run: cmake . -G Ninja -B build -DCTRE_BUILD_TESTS=ON -DCTRE_CXX_STANDARD=20 + - name: "Build" + run: cmake --build build --target ctre-test --verbose diff --git a/third_party/compile-time-regular-expressions/.gitignore b/third_party/compile-time-regular-expressions/.gitignore new file mode 100644 index 0000000000..a644be0f03 --- /dev/null +++ b/third_party/compile-time-regular-expressions/.gitignore @@ -0,0 +1,22 @@ +*.o +*.d +**/*.tmp +test +result +tests/benchmark-exec/* +!tests/benchmark-exec/Makefile +!tests/benchmark-exec/.gitignore +!tests/benchmark-exec/.tm_properties +!tests/benchmark-exec/*.cpp +!tests/benchmark-exec/*.hpp +!tests/benchmark-exec/*.js +*.pyc +.conan/test_package/build +mtent12.txt +*.zip +tests/benchmark-range/* +!tests/benchmark-range/*.cpp +!tests/benchmark-range/*.hpp +build +cmake-build-*/* +.idea/* \ No newline at end of file diff --git a/third_party/compile-time-regular-expressions/.gitmodules b/third_party/compile-time-regular-expressions/.gitmodules new file mode 100644 index 0000000000..e69de29bb2 diff --git a/third_party/compile-time-regular-expressions/.tm_properties b/third_party/compile-time-regular-expressions/.tm_properties new file mode 100644 index 0000000000..b08b62add8 --- /dev/null +++ b/third_party/compile-time-regular-expressions/.tm_properties @@ -0,0 +1,3 @@ +exclude = "{$exclude,**/*.dSYM,**/*.d,**/*.o,test,*.tmp}" +include = "{$include,.gitignore,.github,.travis*,.conan}" +excludeInFolderSearch = "{$excludeInFolderSearch,./ctre.hpp}" diff --git a/third_party/compile-time-regular-expressions/.travis.yml b/third_party/compile-time-regular-expressions/.travis.yml new file mode 100644 index 0000000000..3c280679d7 --- /dev/null +++ b/third_party/compile-time-regular-expressions/.travis.yml @@ -0,0 +1,113 @@ +language: cpp +dist: focal +os: linux + +jobs: + include: + - os: linux + language: python + python: "3.6" + services: + - docker + env: + - COMPILER=g++-8 + - CONAN_GCC_VERSIONS=8 + - CONAN_DOCKER_IMAGE=lasote/conangcc8 + install: + - pip install -U conan conan-package-tools + script: + - python .conan/build.py + + - os: linux + compiler: gcc + env: + - COMPILER=g++-8 + - CXX_STANDARD=17 + addons: + apt: + packages: ['g++-8'] + + - os: linux + compiler: gcc + env: + - COMPILER=g++-8 + - CXX_STANDARD=2a + addons: + apt: + packages: ['g++-8'] + + - os: linux + compiler: clang + env: + - COMPILER=clang++-6.0 + - CXX_STANDARD=17 + addons: + apt: + sources: ['ubuntu-toolchain-r-test', 'llvm-toolchain-trusty-6.0'] + packages: ['g++-8', 'clang-6.0'] + + - os: linux + compiler: clang + env: + - COMPILER=clang++-6.0 + - CXX_STANDARD=2a + addons: + apt: + sources: ['ubuntu-toolchain-r-test', 'llvm-toolchain-trusty-6.0'] + packages: ['g++-8', 'clang-6.0'] + +# FIXME: don't use GCC10 in 17 mode for tests as they are depending on operator"" +# - os: linux +# compiler: gcc +# env: +# - COMPILER=g++-10 +# - CXX_STANDARD=17 +# addons: +# apt: +# packages: ['g++-10'] + + - os: linux + compiler: gcc + env: + - COMPILER=g++-10 + - CXX_STANDARD=20 + addons: + apt: + packages: ['g++-10'] + + - os: osx + osx_image: xcode10 + env: + - CXX_STANDARD=17 + + - os: osx + osx_image: xcode10 + env: + - CXX_STANDARD=2a + + - os: osx + osx_image: xcode11 + env: + - CXX_STANDARD=17 + + - os: osx + osx_image: xcode11 + env: + - CXX_STANDARD=2a + + - os: osx + osx_image: xcode12 + env: + - CXX_STANDARD=17 + + - os: osx + osx_image: xcode12 + env: + - CXX_STANDARD=2a + +install: + - if [[ "${COMPILER}" != "" ]]; then export CXX=${COMPILER}; fi + - uname -a + - $CXX --version +script: + - make CXX_STANDARD=$CXX_STANDARD diff --git a/third_party/compile-time-regular-expressions/CMakeLists.txt b/third_party/compile-time-regular-expressions/CMakeLists.txt new file mode 100644 index 0000000000..9e5a80e9a5 --- /dev/null +++ b/third_party/compile-time-regular-expressions/CMakeLists.txt @@ -0,0 +1,196 @@ +cmake_minimum_required(VERSION 3.14...3.29) + +if(CMAKE_VERSION VERSION_GREATER_EQUAL "3.29.20240416") + set(CMAKE_EXPERIMENTAL_CXX_IMPORT_STD "0e5b6991-d74f-4b3d-a41c-cf096e0b2508") + set(CMAKE_CXX_MODULE_STD 1) +endif() + +# When updating to a newer version of CMake, see if we can use the following +project(ctre + HOMEPAGE_URL "https://compile-time.re" + VERSION 3.9.0 + LANGUAGES CXX) +set(PROJECT_DESCRIPTION "Fast compile-time regular expressions with support for matching/searching/capturing during compile-time or runtime.") + +include(CMakePackageConfigHelpers) +include(CMakeDependentOption) +include(GNUInstallDirs) +include(CTest) + +find_program(CTRE_DPKG_BUILDPACKAGE_FOUND dpkg-buildpackage) +find_program(CTRE_RPMBUILD_FOUND rpmbuild) + +cmake_dependent_option(CTRE_BUILD_TESTS "Build ctre Tests" ON + "BUILD_TESTING;CMAKE_SOURCE_DIR STREQUAL PROJECT_SOURCE_DIR" OFF) +cmake_dependent_option(CTRE_BUILD_PACKAGE "Build ctre Packages" ON + "CMAKE_SOURCE_DIR STREQUAL PROJECT_SOURCE_DIR" OFF) +cmake_dependent_option(CTRE_BUILD_PACKAGE_DEB + "Create DEB Package (${PROJECT_NAME})" ON + "CTRE_BUILD_PACKAGE;CTRE_DPKG_BUILDPACKAGE_FOUND" OFF) +cmake_dependent_option(CTRE_BUILD_PACKAGE_RPM + "Create RPM Package (${PROJECT_NAME})" ON + "CTRE_BUILD_PACKAGE;CTRE_RPMBUILD_FOUND" OFF) + +option(CTRE_MODULE "build C++ module" OFF) + +if(CTRE_MODULE) + if(CMAKE_VERSION VERSION_GREATER_EQUAL "3.29.20240416") + add_library(${PROJECT_NAME}) + + target_sources(${PROJECT_NAME} PUBLIC FILE_SET CXX_MODULES TYPE CXX_MODULES FILES ctre.cppm) + target_sources(${PROJECT_NAME} PUBLIC FILE_SET HEADERS TYPE HEADERS + BASE_DIRS + "${CMAKE_CURRENT_SOURCE_DIR}/include" + FILES + include/ctll.hpp + include/ctre/functions.hpp + include/ctre/utility.hpp + include/ctre/utf8.hpp + include/ctre/evaluation.hpp + include/ctre/starts_with_anchor.hpp + include/ctre/pcre_actions.hpp + include/ctre/rotate.hpp + include/ctre/iterators.hpp + include/ctre/literals.hpp + include/ctre/return_type.hpp + include/ctre/find_captures.hpp + include/ctre/id.hpp + include/ctre/atoms_characters.hpp + include/ctre/actions/mode.inc.hpp + include/ctre/actions/characters.inc.hpp + include/ctre/actions/class.inc.hpp + include/ctre/actions/look.inc.hpp + include/ctre/actions/sequence.inc.hpp + include/ctre/actions/fusion.inc.hpp + include/ctre/actions/asserts.inc.hpp + include/ctre/actions/capture.inc.hpp + include/ctre/actions/named_class.inc.hpp + include/ctre/actions/backreference.inc.hpp + include/ctre/actions/options.inc.hpp + include/ctre/actions/atomic_group.inc.hpp + include/ctre/actions/set.inc.hpp + include/ctre/actions/hexdec.inc.hpp + include/ctre/actions/repeat.inc.hpp + include/ctre/actions/properties.inc.hpp + include/ctre/actions/boundaries.inc.hpp + include/ctre/operators.hpp + include/ctre/pcre.hpp + include/ctre/atoms_unicode.hpp + include/ctre/range.hpp + include/ctre/wrapper.hpp + include/ctre/first.hpp + include/ctre/flags_and_modes.hpp + include/ctre/atoms.hpp + include/unicode-db.hpp + include/unicode-db/unicode_interface.hpp + include/unicode-db/unicode-db.hpp + include/ctll/parser.hpp + include/ctll/actions.hpp + include/ctll/fixed_string.hpp + include/ctll/list.hpp + include/ctll/utilities.hpp + include/ctll/grammars.hpp + include/ctre.hpp + include/ctre-unicode.hpp + ) + + # we are using `import std;` + if (NOT DEFINED CTRE_CXX_STANDARD OR CTRE_CXX_STANDARD LESS 23) + set(CTRE_CXX_STANDARD 23) + endif() + + target_compile_features(${PROJECT_NAME} PUBLIC cxx_std_${CTRE_CXX_STANDARD}) + + install(TARGETS ${PROJECT_NAME} EXPORT ${PROJECT_NAME}-targets + FILE_SET CXX_MODULES DESTINATION "${CMAKE_INSTALL_LIBDIR}/cxx/${PROJECT_NAME}" + FILE_SET HEADERS DESTINATION "include") + else() + message(FATAL_ERROR "unsupported cmake for c++ modules") + endif() +else() + add_library(${PROJECT_NAME} INTERFACE) + + target_include_directories(${PROJECT_NAME} INTERFACE + $ + $) + + if (NOT CTRE_CXX_STANDARD) + set(CTRE_CXX_STANDARD 20) + endif() + + target_compile_features(${PROJECT_NAME} INTERFACE cxx_std_${CTRE_CXX_STANDARD}) + set_property(TARGET ${PROJECT_NAME} PROPERTY CXX_SCAN_FOR_MODULES 0) + + install(TARGETS ${PROJECT_NAME} EXPORT ${PROJECT_NAME}-targets) + install(DIRECTORY include/ DESTINATION include + FILES_MATCHING PATTERN *.hpp) +endif() + +add_library(${PROJECT_NAME}::${PROJECT_NAME} ALIAS ${PROJECT_NAME}) + +if (NOT EXISTS "${PROJECT_BINARY_DIR}/${PROJECT_NAME}-config.cmake.in") + file(WRITE ${PROJECT_BINARY_DIR}/${PROJECT_NAME}-config.cmake.in [[ + @PACKAGE_INIT@ + include("${CMAKE_CURRENT_LIST_DIR}/@PROJECT_NAME@-targets.cmake") + ]]) +endif() + +configure_package_config_file( + "${PROJECT_BINARY_DIR}/${PROJECT_NAME}-config.cmake.in" + "${PROJECT_BINARY_DIR}/${PROJECT_NAME}-config.cmake" + INSTALL_DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}" + NO_SET_AND_CHECK_MACRO + NO_CHECK_REQUIRED_COMPONENTS_MACRO) + +write_basic_package_version_file(ctre-config-version.cmake + VERSION ${PROJECT_VERSION} + COMPATIBILITY SameMajorVersion) + +install(EXPORT ${PROJECT_NAME}-targets + DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}" + NAMESPACE ${PROJECT_NAME}::) +install( + FILES + "${PROJECT_BINARY_DIR}/${PROJECT_NAME}-config-version.cmake" + "${PROJECT_BINARY_DIR}/${PROJECT_NAME}-config.cmake" + DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}) + +if(CTRE_BUILD_TESTS) + add_subdirectory(tests) +endif() + +if (NOT CTRE_BUILD_PACKAGE) + return() +endif() + +list(APPEND source-generators TBZ2 TGZ TXZ ZIP) + +if (CTRE_BUILD_PACKAGE_DEB) + list(APPEND binary-generators "DEB") +endif() + +if (CTRE_BUILD_PACKAGE_RPM) + list(APPEND binary-generators "RPM") +endif() + +set(CPACK_SOURCE_GENERATOR ${source-generators}) +set(CPACK_GENERATOR ${binary-generators}) + +set(CPACK_PACKAGE_FILE_NAME "${PROJECT_NAME}-${PROJECT_VERSION}") +set(CPACK_SOURCE_PACKAGE_FILE_NAME "${CPACK_PACKAGE_FILE_NAME}") + +set(CPACK_DEBIAN_PACKAGE_MAINTAINER "Hana Dusíková") +set(CPACK_DEBIAN_PACKAGE_DESCRIPTION "${PROJECT_DESCRIPTION}") +set(CPACK_DEBIAN_PACKAGE_NAME "lib${PROJECT_NAME}-dev") + +set(CPACK_RPM_PACKAGE_NAME "lib${PROJECT_NAME}-devel") + +set(PKG_CONFIG_FILE_NAME "${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}.pc") +configure_file("${CMAKE_CURRENT_SOURCE_DIR}/packaging/pkgconfig.pc.in" "${PKG_CONFIG_FILE_NAME}" @ONLY) +install(FILES "${PKG_CONFIG_FILE_NAME}" + DESTINATION "${CMAKE_INSTALL_LIBDIR}/pkgconfig" +) + +list(APPEND CPACK_SOURCE_IGNORE_FILES /.git/ /build/ .gitignore .DS_Store) + +include(CPack) diff --git a/third_party/compile-time-regular-expressions/LICENSE b/third_party/compile-time-regular-expressions/LICENSE new file mode 100644 index 0000000000..bd8b243dfa --- /dev/null +++ b/third_party/compile-time-regular-expressions/LICENSE @@ -0,0 +1,218 @@ + Apache License + Version 2.0, January 2004 + +may redistribute such embedded portions in such Object form without complying +with the conditions of Sections 4(a), 4(b) and 4(d) of the License. + +In addition, if you combine or link compiled forms of this Software with +software that is licensed under the GPLv2 ("Combined Software") and if a +court of competent jurisdiction determines that the patent provision (Section +3), the indemnity provision (Section 9) or other Section of the License +conflicts with the conditions of the GPLv2, you may retroactively and +prospectively choose to deem waived or otherwise exclude such Section(s) of +the License, but only in their entirety and only with respect to the Combined +Software. diff --git a/third_party/compile-time-regular-expressions/Makefile b/third_party/compile-time-regular-expressions/Makefile new file mode 100644 index 0000000000..a48a30a031 --- /dev/null +++ b/third_party/compile-time-regular-expressions/Makefile @@ -0,0 +1,92 @@ +.PHONY: default all clean grammar compare single-header single-header/ctre.hpp single-header/ctre-unicode.hpp single-header/unicode-db.hpp + +default: all + +TARGETS := $(wildcard tests/benchmark-exec/*.cpp) +IGNORE := $(wildcard tests/benchmark/*.cpp) $(wildcard tests/benchmark-exec/*.cpp) + +DESATOMAT := /bin/false + +CXX_STANDARD := 20 + +PYTHON := python3.9 + +PEDANTIC:=-pedantic + +override CXXFLAGS := $(CXXFLAGS) -std=c++$(CXX_STANDARD) -Iinclude -O3 $(PEDANTIC) -Wall -Wextra -Werror -Wconversion +LDFLAGS := + +TESTS := $(wildcard tests/*.cpp) $(wildcard tests/benchmark/*.cpp) +TRUE_TARGETS := $(TARGETS:%.cpp=%) +override TRUE_TARGETS := $(filter-out $(IGNORE:%.cpp=%), $(TRUE_TARGETS)) +OBJECTS := $(TARGETS:%.cpp=%.o) $(TESTS:%.cpp=%.o) +override OBJECTS := $(filter-out $(IGNORE:%.cpp=%.o),$(OBJECTS)) +DEPEDENCY_FILES := $(OBJECTS:%.o=%.d) + +all: $(TRUE_TARGETS) $(OBJECTS) + +list: + echo $(SUPPORTED_CPP20) + +$(TRUE_TARGETS): %: %.o + $(CXX) $< $(LDFLAGS) -o $@ + +$(OBJECTS): %.o: %.cpp + $(CXX) $(CXXFLAGS) -MMD -c $< -o $@ + +-include $(DEPEDENCY_FILES) + +benchmark: + @$(MAKE) clean + @$(MAKE) IGNORE="" + +benchmark-clean: + @$(MAKE) IGNORE="" clean + +clean: + rm -f $(TRUE_TARGETS) $(OBJECTS) $(DEPEDENCY_FILES) mtent12.txt mtent12.zip + +grammar: include/ctre/pcre.hpp + +regrammar: + @rm -f include/ctre/pcre.hpp + @$(MAKE) grammar + +include/ctre/pcre.hpp: include/ctre/pcre.gram + @echo "LL1q $<" + @$(DESATOMAT) --ll --q --input=include/ctre/pcre.gram --output=include/ctre/ --generator=cpp_ctll_v2 --cfg:fname=pcre.hpp --cfg:namespace=ctre --cfg:guard=CTRE__PCRE__HPP --cfg:grammar_name=pcre + +mtent12.zip: + curl -s http://www.gutenberg.org/files/3200/old/mtent12.zip -o mtent12.zip + +mtent12.txt: mtent12.zip + unzip -o mtent12.zip + touch mtent12.txt + +single-header: single-header/ctre.hpp single-header/ctre-unicode.hpp single-header/unicode-db.hpp + +single-header/unicode-db.hpp: include/unicode-db/unicode-db.hpp + cp $+ $@ + +single-header/ctre.hpp: + ${PYTHON} -m quom include/ctre.hpp ctre.hpp.tmp + echo "/*" > single-header/ctre.hpp + cat LICENSE >> single-header/ctre.hpp + echo "*/" >> single-header/ctre.hpp + cat ctre.hpp.tmp >> single-header/ctre.hpp + rm ctre.hpp.tmp + +single-header/ctre-unicode.hpp: + ${PYTHON} -m quom include/ctre-unicode.hpp ctre-unicode.hpp.tmp + echo "/*" > single-header/ctre-unicode.hpp + cat LICENSE >> single-header/ctre-unicode.hpp + echo "*/" >> single-header/ctre-unicode.hpp + cat ctre-unicode.hpp.tmp >> single-header/ctre-unicode.hpp + rm ctre-unicode.hpp.tmp + +REPEAT:=10 + +compare: mtent12.txt + $(CXX) $(CXXFLAGS) -MMD -march=native -DPATTERN="\"(${PATTERN})\"" -c tests/benchmark-range/measurement.cpp -o tests/benchmark-range/measurement.o + $(CXX) tests/benchmark-range/measurement.o -lboost_regex -lpcre2-8 -lre2 -o tests/benchmark-range/measurement + tests/benchmark-range/measurement all mtent12.txt ${REPEAT} diff --git a/third_party/compile-time-regular-expressions/NOTES.md b/third_party/compile-time-regular-expressions/NOTES.md new file mode 100644 index 0000000000..b01a62ca7f --- /dev/null +++ b/third_party/compile-time-regular-expressions/NOTES.md @@ -0,0 +1,24 @@ +# Unsupported PCRE constructs + +* `\0dd` `\ddd` `\0{dd...}` octal numbers +* `\Q...\E` quoting +* `\cx` control characters +* `\C` data unit +* `\h` `\H` horizontal character classes +* `\v` `\V` vertical character classes +* `\p{xx}` `\P{xx}` character properties +* `\X` unicode grapheme cluster +* boundaries other than `^$` +* atomic groups +* comments +* options/modes +* subroutines +* conditional patterns +* callouts +* match point reset `\K` + + +# Other unsupported "things" +* `[[.hyphen.]]` named characters +* `[[=M=]]` whatever this is + diff --git a/third_party/compile-time-regular-expressions/README.md b/third_party/compile-time-regular-expressions/README.md new file mode 100644 index 0000000000..e609ef97ce --- /dev/null +++ b/third_party/compile-time-regular-expressions/README.md @@ -0,0 +1,295 @@ +# Compile time regular expressions v3 + +[![Build Status](https://travis-ci.org/hanickadot/compile-time-regular-expressions.svg?branch=master)](https://travis-ci.org/hanickadot/compile-time-regular-expressions) + +Fast compile-time regular expressions with support for matching/searching/capturing during compile-time or runtime. + +You can use the single header version from directory `single-header`. This header can be regenerated with `make single-header`. If you are using cmake, you can add this directory as subdirectory and link to target `ctre`. ID ``0`` is the full match, ID ``1`` is the first capture group, ID ``2`` is the second, etc. + Named groups are specified using ``(?)``. + + Example: :: + + if (auto m = ctre::match<"(?[a-z]+)([0-9]+)">("abc123")) { + m.get<"chars">(); //abc + m.get<2>(); //123 + } + + .. function:: constexpr size_t size() + + Returns the number of captures in this result object. + + .. function:: constexpr operator bool() const noexcept + + Returns whether the match was successful. + + .. function:: constexpr operator std::basic_string_view() const noexcept + constexpr std::basic_string_view to_view() const noexcept + constexpr std::basic_string_view view() const noexcept + + Converts the match to a string view. + + .. function:: constexpr explicit operator std::basic_string() const noexcept + constexpr std::basic_string to_string() const noexcept + constexpr std::basic_string str() const noexcept + + Converts the match to a string view. + +.. class:: template captured_content + + .. class:: template storage + + .. function:: constexpr auto begin() const noexcept + constexpr auto end() const noexcept + + Returns the begin or end iterator for the captured content. + + .. function:: constexpr operator bool() const noexcept + + Returns whether the match was successful. + + .. function:: constexpr auto size() const noexcept + + Returns the number of characters in the capture. + + .. function:: constexpr operator std::basic_string_view() const noexcept + constexpr std::basic_string_view to_view() const noexcept + constexpr std::basic_string_view view() const noexcept + + Converts the capture to a string view. + + .. function:: constexpr explicit operator std::basic_string() const noexcept + constexpr std::basic_string to_string() const noexcept + constexpr std::basic_string str() const noexcept + + Converts the capture to a string view. + + .. function:: constexpr static size_t get_id() noexcept + + Returns ``Id`` + +.. function:: template constexpr ctre::regex_results match(Args&&... args) + template constexpr ctre::regex_results match(Args&&... args) + + Matches ``RE`` against the whole input. + ``Args...`` must be either a string-like object with ``begin`` and ``end`` member functions, or a pair of forward iterators. + +.. function:: template constexpr ctre::regex_results search(Args&&... args) + template constexpr ctre::regex_results search(Args&&... args) + + Searches for a match somewhere within the input. + ``Args...`` must be either a string-like object with ``begin`` and ``end`` member functions, or a pair of forward iterators. + diff --git a/third_party/compile-time-regular-expressions/docs/conf.py b/third_party/compile-time-regular-expressions/docs/conf.py new file mode 100644 index 0000000000..b9bbc25717 --- /dev/null +++ b/third_party/compile-time-regular-expressions/docs/conf.py @@ -0,0 +1,54 @@ +# Configuration file for the Sphinx documentation builder. +# +# This file only contains a selection of the most common options. For a full +# list see the documentation: +# http://www.sphinx-doc.org/en/master/config + +# -- Path setup -------------------------------------------------------------- + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# +# import os +# import sys +# sys.path.insert(0, os.path.abspath('.')) + + +# -- Project information ----------------------------------------------------- + +project = 'ctre' +copyright = '2019, Hana Dusikova' +author = 'Hana Dusikova' +master_doc = 'index' +primary_domain = 'cpp' +highlight_language = 'cpp' + +# -- General configuration --------------------------------------------------- + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ +] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path. +exclude_patterns = [] + + +# -- Options for HTML output ------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = 'sphinx_rtd_theme' + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] diff --git a/third_party/compile-time-regular-expressions/docs/examples.rst b/third_party/compile-time-regular-expressions/docs/examples.rst new file mode 100644 index 0000000000..5281bebef7 --- /dev/null +++ b/third_party/compile-time-regular-expressions/docs/examples.rst @@ -0,0 +1,77 @@ +Examples +======== + +Extracting a number from input +------------------------------ +:: + + std::optional extract_number(std::string_view s) noexcept { + if (auto m = ctre::match<"[a-z]+([0-9]+)">(s)) { + return m.get<1>().to_view(); + } else { + return std::nullopt; + } + } + +`link to compiler explorer `_ + +Extracting values from date +--------------------------- +:: + + + struct date { std::string_view year; std::string_view month; std::string_view day; }; + std::optional extract_date(std::string_view s) noexcept { + using namespace ctre::literals; + if (auto [whole, year, month, day] = ctre::match<"(\\d{4})/(\\d{1,2})/(\\d{1,2})">(s); whole) { + return date{year, month, day}; + } else { + return std::nullopt; + } + } + + //static_assert(extract_date("2018/08/27"sv).has_value()); + //static_assert((*extract_date("2018/08/27"sv)).year == "2018"sv); + //static_assert((*extract_date("2018/08/27"sv)).month == "08"sv); + //static_assert((*extract_date("2018/08/27"sv)).day == "27"sv); + +`link to compiler explorer `_ + +Lexer +----- +:: + + enum class type { + unknown, identifier, number + }; + + struct lex_item { + type t; + std::string_view c; + }; + + std::optional lexer(std::string_view v) noexcept { + if (auto [m,id,num] = ctre::match<"([a-z]+)|([0-9]+)">(v); m) { + if (id) { + return lex_item{type::identifier, id}; + } else if (num) { + return lex_item{type::number, num}; + } + } + return std::nullopt; + } + +`link to compiler explorer `_ + +Range over input +---------------- + +This support is preliminary and probably the API will be changed. + +:: + + auto input = "123,456,768"sv; + + for (auto match: ctre::range<"([0-9]+),?">(input)) { + std::cout << std::string_view{match.get<0>()} << "\n"; + } \ No newline at end of file diff --git a/third_party/compile-time-regular-expressions/docs/index.rst b/third_party/compile-time-regular-expressions/docs/index.rst new file mode 100644 index 0000000000..67c7b3ef28 --- /dev/null +++ b/third_party/compile-time-regular-expressions/docs/index.rst @@ -0,0 +1,66 @@ +ctre +==== + +A compile-time (almost) PCRE-compatible regular expression matcher for C++. + +Overview +======== + +Fast compile-time regular expressions with support for matching/searching/capturing at compile-time or runtime. :: + + ctre::match<"REGEX">(subject); // C++20 + "REGEX"_ctre.match(subject); // C++17 + N3599 extension + +.. toctree:: + :maxdepth: 2 + + api + examples + regex_syntax + +Supported compilers +=================== + +- clang 6.0+ (template UDL, C++17 syntax) +- xcode clang 10.0+ (template UDL, C++17 syntax) +- gcc 7.4+ (template UDL, C++17 syntax) +- gcc 9.0+ (C++17 & C++20 cNTTP syntax) +- MSVC 15.8.8+ (C++17 syntax only) + +Basic Usage +=========== + +Template UDL syntax +------------------- + +Compiler must support N3599 extension, as GNU extension in gcc (not in GCC 9.1+) and clang. :: + + constexpr auto match(std::string_view sv) noexcept { + using namespace ctre::literals; + return "h.*"_ctre.match(sv); + } + +If you need N3599 extension in GCC 9.1+ you can't use -pedantic mode and define the macro ``CTRE_ENABLE_LITERALS``. + +C++17 syntax +------------ + +You can provide pattern as a constexpr ``ctll::fixed_string variable``. :: + + static constexpr auto pattern = ctll::fixed_string{ "h.*" }; + + constexpr auto match(std::string_view sv) noexcept { + return ctre::match(sv); + } + +(this is tested in MSVC 15.8.8) + +C++20 syntax +------------ + +Currently only compiler which supports cNTTP syntax ``ctre::match(subject)`` is GCC 9+. :: + + constexpr auto match(std::string_view sv) noexcept { + return ctre::match<"h.*">(sv); + } + diff --git a/third_party/compile-time-regular-expressions/docs/regex_syntax.rst b/third_party/compile-time-regular-expressions/docs/regex_syntax.rst new file mode 100644 index 0000000000..9af97adf5d --- /dev/null +++ b/third_party/compile-time-regular-expressions/docs/regex_syntax.rst @@ -0,0 +1,18 @@ +Regex Syntax +============ + +The library supports most of the `PCRE `_ syntax with a few exceptions: + +- callouts +- comments +- conditional patterns +- control characters (\\cX) +- horizontal / vertical character classes (\\h\\H\\v\\V) +- match point reset (\\K) +- named characters +- octal numbers +- options / modes +- subroutines +- unicode grapheme cluster (\\X) + +TODO more detailed regex information \ No newline at end of file diff --git a/third_party/compile-time-regular-expressions/future.cpp b/third_party/compile-time-regular-expressions/future.cpp new file mode 100644 index 0000000000..07931f6ec7 --- /dev/null +++ b/third_party/compile-time-regular-expressions/future.cpp @@ -0,0 +1,15 @@ +#include +#include +#include + +std::string match(std::string_view sv) { + if (auto match = ctre::match<"[a-z]+">(sv); match) { + return match.to_string(); + } else { + return "not_match"; + } +} + +int main() { + std::cout << match("hello") << "\n"; +} \ No newline at end of file diff --git a/third_party/compile-time-regular-expressions/gcc-bench.txt b/third_party/compile-time-regular-expressions/gcc-bench.txt new file mode 100644 index 0000000000..a943b97d71 --- /dev/null +++ b/third_party/compile-time-regular-expressions/gcc-bench.txt @@ -0,0 +1,20 @@ +//--------- ABCD|DEFGH|EFGHI|A{4,} +egrep 0m50.036s +CTRE 0m3.982s +PCRE 0m8.621s +std::regex 0m55.058s +//--------- [0-9a-fA-F]{8,16} +egrep 0m32.361s +CTRE 0m4.291s +PCRE 0m13.958s +std::regex 0m18.179s +//--------- ^([0-9]{4,16})?[aA] +egrep 0m12.819s +CTRE 0m2.844s +PCRE 0m2.614s +std::regex 0m22.876s +//--------- ([aAbB]{4,}|[xXyY]{4,}|[1234]{4,})0 +egrep 1m45.696s +CTRE 0m7.623s +PCRE 0m39.808s +std::regex 1m2.799s diff --git a/third_party/compile-time-regular-expressions/include/ctll.hpp b/third_party/compile-time-regular-expressions/include/ctll.hpp new file mode 100644 index 0000000000..2fe40e3419 --- /dev/null +++ b/third_party/compile-time-regular-expressions/include/ctll.hpp @@ -0,0 +1,6 @@ +#ifndef CTRE_V2__CTLL__HPP +#define CTRE_V2__CTLL__HPP + +#include "ctll/parser.hpp" + +#endif diff --git a/third_party/compile-time-regular-expressions/include/ctll/actions.hpp b/third_party/compile-time-regular-expressions/include/ctll/actions.hpp new file mode 100644 index 0000000000..a6a569df80 --- /dev/null +++ b/third_party/compile-time-regular-expressions/include/ctll/actions.hpp @@ -0,0 +1,29 @@ +#ifndef CTLL__ACTIONS__HPP +#define CTLL__ACTIONS__HPP + +namespace ctll { + struct empty_subject { }; + + struct empty_actions { + // dummy operator so using Actions::operator() later will not give error + template static constexpr auto apply(Action, InputSymbol, Subject subject) { + return subject; + } + }; + + template struct identity: public Actions { + using Actions::apply; + // allow empty_subject to exists + template constexpr static auto apply(Action, term, empty_subject) -> empty_subject { return {}; } + template constexpr static auto apply(Action, epsilon, empty_subject) -> empty_subject { return {}; } + }; + + template struct ignore_unknown: public Actions { + using Actions::apply; + // allow flow thru unknown actions + template constexpr static auto apply(Action, term, Subject) -> Subject { return {}; } + template constexpr static auto apply(Action, epsilon, Subject) -> Subject { return {}; } + }; +} + +#endif diff --git a/third_party/compile-time-regular-expressions/include/ctll/fixed_string.hpp b/third_party/compile-time-regular-expressions/include/ctll/fixed_string.hpp new file mode 100644 index 0000000000..5c7666b6f5 --- /dev/null +++ b/third_party/compile-time-regular-expressions/include/ctll/fixed_string.hpp @@ -0,0 +1,223 @@ +#ifndef CTLL__FIXED_STRING__GPP +#define CTLL__FIXED_STRING__GPP + +#ifndef CTLL_IN_A_MODULE +#include +#include +#include +#include +#include +#endif + +#include "utilities.hpp" + +namespace ctll { + +struct length_value_t { + uint32_t value; + uint8_t length; +}; + +constexpr length_value_t length_and_value_of_utf8_code_point(uint8_t first_unit) noexcept { + if ((first_unit & 0b1000'0000) == 0b0000'0000) return {static_cast(first_unit), 1}; + else if ((first_unit & 0b1110'0000) == 0b1100'0000) return {static_cast(first_unit & 0b0001'1111), 2}; + else if ((first_unit & 0b1111'0000) == 0b1110'0000) return {static_cast(first_unit & 0b0000'1111), 3}; + else if ((first_unit & 0b1111'1000) == 0b1111'0000) return {static_cast(first_unit & 0b0000'0111), 4}; + else if ((first_unit & 0b1111'1100) == 0b1111'1000) return {static_cast(first_unit & 0b0000'0011), 5}; + else if ((first_unit & 0b1111'1100) == 0b1111'1100) return {static_cast(first_unit & 0b0000'0001), 6}; + else return {0, 0}; +} + +constexpr char32_t value_of_trailing_utf8_code_point(uint8_t unit, bool & correct) noexcept { + if ((unit & 0b1100'0000) == 0b1000'0000) return unit & 0b0011'1111; + else { + correct = false; + return 0; + } +} + +constexpr length_value_t length_and_value_of_utf16_code_point(uint16_t first_unit) noexcept { + if ((first_unit & 0b1111110000000000) == 0b1101'1000'0000'0000) return {static_cast(first_unit & 0b0000001111111111), 2}; + else return {first_unit, 1}; +} + +struct construct_from_pointer_t { }; + +constexpr auto construct_from_pointer = construct_from_pointer_t{}; + +CTLL_EXPORT template struct fixed_string { + char32_t content[N] = {}; + size_t real_size{0}; + bool correct_flag{true}; + + template constexpr fixed_string(construct_from_pointer_t, const T * input) noexcept { + if constexpr (std::is_same_v) { + #ifdef CTRE_STRING_IS_UTF8 + size_t out{0}; + for (size_t i{0}; i < N; ++i) { + length_value_t info = length_and_value_of_utf8_code_point(input[i]); + switch (info.length) { + case 6: + if (++i < N) info.value = (info.value << 6) | value_of_trailing_utf8_code_point(input[i], correct_flag); + [[fallthrough]]; + case 5: + if (++i < N) info.value = (info.value << 6) | value_of_trailing_utf8_code_point(input[i], correct_flag); + [[fallthrough]]; + case 4: + if (++i < N) info.value = (info.value << 6) | value_of_trailing_utf8_code_point(input[i], correct_flag); + [[fallthrough]]; + case 3: + if (++i < N) info.value = (info.value << 6) | value_of_trailing_utf8_code_point(input[i], correct_flag); + [[fallthrough]]; + case 2: + if (++i < N) info.value = (info.value << 6) | value_of_trailing_utf8_code_point(input[i], correct_flag); + [[fallthrough]]; + case 1: + content[out++] = static_cast(info.value); + real_size++; + break; + default: + correct_flag = false; + return; + } + } + #else + for (size_t i{0}; i < N; ++i) { + content[i] = static_cast(input[i]); + real_size++; + } + #endif + #if __cpp_char8_t + } else if constexpr (std::is_same_v) { + size_t out{0}; + for (size_t i{0}; i < N; ++i) { + length_value_t info = length_and_value_of_utf8_code_point(input[i]); + switch (info.length) { + case 6: + if (++i < N) info.value = (info.value << 6) | value_of_trailing_utf8_code_point(input[i], correct_flag); + [[fallthrough]]; + case 5: + if (++i < N) info.value = (info.value << 6) | value_of_trailing_utf8_code_point(input[i], correct_flag); + [[fallthrough]]; + case 4: + if (++i < N) info.value = (info.value << 6) | value_of_trailing_utf8_code_point(input[i], correct_flag); + [[fallthrough]]; + case 3: + if (++i < N) info.value = (info.value << 6) | value_of_trailing_utf8_code_point(input[i], correct_flag); + [[fallthrough]]; + case 2: + if (++i < N) info.value = (info.value << 6) | value_of_trailing_utf8_code_point(input[i], correct_flag); + [[fallthrough]]; + case 1: + content[out++] = static_cast(info.value); + real_size++; + break; + default: + correct_flag = false; + return; + } + } + #endif + } else if constexpr (std::is_same_v) { + size_t out{0}; + for (size_t i{0}; i < N; ++i) { + length_value_t info = length_and_value_of_utf16_code_point(input[i]); + if (info.length == 2) { + if (++i < N) { + if ((input[i] & 0b1111'1100'0000'0000) == 0b1101'1100'0000'0000) { + content[out++] = ((info.value << 10) | (input[i] & 0b0000'0011'1111'1111)) + 0x10000; + } else { + correct_flag = false; + break; + } + } + } else { + content[out++] = info.value; + } + } + real_size = out; + } else if constexpr (std::is_same_v || std::is_same_v) { + for (size_t i{0}; i < N; ++i) { + content[i] = static_cast(input[i]); + real_size++; + } + } + } + + template constexpr fixed_string(const std::array & in) noexcept: fixed_string{construct_from_pointer, in.data()} { } + template constexpr fixed_string(const T (&input)[N+1]) noexcept: fixed_string{construct_from_pointer, input} { } + + constexpr fixed_string(const fixed_string & other) noexcept { + for (size_t i{0}; i < N; ++i) { + content[i] = other.content[i]; + } + real_size = other.real_size; + correct_flag = other.correct_flag; + } + constexpr bool correct() const noexcept { + return correct_flag; + } + constexpr size_t size() const noexcept { + return real_size; + } + constexpr const char32_t * begin() const noexcept { + return content; + } + constexpr const char32_t * end() const noexcept { + return content + size(); + } + constexpr char32_t operator[](size_t i) const noexcept { + return content[i]; + } + template constexpr bool is_same_as(const fixed_string & rhs) const noexcept { + if (real_size != rhs.size()) return false; + for (size_t i{0}; i != real_size; ++i) { + if (content[i] != rhs[i]) return false; + } + return true; + } + constexpr operator std::basic_string_view() const noexcept { + return std::basic_string_view{content, size()}; + } +}; + +template <> class fixed_string<0> { + static constexpr char32_t empty[1] = {0}; +public: + template constexpr fixed_string(const T *) noexcept { + + } + constexpr fixed_string(std::initializer_list) noexcept { + + } + constexpr fixed_string(const fixed_string &) noexcept { + + } + constexpr bool correct() const noexcept { + return true; + } + constexpr size_t size() const noexcept { + return 0; + } + constexpr const char32_t * begin() const noexcept { + return empty; + } + constexpr const char32_t * end() const noexcept { + return empty + size(); + } + constexpr char32_t operator[](size_t) const noexcept { + return 0; + } + constexpr operator std::basic_string_view() const noexcept { + return std::basic_string_view{empty, 0}; + } +}; + +template fixed_string(const CharT (&)[N]) -> fixed_string; +template fixed_string(const std::array &) -> fixed_string; + +template fixed_string(fixed_string) -> fixed_string; + +} + +#endif diff --git a/third_party/compile-time-regular-expressions/include/ctll/grammars.hpp b/third_party/compile-time-regular-expressions/include/ctll/grammars.hpp new file mode 100644 index 0000000000..fd5184c129 --- /dev/null +++ b/third_party/compile-time-regular-expressions/include/ctll/grammars.hpp @@ -0,0 +1,123 @@ +#ifndef CTLL__GRAMMARS__HPP +#define CTLL__GRAMMARS__HPP + +namespace ctll { + +// terminal type representing symbol / character of any type +template struct term { + static constexpr auto value = v; +}; + +// epsilon = nothing on input tape +// also used as an command for parsing means "do nothing" +struct epsilon { + static constexpr auto value = '-'; +}; + +// empty_stack_symbol = nothing on stack +struct empty_stack_symbol {}; + +// push is alias to list +template using push = list; + +// accept/reject type for controlling output of LL1 machine +struct accept { constexpr explicit operator bool() noexcept { return true; } }; +struct reject { constexpr explicit operator bool() noexcept { return false; } }; + +// action type, every action item in grammar must inherit from +struct action { + struct action_tag { }; +}; + +// move one character forward and pop it from stack command +struct pop_input { + struct pop_input_tag { }; +}; + +// additional overloads for type list +template constexpr auto push_front(pop_input, list) -> list { return {}; } + +template constexpr auto push_front(epsilon, list) -> list { return {}; } + +template constexpr auto push_front(list, list) -> list { return {}; } + +template constexpr auto pop_front_and_push_front(T item, list l) { + return push_front(item, pop_front(l)); +} + +// SPECIAL matching types for nicer grammars + +// match any term +struct anything { + constexpr inline anything() noexcept { } + template constexpr anything(term) noexcept; +}; + +// match range of term A-B +template struct range { + constexpr inline range() noexcept { } + //template constexpr range(term) noexcept requires (A <= V) && (V <= B); + template > constexpr range(term) noexcept; +}; + +#ifdef __EDG__ +template struct contains { + static constexpr bool value = ((Set == V) || ... || false); +}; +#endif + +// match terms defined in set +template struct set { + constexpr inline set() noexcept { } + #ifdef __EDG__ + template ::value>> constexpr set(term) noexcept; + #else + template > constexpr set(term) noexcept; + #endif +}; + +// match terms not defined in set +template struct neg_set { + constexpr inline neg_set() noexcept { } + + #ifdef __EDG__ + template ::value>> constexpr neg_set(term) noexcept; + #else + template > constexpr neg_set(term) noexcept; + #endif +}; + +// AUGMENTED grammar which completes user-defined grammar for all other cases +template struct augment_grammar: public Grammar { + // start nonterminal is defined in parent type + using typename Grammar::_start; + + // grammar rules are inherited from Grammar parent type + using Grammar::rule; + + // term on stack and on input means pop_input; + template static constexpr auto rule(term, term) -> ctll::pop_input; + + // if the type on stack (range, set, neg_set, anything) is constructible from the terminal => pop_input + template static constexpr auto rule(Expected, term) -> std::enable_if_t>, ctll::pop_input>; + + // empty stack and empty input means we are accepting + static constexpr auto rule(empty_stack_symbol, epsilon) -> ctll::accept; + + // not matching anything else => reject + static constexpr auto rule(...) -> ctll::reject; + + // start stack is just a list; + using start_stack = list; +}; + + + +} + + + + + +#endif + diff --git a/third_party/compile-time-regular-expressions/include/ctll/list.hpp b/third_party/compile-time-regular-expressions/include/ctll/list.hpp new file mode 100644 index 0000000000..a04ed51287 --- /dev/null +++ b/third_party/compile-time-regular-expressions/include/ctll/list.hpp @@ -0,0 +1,93 @@ +#ifndef CTLL__TYPE_STACK__HPP +#define CTLL__TYPE_STACK__HPP + +#include "utilities.hpp" + +namespace ctll { + +template struct list { }; + +struct _nothing { }; + +using empty_list = list<>; + +// calculate size of list content +template constexpr auto size(list) noexcept { return sizeof...(Ts); } + + +// check if the list is empty +template constexpr bool empty(list) noexcept { return false; } +constexpr bool empty(empty_list) { return true; } + + +// concat two lists together left to right +template constexpr auto concat(list, list) noexcept -> list { return {}; } + + +// push something to the front of a list +template constexpr auto push_front(T, list) noexcept -> list { return {}; } + + +// pop element from the front of a list +template constexpr auto pop_front(list) noexcept -> list { return {}; } +constexpr auto pop_front(empty_list) -> empty_list; + +// pop element from the front of a list and return new typelist too +template struct list_pop_pair { + Front front{}; + List list{}; + constexpr list_pop_pair() = default; +}; + +template constexpr auto pop_and_get_front(list, T = T()) noexcept -> list_pop_pair> { return {}; } +template constexpr auto pop_and_get_front(empty_list, T = T()) noexcept -> list_pop_pair { return {}; } + + +// return front of the list +template constexpr auto front(list, T = T()) noexcept -> Head { return {}; } +template constexpr auto front(empty_list, T = T()) noexcept -> T { return {}; } + +// rotate list +template struct rotate_item { + template friend constexpr auto operator+(list, rotate_item) noexcept -> list { return {}; } +}; + +template constexpr auto rotate(list) -> decltype((list<>{} + ... + rotate_item{})) { + return {}; +} + +// set operations +template struct item_matcher { + struct not_selected { + template friend constexpr auto operator+(list, not_selected) -> list; + }; + template struct wrapper { + template friend constexpr auto operator+(list, wrapper) -> list; + }; + + static constexpr auto check(T) { return std::true_type{}; } + static constexpr auto check(...) { return std::false_type{}; } + static constexpr auto select(T) { return not_selected{}; } + template static constexpr auto select(Y) { return wrapper{}; } +}; + +template constexpr bool exists_in(T, list) noexcept { + return (item_matcher::check(Ts{}) || ... || false); +} + +template constexpr auto add_item(T item, list l) noexcept { + if constexpr (exists_in(item, l)) { + return l; + } else { + return list{}; + } +} + +template constexpr auto remove_item(T, list) noexcept { + item_matcher matcher; + return decltype((list<>{} + ... + matcher.select(Ts{}))){}; +} + +} + +#endif diff --git a/third_party/compile-time-regular-expressions/include/ctll/parser.hpp b/third_party/compile-time-regular-expressions/include/ctll/parser.hpp new file mode 100644 index 0000000000..d3751f9aa7 --- /dev/null +++ b/third_party/compile-time-regular-expressions/include/ctll/parser.hpp @@ -0,0 +1,192 @@ +#ifndef CTLL__PARSER__HPP +#define CTLL__PARSER__HPP + +#include "fixed_string.hpp" +#include "list.hpp" +#include "grammars.hpp" +#include "actions.hpp" + +#ifndef CTLL_IN_A_MODULE +#include +#endif + +namespace ctll { + + +enum class decision { + reject, + accept, + undecided +}; + +struct placeholder { }; + +template using index_placeholder = placeholder; + +#if CTLL_CNTTP_COMPILER_CHECK +template struct parser { // in c++20 +#else +template struct parser { +#endif + + #ifdef __GNUC__ // workaround to GCC bug + #if CTLL_CNTTP_COMPILER_CHECK + static constexpr auto _input = input; // c++20 mode + #else + static constexpr auto & _input = input; // c++17 mode + #endif + #else + static constexpr auto _input = input; // everyone else + #endif + + using Actions = ctll::conditional, identity>; + using grammar = augment_grammar; + + template struct results { + + static constexpr bool is_correct = Decision == decision::accept; + + constexpr inline CTLL_FORCE_INLINE operator bool() const noexcept { + return is_correct; + } + + #ifdef __GNUC__ // workaround to GCC bug + #if CTLL_CNTTP_COMPILER_CHECK + static constexpr auto _input = input; // c++20 mode + #else + static constexpr auto & _input = input; // c++17 mode + #endif + #else + static constexpr auto _input = input; // everyone else + #endif + + using output_type = Subject; + static constexpr size_t position = Pos; + + constexpr auto operator+(placeholder) const noexcept { + if constexpr (Decision == decision::undecided) { + // parse for current char (RPos) with previous stack and subject :) + return parser::template decide({}, {}); + } else { + // if there is decision already => just push it to the end of fold expression + return *this; + } + } + }; + + template static constexpr auto get_current_term() noexcept { + if constexpr (Pos < input.size()) { + constexpr auto value = input[Pos]; + if constexpr (value <= static_cast((std::numeric_limits::max)())) { + return term(value)>{}; + } else { + return term{}; + } + + } else { + // return epsilon if we are past the input + return epsilon{}; + } + } + template static constexpr auto get_previous_term() noexcept { + if constexpr (Pos == 0) { + // there is no previous character on input if we are on start + return epsilon{}; + } else if constexpr ((Pos-1) < input.size()) { + constexpr auto value = input[Pos-1]; + if constexpr (value <= static_cast((std::numeric_limits::max)())) { + return term(value)>{}; + } else { + return term{}; + } + } else { + return epsilon{}; + } + } + // if rule is accept => return true and subject + template + static constexpr auto move(ctll::accept, Terminal, Stack, Subject) noexcept { + return typename parser::template results(); + } + // if rule is reject => return false and subject + template + static constexpr auto move(ctll::reject, Terminal, Stack, Subject) noexcept { + return typename parser::template results(); + } + // if rule is pop_input => move to next character + template + static constexpr auto move(ctll::pop_input, Terminal, Stack, Subject) noexcept { + return typename parser::template results(); + } + // if rule is string => push it to the front of stack + template + static constexpr auto move(push string, Terminal, Stack stack, Subject subject) noexcept { + return decide(push_front(string, stack), subject); + } + // if rule is epsilon (empty string) => continue + template + static constexpr auto move(epsilon, Terminal, Stack stack, Subject subject) noexcept { + return decide(stack, subject); + } + // if rule is string with current character at the beginning (term) => move to next character + // and push string without the character (quick LL(1)) + template + static constexpr auto move(push, Content...>, term, Stack stack, Subject) noexcept { + constexpr auto local_input = input; + return typename parser::template results(), stack)), Subject, decision::undecided>(); + } + // if rule is string with any character at the beginning (compatible with current term) => move to next character + // and push string without the character (quick LL(1)) + template + static constexpr auto move(push, term, Stack stack, Subject) noexcept { + constexpr auto local_input = input; + return typename parser::template results(), stack)), Subject, decision::undecided>(); + } + // decide if we need to take action or move + template static constexpr auto decide(Stack previous_stack, Subject previous_subject) noexcept { + // each call means we pop something from stack + auto top_symbol = decltype(ctll::front(previous_stack, empty_stack_symbol()))(); + // gcc pedantic warning + [[maybe_unused]] auto stack = decltype(ctll::pop_front(previous_stack))(); + + // in case top_symbol is action type (apply it on previous subject and get new one) + if constexpr (std::is_base_of_v) { + auto subject = Actions::apply(top_symbol, get_previous_term(), previous_subject); + + // in case that semantic action is error => reject input + if constexpr (std::is_same_v) { + return typename parser::template results(); + } else { + return decide(stack, subject); + } + } else { + // all other cases are ordinary for LL(1) parser + auto current_term = get_current_term(); + auto rule = decltype(grammar::rule(top_symbol,current_term))(); + return move(rule, current_term, stack, previous_subject); + } + } + + // trampolines with folded expression + template static constexpr auto trampoline_decide(Subject, std::index_sequence) noexcept { + // parse everything for first char and than for next and next ... + // Pos+1 is needed as we want to finish calculation with epsilons on stack + auto v = (decide<0, typename grammar::start_stack, Subject>({}, {}) + ... + index_placeholder()); + return v; + } + + template static constexpr auto trampoline_decide(Subject subject = {}) noexcept { + // there will be no recursion, just sequence long as the input + return trampoline_decide(subject, std::make_index_sequence()); + } + + template using output = decltype(trampoline_decide()); + template static inline constexpr bool correct_with = trampoline_decide(); + +}; + +} // end of ctll namespace + + +#endif + diff --git a/third_party/compile-time-regular-expressions/include/ctll/utilities.hpp b/third_party/compile-time-regular-expressions/include/ctll/utilities.hpp new file mode 100644 index 0000000000..808e2ffe2a --- /dev/null +++ b/third_party/compile-time-regular-expressions/include/ctll/utilities.hpp @@ -0,0 +1,67 @@ +#ifndef CTLL__UTILITIES__HPP +#define CTLL__UTILITIES__HPP + +#ifndef CTLL_IN_A_MODULE +#include +#endif + +#ifdef CTLL_IN_A_MODULE +#define CTLL_EXPORT export +#else +#define CTLL_EXPORT +#endif + +#if defined __cpp_nontype_template_parameter_class + #define CTLL_CNTTP_COMPILER_CHECK 1 +#elif defined __cpp_nontype_template_args +// compiler which defines correctly feature test macro (not you clang) + #if __cpp_nontype_template_args >= 201911L + #define CTLL_CNTTP_COMPILER_CHECK 1 + #elif __cpp_nontype_template_args >= 201411L +// appleclang 13+ + #if defined __apple_build_version__ + #if defined __clang_major__ && __clang_major__ >= 13 +// but only in c++20 and more + #if __cplusplus > 201703L + #define CTLL_CNTTP_COMPILER_CHECK 1 + #endif + #endif + #else +// clang 12+ + #if defined __clang_major__ && __clang_major__ >= 12 +// but only in c++20 and more + #if __cplusplus > 201703L + #define CTLL_CNTTP_COMPILER_CHECK 1 + #endif + #endif + #endif + #endif +#endif + +#ifndef CTLL_CNTTP_COMPILER_CHECK + #define CTLL_CNTTP_COMPILER_CHECK 0 +#endif + +#ifdef _MSC_VER +#define CTLL_FORCE_INLINE __forceinline +#else +#define CTLL_FORCE_INLINE __attribute__((always_inline)) +#endif + +namespace ctll { + +template struct conditional_helper; + +template <> struct conditional_helper { + template using type = A; +}; + +template <> struct conditional_helper { + template using type = B; +}; + +template using conditional = typename conditional_helper::template type; + +} + +#endif diff --git a/third_party/compile-time-regular-expressions/include/ctre-unicode.hpp b/third_party/compile-time-regular-expressions/include/ctre-unicode.hpp new file mode 100644 index 0000000000..5c8ac17b5f --- /dev/null +++ b/third_party/compile-time-regular-expressions/include/ctre-unicode.hpp @@ -0,0 +1,7 @@ +#ifndef CTRE_V2__CTRE_UNICODE__HPP +#define CTRE_V2__CTRE_UNICODE__HPP + +#include "ctre.hpp" +#include "unicode-db.hpp" + +#endif diff --git a/third_party/compile-time-regular-expressions/include/ctre.hpp b/third_party/compile-time-regular-expressions/include/ctre.hpp new file mode 100644 index 0000000000..fcc4b0d8e1 --- /dev/null +++ b/third_party/compile-time-regular-expressions/include/ctre.hpp @@ -0,0 +1,10 @@ +#ifndef CTRE_V2__CTRE__HPP +#define CTRE_V2__CTRE__HPP + +#include "ctre/literals.hpp" +#include "ctre/functions.hpp" +#include "ctre/iterators.hpp" +#include "ctre/range.hpp" +#include "ctre/operators.hpp" + +#endif diff --git a/third_party/compile-time-regular-expressions/include/ctre/actions/asserts.inc.hpp b/third_party/compile-time-regular-expressions/include/ctre/actions/asserts.inc.hpp new file mode 100644 index 0000000000..8b8077c8d3 --- /dev/null +++ b/third_party/compile-time-regular-expressions/include/ctre/actions/asserts.inc.hpp @@ -0,0 +1,29 @@ +#ifndef CTRE__ACTIONS__ASSERTS__HPP +#define CTRE__ACTIONS__ASSERTS__HPP + +// push_assert_begin +template static constexpr auto apply(pcre::push_assert_begin, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(assert_line_begin(), subject.stack), subject.parameters}; +} + +// push_assert_end +template static constexpr auto apply(pcre::push_assert_end, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(assert_line_end(), subject.stack), subject.parameters}; +} + +// push_assert_begin +template static constexpr auto apply(pcre::push_assert_subject_begin, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(assert_subject_begin(), subject.stack), subject.parameters}; +} + +// push_assert_subject_end +template static constexpr auto apply(pcre::push_assert_subject_end, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(assert_subject_end(), subject.stack), subject.parameters}; +} + +// push_assert_subject_end_with_lineend +template static constexpr auto apply(pcre::push_assert_subject_end_with_lineend, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(assert_subject_end_line(), subject.stack), subject.parameters}; +} + +#endif diff --git a/third_party/compile-time-regular-expressions/include/ctre/actions/atomic_group.inc.hpp b/third_party/compile-time-regular-expressions/include/ctre/actions/atomic_group.inc.hpp new file mode 100644 index 0000000000..f615c8ac4d --- /dev/null +++ b/third_party/compile-time-regular-expressions/include/ctre/actions/atomic_group.inc.hpp @@ -0,0 +1,19 @@ +#ifndef CTRE__ACTIONS__ATOMIC_GROUP__HPP +#define CTRE__ACTIONS__ATOMIC_GROUP__HPP + +// atomic start +template static constexpr auto apply(pcre::start_atomic, ctll::term, pcre_context, pcre_parameters>) { + return pcre_context{ctll::list(), pcre_parameters()}; +} + +// atomic +template static constexpr auto apply(pcre::make_atomic, ctll::term, pcre_context, pcre_parameters>) { + return pcre_context{ctll::list, Ts...>(), pcre_parameters()}; +} + +// atomic sequence +template static constexpr auto apply(pcre::make_atomic, ctll::term, pcre_context, atomic_start, Ts...>, pcre_parameters>) { + return pcre_context{ctll::list, Ts...>(), pcre_parameters()}; +} + +#endif diff --git a/third_party/compile-time-regular-expressions/include/ctre/actions/backreference.inc.hpp b/third_party/compile-time-regular-expressions/include/ctre/actions/backreference.inc.hpp new file mode 100644 index 0000000000..2a7de4d38d --- /dev/null +++ b/third_party/compile-time-regular-expressions/include/ctre/actions/backreference.inc.hpp @@ -0,0 +1,30 @@ +#ifndef CTRE__ACTIONS__BACKREFERENCE__HPP +#define CTRE__ACTIONS__BACKREFERENCE__HPP + +// backreference with name +template static constexpr auto apply(pcre::make_back_reference, ctll::term, pcre_context, Ts...>, pcre_parameters>) { + return pcre_context{ctll::push_front(back_reference_with_name>(), ctll::list()), pcre_parameters()}; +} + +// with just a number +template static constexpr auto apply(pcre::make_back_reference, ctll::term, pcre_context, Ts...>, pcre_parameters>) { + // if we are looking outside of existing list of Ids ... reject input during parsing + if constexpr (Counter < Id) { + return ctll::reject{}; + } else { + return pcre_context{ctll::push_front(back_reference(), ctll::list()), pcre_parameters()}; + } +} + +// relative backreference +template static constexpr auto apply(pcre::make_relative_back_reference, ctll::term, [[maybe_unused]] pcre_context, Ts...>, pcre_parameters>) { + // if we are looking outside of existing list of Ids ... reject input during parsing + if constexpr (Counter < Id) { + return ctll::reject{}; + } else { + constexpr size_t absolute_id = (Counter + 1) - Id; + return pcre_context{ctll::push_front(back_reference(), ctll::list()), pcre_parameters()}; + } +} + +#endif diff --git a/third_party/compile-time-regular-expressions/include/ctre/actions/boundaries.inc.hpp b/third_party/compile-time-regular-expressions/include/ctre/actions/boundaries.inc.hpp new file mode 100644 index 0000000000..097bc9dbeb --- /dev/null +++ b/third_party/compile-time-regular-expressions/include/ctre/actions/boundaries.inc.hpp @@ -0,0 +1,14 @@ +#ifndef CTRE__ACTIONS__BOUNDARIES__HPP +#define CTRE__ACTIONS__BOUNDARIES__HPP + +// push_word_boundary +template static constexpr auto apply(pcre::push_word_boundary, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(boundary(), subject.stack), subject.parameters}; +} + +// push_not_word_boundary +template static constexpr auto apply(pcre::push_not_word_boundary, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(boundary>(), subject.stack), subject.parameters}; +} + +#endif diff --git a/third_party/compile-time-regular-expressions/include/ctre/actions/capture.inc.hpp b/third_party/compile-time-regular-expressions/include/ctre/actions/capture.inc.hpp new file mode 100644 index 0000000000..ebf5437a68 --- /dev/null +++ b/third_party/compile-time-regular-expressions/include/ctre/actions/capture.inc.hpp @@ -0,0 +1,40 @@ +#ifndef CTRE__ACTIONS__CAPTURE__HPP +#define CTRE__ACTIONS__CAPTURE__HPP + +// prepare_capture +template static constexpr auto apply(pcre::prepare_capture, ctll::term, pcre_context, pcre_parameters>) { + return pcre_context{ctll::push_front(capture_id(), ctll::list()), pcre_parameters()}; +} + +// reset_capture +template static constexpr auto apply(pcre::reset_capture, ctll::term, pcre_context, Ts...>, pcre_parameters>) { + return pcre_context{ctll::list(), pcre_parameters()}; +} + +// capture +template static constexpr auto apply(pcre::make_capture, ctll::term, pcre_context, Ts...>, pcre_parameters>) { + return pcre_context{ctll::push_front(capture(), ctll::list()), pcre_parameters()}; +} +// capture (sequence) +template static constexpr auto apply(pcre::make_capture, ctll::term, pcre_context, capture_id, Ts...>, pcre_parameters>) { + return pcre_context{ctll::push_front(capture(), ctll::list()), pcre_parameters()}; +} +// push_name +template static constexpr auto apply(pcre::push_name, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(id(), subject.stack), subject.parameters}; +} +// push_name (concat) +template static constexpr auto apply(pcre::push_name, ctll::term, pcre_context, Ts...>, Parameters> subject) { + return pcre_context{ctll::push_front(id(), ctll::list()), subject.parameters}; +} +// capture with name +template static constexpr auto apply(pcre::make_capture_with_name, ctll::term, pcre_context, capture_id, Ts...>, pcre_parameters>) { + return pcre_context{ctll::push_front(capture_with_name, A>(), ctll::list()), pcre_parameters()}; +} +// capture with name (sequence) +template static constexpr auto apply(pcre::make_capture_with_name, ctll::term, pcre_context, id, capture_id, Ts...>, pcre_parameters>) { + return pcre_context{ctll::push_front(capture_with_name, Content...>(), ctll::list()), pcre_parameters()}; +} + + +#endif diff --git a/third_party/compile-time-regular-expressions/include/ctre/actions/characters.inc.hpp b/third_party/compile-time-regular-expressions/include/ctre/actions/characters.inc.hpp new file mode 100644 index 0000000000..4aa7ffd7a2 --- /dev/null +++ b/third_party/compile-time-regular-expressions/include/ctre/actions/characters.inc.hpp @@ -0,0 +1,41 @@ +#ifndef CTRE__ACTIONS__CHARACTERS__HPP +#define CTRE__ACTIONS__CHARACTERS__HPP + +// push character +template static constexpr auto apply(pcre::push_character, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(character(), subject.stack), subject.parameters}; +} +// push_any_character +template static constexpr auto apply(pcre::push_character_anything, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(any(), subject.stack), subject.parameters}; +} +// character_alarm +template static constexpr auto apply(pcre::push_character_alarm, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(character<'\x07'>(), subject.stack), subject.parameters}; +} +// character_escape +template static constexpr auto apply(pcre::push_character_escape, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(character<'\x14'>(), subject.stack), subject.parameters}; +} +// character_formfeed +template static constexpr auto apply(pcre::push_character_formfeed, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(character<'\x0C'>(), subject.stack), subject.parameters}; +} +// push_character_newline +template static constexpr auto apply(pcre::push_character_newline, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(character<'\x0A'>(), subject.stack), subject.parameters}; +} +// push_character_null +template static constexpr auto apply(pcre::push_character_null, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(character<'\0'>(), subject.stack), subject.parameters}; +} +// push_character_return_carriage +template static constexpr auto apply(pcre::push_character_return_carriage, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(character<'\x0D'>(), subject.stack), subject.parameters}; +} +// push_character_tab +template static constexpr auto apply(pcre::push_character_tab, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(character<'\x09'>(), subject.stack), subject.parameters}; +} + +#endif diff --git a/third_party/compile-time-regular-expressions/include/ctre/actions/class.inc.hpp b/third_party/compile-time-regular-expressions/include/ctre/actions/class.inc.hpp new file mode 100644 index 0000000000..c52c550b3d --- /dev/null +++ b/third_party/compile-time-regular-expressions/include/ctre/actions/class.inc.hpp @@ -0,0 +1,51 @@ +#ifndef CTRE__ACTIONS__CLASS__HPP +#define CTRE__ACTIONS__CLASS__HPP + +// class_digit +template static constexpr auto apply(pcre::class_digit, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(ctre::set(), subject.stack), subject.parameters}; +} +// class_non_digit +template static constexpr auto apply(pcre::class_nondigit, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(ctre::negative_set(), subject.stack), subject.parameters}; +} +// class_space +template static constexpr auto apply(pcre::class_space, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(ctre::set(), subject.stack), subject.parameters}; +} +// class_nonspace +template static constexpr auto apply(pcre::class_nonspace, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(ctre::negative_set(), subject.stack), subject.parameters}; +} + +// class_horizontal_space +template static constexpr auto apply(pcre::class_horizontal_space, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(ctre::set(), subject.stack), subject.parameters}; +} +// class_horizontal_nonspace +template static constexpr auto apply(pcre::class_non_horizontal_space, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(ctre::negative_set(), subject.stack), subject.parameters}; +} +// class_vertical_space +template static constexpr auto apply(pcre::class_vertical_space, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(ctre::set(), subject.stack), subject.parameters}; +} +// class_vertical_nonspace +template static constexpr auto apply(pcre::class_non_vertical_space, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(ctre::negative_set(), subject.stack), subject.parameters}; +} + +// class_word +template static constexpr auto apply(pcre::class_word, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(ctre::set(), subject.stack), subject.parameters}; +} +// class_nonword +template static constexpr auto apply(pcre::class_nonword, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(ctre::negative_set(), subject.stack), subject.parameters}; +} +// class_nonnewline +template static constexpr auto apply(pcre::class_nonnewline, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(ctre::negative_set>(), subject.stack), subject.parameters}; +} + +#endif diff --git a/third_party/compile-time-regular-expressions/include/ctre/actions/fusion.inc.hpp b/third_party/compile-time-regular-expressions/include/ctre/actions/fusion.inc.hpp new file mode 100644 index 0000000000..bccb20bf41 --- /dev/null +++ b/third_party/compile-time-regular-expressions/include/ctre/actions/fusion.inc.hpp @@ -0,0 +1,70 @@ +#ifndef CTRE__ACTIONS__FUSION__HPP +#define CTRE__ACTIONS__FUSION__HPP + +static constexpr size_t combine_max_repeat_length(size_t A, size_t B) { + if (A && B) return A+B; + else return 0; +} + +template static constexpr auto combine_repeat(repeat, repeat) { + return repeat(); +} + +template static constexpr auto combine_repeat(lazy_repeat, lazy_repeat) { + return lazy_repeat(); +} + +template static constexpr auto combine_repeat(possessive_repeat, possessive_repeat) { + [[maybe_unused]] constexpr bool first_is_unbounded = (MaxA == 0); + [[maybe_unused]] constexpr bool second_is_nonempty = (MinB > 0); + [[maybe_unused]] constexpr bool second_can_be_empty = (MinB == 0); + + if constexpr (first_is_unbounded && second_is_nonempty) { + // will always reject, but I keep the content, so I have some amount of captures + return sequence(); + } else if constexpr (first_is_unbounded) { + return possessive_repeat(); + } else if constexpr (second_can_be_empty) { + return possessive_repeat(); + } else { + return possessive_repeat(); + } +} + +// concat repeat sequences +template static constexpr auto apply(pcre::make_sequence, ctll::term, pcre_context, repeat, Ts...>, Parameters> subject) { + return pcre_context{ctll::push_front(combine_repeat(repeat(), repeat()), ctll::list()), subject.parameters}; +} + +// concat lazy repeat sequences +template static constexpr auto apply(pcre::make_sequence, ctll::term, pcre_context, lazy_repeat, Ts...>, Parameters> subject) { + return pcre_context{ctll::push_front(combine_repeat(lazy_repeat(), lazy_repeat()), ctll::list()), subject.parameters}; +} + +// concat possessive repeat seqeunces +template static constexpr auto apply(pcre::make_sequence, ctll::term, pcre_context, possessive_repeat, Ts...>, Parameters> subject) { + return pcre_context{ctll::push_front(combine_repeat(possessive_repeat(), possessive_repeat()), ctll::list()), subject.parameters}; +} + +// concat repeat sequences into sequence +template static constexpr auto apply(pcre::make_sequence, ctll::term, pcre_context, As...>,repeat,Ts...>, Parameters> subject) { + using result = decltype(combine_repeat(repeat(), repeat())); + + return pcre_context{ctll::push_front(sequence(), ctll::list()), subject.parameters}; +} + +// concat lazy repeat sequences into sequence +template static constexpr auto apply(pcre::make_sequence, ctll::term, pcre_context, As...>,lazy_repeat,Ts...>, Parameters> subject) { + using result = decltype(combine_repeat(lazy_repeat(), lazy_repeat())); + + return pcre_context{ctll::push_front(sequence(), ctll::list()), subject.parameters}; +} + +// concat possessive repeat sequences into sequence +template static constexpr auto apply(pcre::make_sequence, ctll::term, pcre_context, As...>,possessive_repeat,Ts...>, Parameters> subject) { + using result = decltype(combine_repeat(possessive_repeat(), possessive_repeat())); + + return pcre_context{ctll::push_front(sequence(), ctll::list()), subject.parameters}; +} + +#endif diff --git a/third_party/compile-time-regular-expressions/include/ctre/actions/hexdec.inc.hpp b/third_party/compile-time-regular-expressions/include/ctre/actions/hexdec.inc.hpp new file mode 100644 index 0000000000..ae30b6ee85 --- /dev/null +++ b/third_party/compile-time-regular-expressions/include/ctre/actions/hexdec.inc.hpp @@ -0,0 +1,29 @@ +#ifndef CTRE__ACTIONS__HEXDEC__HPP +#define CTRE__ACTIONS__HEXDEC__HPP + +// hexdec character support (seed) +template static constexpr auto apply(pcre::create_hexdec, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(number<0ull>(), subject.stack), subject.parameters}; +} +// hexdec character support (push value) +template static constexpr auto apply(pcre::push_hexdec, ctll::term, pcre_context, Ts...>, Parameters> subject) { + constexpr auto previous = N << 4ull; + if constexpr (V >= 'a' && V <= 'f') { + return pcre_context{ctll::push_front(number<(previous + (V - 'a' + 10))>(), ctll::list()), subject.parameters}; + } else if constexpr (V >= 'A' && V <= 'F') { + return pcre_context{ctll::push_front(number<(previous + (V - 'A' + 10))>(), ctll::list()), subject.parameters}; + } else { + return pcre_context{ctll::push_front(number<(previous + (V - '0'))>(), ctll::list()), subject.parameters}; + } +} +// hexdec character support (convert to character) +template static constexpr auto apply(pcre::finish_hexdec, ctll::term, pcre_context, Ts...>, Parameters> subject) { + constexpr size_t max_char = (std::numeric_limits::max)(); + if constexpr (N <= max_char) { + return pcre_context{ctll::push_front(character(), ctll::list()), subject.parameters}; + } else { + return pcre_context{ctll::push_front(character(), ctll::list()), subject.parameters}; + } +} + +#endif diff --git a/third_party/compile-time-regular-expressions/include/ctre/actions/look.inc.hpp b/third_party/compile-time-regular-expressions/include/ctre/actions/look.inc.hpp new file mode 100644 index 0000000000..8786a77500 --- /dev/null +++ b/third_party/compile-time-regular-expressions/include/ctre/actions/look.inc.hpp @@ -0,0 +1,68 @@ +#ifndef CTRE__ACTIONS__LOOKAHEAD__HPP +#define CTRE__ACTIONS__LOOKAHEAD__HPP + +// lookahead positive start +template static constexpr auto apply(pcre::start_lookahead_positive, ctll::term, pcre_context, pcre_parameters>) { + return pcre_context{ctll::list>, Ts...>(), pcre_parameters()}; +} + +// lookahead positive end +template static constexpr auto apply(pcre::look_finish, ctll::term, pcre_context>, Ts...>, pcre_parameters>) { + return pcre_context{ctll::list, Ts...>(), pcre_parameters()}; +} + +// lookahead positive end (sequence) +template static constexpr auto apply(pcre::look_finish, ctll::term, pcre_context, look_start>, Ts...>, pcre_parameters>) { + return pcre_context{ctll::list, Ts...>(), pcre_parameters()}; +} + +// lookahead negative start +template static constexpr auto apply(pcre::start_lookahead_negative, ctll::term, pcre_context, pcre_parameters>) { + return pcre_context{ctll::list>, Ts...>(), pcre_parameters()}; +} + +// lookahead negative end +template static constexpr auto apply(pcre::look_finish, ctll::term, pcre_context>, Ts...>, pcre_parameters>) { + return pcre_context{ctll::list, Ts...>(), pcre_parameters()}; +} + +// lookahead negative end (sequence) +template static constexpr auto apply(pcre::look_finish, ctll::term, pcre_context, look_start>, Ts...>, pcre_parameters>) { + return pcre_context{ctll::list, Ts...>(), pcre_parameters()}; +} + +// LOOKBEHIND + +// lookbehind positive start +template static constexpr auto apply(pcre::start_lookbehind_positive, ctll::term, pcre_context, pcre_parameters>) { + return pcre_context{ctll::list>, Ts...>(), pcre_parameters()}; +} + +// lookbehind positive end +template static constexpr auto apply(pcre::look_finish, ctll::term, pcre_context>, Ts...>, pcre_parameters>) { + return pcre_context{ctll::list, Ts...>(), pcre_parameters()}; +} + +// lookbehind positive end (sequence) +template static constexpr auto apply(pcre::look_finish, ctll::term, pcre_context, look_start>, Ts...>, pcre_parameters>) { + using my_lookbehind = decltype(ctre::convert_to_basic_list(ctll::rotate(ctll::list{}))); + return pcre_context{ctll::list(), pcre_parameters()}; +} + +// lookbehind negative start +template static constexpr auto apply(pcre::start_lookbehind_negative, ctll::term, pcre_context, pcre_parameters>) { + return pcre_context{ctll::list>, Ts...>(), pcre_parameters()}; +} + +// lookbehind negative end +template static constexpr auto apply(pcre::look_finish, ctll::term, pcre_context>, Ts...>, pcre_parameters>) { + return pcre_context{ctll::list, Ts...>(), pcre_parameters()}; +} + +// lookbehind negative end (sequence) +template static constexpr auto apply(pcre::look_finish, ctll::term, pcre_context, look_start>, Ts...>, pcre_parameters>) { + using my_lookbehind = decltype(ctre::convert_to_basic_list(ctll::rotate(ctll::list{}))); + return pcre_context{ctll::list(), pcre_parameters()}; +} + +#endif diff --git a/third_party/compile-time-regular-expressions/include/ctre/actions/mode.inc.hpp b/third_party/compile-time-regular-expressions/include/ctre/actions/mode.inc.hpp new file mode 100644 index 0000000000..d4e612b081 --- /dev/null +++ b/third_party/compile-time-regular-expressions/include/ctre/actions/mode.inc.hpp @@ -0,0 +1,32 @@ +#ifndef CTRE__ACTIONS__MODE__HPP +#define CTRE__ACTIONS__MODE__HPP + +// we need to reset counter and wrap Mode into mode_switch +template static constexpr auto apply_mode(Mode, ctll::list, Parameters) { + return pcre_context, Ts...>, Parameters>{}; +} + +template static constexpr auto apply_mode(Mode, ctll::list, Ts...>, pcre_parameters) { + return pcre_context, Ts...>, pcre_parameters>{}; +} + +// catch a semantic action into mode +template static constexpr auto apply(pcre::mode_case_insensitive mode, ctll::term,pcre_context, Parameters>) { + return apply_mode(mode, ctll::list{}, Parameters{}); +} + +template static constexpr auto apply(pcre::mode_case_sensitive mode, ctll::term,pcre_context, Parameters>) { + return apply_mode(mode, ctll::list{}, Parameters{}); +} + +template static constexpr auto apply(pcre::mode_singleline mode, ctll::term,pcre_context, Parameters>) { + return apply_mode(mode, ctll::list{}, Parameters{}); +} + +template static constexpr auto apply(pcre::mode_multiline mode, ctll::term,pcre_context, Parameters>) { + return apply_mode(mode, ctll::list{}, Parameters{}); +} + +// to properly reset capture + +#endif diff --git a/third_party/compile-time-regular-expressions/include/ctre/actions/named_class.inc.hpp b/third_party/compile-time-regular-expressions/include/ctre/actions/named_class.inc.hpp new file mode 100644 index 0000000000..0be2710967 --- /dev/null +++ b/third_party/compile-time-regular-expressions/include/ctre/actions/named_class.inc.hpp @@ -0,0 +1,61 @@ +#ifndef CTRE__ACTIONS__NAMED_CLASS__HPP +#define CTRE__ACTIONS__NAMED_CLASS__HPP + +// class_named_alnum +template static constexpr auto apply(pcre::class_named_alnum, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(ctre::alphanum_chars(), subject.stack), subject.parameters}; +} +// class_named_alpha +template static constexpr auto apply(pcre::class_named_alpha, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(ctre::alpha_chars(), subject.stack), subject.parameters}; +} +// class_named_digit +template static constexpr auto apply(pcre::class_named_digit, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(ctre::digit_chars(), subject.stack), subject.parameters}; +} +// class_named_ascii +template static constexpr auto apply(pcre::class_named_ascii, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(ctre::ascii_chars(), subject.stack), subject.parameters}; +} +// class_named_blank +template static constexpr auto apply(pcre::class_named_blank, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(ctre::enumeration<' ','\t'>(), subject.stack), subject.parameters}; +} +// class_named_cntrl +template static constexpr auto apply(pcre::class_named_cntrl, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(ctre::set, ctre::character<'\x7F'>>(), subject.stack), subject.parameters}; +} +// class_named_graph +template static constexpr auto apply(pcre::class_named_graph, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(ctre::char_range<'\x21','\x7E'>(), subject.stack), subject.parameters}; +} +// class_named_lower +template static constexpr auto apply(pcre::class_named_lower, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(ctre::char_range<'a','z'>(), subject.stack), subject.parameters}; +} +// class_named_upper +template static constexpr auto apply(pcre::class_named_upper, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(ctre::char_range<'A','Z'>(), subject.stack), subject.parameters}; +} +// class_named_print +template static constexpr auto apply(pcre::class_named_print, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(ctre::char_range<'\x20','\x7E'>(), subject.stack), subject.parameters}; +} +// class_named_space +template static constexpr auto apply(pcre::class_named_space, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(space_chars(), subject.stack), subject.parameters}; +} +// class_named_word +template static constexpr auto apply(pcre::class_named_word, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(word_chars(), subject.stack), subject.parameters}; +} +// class_named_punct +template static constexpr auto apply(pcre::class_named_punct, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(punct_chars(), subject.stack), subject.parameters}; +} +// class_named_xdigit +template static constexpr auto apply(pcre::class_named_xdigit, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(xdigit_chars(), subject.stack), subject.parameters}; +} + +#endif diff --git a/third_party/compile-time-regular-expressions/include/ctre/actions/options.inc.hpp b/third_party/compile-time-regular-expressions/include/ctre/actions/options.inc.hpp new file mode 100644 index 0000000000..93ef48ac33 --- /dev/null +++ b/third_party/compile-time-regular-expressions/include/ctre/actions/options.inc.hpp @@ -0,0 +1,55 @@ +#ifndef CTRE__ACTIONS__OPTIONS__HPP +#define CTRE__ACTIONS__OPTIONS__HPP + +// empty option for alternate +template static constexpr auto apply(pcre::push_empty, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(empty(), subject.stack), subject.parameters}; +} + +// empty option for empty regex +template static constexpr auto apply(pcre::push_empty, ctll::epsilon, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(empty(), subject.stack), subject.parameters}; +} + +// make_alternate (A|B) +template static constexpr auto apply(pcre::make_alternate, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(select(), ctll::list()), subject.parameters}; +} +// make_alternate (As..)|B => (As..|B) +template static constexpr auto apply(pcre::make_alternate, ctll::term, pcre_context, A, Ts...>, Parameters> subject) { + return pcre_context{ctll::push_front(select(), ctll::list()), subject.parameters}; +} + + +// make_optional +template static constexpr auto apply(pcre::make_optional, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(optional(), ctll::list()), subject.parameters}; +} + +template static constexpr auto apply(pcre::make_optional, ctll::term, pcre_context, Ts...>, Parameters> subject) { + return pcre_context{ctll::push_front(optional(), ctll::list()), subject.parameters}; +} + +// prevent from creating wrapped optionals +template static constexpr auto apply(pcre::make_optional, ctll::term, pcre_context, Ts...>, Parameters> subject) { + return pcre_context{ctll::push_front(optional(), ctll::list()), subject.parameters}; +} + +// in case inner optional is lazy, result should be lazy too +template static constexpr auto apply(pcre::make_optional, ctll::term, pcre_context, Ts...>, Parameters> subject) { + return pcre_context{ctll::push_front(lazy_optional(), ctll::list()), subject.parameters}; +} + +// make_lazy (optional) +template static constexpr auto apply(pcre::make_lazy, ctll::term, pcre_context, Ts...>, Parameters> subject) { + return pcre_context{ctll::push_front(lazy_optional(), ctll::list()), subject.parameters}; +} + +// if you already got a lazy optional, make_lazy is no-op +template static constexpr auto apply(pcre::make_lazy, ctll::term, pcre_context, Ts...>, Parameters> subject) { + return pcre_context{ctll::push_front(lazy_optional(), ctll::list()), subject.parameters}; +} + + + +#endif diff --git a/third_party/compile-time-regular-expressions/include/ctre/actions/properties.inc.hpp b/third_party/compile-time-regular-expressions/include/ctre/actions/properties.inc.hpp new file mode 100644 index 0000000000..05f7e80164 --- /dev/null +++ b/third_party/compile-time-regular-expressions/include/ctre/actions/properties.inc.hpp @@ -0,0 +1,73 @@ +#ifndef CTRE__ACTIONS__PROPERTIES__HPP +#define CTRE__ACTIONS__PROPERTIES__HPP + +// push_property_name +template static constexpr auto apply(pcre::push_property_name, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(property_name(), subject.stack), subject.parameters}; +} +// push_property_name (concat) +template static constexpr auto apply(pcre::push_property_name, ctll::term, pcre_context, Ts...>, Parameters> subject) { + return pcre_context{ctll::push_front(property_name(), ctll::list()), subject.parameters}; +} + +// push_property_value +template static constexpr auto apply(pcre::push_property_value, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(property_value(), subject.stack), subject.parameters}; +} +// push_property_value (concat) +template static constexpr auto apply(pcre::push_property_value, ctll::term, pcre_context, Ts...>, Parameters> subject) { + return pcre_context{ctll::push_front(property_value(), ctll::list()), subject.parameters}; +} + +// make_property +template static constexpr auto apply(pcre::make_property, ctll::term, [[maybe_unused]] pcre_context, Ts...>, Parameters> subject) { + //return ctll::reject{}; + constexpr char name[sizeof...(Name)]{static_cast(Name)...}; + constexpr auto p = uni::detail::binary_prop_from_string(get_string_view(name)); + + if constexpr (uni::detail::is_unknown(p)) { + return ctll::reject{}; + } else { + return pcre_context{ctll::push_front(make_binary_property

(), ctll::list()), subject.parameters}; + } +} + +// make_property +template static constexpr auto apply(pcre::make_property, ctll::term, [[maybe_unused]] pcre_context, property_name, Ts...>, Parameters> subject) { + //return ctll::reject{}; + constexpr auto prop = property_builder::template get(); + + if constexpr (std::is_same_v) { + return ctll::reject{}; + } else { + return pcre_context{ctll::push_front(prop, ctll::list()), subject.parameters}; + } +} + + +// make_property_negative +template static constexpr auto apply(pcre::make_property_negative, ctll::term, [[maybe_unused]] pcre_context, Ts...>, Parameters> subject) { + //return ctll::reject{}; + constexpr char name[sizeof...(Name)]{static_cast(Name)...}; + constexpr auto p = uni::detail::binary_prop_from_string(get_string_view(name)); + + if constexpr (uni::detail::is_unknown(p)) { + return ctll::reject{}; + } else { + return pcre_context{ctll::push_front(negate>(), ctll::list()), subject.parameters}; + } +} + +// make_property_negative +template static constexpr auto apply(pcre::make_property_negative, ctll::term, [[maybe_unused]] pcre_context, property_name, Ts...>, Parameters> subject) { + //return ctll::reject{}; + constexpr auto prop = property_builder::template get(); + + if constexpr (std::is_same_v) { + return ctll::reject{}; + } else { + return pcre_context{ctll::push_front(negate(), ctll::list()), subject.parameters}; + } +} + +#endif diff --git a/third_party/compile-time-regular-expressions/include/ctre/actions/repeat.inc.hpp b/third_party/compile-time-regular-expressions/include/ctre/actions/repeat.inc.hpp new file mode 100644 index 0000000000..98c2e0bc9d --- /dev/null +++ b/third_party/compile-time-regular-expressions/include/ctre/actions/repeat.inc.hpp @@ -0,0 +1,90 @@ +#ifndef CTRE__ACTIONS__REPEAT__HPP +#define CTRE__ACTIONS__REPEAT__HPP + +// repeat 1..N +template static constexpr auto apply(pcre::repeat_plus, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(plus(), ctll::list()), subject.parameters}; +} +// repeat 1..N (sequence) +template static constexpr auto apply(pcre::repeat_plus, ctll::term, pcre_context, Ts...>, Parameters> subject) { + return pcre_context{ctll::push_front(plus(), ctll::list()), subject.parameters}; +} + +// repeat 0..N +template static constexpr auto apply(pcre::repeat_star, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(star(), ctll::list()), subject.parameters}; +} +// repeat 0..N (sequence) +template static constexpr auto apply(pcre::repeat_star, ctll::term, pcre_context, Ts...>, Parameters> subject) { + return pcre_context{ctll::push_front(star(), ctll::list()), subject.parameters}; +} + +// create_number (seed) +template static constexpr auto apply(pcre::create_number, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(number(V - '0')>(), subject.stack), subject.parameters}; +} +// push_number +template static constexpr auto apply(pcre::push_number, ctll::term, pcre_context, Ts...>, Parameters> subject) { + constexpr size_t previous = N * 10ull; + return pcre_context{ctll::push_front(number<(previous + (V - '0'))>(), ctll::list()), subject.parameters}; +} + +// repeat A..B +template static constexpr auto apply(pcre::repeat_ab, ctll::term, pcre_context, number, Subject, Ts...>, Parameters> subject) { + return pcre_context{ctll::push_front(repeat(), ctll::list()), subject.parameters}; +} +// repeat A..B (sequence) +template static constexpr auto apply(pcre::repeat_ab, ctll::term, pcre_context, number, sequence, Ts...>, Parameters> subject) { + return pcre_context{ctll::push_front(repeat(), ctll::list()), subject.parameters}; +} + +// repeat_exactly +template static constexpr auto apply(pcre::repeat_exactly, ctll::term, pcre_context, Subject, Ts...>, Parameters> subject) { + return pcre_context{ctll::push_front(repeat(), ctll::list()), subject.parameters}; +} +// repeat_exactly A..B (sequence) +template static constexpr auto apply(pcre::repeat_exactly, ctll::term, pcre_context, sequence, Ts...>, Parameters> subject) { + return pcre_context{ctll::push_front(repeat(), ctll::list()), subject.parameters}; +} + +// repeat_at_least (A+) +template static constexpr auto apply(pcre::repeat_at_least, ctll::term, pcre_context, Subject, Ts...>, Parameters> subject) { + return pcre_context{ctll::push_front(repeat(), ctll::list()), subject.parameters}; +} +// repeat_at_least (A+) (sequence) +template static constexpr auto apply(pcre::repeat_at_least, ctll::term, pcre_context, sequence, Ts...>, Parameters> subject) { + return pcre_context{ctll::push_front(repeat(), ctll::list()), subject.parameters}; +} + +// make_lazy (plus) +template static constexpr auto apply(pcre::make_lazy, ctll::term, pcre_context, Ts...>, Parameters> subject) { + return pcre_context{ctll::push_front(lazy_plus(), ctll::list()), subject.parameters}; +} + +// make_lazy (star) +template static constexpr auto apply(pcre::make_lazy, ctll::term, pcre_context, Ts...>, Parameters> subject) { + return pcre_context{ctll::push_front(lazy_star(), ctll::list()), subject.parameters}; +} + +// make_lazy (repeat) +template static constexpr auto apply(pcre::make_lazy, ctll::term, pcre_context, Ts...>, Parameters> subject) { + return pcre_context{ctll::push_front(lazy_repeat(), ctll::list()), subject.parameters}; +} + +// make_possessive (plus) +template static constexpr auto apply(pcre::make_possessive, ctll::term, pcre_context, Ts...>, Parameters> subject) { + return pcre_context{ctll::push_front(possessive_plus(), ctll::list()), subject.parameters}; +} + +// make_possessive (star) +template static constexpr auto apply(pcre::make_possessive, ctll::term, pcre_context, Ts...>, Parameters> subject) { + return pcre_context{ctll::push_front(possessive_star(), ctll::list()), subject.parameters}; +} + +// make_possessive (repeat) +template static constexpr auto apply(pcre::make_possessive, ctll::term, pcre_context, Ts...>, Parameters> subject) { + return pcre_context{ctll::push_front(possessive_repeat(), ctll::list()), subject.parameters}; +} + + +#endif diff --git a/third_party/compile-time-regular-expressions/include/ctre/actions/sequence.inc.hpp b/third_party/compile-time-regular-expressions/include/ctre/actions/sequence.inc.hpp new file mode 100644 index 0000000000..f590432e20 --- /dev/null +++ b/third_party/compile-time-regular-expressions/include/ctre/actions/sequence.inc.hpp @@ -0,0 +1,32 @@ +#ifndef CTRE__ACTIONS__SEQUENCE__HPP +#define CTRE__ACTIONS__SEQUENCE__HPP + +// make_sequence +template static constexpr auto apply(pcre::make_sequence, ctll::term, pcre_context, Parameters> subject) { + return pcre_context{ctll::push_front(sequence(), ctll::list()), subject.parameters}; +} +// make_sequence (concat) +template static constexpr auto apply(pcre::make_sequence, ctll::term, pcre_context,A,Ts...>, Parameters> subject) { + return pcre_context{ctll::push_front(sequence(), ctll::list()), subject.parameters}; +} + +// make_sequence (make string) +template static constexpr auto apply(pcre::make_sequence, ctll::term, pcre_context,character,Ts...>, Parameters> subject) { + return pcre_context{ctll::push_front(string(), ctll::list()), subject.parameters}; +} +// make_sequence (concat string) +template static constexpr auto apply(pcre::make_sequence, ctll::term, pcre_context,character,Ts...>, Parameters> subject) { + return pcre_context{ctll::push_front(string(), ctll::list()), subject.parameters}; +} + +// make_sequence (make string in front of different items) +template static constexpr auto apply(pcre::make_sequence, ctll::term, pcre_context,Sq...>,character,Ts...>, Parameters> subject) { + return pcre_context{ctll::push_front(sequence,Sq...>(), ctll::list()), subject.parameters}; +} +// make_sequence (concat string in front of different items) +template static constexpr auto apply(pcre::make_sequence, ctll::term, pcre_context,Sq...>,character,Ts...>, Parameters> subject) { + return pcre_context{ctll::push_front(sequence,Sq...>(), ctll::list()), subject.parameters}; +} + + +#endif diff --git a/third_party/compile-time-regular-expressions/include/ctre/actions/set.inc.hpp b/third_party/compile-time-regular-expressions/include/ctre/actions/set.inc.hpp new file mode 100644 index 0000000000..a5a08d14d9 --- /dev/null +++ b/third_party/compile-time-regular-expressions/include/ctre/actions/set.inc.hpp @@ -0,0 +1,66 @@ +#ifndef CTRE__ACTIONS__SET__HPP +#define CTRE__ACTIONS__SET__HPP + +// UTILITY +// add into set if not exists +template