diff --git a/storage/src/vespa/storage/persistence/filestorage/filestorhandlerimpl.cpp b/storage/src/vespa/storage/persistence/filestorage/filestorhandlerimpl.cpp index 1c8182337460..1c245d9c38e1 100644 --- a/storage/src/vespa/storage/persistence/filestorage/filestorhandlerimpl.cpp +++ b/storage/src/vespa/storage/persistence/filestorage/filestorhandlerimpl.cpp @@ -14,12 +14,15 @@ #include #include #include +#include #include #include LOG_SETUP(".persistence.filestor.handler.impl"); using document::BucketSpace; +using vespalib::xml_attribute_escaped; +using vespalib::xml_content_escaped; namespace storage { @@ -1338,8 +1341,8 @@ FileStorHandlerImpl::Stripe::dumpQueueHtml(std::ostream & os) const const PriorityIdx& idx = bmi::get<1>(*_queue); for (const auto & entry : idx) { - os << "
  • " << entry._command->toString() << " (priority: " - << (int)entry._command->getPriority() << ")
  • \n"; + os << "
  • " << xml_content_escaped(entry._command->toString()) << " (priority: " + << static_cast(entry._command->getPriority()) << ")
  • \n"; } } @@ -1379,8 +1382,9 @@ FileStorHandlerImpl::Stripe::dumpQueue(std::ostream & os) const const PriorityIdx& idx = bmi::get<1>(*_queue); for (const auto & entry : idx) { - os << entry._bucket.getBucketId() << ": " << entry._command->toString() << " (priority: " - << (int)entry._command->getPriority() << ")\n"; + os << entry._bucket.getBucketId() << ": " + << xml_content_escaped(entry._command->toString()) + << " (priority: " << static_cast(entry._command->getPriority()) << ")\n"; } } diff --git a/storage/src/vespa/storage/persistence/filestorage/filestormanager.cpp b/storage/src/vespa/storage/persistence/filestorage/filestormanager.cpp index 63fec9f037fe..62be96447a47 100644 --- a/storage/src/vespa/storage/persistence/filestorage/filestormanager.cpp +++ b/storage/src/vespa/storage/persistence/filestorage/filestormanager.cpp @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -887,16 +888,17 @@ void FileStorManager::onFlush(bool downwards) void FileStorManager::reportHtmlStatus(std::ostream& out, const framework::HttpUrlPath& path) const { - bool showStatus = !path.hasAttribute("thread"); - bool verbose = path.hasAttribute("verbose"); + using vespalib::xml_attribute_escaped; - // Print menu + bool showStatus = !path.hasAttribute("thread"); + bool verbose = path.hasAttribute("verbose"); + // Print menu out << "[ Back to top" << " | Main filestor manager status page" << " | " << (verbose ? "Less verbose" : "More verbose") << "\n" << " ]

    \n"; diff --git a/storage/src/vespa/storage/storageserver/mergethrottler.cpp b/storage/src/vespa/storage/storageserver/mergethrottler.cpp index 28a76413149d..681d97299faa 100644 --- a/storage/src/vespa/storage/storageserver/mergethrottler.cpp +++ b/storage/src/vespa/storage/storageserver/mergethrottler.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -1313,6 +1314,8 @@ void MergeThrottler::reportHtmlStatus(std::ostream& out, const framework::HttpUrlPath&) const { + using vespalib::xml_content_escaped; + std::lock_guard lock(_stateLock); if (_use_dynamic_throttling) { out << "

    Dynamic throttle policy; window size min/max: [" @@ -1333,7 +1336,7 @@ MergeThrottler::reportHtmlStatus(std::ostream& out, if (!_merges.empty()) { out << "

      \n"; for (auto& m : _merges) { - out << "
    • " << m.second.getMergeCmdString(); + out << "
    • " << xml_content_escaped(m.second.getMergeCmdString()); if (m.second.isExecutingLocally()) { out << " ("; if (m.second.isInCycle()) { @@ -1364,7 +1367,7 @@ MergeThrottler::reportHtmlStatus(std::ostream& out, // The queue always owns its messages, thus this is safe out << "
    • Pri " << static_cast(qm._msg->getPriority()) - << ": " << *qm._msg; + << ": " << xml_content_escaped(qm._msg->toString()); out << "
    • \n"; } out << "\n"; diff --git a/storage/src/vespa/storage/storageserver/statemanager.cpp b/storage/src/vespa/storage/storageserver/statemanager.cpp index 60aebf5a5353..b1ea000e9bcf 100644 --- a/storage/src/vespa/storage/storageserver/statemanager.cpp +++ b/storage/src/vespa/storage/storageserver/statemanager.cpp @@ -9,9 +9,9 @@ #include #include #include -#include #include #include +#include #include #include @@ -99,19 +99,20 @@ void StateManager::reportHtmlStatus(std::ostream& out, const framework::HttpUrlPath& path) const { + using vespalib::xml_content_escaped; (void) path; { std::lock_guard lock(_stateLock); - const auto &baseLineClusterState = _systemState->getBaselineClusterState(); + const auto& baseLineClusterState = _systemState->getBaselineClusterState(); out << "

      Current system state

      \n" - << "" << baseLineClusterState->toString(true) << "\n" + << "" << xml_content_escaped(baseLineClusterState->toString(true)) << "\n" << "

      Current node state

      \n" << "" << baseLineClusterState->getNodeState(lib::Node( _component.getNodeType(), _component.getIndex()) ).toString(true) << "\n" << "

      Reported node state

      \n" - << "" << _nodeState->toString(true) << "\n" + << "" << xml_content_escaped(_nodeState->toString(true)) << "\n" << "

      Pending state requests

      \n" << _queuedStateRequests.size() << "\n" << "

      System state history

      \n" @@ -119,7 +120,7 @@ StateManager::reportHtmlStatus(std::ostream& out, << "Received at timeState\n"; for (auto it = _systemStateHistory.rbegin(); it != _systemStateHistory.rend(); ++it) { out << "" << it->first << "" - << *it->second->getBaselineClusterState() << "\n"; + << xml_content_escaped(it->second->getBaselineClusterState()->toString()) << "\n"; } out << "\n"; } diff --git a/storage/src/vespa/storage/visiting/visitor.cpp b/storage/src/vespa/storage/visiting/visitor.cpp index e8a217fc718d..91f304ad9a08 100644 --- a/storage/src/vespa/storage/visiting/visitor.cpp +++ b/storage/src/vespa/storage/visiting/visitor.cpp @@ -2,15 +2,15 @@ #include "visitor.h" #include "visitormetrics.h" +#include +#include +#include #include -#include #include +#include #include -#include -#include -#include -#include #include +#include #include #include #include @@ -932,10 +932,12 @@ Visitor::continueVisitor() void Visitor::getStatus(std::ostream& out, bool verbose) const { + using vespalib::xml_content_escaped; + out << "\n"; out << "\n"; - out << "\n"; + out << "\n"; out << "\n"; @@ -953,7 +955,7 @@ Visitor::getStatus(std::ostream& out, bool verbose) const << "\n"; out << "\n"; + << xml_content_escaped(_result.toString()) << "\n"; out << "\n"; @@ -973,28 +975,28 @@ Visitor::getStatus(std::ostream& out, bool verbose) const out << "\n"; out << "\n"; out << "\n"; out << "\n"; out << "\n"; out << "" << "" << "" - << "" + << "" << "\n"; } out << "
      PropertyValue
      Visitor id" << _visitorId << "
      Visitor name" << _id << "
      Visitor name" << xml_content_escaped(_id) << "
      Number of buckets to visit" << _buckets.size() << "
      Current status" - << _result << "
      Failed" << (failed() ? "true" : "false") << "
      Called completed visitor" << (_calledCompletedVisitor ? "true" : "false") << "
      Visiting fields" - << _visitorOptions._fieldSet + << xml_content_escaped(_visitorOptions._fieldSet) << "
      Visiting removes" << (_visitorOptions._visitRemoves ? "true" : "false") << "
      Control destination"; if (_controlDestination.get()) { - out << _controlDestination->toString(); + out << xml_content_escaped(_controlDestination->toString()); } else { out << "nil"; } out << "
      Data destination"; if (_dataDestination.get()) { - out << _dataDestination->toString(); + out << xml_content_escaped(_dataDestination->toString()); } else { out << "nil"; } out << "
      Document selection"; if (_documentSelection.get()) { - out << *_documentSelection; + out << xml_content_escaped(_documentSelection->toString()); } else { out << "nil"; } @@ -1052,7 +1054,7 @@ Visitor::getStatus(std::ostream& out, bool verbose) const for (auto& idAndMeta : _visitorTarget._messageMeta) { const VisitorTarget::MessageMeta& meta(idAndMeta.second); out << "Message #" << idAndMeta.first << " " - << meta.messageText << " "; + << xml_content_escaped(meta.messageText) << " "; if (meta.retryCount > 0) { out << "Retried " << meta.retryCount << " times. "; } diff --git a/storage/src/vespa/storage/visiting/visitormanager.cpp b/storage/src/vespa/storage/visiting/visitormanager.cpp index c548ef9e20c4..b305abae019f 100644 --- a/storage/src/vespa/storage/visiting/visitormanager.cpp +++ b/storage/src/vespa/storage/visiting/visitormanager.cpp @@ -7,11 +7,12 @@ #include "testvisitor.h" #include "recoveryvisitor.h" #include "reindexing_visitor.h" -#include #include #include -#include #include +#include +#include +#include #include #include @@ -557,6 +558,9 @@ void VisitorManager::reportHtmlStatus(std::ostream& out, const framework::HttpUrlPath& path) const { + using vespalib::xml_attribute_escaped; + using vespalib::xml_content_escaped; + bool showStatus = !path.hasAttribute("visitor"); bool verbose = path.hasAttribute("verbose"); bool showAll = path.hasAttribute("allvisitors"); @@ -568,7 +572,7 @@ VisitorManager::reportHtmlStatus(std::ostream& out, << " | Show all visitors" << " | " << (verbose ? "Less verbose" : "More verbose") << "\n" << " ]

      \n"; @@ -585,7 +589,8 @@ VisitorManager::reportHtmlStatus(std::ostream& out, out << " none"; } else { for (const auto& id_and_visitor : _visitorThread[i].second) { - out << " " << id_and_visitor.second << " (" << id_and_visitor.first << ")"; + out << " " << xml_content_escaped(id_and_visitor.second) + << " (" << id_and_visitor.first << ")"; } } out << "
      \n"; @@ -596,7 +601,8 @@ VisitorManager::reportHtmlStatus(std::ostream& out, for (const auto& enqueued : _visitorQueue) { auto& cmd = enqueued._command; assert(cmd); - out << "
    • " << cmd->getInstanceId() << " - " + out << "
    • " + << xml_content_escaped(cmd->getInstanceId()) << " - " << vespalib::count_ms(cmd->getQueueTimeout()) << ", remaining timeout " << vespalib::count_ms(enqueued._deadline - now) << " ms\n"; } @@ -619,7 +625,7 @@ VisitorManager::reportHtmlStatus(std::ostream& out, << "
    • " << entry.second.id << "" << entry.second.timestamp << "" << vespalib::count_ms(entry.second.timeout) << "" << entry.second.destination << "" << xml_content_escaped(entry.second.destination) << "
      \n"; @@ -632,13 +638,13 @@ VisitorManager::reportHtmlStatus(std::ostream& out, << ", variable = " << _maxVariableConcurrentVisitors << ", waiting visitors " << _visitorQueue.size() << "
      \n"; } - // Only one can access status at a time as _statusRequest only holds - // answers from one request at a time + // Only one can access status at a time as _statusRequest only holds + // answers from one request at a time std::unique_lock sync(_statusLock); - // Send all subrequests + // Send all sub-requests uint32_t parts = _visitorThread.size(); for (uint32_t i=0; i cmd(new RequestStatusPage(path)); + auto cmd = std::make_shared(path); std::ostringstream token; token << "Visitor thread " << i; cmd->setSortToken(token.str()); @@ -648,7 +654,7 @@ VisitorManager::reportHtmlStatus(std::ostream& out, _statusCond.wait(sync, [&]() { return (_statusRequest.size() >= parts);}); std::sort(_statusRequest.begin(), _statusRequest.end(), StatusReqSorter()); - // Create output + // Create output for (uint32_t i=0; i<_statusRequest.size(); ++i) { out << "

      " << _statusRequest[i]->getSortToken() << "

      \n" << _statusRequest[i]->getStatus() << "\n"; diff --git a/storage/src/vespa/storage/visiting/visitorthread.cpp b/storage/src/vespa/storage/visiting/visitorthread.cpp index cc3e709a8485..ba2a7584ff4d 100644 --- a/storage/src/vespa/storage/visiting/visitorthread.cpp +++ b/storage/src/vespa/storage/visiting/visitorthread.cpp @@ -2,18 +2,16 @@ #include "visitorthread.h" #include "messages.h" +#include #include #include #include +#include #include #include -#include #include #include -#include -#include #include -#include #include LOG_SETUP(".visitor.thread"); diff --git a/vespalib/CMakeLists.txt b/vespalib/CMakeLists.txt index b2ce30112964..df1e10068287 100644 --- a/vespalib/CMakeLists.txt +++ b/vespalib/CMakeLists.txt @@ -189,6 +189,7 @@ vespa_define_module( src/tests/util/rcuvector src/tests/util/reusable_set src/tests/util/size_literals + src/tests/util/string_escape src/tests/valgrind src/tests/visit_ranges src/tests/invokeservice diff --git a/vespalib/src/tests/util/string_escape/CMakeLists.txt b/vespalib/src/tests/util/string_escape/CMakeLists.txt new file mode 100644 index 000000000000..98d4e7bd253e --- /dev/null +++ b/vespalib/src/tests/util/string_escape/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(vespalib_util_string_escape_test_app TEST + SOURCES + string_escape_test.cpp + DEPENDS + vespalib + GTest::GTest +) +vespa_add_test(NAME vespalib_util_string_escape_test_app COMMAND vespalib_util_string_escape_test_app) diff --git a/vespalib/src/tests/util/string_escape/string_escape_test.cpp b/vespalib/src/tests/util/string_escape/string_escape_test.cpp new file mode 100644 index 000000000000..1ee2c08fbc32 --- /dev/null +++ b/vespalib/src/tests/util/string_escape/string_escape_test.cpp @@ -0,0 +1,44 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include + +using namespace vespalib; +using namespace ::testing; + +TEST(StringEscapeTest, xml_attribute_special_chars_are_escaped) { + // We always escape both " and ' since we don't know the quoting context of the enclosing attribute. + EXPECT_EQ(xml_attribute_escaped("<>&\"'"), "<>&"'"); +} + +TEST(StringEscapeTest, xml_attribute_regular_chars_are_not_escaped) { + // Far from exhaustive, but should catch obvious mess-ups. + EXPECT_EQ(xml_attribute_escaped("09azAZ.,()[]$!"), "09azAZ.,()[]$!"); +} + +TEST(StringEscapeTest, control_characters_are_escaped_in_attributes) { + EXPECT_EQ(xml_attribute_escaped("\n"), " "); + EXPECT_EQ(xml_attribute_escaped("\r"), " "); + EXPECT_EQ(xml_attribute_escaped(stringref("\x00", 1)), "�"); // Can't just invoke strlen with null byte :) + EXPECT_EQ(xml_attribute_escaped("\x1f"), ""); +} + +TEST(StringEscapeTest, xml_content_special_chars_are_escaped) { + EXPECT_EQ(xml_content_escaped("<>&"), "<>&"); +} + +TEST(StringEscapeTest, xml_content_regular_chars_are_not_escaped) { + EXPECT_EQ(xml_content_escaped("09azAZ.,()[]$!"), "09azAZ.,()[]$!"); + // Newlines are not escaped in content + EXPECT_EQ(xml_content_escaped("\n"), "\n"); + // Quotes are not escaped in content + EXPECT_EQ(xml_content_escaped("\"'"), "\"'"); +} + +TEST(StringEscapeTest, control_characters_are_escaped_in_content) { + EXPECT_EQ(xml_content_escaped("\r"), " "); + EXPECT_EQ(xml_content_escaped(stringref("\x00", 1)), "�"); + EXPECT_EQ(xml_content_escaped("\x1f"), ""); +} + +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/vespalib/src/vespa/vespalib/util/CMakeLists.txt b/vespalib/src/vespa/vespalib/util/CMakeLists.txt index 056823379820..8cdc9444daa5 100644 --- a/vespalib/src/vespa/vespalib/util/CMakeLists.txt +++ b/vespalib/src/vespa/vespalib/util/CMakeLists.txt @@ -88,6 +88,7 @@ vespa_add_library(vespalib_vespalib_util OBJECT singleexecutor.cpp small_vector.cpp stash.cpp + string_escape.cpp string_hash.cpp stringfmt.cpp testclock.cpp diff --git a/vespalib/src/vespa/vespalib/util/string_escape.cpp b/vespalib/src/vespa/vespalib/util/string_escape.cpp new file mode 100644 index 000000000000..d1b38f84c3e2 --- /dev/null +++ b/vespalib/src/vespa/vespalib/util/string_escape.cpp @@ -0,0 +1,79 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "string_escape.h" +#include +#include +#include + +namespace vespalib { + +namespace { + +std::vector precompute_escaped_xml_chars() { + std::vector vec(256, false); + for (uint32_t i=0; i<32; ++i) { + vec[i] = true; + } + vec['\n'] = false; + vec['<'] = true; + vec['>'] = true; + vec['&'] = true; + return vec; +} + +std::vector escaped_xml_chars = precompute_escaped_xml_chars(); + +template +void do_write_xml_content_escaped(StreamT& out, vespalib::stringref str) { + for (const char s : str) { + if (escaped_xml_chars[static_cast(s)]) { + if (s == '<') out << "<"; + else if (s == '>') out << ">"; + else if (s == '&') out << "&"; + else { + out << "&#" << static_cast(s) << ";"; + } + } else { + out << s; + } + } +} + +} + +vespalib::string xml_attribute_escaped(vespalib::stringref str) { + vespalib::asciistream ost; + for (const char s : str) { + if (s == '"' || s == '\'' || s == '\n' + || escaped_xml_chars[static_cast(s)]) + { + if (s == '<') ost << "<"; + else if (s == '>') ost << ">"; + else if (s == '&') ost << "&"; + else if (s == '"') ost << """; + else if (s == '\'') ost << "'"; + else { + ost << "&#" << static_cast(s) << ";"; + } + } else { + ost << s; + } + } + return ost.str(); +} + +vespalib::string xml_content_escaped(vespalib::stringref str) { + vespalib::asciistream out; + do_write_xml_content_escaped(out, str); + return out.str(); +} + +void write_xml_content_escaped(vespalib::asciistream& out, vespalib::stringref str) { + do_write_xml_content_escaped(out, str); +} + +void write_xml_content_escaped(std::ostream& out, vespalib::stringref str) { + do_write_xml_content_escaped(out, str); +} + +} diff --git a/vespalib/src/vespa/vespalib/util/string_escape.h b/vespalib/src/vespa/vespalib/util/string_escape.h new file mode 100644 index 000000000000..3ad926dafc43 --- /dev/null +++ b/vespalib/src/vespa/vespalib/util/string_escape.h @@ -0,0 +1,26 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include +#include + +namespace vespalib { + +/** + * Returns input string but where the following characters are escaped: + * - all control chars < char value 32 + * - <, >, &, " and ' + */ +[[nodiscard]] vespalib::string xml_attribute_escaped(vespalib::stringref s); + +/** + * Returns input string but where the following characters are escaped: + * - all control chars < char value 32, _except_ linebreak + * - <, > and & + */ +[[nodiscard]] vespalib::string xml_content_escaped(vespalib::stringref s); +void write_xml_content_escaped(vespalib::asciistream& out, vespalib::stringref s); +void write_xml_content_escaped(std::ostream& out, vespalib::stringref s); + +} diff --git a/vespalib/src/vespa/vespalib/util/xmlstream.cpp b/vespalib/src/vespa/vespalib/util/xmlstream.cpp index bdc09da127bb..108cc56a2f22 100644 --- a/vespalib/src/vespa/vespalib/util/xmlstream.cpp +++ b/vespalib/src/vespa/vespalib/util/xmlstream.cpp @@ -1,6 +1,7 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "xmlstream.hpp" +#include "string_escape.h" #include #include #include @@ -42,23 +43,10 @@ namespace { return vec; } - std::vector getEscapedXmlCharacters() { - std::vector vec(256, false); - for (uint32_t i=0; i<32; ++i) { - vec[i] = true; - } - vec['\n'] = false; - vec['<'] = true; - vec['>'] = true; - vec['&'] = true; - return vec; - } - std::vector legalIdentifierFirstChar( getLegalIdentifierFirstCharacters()); std::vector legalIdentifierChars = getLegalIdentifierCharacters(); std::vector binaryChars = getBinaryCharacters(); - std::vector escapedXmlChars = getEscapedXmlCharacters(); bool containsBinaryCharacters(const std::string& s) { for (int i=0, n=s.size(); i(s[i])]) - { - if (s[i] == '<') ost << "<"; - else if (s[i] == '>') ost << ">"; - else if (s[i] == '&') ost << "&"; - else if (s[i] == '"') ost << """; - else { - ost << "&#" << (int) s[i] << ";"; - } - } else { - ost << s[i]; - } - } - return ost.str(); - } - - void writeEscaped(std::ostream& out, const std::string& s) { - for (uint32_t i=0, n=s.size(); i(s[i])]) { - if (s[i] == '<') out << "<"; - else if (s[i] == '>') out << ">"; - else if (s[i] == '&') out << "&"; - else { - out << "&#" << (int) s[i] << ";"; - } - } else { - out << s[i]; - } - } - } - void writeBase64Encoded(std::ostream& out, const std::string& s) { out << vespalib::Base64::encode(&s[0], s.size()); } @@ -290,7 +243,7 @@ XmlOutputStream::flush(bool endTag) it != _cachedAttributes.end(); ++it) { _wrappedStream << ' ' << it->getName() << "=\"" - << xmlAttributeEscape(it->getValue()) << '"'; + << xml_attribute_escaped(it->getValue()) << '"'; } _cachedAttributes.clear(); if (_cachedContent.empty() && endTag) { @@ -325,7 +278,7 @@ XmlOutputStream::flush(bool endTag) } switch (_cachedContentType) { case XmlContent::ESCAPED: { - writeEscaped(_wrappedStream, it->getContent()); + write_xml_content_escaped(_wrappedStream, it->getContent()); break; } case XmlContent::BASE64: {