-
Notifications
You must be signed in to change notification settings - Fork 7
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add support for deserializing and decoding v0.1.0 IR streams, but without log-level parsing and filtering. #30
Changes from 42 commits
fc08027
8d20583
4757bd4
12eeca6
d870b19
edda4b3
8a1e9a6
5f005e7
9581578
dc652ad
7d1fa0e
c675c61
5c23182
a4c2c07
b940296
1087f94
5e8d42a
570b8eb
59b163b
92e44b3
d170434
b110479
181206e
cac21ee
b6d837c
11b347b
6588d56
2bca1cc
94f31af
072c7bc
08e0bdd
95b8102
37e6f30
058ec78
4d2c076
49a72c3
12dfb9e
2df4df8
7edb19e
77e2b2e
320f1ea
81135fa
cca920a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||||
---|---|---|---|---|---|---|---|---|---|---|
@@ -1,6 +1,5 @@ | ||||||||||
#include "StreamReader.hpp" | ||||||||||
|
||||||||||
#include <algorithm> | ||||||||||
#include <cstddef> | ||||||||||
#include <cstdint> | ||||||||||
#include <format> | ||||||||||
|
@@ -23,6 +22,7 @@ | |||||||||
#include <spdlog/spdlog.h> | ||||||||||
|
||||||||||
#include <clp_ffi_js/ClpFfiJsException.hpp> | ||||||||||
#include <clp_ffi_js/ir/StructuredIrStreamReader.hpp> | ||||||||||
#include <clp_ffi_js/ir/UnstructuredIrStreamReader.hpp> | ||||||||||
|
||||||||||
namespace { | ||||||||||
|
@@ -117,8 +117,12 @@ EMSCRIPTEN_BINDINGS(ClpStreamReader) { | |||||||||
// JS types used as inputs | ||||||||||
emscripten::register_type<clp_ffi_js::ir::DataArrayTsType>("Uint8Array"); | ||||||||||
emscripten::register_type<clp_ffi_js::ir::LogLevelFilterTsType>("number[] | null"); | ||||||||||
emscripten::register_type<clp_ffi_js::ir::ReaderOptions>("{timestampKey: string} | null"); | ||||||||||
|
||||||||||
// JS types used as outputs | ||||||||||
emscripten::enum_<clp_ffi_js::ir::StreamType>("IrStreamType") | ||||||||||
.value("STRUCTURED", clp_ffi_js::ir::StreamType::Structured) | ||||||||||
.value("UNSTRUCTURED", clp_ffi_js::ir::StreamType::Unstructured); | ||||||||||
emscripten::register_type<clp_ffi_js::ir::DecodedResultsTsType>( | ||||||||||
"Array<[string, number, number, number]>" | ||||||||||
); | ||||||||||
|
@@ -128,6 +132,7 @@ EMSCRIPTEN_BINDINGS(ClpStreamReader) { | |||||||||
&clp_ffi_js::ir::StreamReader::create, | ||||||||||
emscripten::return_value_policy::take_ownership() | ||||||||||
) | ||||||||||
.function("getIrStreamType", &clp_ffi_js::ir::StreamReader::get_ir_stream_type) | ||||||||||
.function( | ||||||||||
"getNumEventsBuffered", | ||||||||||
&clp_ffi_js::ir::StreamReader::get_num_events_buffered | ||||||||||
|
@@ -143,7 +148,8 @@ EMSCRIPTEN_BINDINGS(ClpStreamReader) { | |||||||||
} // namespace | ||||||||||
|
||||||||||
namespace clp_ffi_js::ir { | ||||||||||
auto StreamReader::create(DataArrayTsType const& data_array) -> std::unique_ptr<StreamReader> { | ||||||||||
auto StreamReader::create(DataArrayTsType const& data_array, ReaderOptions const& reader_options) | ||||||||||
-> std::unique_ptr<StreamReader> { | ||||||||||
Comment on lines
+151
to
+152
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🛠️ Refactor suggestion Potential API Breaking Change in The Apply this diff to add a default parameter value: -auto StreamReader::create(DataArrayTsType const& data_array, ReaderOptions const& reader_options)
+auto StreamReader::create(DataArrayTsType const& data_array, ReaderOptions const& reader_options = {}) 📝 Committable suggestion
Suggested change
|
||||||||||
auto const length{data_array["length"].as<size_t>()}; | ||||||||||
SPDLOG_INFO("StreamReader::create: got buffer of length={}", length); | ||||||||||
|
||||||||||
|
@@ -159,20 +165,30 @@ auto StreamReader::create(DataArrayTsType const& data_array) -> std::unique_ptr< | |||||||||
|
||||||||||
rewind_reader_and_validate_encoding_type(*zstd_decompressor); | ||||||||||
|
||||||||||
// Validate the stream's version | ||||||||||
// Validate the stream's version and decide which type of IR stream reader to create. | ||||||||||
auto pos = zstd_decompressor->get_pos(); | ||||||||||
auto const version{get_version(*zstd_decompressor)}; | ||||||||||
if (std::ranges::find(cUnstructuredIrVersions, version) == cUnstructuredIrVersions.end()) { | ||||||||||
throw ClpFfiJsException{ | ||||||||||
clp::ErrorCode::ErrorCode_Unsupported, | ||||||||||
__FILENAME__, | ||||||||||
__LINE__, | ||||||||||
std::format("Unable to create reader for IR stream with version {}.", version) | ||||||||||
}; | ||||||||||
} | ||||||||||
try { | ||||||||||
zstd_decompressor->seek_from_begin(pos); | ||||||||||
} catch (ZstdDecompressor::OperationFailed& e) { | ||||||||||
auto const version_validation_result{clp::ffi::ir_stream::validate_protocol_version(version) | ||||||||||
}; | ||||||||||
if (clp::ffi::ir_stream::IRProtocolErrorCode::Supported == version_validation_result) { | ||||||||||
zstd_decompressor->seek_from_begin(0); | ||||||||||
return std::make_unique<StructuredIrStreamReader>(StructuredIrStreamReader::create( | ||||||||||
std::move(zstd_decompressor), | ||||||||||
std::move(data_buffer), | ||||||||||
reader_options | ||||||||||
)); | ||||||||||
} | ||||||||||
if (clp::ffi::ir_stream::IRProtocolErrorCode::BackwardCompatible | ||||||||||
== version_validation_result) | ||||||||||
{ | ||||||||||
zstd_decompressor->seek_from_begin(pos); | ||||||||||
return std::make_unique<UnstructuredIrStreamReader>(UnstructuredIrStreamReader::create( | ||||||||||
std::move(zstd_decompressor), | ||||||||||
std::move(data_buffer) | ||||||||||
)); | ||||||||||
} | ||||||||||
} catch (ZstdDecompressor::OperationFailed const& e) { | ||||||||||
Comment on lines
+174
to
+191
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🛠️ Refactor suggestion Ensure consistent use of While |
||||||||||
throw ClpFfiJsException{ | ||||||||||
clp::ErrorCode::ErrorCode_Failure, | ||||||||||
__FILENAME__, | ||||||||||
|
@@ -181,8 +197,11 @@ auto StreamReader::create(DataArrayTsType const& data_array) -> std::unique_ptr< | |||||||||
}; | ||||||||||
} | ||||||||||
|
||||||||||
return std::make_unique<UnstructuredIrStreamReader>( | ||||||||||
UnstructuredIrStreamReader::create(std::move(zstd_decompressor), std::move(data_buffer)) | ||||||||||
); | ||||||||||
throw ClpFfiJsException{ | ||||||||||
clp::ErrorCode::ErrorCode_Unsupported, | ||||||||||
__FILENAME__, | ||||||||||
__LINE__, | ||||||||||
std::format("Unable to create reader for IR stream with version {}.", version) | ||||||||||
}; | ||||||||||
} | ||||||||||
} // namespace clp_ffi_js::ir |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,195 @@ | ||
#include "StructuredIrStreamReader.hpp" | ||
|
||
#include <cstddef> | ||
#include <format> | ||
#include <memory> | ||
#include <string> | ||
#include <string_view> | ||
#include <system_error> | ||
#include <utility> | ||
#include <vector> | ||
|
||
#include <clp/Array.hpp> | ||
#include <clp/ErrorCode.hpp> | ||
#include <clp/ffi/ir_stream/Deserializer.hpp> | ||
#include <clp/ffi/KeyValuePairLogEvent.hpp> | ||
#include <clp/ffi/Value.hpp> | ||
#include <clp/ir/types.hpp> | ||
#include <clp/TraceableException.hpp> | ||
#include <emscripten/em_asm.h> | ||
#include <emscripten/val.h> | ||
#include <spdlog/spdlog.h> | ||
|
||
#include <clp_ffi_js/ClpFfiJsException.hpp> | ||
#include <clp_ffi_js/constants.hpp> | ||
#include <clp_ffi_js/ir/StreamReader.hpp> | ||
#include <clp_ffi_js/ir/StreamReaderDataContext.hpp> | ||
|
||
namespace { | ||
constexpr std::string_view cEmptyJsonStr{"{}"}; | ||
constexpr std::string_view cLogLevelFilteringNotSupportedErrorMsg{ | ||
"Log level filtering is not yet supported in this reader." | ||
}; | ||
constexpr std::string_view cReaderOptionsTimestampKey{"timestampKey"}; | ||
} // namespace | ||
|
||
namespace clp_ffi_js::ir { | ||
using clp::ir::four_byte_encoded_variable_t; | ||
junhaoliao marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
auto StructuredIrStreamReader::create( | ||
std::unique_ptr<ZstdDecompressor>&& zstd_decompressor, | ||
clp::Array<char> data_array, | ||
ReaderOptions const& reader_options | ||
) -> StructuredIrStreamReader { | ||
auto deserialized_log_events{std::make_shared<std::vector<clp::ffi::KeyValuePairLogEvent>>()}; | ||
auto result{StructuredIrDeserializer::create( | ||
*zstd_decompressor, | ||
IrUnitHandler{ | ||
deserialized_log_events, | ||
reader_options[cReaderOptionsTimestampKey.data()].as<std::string>() | ||
} | ||
)}; | ||
if (result.has_error()) { | ||
auto const error_code{result.error()}; | ||
throw ClpFfiJsException{ | ||
clp::ErrorCode::ErrorCode_Failure, | ||
__FILENAME__, | ||
__LINE__, | ||
std::format( | ||
"Failed to create deserializer: {} {}", | ||
error_code.category().name(), | ||
error_code.message() | ||
) | ||
}; | ||
} | ||
StreamReaderDataContext<StructuredIrDeserializer> data_context{ | ||
std::move(data_array), | ||
std::move(zstd_decompressor), | ||
std::move(result.value()) | ||
}; | ||
return StructuredIrStreamReader{std::move(data_context), std::move(deserialized_log_events)}; | ||
} | ||
|
||
auto StructuredIrStreamReader::get_num_events_buffered() const -> size_t { | ||
return m_deserialized_log_events->size(); | ||
} | ||
|
||
auto StructuredIrStreamReader::get_filtered_log_event_map() const -> FilteredLogEventMapTsType { | ||
SPDLOG_ERROR(cLogLevelFilteringNotSupportedErrorMsg); | ||
return FilteredLogEventMapTsType{emscripten::val::null()}; | ||
} | ||
|
||
void StructuredIrStreamReader::filter_log_events(LogLevelFilterTsType const& log_level_filter) { | ||
if (log_level_filter.isNull()) { | ||
return; | ||
} | ||
SPDLOG_ERROR(cLogLevelFilteringNotSupportedErrorMsg); | ||
} | ||
Comment on lines
+77
to
+87
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🛠️ Refactor suggestion Consider throwing an exception for unsupported operations. Instead of logging errors and returning null values, consider throwing a auto StructuredIrStreamReader::get_filtered_log_event_map() const -> FilteredLogEventMapTsType {
- SPDLOG_ERROR(cLogLevelFilteringNotSupportedErrorMsg);
- return FilteredLogEventMapTsType{emscripten::val::null()};
+ throw ClpFfiJsException{
+ clp::ErrorCode::ErrorCode_NotSupported,
+ __FILENAME__,
+ __LINE__,
+ std::string{cLogLevelFilteringNotSupportedErrorMsg}
+ };
}
|
||
|
||
auto StructuredIrStreamReader::deserialize_stream() -> size_t { | ||
if (nullptr == m_stream_reader_data_context) { | ||
return m_deserialized_log_events->size(); | ||
} | ||
|
||
constexpr size_t cDefaultNumReservedLogEvents{500'000}; | ||
m_deserialized_log_events->reserve(cDefaultNumReservedLogEvents); | ||
auto& reader{m_stream_reader_data_context->get_reader()}; | ||
while (true) { | ||
auto result{m_stream_reader_data_context->get_deserializer().deserialize_next_ir_unit(reader | ||
)}; | ||
if (false == result.has_error()) { | ||
continue; | ||
} | ||
auto const error{result.error()}; | ||
if (std::errc::operation_not_permitted == error) { | ||
break; | ||
} | ||
if (std::errc::result_out_of_range == error) { | ||
SPDLOG_ERROR("File contains an incomplete IR stream"); | ||
break; | ||
} | ||
Comment on lines
+107
to
+110
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🛠️ Refactor suggestion Throw exception for incomplete IR streams When encountering an incomplete IR stream, the code logs an error and exits the loop. Consider throwing a |
||
throw ClpFfiJsException{ | ||
clp::ErrorCode::ErrorCode_Corrupt, | ||
__FILENAME__, | ||
__LINE__, | ||
std::format( | ||
"Failed to deserialize IR unit: {}:{}", | ||
error.category().name(), | ||
error.message() | ||
) | ||
}; | ||
} | ||
m_timestamp_node_id = m_stream_reader_data_context->get_deserializer() | ||
.get_ir_unit_handler() | ||
.get_timestamp_node_id(); | ||
m_stream_reader_data_context.reset(nullptr); | ||
return m_deserialized_log_events->size(); | ||
} | ||
|
||
auto StructuredIrStreamReader::decode_range(size_t begin_idx, size_t end_idx, bool use_filter) const | ||
-> DecodedResultsTsType { | ||
if (use_filter) { | ||
SPDLOG_ERROR(cLogLevelFilteringNotSupportedErrorMsg); | ||
return DecodedResultsTsType{emscripten::val::null()}; | ||
} | ||
|
||
if (m_deserialized_log_events->size() < end_idx || begin_idx > end_idx) { | ||
return DecodedResultsTsType{emscripten::val::null()}; | ||
} | ||
|
||
auto const results{emscripten::val::array()}; | ||
|
||
for (size_t log_event_idx = begin_idx; log_event_idx < end_idx; ++log_event_idx) { | ||
auto const& log_event{m_deserialized_log_events->at(log_event_idx)}; | ||
|
||
auto const json_result{log_event.serialize_to_json()}; | ||
std::string json_str{cEmptyJsonStr}; | ||
if (false == json_result.has_value()) { | ||
auto error_code{json_result.error()}; | ||
SPDLOG_ERROR( | ||
"Failed to deserialize log event to JSON: {}:{}", | ||
error_code.category().name(), | ||
error_code.message() | ||
); | ||
} else { | ||
json_str = json_result.value().dump(); | ||
} | ||
|
||
auto const& id_value_pairs{log_event.get_node_id_value_pairs()}; | ||
clp::ffi::value_int_t timestamp{0}; | ||
if (m_timestamp_node_id.has_value()) { | ||
auto const& timestamp_pair{id_value_pairs.at(m_timestamp_node_id.value())}; | ||
if (timestamp_pair.has_value()) { | ||
if (timestamp_pair->is<clp::ffi::value_int_t>()) { | ||
timestamp = timestamp_pair.value().get_immutable_view<clp::ffi::value_int_t>(); | ||
} else { | ||
// TODO: Add support for parsing timestamp values of string type. | ||
SPDLOG_ERROR("Unable to parse timestamp for log_event_idx={}", log_event_idx); | ||
} | ||
} | ||
} | ||
|
||
EM_ASM( | ||
{ Emval.toValue($0).push([UTF8ToString($1), $2, $3, $4]); }, | ||
results.as_handle(), | ||
json_str.c_str(), | ||
timestamp, | ||
LogLevel::NONE, | ||
log_event_idx + 1 | ||
); | ||
} | ||
|
||
return DecodedResultsTsType(results); | ||
} | ||
|
||
StructuredIrStreamReader::StructuredIrStreamReader( | ||
StreamReaderDataContext<StructuredIrDeserializer>&& stream_reader_data_context, | ||
std::shared_ptr<std::vector<clp::ffi::KeyValuePairLogEvent>> deserialized_log_events | ||
) | ||
: m_deserialized_log_events{std::move(deserialized_log_events)}, | ||
m_stream_reader_data_context{ | ||
std::make_unique<StreamReaderDataContext<StructuredIrDeserializer>>( | ||
std::move(stream_reader_data_context) | ||
) | ||
} {} | ||
} // namespace clp_ffi_js::ir |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just double-checking, the TS type should remain
IrStreamType
, right?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Right, I keep the
Ir
prefix intentionally.To be precise, types registered via this method are not TS types. For every enum, emscripten creates an object, which is comparable with any transpiled enum value. For example,
Module.IrStreamType.STRUCTURED
/Module.IrStreamType.UNSTRUCTURED
can be used to check against the return values ofreader.getIrStreamType()
in our case.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Reference: https://www.github.com/emscripten-core/emscripten/issues/18585