From a942e3940c30eeba900c873f3bfb3f48d5b39ddb Mon Sep 17 00:00:00 2001 From: SimBe195 <37951951+SimBe195@users.noreply.github.com> Date: Wed, 27 Mar 2024 11:13:14 +0100 Subject: [PATCH] Fix word boundaries in ctm dumps of lattices with subsampling (#72) --- src/Flf/Traceback.cc | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/Flf/Traceback.cc b/src/Flf/Traceback.cc index 704c3697..289ae697 100644 --- a/src/Flf/Traceback.cc +++ b/src/Flf/Traceback.cc @@ -60,6 +60,7 @@ class DumpTracebackNode : public FilterNode { static const Core::ParameterBool paramDumpPhonemeAlignment; static const Core::ParameterBool paramDumpSubwordAlignment; static const Core::ParameterBool paramFillEmptySegments; + static const Core::ParameterFloat paramFrameShiftTime; private: Core::Channel dump_; @@ -82,6 +83,7 @@ class DumpTracebackNode : public FilterNode { bool dumpPhonemeAlignment_; bool dumpSubwordAlignment_; bool fillEmptySegments_; + float frameShiftTime_; LatticeAlignmentBuilderRef alignmentBuilder_; protected: @@ -370,8 +372,8 @@ class DumpTracebackNode : public FilterNode { verify(sr->nArcs() == 1); const Arc& a = *sr->begin(); const Boundary &leftBoundary = boundaries.get(sr->id()), &rightBoundary = boundaries.get(a.target()); - f32 wordBegin = f32(leftBoundary.time()) / 100.00; - f32 wordEnd = f32(rightBoundary.time()) / 100.00; + f32 wordBegin = f32(leftBoundary.time()) * frameShiftTime_; + f32 wordEnd = f32(rightBoundary.time()) * frameShiftTime_; if (wordBegin < wordEnd) { if (lAlphabet || lpAlphabet) { std::string word; @@ -580,6 +582,7 @@ class DumpTracebackNode : public FilterNode { dumpPhonemeAlignment_ = paramDumpPhonemeAlignment(ctmConfig); dumpSubwordAlignment_ = paramDumpSubwordAlignment(ctmConfig); fillEmptySegments_ = paramFillEmptySegments(ctmConfig); + frameShiftTime_ = paramFrameShiftTime(ctmConfig); if (dumpPhonemeAlignment_ || dumpSubwordAlignment_) { createAlignmentBuilder(ctmConfig); } @@ -684,6 +687,10 @@ const Core::ParameterBool DumpTracebackNode::paramFillEmptySegments( "fill-empty-segments", "fill empty segments (can fix issues with sclite complaining about unsynchronized files if a segment is missing from the ctm file)", false); +const Core::ParameterFloat DumpTracebackNode::paramFrameShiftTime( + "frame-shift-time", + "shift-time of frames of the lattice time axis in seconds. Defaults to 0.01 = 10ms. Important for correct word boundaries when subsampling is used.", + 0.01); NodeRef createDumpTracebackNode(const std::string& name, const Core::Configuration& config) { return NodeRef(new DumpTracebackNode(name, config)); }