Skip to content

Commit

Permalink
ICASSP 2025 paper information
Browse files Browse the repository at this point in the history
  • Loading branch information
Marvin84 committed Jan 8, 2025
1 parent 6fdb181 commit 5cb7309
Show file tree
Hide file tree
Showing 9 changed files with 3,944 additions and 0 deletions.
9 changes: 9 additions & 0 deletions 2025-factored-fullsum-rightcontext/README
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
* Training uses tensorflow backend, RASR for data preparation, and extended CUDA kernel
** config files for data preparation
** returnn config, containing both network definition and the RASR settings for the alignment FSA
** a copy of the additional layer and CUDA code for running factored loss

* decoding uses master branch of RASR
** recognition config


10 changes: 10 additions & 0 deletions 2025-factored-fullsum-rightcontext/decoding/feature.flow
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
<?xml version="1.0" ?>
<network name="network">
<out name="features"/>
<param name="TASK"/>
<param name="id"/>
<node filter="generic-cache" id="$(id)" name="cache" path="`cf /u/raissi/setups/librispeech/960h/work/i6_core/features/extraction/FeatureExtractionJob.Gammatone.oDJO8MMhhCEx/output/gt.cache.$(TASK)`"/>
<node filter="tensorflow-forward" id="$(id)" name="tf-fwd"/>
<link from="cache" to="tf-fwd:features"/>
<link from="tf-fwd:posteriors" to="network:features"/>
</network>
174 changes: 174 additions & 0 deletions 2025-factored-fullsum-rightcontext/decoding/recognition.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
[*]
configuration.channel = output-channel
dot.channel = nil
encoding = UTF-8
error.channel = output-channel, stderr
log.channel = output-channel
progress.channel = output-channel
real-time-factor.channel = output-channel
statistics.channel = output-channel
system-info.channel = output-channel
time.channel = output-channel
version.channel = output-channel
warning.channel = output-channel, stderr

[*.output-channel]
append = no
compressed = no
file = $(LOGFILE)
unbuffered = no

[*.session]
inter-op-parallelism-threads = 1
intra-op-parallelism-threads = 2

[flf-lattice-tool.corpus]
capitalize-transcriptions = no
file = /u/raissi/setups/librispeech/960h/work/i6_core/audio/encoding/BlissChangeEncodingJob.vUdgDkgc97ZK/output/corpus.xml.gz
progress-indication = global
segments.file = /u/raissi/setups/librispeech/960h/work/i6_core/corpus/segments/SegmentCorpusJob.UKZC0F09VgYu/output/segments.$(TASK)
warn-about-unexpected-elements = yes

[flf-lattice-tool.global-cache]
file = `cf /u/raissi/setups/librispeech/960h/work/i6_core/recognition/advanced_tree_search/AdvancedTreeSearchLmImageAndGlobalCacheJob.Gbj19O0SC51m/output/global.cache`
read-only = yes

[flf-lattice-tool.lexicon]
file = /u/raissi/setups/librispeech/960h/work/i6_core/lexicon/modification/MergeLexiconJob.z54fVoMlr0md/output/lexicon.xml.gz
normalize-pronunciation = no

[flf-lattice-tool.network]
initial-nodes = segment

[flf-lattice-tool.network.archive-writer]
format = flf
info = yes
links = sink:1
path = lattice.cache.$(TASK)
type = archive-writer

[flf-lattice-tool.network.evaluator]
best-in-lattice = yes
links = sink:0
single-best = yes
type = evaluator
word-errors = yes

[flf-lattice-tool.network.evaluator.edit-distance]
allow-broken-words = no
format = bliss

[flf-lattice-tool.network.expand]
links = evaluator archive-writer
type = expand-transits

[flf-lattice-tool.network.recognizer]
add-confidence-score = no
apply-non-word-closure-filter = no
apply-posterior-pruning = no
links = expand
search-type = advanced-tree-search
type = recognizer

[flf-lattice-tool.network.recognizer.acoustic-model.allophones]
add-all = yes
add-from-lexicon = no

[flf-lattice-tool.network.recognizer.acoustic-model.hmm]
across-word-model = yes
early-recombination = no
state-repetitions = 1
states-per-phone = 1

[flf-lattice-tool.network.recognizer.acoustic-model.mixture-set]
feature-scorer-type = nn-precomputed-hybrid
file = /u/raissi/setups/librispeech/960h/work/i6_core/mm/mixtures/CreateDummyMixturesJob.WXJMtY5OeGyg/output/dummy.mix
normalize-mixture-weights = no
prior-file = /u/raissi/setups/librispeech/960h/work/i6_experiments/users/raissi/setups/common/helpers/priors/smoothen/SmoothenPriorsJob.oKBFK7dIraCk/output/priors.xml
priori-scale = 0.4
scale = 1.0

[flf-lattice-tool.network.recognizer.acoustic-model.state-tying]
type = monophone-dense
use-boundary-classes = no
use-word-end-classes = yes

[flf-lattice-tool.network.recognizer.acoustic-model.tdp]
entry-m1.loop = infinity
entry-m2.loop = infinity
nonword-phones = [UNKNOWN]
scale = 0.1
tying-type = global-and-nonword

[flf-lattice-tool.network.recognizer.acoustic-model.tdp.*]
exit = 0.0
forward = 0.0
loop = 5.0
skip = infinity

[flf-lattice-tool.network.recognizer.acoustic-model.tdp.nonword-0]
exit = 20.0
forward = 0.0
loop = 11.0
skip = infinity

[flf-lattice-tool.network.recognizer.acoustic-model.tdp.nonword-1]
exit = 20.0
forward = 0.0
loop = 11.0
skip = infinity

[flf-lattice-tool.network.recognizer.acoustic-model.tdp.silence]
exit = 15.0
forward = 0.0
loop = 13.0
skip = infinity

[flf-lattice-tool.network.recognizer.feature-extraction]
file = feature.flow

[flf-lattice-tool.network.recognizer.feature-extraction.tf-fwd.input-map.info-0]
param-name = features
seq-length-tensor-name = extern_data/placeholders/data/data_dim0_size
tensor-name = extern_data/placeholders/data/data

[flf-lattice-tool.network.recognizer.feature-extraction.tf-fwd.loader]
meta-graph-file = /u/raissi/setups/librispeech/960h/work/i6_core/returnn/compile/CompileTFGraphJob.L1VLSMvSu4PB/output/graph.meta
saved-model-file = /u/raissi/setups/librispeech/960h/work/i6_experiments/users/raissi/costum/returnn/rasr_returnn_bw/ReturnnRasrTrainingBWJob.qckoKdPLkSdj/output/models/epoch.492
type = meta

[flf-lattice-tool.network.recognizer.feature-extraction.tf-fwd.output-map.info-0]
param-name = posteriors
tensor-name = center__output/output_batch_major

[flf-lattice-tool.network.recognizer.lm]
file = /u/raissi/setups/librispeech/960h/work/i6_core/tools/download/DownloadJob.6ij8dDC1z4zK/output/4-gram.arpa.gz
image = `cf /u/raissi/setups/librispeech/960h/work/i6_core/recognition/advanced_tree_search/AdvancedTreeSearchLmImageAndGlobalCacheJob.Gbj19O0SC51m/output/lm-1.image`
scale = 0.9
type = ARPA

[flf-lattice-tool.network.recognizer.recognizer]
beam-pruning = 22.0
beam-pruning-limit = 500000
create-lattice = yes
lm-lookahead = yes
lm-lookahead-laziness = 15
optimize-lattice = simple
word-end-pruning = 0.5
word-end-pruning-limit = 10000

[flf-lattice-tool.network.recognizer.recognizer.lm-lookahead]
cache-size-high = 3000
cache-size-low = 2000
history-limit = 1
minimum-representation = 1
tree-cutoff = 30

[flf-lattice-tool.network.segment]
links = 1->recognizer:1 0->archive-writer:1 0->evaluator:1
type = speech-segment

[flf-lattice-tool.network.sink]
error-on-empty-lattice = no
type = sink
warn-on-empty-lattice = yes
22 changes: 22 additions & 0 deletions 2025-factored-fullsum-rightcontext/decoding/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#!/usr/bin/env bash
set -ueo pipefail

if [[ $# -gt 0 ]]; then
TASK=$1;
shift;
else
echo "No TASK-id given";
exit 1;
fi

if [ $# -gt 0 ]; then
LOGFILE=$1;
shift;
else
LOGFILE=rasr.log
fi

export OMP_NUM_THREADS=2
export TF_DEVICE='cpu'

/work/tools/users/raissi/rasr/rasr_tf2/arch/linux-x86_64-standard/flf-tool.linux-x86_64-standard --config=recognition.config --*.TASK=$TASK --*.LOGFILE=$LOGFILE $@
Loading

0 comments on commit 5cb7309

Please sign in to comment.