diff --git a/2024-orthogonal-softmax/README.md b/2024-orthogonal-softmax/README.md index 5eff80ca..172056a9 100644 --- a/2024-orthogonal-softmax/README.md +++ b/2024-orthogonal-softmax/README.md @@ -6,3 +6,10 @@ We use [RETURNN](https://github.com/rwth-i6/returnn) for training and our setups We use models parts from [i6-models](https://github.com/rwth-i6/i6_models/tree/jing-dynamic-encoder-size) +### subnet selection with component-wise criterion + +ConformerCTCModel, ConformerCTCConfig and train_step in returnn config is defined in [here](https://github.com/rwth-i6/i6_experiments/blob/main/users/jxu/experiments/ctc/tedlium2/pytorch_networks/dynamic_encoder_size/orthogonal_softmax/joint_train_conformer_orthogonal_softmax_component_wise.py) + +### subnet selection with layer-wise criterion + +ConformerCTCModel, ConformerCTCConfig and train_step in returnn config is defined in [here](https://github.com/rwth-i6/i6_experiments/blob/main/users/jxu/experiments/ctc/tedlium2/pytorch_networks/dynamic_encoder_size/orthogonal_softmax/joint_train_conformer_orthogonal_softmax_layer_wise.py) \ No newline at end of file diff --git a/2024-orthogonal-softmax/TED-LIUM-v2/Figure1_orthosoftmax_jointly_train_five_models_FLOPs_aware_cmp_wise.config b/2024-orthogonal-softmax/TED-LIUM-v2/Figure1_orthosoftmax_jointly_train_five_models_FLOPs_aware_cmp_wise.config index 0fdbfd9d..2c7b0717 100644 --- a/2024-orthogonal-softmax/TED-LIUM-v2/Figure1_orthosoftmax_jointly_train_five_models_FLOPs_aware_cmp_wise.config +++ b/2024-orthogonal-softmax/TED-LIUM-v2/Figure1_orthosoftmax_jointly_train_five_models_FLOPs_aware_cmp_wise.config @@ -334,10 +334,10 @@ import os import sys sys.path.insert(0, "/u/jxu/setups/tedlium2/2024-05-14--independent-softmax/recipe") -from i6_experiments.users.jxu.experiments.ctc.tedlium2.pytorch_networks.independent_softmax.jointly_train_two_models.num_params.conformer_size_384_log_mel_ffn_dim_conv_attn_heads_decomposable_random_pct import ( +from i6_experiments.users.jxu.experiments.ctc.tedlium2.pytorch_networks.dynamic_encoder_size.orthogonal_softmax.joint_train_conformer_orthogonal_softmax_component_wise import ( ConformerCTCModel, ) -from i6_experiments.users.jxu.experiments.ctc.tedlium2.pytorch_networks.independent_softmax.jointly_train_two_models.num_params.conformer_size_384_log_mel_ffn_dim_conv_attn_heads_decomposable_random_pct import ( +from i6_experiments.users.jxu.experiments.ctc.tedlium2.pytorch_networks.dynamic_encoder_size.orthogonal_softmax.joint_train_conformer_orthogonal_softmax_component_wise import ( ConformerCTCConfig, ) from i6_models.primitives.feature_extraction import LogMelFeatureExtractionV1Config @@ -641,6 +641,6 @@ def get_model(epoch, step, **kwargs): return ConformerCTCModel(epoch=epoch, step=step, **model_kwargs, **kwargs) -from i6_experiments.users.jxu.experiments.ctc.tedlium2.pytorch_networks.independent_softmax.jointly_train_two_models.num_params.conformer_size_384_log_mel_ffn_dim_conv_attn_heads_decomposable_random_pct import ( +from i6_experiments.users.jxu.experiments.ctc.tedlium2.pytorch_networks.dynamic_encoder_size.orthogonal_softmax.joint_train_conformer_orthogonal_softmax_component_wise import ( train_step, ) diff --git a/2024-orthogonal-softmax/TED-LIUM-v2/Tab4_orthosoftmax_jointly_train_three_models_FLOPs_aware_cmp_wise.config b/2024-orthogonal-softmax/TED-LIUM-v2/Tab4_orthosoftmax_jointly_train_three_models_FLOPs_aware_cmp_wise.config index 9badbab5..a376be57 100644 --- a/2024-orthogonal-softmax/TED-LIUM-v2/Tab4_orthosoftmax_jointly_train_three_models_FLOPs_aware_cmp_wise.config +++ b/2024-orthogonal-softmax/TED-LIUM-v2/Tab4_orthosoftmax_jointly_train_three_models_FLOPs_aware_cmp_wise.config @@ -334,10 +334,10 @@ import os import sys sys.path.insert(0, "/u/jxu/setups/tedlium2/2024-05-14--independent-softmax/recipe") -from i6_experiments.users.jxu.experiments.ctc.tedlium2.pytorch_networks.independent_softmax.jointly_train_two_models.num_params.conformer_size_384_log_mel_ffn_dim_conv_attn_heads_decomposable_random_pct import ( +from i6_experiments.users.jxu.experiments.ctc.tedlium2.pytorch_networks.dynamic_encoder_size.orthogonal_softmax.joint_train_conformer_orthogonal_softmax_component_wise import ( ConformerCTCModel, ) -from i6_experiments.users.jxu.experiments.ctc.tedlium2.pytorch_networks.independent_softmax.jointly_train_two_models.num_params.conformer_size_384_log_mel_ffn_dim_conv_attn_heads_decomposable_random_pct import ( +from i6_experiments.users.jxu.experiments.ctc.tedlium2.pytorch_networks.dynamic_encoder_size.orthogonal_softmax.joint_train_conformer_orthogonal_softmax_component_wise import ( ConformerCTCConfig, ) from i6_models.primitives.feature_extraction import LogMelFeatureExtractionV1Config @@ -635,6 +635,6 @@ def get_model(epoch, step, **kwargs): return ConformerCTCModel(epoch=epoch, step=step, **model_kwargs, **kwargs) -from i6_experiments.users.jxu.experiments.ctc.tedlium2.pytorch_networks.independent_softmax.jointly_train_two_models.num_params.conformer_size_384_log_mel_ffn_dim_conv_attn_heads_decomposable_random_pct import ( +from i6_experiments.users.jxu.experiments.ctc.tedlium2.pytorch_networks.dynamic_encoder_size.orthogonal_softmax.joint_train_conformer_orthogonal_softmax_component_wise import ( train_step, ) diff --git a/2024-orthogonal-softmax/TED-LIUM-v2/Tab4_orthosoftmax_jointly_train_three_models_sparsity_aware_cmp_wise.config b/2024-orthogonal-softmax/TED-LIUM-v2/Tab4_orthosoftmax_jointly_train_three_models_sparsity_aware_cmp_wise.config index 10546d85..f96e3622 100644 --- a/2024-orthogonal-softmax/TED-LIUM-v2/Tab4_orthosoftmax_jointly_train_three_models_sparsity_aware_cmp_wise.config +++ b/2024-orthogonal-softmax/TED-LIUM-v2/Tab4_orthosoftmax_jointly_train_three_models_sparsity_aware_cmp_wise.config @@ -334,10 +334,10 @@ import os import sys sys.path.insert(0, "/u/jxu/setups/tedlium2/2024-05-14--independent-softmax/recipe") -from i6_experiments.users.jxu.experiments.ctc.tedlium2.pytorch_networks.independent_softmax.jointly_train_two_models.num_params.conformer_size_384_log_mel_ffn_dim_conv_attn_heads_decomposable_random_pct import ( +from i6_experiments.users.jxu.experiments.ctc.tedlium2.pytorch_networks.dynamic_encoder_size.orthogonal_softmax.joint_train_conformer_orthogonal_softmax_component_wise import ( ConformerCTCModel, ) -from i6_experiments.users.jxu.experiments.ctc.tedlium2.pytorch_networks.independent_softmax.jointly_train_two_models.num_params.conformer_size_384_log_mel_ffn_dim_conv_attn_heads_decomposable_random_pct import ( +from i6_experiments.users.jxu.experiments.ctc.tedlium2.pytorch_networks.dynamic_encoder_size.orthogonal_softmax.joint_train_conformer_orthogonal_softmax_component_wise import ( ConformerCTCConfig, ) from i6_models.primitives.feature_extraction import LogMelFeatureExtractionV1Config @@ -635,6 +635,6 @@ def get_model(epoch, step, **kwargs): return ConformerCTCModel(epoch=epoch, step=step, **model_kwargs, **kwargs) -from i6_experiments.users.jxu.experiments.ctc.tedlium2.pytorch_networks.independent_softmax.jointly_train_two_models.num_params.conformer_size_384_log_mel_ffn_dim_conv_attn_heads_decomposable_random_pct import ( +from i6_experiments.users.jxu.experiments.ctc.tedlium2.pytorch_networks.dynamic_encoder_size.orthogonal_softmax.joint_train_conformer_orthogonal_softmax_component_wise import ( train_step, )