name: Conformer-CTC-Char1
model:
  sample_rate: 16000
  labels:
  - ' '
  - a
  - b
  - c
  - d
  - e
  - f
  - g
  - h
  - i
  - j
  - k
  - l
  - m
  - 'n'
  - o
  - p
  - q
  - r
  - s
  - t
  - u
  - v
  - w
  - x
  - 'y'
  - z
  - ''''
  log_prediction: true
  ctc_reduction: mean_batch
  skip_nan_grad: false
  train_ds:
    manifest_filepath: /opt/ml/input/data/training/an4/train_manifest.json
    labels: ${model.labels}
    sample_rate: ${model.sample_rate}
    batch_size: 16
    shuffle: true
    num_workers: 8
    pin_memory: true
    trim_silence: false
    max_duration: 16.7
    min_duration: 0.1
    is_tarred: false
    tarred_audio_filepaths: null
    shuffle_n: 2048
    bucketing_strategy: synced_randomized
    bucketing_batch_size: null
  validation_ds:
    manifest_filepath: /opt/ml/input/data/testing/an4/test_manifest.json
    labels: ${model.labels}
    sample_rate: ${model.sample_rate}
    batch_size: 16
    shuffle: false
    num_workers: 8
    pin_memory: true
  test_ds:
    manifest_filepath: null
    labels: ${model.labels}
    sample_rate: ${model.sample_rate}
    batch_size: 16
    shuffle: false
    num_workers: 8
    pin_memory: true
  preprocessor:
    _target_: nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor
    sample_rate: ${model.sample_rate}
    normalize: per_feature
    window_size: 0.025
    window_stride: 0.01
    window: hann
    features: 80
    n_fft: 512
    log: true
    frame_splicing: 1
    dither: 1.0e-05
    pad_to: 0
    pad_value: 0.0
  spec_augment:
    _target_: nemo.collections.asr.modules.SpectrogramAugmentation
    freq_masks: 2
    time_masks: 5
    freq_width: 27
    time_width: 0.05
  encoder:
    _target_: nemo.collections.asr.modules.ConformerEncoder
    feat_in: ${model.preprocessor.features}
    feat_out: -1
    n_layers: 8
    d_model: 256
    subsampling: striding
    subsampling_factor: 4
    subsampling_conv_channels: -1
    causal_downsampling: false
    ff_expansion_factor: 4
    self_attention_model: rel_pos
    n_heads: 8
    att_context_size:
    - -1
    - -1
    att_context_style: regular
    xscaling: true
    untie_biases: true
    pos_emb_max_len: 5000
    conv_kernel_size: 31
    conv_norm_type: batch_norm
    conv_context_size: null
    dropout: 0.1
    dropout_pre_encoder: 0.1
    dropout_emb: 0.0
    dropout_att: 0.1
    stochastic_depth_drop_prob: 0.0
    stochastic_depth_mode: linear
    stochastic_depth_start_layer: 1
  decoder:
    _target_: nemo.collections.asr.modules.ConvASRDecoder
    feat_in: null
    num_classes: -1
    vocabulary: ${model.labels}
  interctc:
    loss_weights: []
    apply_at_layers: []
  optim:
    name: adamw
    lr: 2.0
    betas:
    - 0.9
    - 0.98
    weight_decay: 0.001
    sched:
      name: NoamAnnealing
      d_model: ${model.encoder.d_model}
      warmup_steps: 10000
      warmup_ratio: null
      min_lr: 1.0e-06
  n_heads: 4
trainer:
  devices: -1
  num_nodes: 1
  max_epochs: 2
  max_steps: -1
  val_check_interval: 1.0
  accelerator: gpu
  strategy: null
  accumulate_grad_batches: 1
  gradient_clip_val: 0.0
  precision: 32
  log_every_n_steps: 10
  enable_progress_bar: true
  resume_from_checkpoint: null
  num_sanity_val_steps: 0
  check_val_every_n_epoch: 1
  sync_batchnorm: true
  enable_checkpointing: false
  logger: false
  benchmark: false
exp_manager:
  exp_dir: /opt/ml/model/
  name: ${name}
  create_tensorboard_logger: true
  create_checkpoint_callback: true
  checkpoint_callback_params:
    monitor: val_wer
    mode: min
    save_top_k: 5
    always_save_nemo: true
  resume_if_exists: true
  resume_ignore_no_checkpoint: true
  create_wandb_logger: false
  wandb_logger_kwargs:
    name: null
    project: null
init_from_nemo_model: /opt/ml/input/data/pretrained/CTC.nemo