Global:
  device: gpu
  epoch_num: 100
  log_smooth_window: 20
  print_batch_step: 10
  output_dir: ./output/rec/ch/svtrv2_llada_test_lr00005_fs_reflect_onedecoder_semiar_withoutmask_sample3_3633_ch/
  save_epoch_step:
  - 10
  - 1
  eval_batch_step:
  - 0
  - 2000
  eval_epoch_step:
  - 0
  - 1
  cal_metric_during_train: false
  pretrained_model: null
  checkpoints: null
  use_tensorboard: false
  infer_img: ./img_44_1.jpg
  character_dict_path: ./tools/utils/ppocr_keys_v1.txt
  max_text_length: 25
  use_space_char: false
  save_res_path: ./output/rec/u14m_filter/predicts_svtrv2_nrtr.txt
  use_amp: true
  grad_clip_val: 20.0
  distributed: true
Optimizer:
  name: AdamW
  lr: 0.0005
  weight_decay: 0.05
  filter_bias_and_bn: true
LRScheduler:
  name: OneCycleLR
  warmup_epoch: 5
  cycle_momentum: false
Architecture:
  model_type: rec
  algorithm: NRTR
  in_channels: 3
  Transform: null
  Encoder:
    name: SVTRv2LNConvTwo33
    use_pos_embed: false
    dims:
    - 128
    - 256
    - 384
    depths:
    - 3
    - 6
    - 3
    num_heads:
    - 4
    - 8
    - 12
    mixer:
    - - Conv
      - Conv
      - Conv
      - Conv
      - Conv
      - Conv
    - - Conv
      - Conv
      - FGlobal
      - Global
      - Global
      - Global
    - - Global
      - Global
      - Global
      - Global
      - Global
      - Global
    local_k:
    - - 5
      - 5
    - - 5
      - 5
    - - -1
      - -1
    sub_k:
    - - 1
      - 1
    - - 2
      - 1
    - - -1
      - -1
    last_stage: false
    feat2d: false
  Decoder:
    name: MDiffDecoder
    num_decoder_layers: 3
    nhead: 6
    max_len: 25
    parallel_decoding: false
    autoregressive_decoding: false
    low_confidence_decoding: false
    random_mask_decoding: false
    semi_autoregressive_decoding: true
    cloze_mask_decoding: false
    sampler_step: 3
    sample_k: 3
Loss:
  name: NoneLoss
PostProcess:
  name: ARLabelDecode
  character_dict_path: ./tools/utils/ppocr_keys_v1.txt
  use_space_char: false
Metric:
  name: RecMetric
  main_indicator: acc
  is_filter: false
Train:
  dataset:
    name: RatioDataSetTVResize
    ds_width: true
    padding: false
    data_dir_list:
    - ../benchmark_bctr/benchmark_bctr_train/document_train
    - ../benchmark_bctr/benchmark_bctr_train/handwriting_train
    - ../benchmark_bctr/benchmark_bctr_train/scene_train
    - ../benchmark_bctr/benchmark_bctr_train/web_train
    transforms:
    - DecodeImagePIL:
        img_mode: RGB
    - PARSeqAugPIL: null
    - LLaDALabelEncode:
        character_dict_path: ./tools/utils/ppocr_keys_v1.txt
        use_space_char: false
        max_text_length: 25
        train_all_layer: true
        sample_num: 3
    - KeepKeys:
        keep_keys:
        - image
        - label
        - reflect_ids
        - noisy_batch
        - masked_indices
        - p_mask
        - length
  sampler:
    name: RatioSampler
    scales:
    - - 128
      - 32
    first_bs: 128
    fix_bs: false
    divided_factor:
    - 4
    - 16
    is_training: true
  loader:
    shuffle: true
    batch_size_per_card: 128
    drop_last: true
    max_ratio: 8
    num_workers: 4
Eval:
  dataset:
    name: RatioDataSetTVResize
    ds_width: true
    padding: false
    data_dir_list:
    - ../benchmark_bctr/benchmark_bctr_test/scene_test
    transforms:
    - DecodeImagePIL:
        img_mode: RGB
    - ARLabelEncode:
        character_dict_path: ./tools/utils/ppocr_keys_v1.txt
        use_space_char: false
        max_text_length: 25
    - KeepKeys:
        keep_keys:
        - image
        - label
        - length
  sampler:
    name: RatioSampler
    scales:
    - - 128
      - 32
    first_bs: 128
    fix_bs: false
    divided_factor:
    - 4
    - 16
    is_training: false
  loader:
    shuffle: false
    drop_last: false
    batch_size_per_card: 128
    max_ratio: 8
    num_workers: 4
filename: svtrv2_llada_fs_reflect_onedecoder_reflectwithoutmask_sample3_ch_3633
config: configs/rec/llada/svtrv2_llada_fs_reflect_onedecoder_reflectwithoutmask_sample3_ch_3633.yml
local_rank: '4'
eval: true