Global: device: gpu epoch_num: 40 log_smooth_window: 20 print_batch_step: 10 output_dir: ./output/rec/u14m_filter/svtrv2_llada_test_lr00005_fs_reflect_onedecoder_semiar_withoutmask_sample3/ save_epoch_step: - 10 - 1 eval_batch_step: - 0 - 500 eval_epoch_step: - 0 - 1 cal_metric_during_train: false pretrained_model: null checkpoints: null use_tensorboard: false infer_img: ./img_44_1.jpg character_dict_path: ./tools/utils/EN_symbol_dict.txt max_text_length: 25 use_space_char: false save_res_path: ./output/rec/u14m_filter/predicts_svtrv2_nrtr.txt use_amp: true grad_clip_val: 20.0 distributed: true Optimizer: name: AdamW lr: 0.0005 weight_decay: 0.05 filter_bias_and_bn: true LRScheduler: name: OneCycleLR warmup_epoch: 1.5 cycle_momentum: false Architecture: model_type: rec algorithm: NRTR in_channels: 3 Transform: null Encoder: name: SVTRv2LNConvTwo33 use_pos_embed: false dims: - 128 - 256 - 384 depths: - 6 - 6 - 6 num_heads: - 4 - 8 - 12 mixer: - - Conv - Conv - Conv - Conv - Conv - Conv - - Conv - Conv - FGlobal - Global - Global - Global - - Global - Global - Global - Global - Global - Global local_k: - - 5 - 5 - - 5 - 5 - - -1 - -1 sub_k: - - 1 - 1 - - 2 - 1 - - -1 - -1 last_stage: false feat2d: false Decoder: name: MDiffDecoder num_decoder_layers: 6 nhead: 6 max_len: 25 parallel_decoding: false autoregressive_decoding: false low_confidence_decoding: false random_mask_decoding: false semi_autoregressive_decoding: true cloze_mask_decoding: false sampler_step: 3 sample_k: 3 Loss: name: NoneLoss PostProcess: name: ARLabelDecode character_dict_path: ./tools/utils/EN_symbol_dict.txt use_space_char: false Metric: name: RecMetric main_indicator: acc is_filter: true Train: dataset: name: RatioDataSetTVResize ds_width: true padding: false data_dir_list: - /data/Union14M-L-LMDB-Filtered/filter_train_challenging - /data/Union14M-L-LMDB-Filtered/filter_train_hard - /data/Union14M-L-LMDB-Filtered/filter_train_medium - /data/Union14M-L-LMDB-Filtered/filter_train_normal - /data/Union14M-L-LMDB-Filtered/filter_train_easy transforms: - DecodeImagePIL: img_mode: RGB - PARSeqAugPIL: null - LLaDALabelEncode: character_dict_path: ./tools/utils/EN_symbol_dict.txt use_space_char: false max_text_length: 25 train_all_layer: true sample_num: 3 - KeepKeys: keep_keys: - image - label - reflect_ids - noisy_batch - masked_indices - p_mask - length sampler: name: RatioSampler scales: - - 128 - 32 first_bs: 256 fix_bs: false divided_factor: - 4 - 16 is_training: true loader: shuffle: true batch_size_per_card: 256 drop_last: true max_ratio: 4 num_workers: 4 Eval: dataset: name: RatioDataSetTVResize ds_width: true padding: false data_dir_list: - /data/evaluation/CUTE80 - /data/evaluation/IC13_857 - /data/evaluation/IC15_1811 - /data/evaluation/IIIT5k_3000 - /data/evaluation/SVT - /data/evaluation/SVTP transforms: - DecodeImagePIL: img_mode: RGB - ARLabelEncode: character_dict_path: ./tools/utils/EN_symbol_dict.txt use_space_char: false max_text_length: 25 - KeepKeys: keep_keys: - image - label - length sampler: name: RatioSampler scales: - - 128 - 32 first_bs: 256 fix_bs: false divided_factor: - 4 - 16 is_training: false loader: shuffle: false drop_last: false batch_size_per_card: 256 max_ratio: 4 num_workers: 4 filename: svtrv2_llada_fs_reflect_onedecoder_reflectwithoutmask_sample3 config: configs/rec/llada/svtrv2_llada_fs_reflect_onedecoder_reflectwithoutmask_sample3.yml local_rank: '2' eval: true