Global: device: gpu epoch_num: 100 log_smooth_window: 20 print_batch_step: 10 output_dir: ./output/rec/ch/svtrv2_llada_test_lr00005_fs_reflect_onedecoder_semiar_withoutmask_sample3_3633_ch/ save_epoch_step: - 10 - 1 eval_batch_step: - 0 - 2000 eval_epoch_step: - 0 - 1 cal_metric_during_train: false pretrained_model: null checkpoints: null use_tensorboard: false infer_img: ./img_44_1.jpg character_dict_path: ./tools/utils/ppocr_keys_v1.txt max_text_length: 25 use_space_char: false save_res_path: ./output/rec/u14m_filter/predicts_svtrv2_nrtr.txt use_amp: true grad_clip_val: 20.0 distributed: true Optimizer: name: AdamW lr: 0.0005 weight_decay: 0.05 filter_bias_and_bn: true LRScheduler: name: OneCycleLR warmup_epoch: 5 cycle_momentum: false Architecture: model_type: rec algorithm: NRTR in_channels: 3 Transform: null Encoder: name: SVTRv2LNConvTwo33 use_pos_embed: false dims: - 128 - 256 - 384 depths: - 3 - 6 - 3 num_heads: - 4 - 8 - 12 mixer: - - Conv - Conv - Conv - Conv - Conv - Conv - - Conv - Conv - FGlobal - Global - Global - Global - - Global - Global - Global - Global - Global - Global local_k: - - 5 - 5 - - 5 - 5 - - -1 - -1 sub_k: - - 1 - 1 - - 2 - 1 - - -1 - -1 last_stage: false feat2d: false Decoder: name: MDiffDecoder num_decoder_layers: 3 nhead: 6 max_len: 25 parallel_decoding: false autoregressive_decoding: false low_confidence_decoding: false random_mask_decoding: false semi_autoregressive_decoding: true cloze_mask_decoding: false sampler_step: 3 sample_k: 3 Loss: name: NoneLoss PostProcess: name: ARLabelDecode character_dict_path: ./tools/utils/ppocr_keys_v1.txt use_space_char: false Metric: name: RecMetric main_indicator: acc is_filter: false Train: dataset: name: RatioDataSetTVResize ds_width: true padding: false data_dir_list: - ../benchmark_bctr/benchmark_bctr_train/document_train - ../benchmark_bctr/benchmark_bctr_train/handwriting_train - ../benchmark_bctr/benchmark_bctr_train/scene_train - ../benchmark_bctr/benchmark_bctr_train/web_train transforms: - DecodeImagePIL: img_mode: RGB - PARSeqAugPIL: null - LLaDALabelEncode: character_dict_path: ./tools/utils/ppocr_keys_v1.txt use_space_char: false max_text_length: 25 train_all_layer: true sample_num: 3 - KeepKeys: keep_keys: - image - label - reflect_ids - noisy_batch - masked_indices - p_mask - length sampler: name: RatioSampler scales: - - 128 - 32 first_bs: 128 fix_bs: false divided_factor: - 4 - 16 is_training: true loader: shuffle: true batch_size_per_card: 128 drop_last: true max_ratio: 8 num_workers: 4 Eval: dataset: name: RatioDataSetTVResize ds_width: true padding: false data_dir_list: - ../benchmark_bctr/benchmark_bctr_test/scene_test transforms: - DecodeImagePIL: img_mode: RGB - ARLabelEncode: character_dict_path: ./tools/utils/ppocr_keys_v1.txt use_space_char: false max_text_length: 25 - KeepKeys: keep_keys: - image - label - length sampler: name: RatioSampler scales: - - 128 - 32 first_bs: 128 fix_bs: false divided_factor: - 4 - 16 is_training: false loader: shuffle: false drop_last: false batch_size_per_card: 128 max_ratio: 8 num_workers: 4 filename: svtrv2_llada_fs_reflect_onedecoder_reflectwithoutmask_sample3_ch_3633 config: configs/rec/llada/svtrv2_llada_fs_reflect_onedecoder_reflectwithoutmask_sample3_ch_3633.yml local_rank: '4' eval: true