File size: 4,231 Bytes
6730f86 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 |
Global:
device: gpu
epoch_num: 40
log_smooth_window: 20
print_batch_step: 10
output_dir: ./output/rec/u14m_filter/svtrv2_llada_test_lr00005_fs_reflect_onedecoder_semiar_withoutmask_sample3/
save_epoch_step:
- 10
- 1
eval_batch_step:
- 0
- 500
eval_epoch_step:
- 0
- 1
cal_metric_during_train: false
pretrained_model: null
checkpoints: null
use_tensorboard: false
infer_img: ./img_44_1.jpg
character_dict_path: ./tools/utils/EN_symbol_dict.txt
max_text_length: 25
use_space_char: false
save_res_path: ./output/rec/u14m_filter/predicts_svtrv2_nrtr.txt
use_amp: true
grad_clip_val: 20.0
distributed: true
Optimizer:
name: AdamW
lr: 0.0005
weight_decay: 0.05
filter_bias_and_bn: true
LRScheduler:
name: OneCycleLR
warmup_epoch: 1.5
cycle_momentum: false
Architecture:
model_type: rec
algorithm: NRTR
in_channels: 3
Transform: null
Encoder:
name: SVTRv2LNConvTwo33
use_pos_embed: false
dims:
- 128
- 256
- 384
depths:
- 6
- 6
- 6
num_heads:
- 4
- 8
- 12
mixer:
- - Conv
- Conv
- Conv
- Conv
- Conv
- Conv
- - Conv
- Conv
- FGlobal
- Global
- Global
- Global
- - Global
- Global
- Global
- Global
- Global
- Global
local_k:
- - 5
- 5
- - 5
- 5
- - -1
- -1
sub_k:
- - 1
- 1
- - 2
- 1
- - -1
- -1
last_stage: false
feat2d: false
Decoder:
name: MDiffDecoder
num_decoder_layers: 6
nhead: 6
max_len: 25
parallel_decoding: false
autoregressive_decoding: false
low_confidence_decoding: false
random_mask_decoding: false
semi_autoregressive_decoding: true
cloze_mask_decoding: false
sampler_step: 3
sample_k: 3
Loss:
name: NoneLoss
PostProcess:
name: ARLabelDecode
character_dict_path: ./tools/utils/EN_symbol_dict.txt
use_space_char: false
Metric:
name: RecMetric
main_indicator: acc
is_filter: true
Train:
dataset:
name: RatioDataSetTVResize
ds_width: true
padding: false
data_dir_list:
- /data/Union14M-L-LMDB-Filtered/filter_train_challenging
- /data/Union14M-L-LMDB-Filtered/filter_train_hard
- /data/Union14M-L-LMDB-Filtered/filter_train_medium
- /data/Union14M-L-LMDB-Filtered/filter_train_normal
- /data/Union14M-L-LMDB-Filtered/filter_train_easy
transforms:
- DecodeImagePIL:
img_mode: RGB
- PARSeqAugPIL: null
- LLaDALabelEncode:
character_dict_path: ./tools/utils/EN_symbol_dict.txt
use_space_char: false
max_text_length: 25
train_all_layer: true
sample_num: 3
- KeepKeys:
keep_keys:
- image
- label
- reflect_ids
- noisy_batch
- masked_indices
- p_mask
- length
sampler:
name: RatioSampler
scales:
- - 128
- 32
first_bs: 256
fix_bs: false
divided_factor:
- 4
- 16
is_training: true
loader:
shuffle: true
batch_size_per_card: 256
drop_last: true
max_ratio: 4
num_workers: 4
Eval:
dataset:
name: RatioDataSetTVResize
ds_width: true
padding: false
data_dir_list:
- /data/evaluation/CUTE80
- /data/evaluation/IC13_857
- /data/evaluation/IC15_1811
- /data/evaluation/IIIT5k_3000
- /data/evaluation/SVT
- /data/evaluation/SVTP
transforms:
- DecodeImagePIL:
img_mode: RGB
- ARLabelEncode:
character_dict_path: ./tools/utils/EN_symbol_dict.txt
use_space_char: false
max_text_length: 25
- KeepKeys:
keep_keys:
- image
- label
- length
sampler:
name: RatioSampler
scales:
- - 128
- 32
first_bs: 256
fix_bs: false
divided_factor:
- 4
- 16
is_training: false
loader:
shuffle: false
drop_last: false
batch_size_per_card: 256
max_ratio: 4
num_workers: 4
filename: svtrv2_llada_fs_reflect_onedecoder_reflectwithoutmask_sample3
config: configs/rec/llada/svtrv2_llada_fs_reflect_onedecoder_reflectwithoutmask_sample3.yml
local_rank: '2'
eval: true
|