MolmoAct-7B-D-LIBERO-Object-0812 / model.yaml

update to fp32 weights

dc0553f 6 months ago

6.18 kB

	model_name: molmo
	llm:
	d_model: 3584
	n_heads: 28
	n_kv_heads: 4
	head_dim: null
	qkv_bias: true
	clip_qkv: null
	n_layers: 28
	mlp_ratio: 4
	mlp_hidden_size: 37888
	activation_type: swiglu
	block_type: sequential
	rope: true
	rope_full_precision: true
	rope_theta: 1000000.0
	rope_type: default
	rope_factor: null
	rope_high_freq_factor: null
	rope_low_freq_factor: null
	rope_original_max_position_embeddings: null
	attention_type: sdpa
	float32_attention: true
	attention_dropout: 0.0
	attention_layer_norm: false
	attention_layer_norm_type: olmo
	residual_dropout: 0.1
	response_residual_dropout: 0.0
	layer_norm_type: rms
	layer_norm_with_affine: true
	layer_norm_eps: 1.0e-06
	attention_layer_norm_with_affine: true
	max_sequence_length: 4096
	max_position_embeddings: null
	include_bias: false
	bias_for_layer_norm: null
	norm_after: false
	moe_num_experts: 8
	moe_top_k: 2
	moe_mlp_impl: sparse
	moe_log_expert_assignment: false
	moe_shared_expert: false
	moe_lbl_in_fp32: false
	moe_interleave: false
	moe_loss_weight: 0.1
	moe_zloss_weight: null
	moe_dropless: true
	moe_capacity_factor: 1.25
	embedding_dropout: 0.0
	scale_logits: false
	vocab_size: 152064
	additional_vocab_size: 128
	weight_tying: false
	embedding_size: 152064
	use_position_ids: true
	tokenizer:
	identifier: Qwen/Qwen2.5-7B
	tokenizer_dir: null
	depth_tokens: true
	init_path: gs://mm-olmo/pretrained_llms/qwen2.5-7b.pt
	init_incremental: null
	new_embedding_init_range: 0.02
	initializer_range: 0.02
	normalize_input_embeds: false
	activation_checkpoint: whole_layer
	compile: blocks
	fix_pad_tokenizer: false
	resize_vocab: false
	init_std: 0.02
	init_fn: normal
	init_cutoff_factor: null
	vision_backbone:
	vit:
	image_model_type: siglip
	image_default_input_size:
	- 378
	- 378
	image_patch_size: 14
	image_pos_patch_size: 14
	image_emb_dim: 1152
	image_num_heads: 16
	image_num_key_value_heads: 16
	image_num_layers: 27
	image_head_dim: 72
	image_mlp_dim: 4304
	image_mlp_activations: gelu_pytorch_tanh
	image_dropout_rate: 0.0
	image_num_pos: 729
	image_norm_eps: 1.0e-06
	attention_dropout: 0.0
	residual_dropout: 0.0
	initializer_range: 0.02
	float32_attention: true
	attention_type: sdpa
	activation_checkpointing: true
	init_path: gs://mm-olmo/pretrained_image_encoders/siglip2-so400m-14-384.pt
	resize_mode: siglip
	pad_value: 0.0
	normalize: siglip
	image_pooling_2d: attention_meanq
	pooling_attention_mask: false
	image_projector: mlp
	image_padding_embed: null
	vit_layers:
	- -3
	- -9
	skip_unused_layers: true
	image_feature_dropout: 0.0
	connector_activation_checkpointing: true
	compile_vit: blocks
	data_formatter:
	prompt_templates: uber_model
	message_format: role
	system_prompt: demo_or_style
	always_start_with_space: false
	default_inference_len: 65
	select_answer: best
	debug: false
	image_last: false
	format_message_list: null
	p_one_message: 0.0
	mm_preprocessor:
	crop_mode: overlap-and-resize-c2
	max_crops: 8
	max_images: 2
	max_multi_image_crops: 8
	pooling_w: 2
	pooling_h: 2
	overlap_margins:
	- 4
	- 4
	use_col_tokens: true
	loss_token_weighting: root_subsegments
	legacy_image_mask: false
	max_answer_len: null
	img_aug: true
	bi_directional_attn: null
	lora_enable: true
	lora_rank: 32
	lora_alpha: 16
	lora_dropout: 0.0
	lora_bias: none
	n_action_bins: 256
	norm_stats:
	libero_object_no_noops_modified:
	action:
	mean:
	- 0.07096529006958008
	- 0.13498851656913757
	- -0.04601382836699486
	- 0.00123520044144243
	- 0.006998839322477579
	- -0.015027612447738647
	- 0.46428999304771423
	std:
	- 0.2681235373020172
	- 0.43846824765205383
	- 0.4474974274635315
	- 0.024446550756692886
	- 0.049355510622262955
	- 0.042107198387384415
	- 0.49879148602485657
	max:
	- 0.9375
	- 0.8919642567634583
	- 0.9375
	- 0.17678570747375488
	- 0.35035714507102966
	- 0.1810714304447174
	- 1.0
	min:
	- -0.8839285969734192
	- -0.9375
	- -0.9375
	- -0.15000000596046448
	- -0.29035714268684387
	- -0.32892856001853943
	- 0.0
	q01:
	- -0.5383928418159485
	- -0.8758928775787354
	- -0.9375
	- -0.06964285671710968
	- -0.11678571254014969
	- -0.15964286029338837
	- 0.0
	q99:
	- 0.8464285731315613
	- 0.84375
	- 0.9375
	- 0.08142857253551483
	- 0.14892856776714325
	- 0.0867857113480568
	- 1.0
	proprio:
	mean:
	- -0.02999030612409115
	- -0.007947085425257683
	- 0.20293472707271576
	- 3.1086409091949463
	- -0.21404768526554108
	- -0.11307074874639511
	- 0.0
	- 0.029380427673459053
	- -0.030556727200746536
	std:
	- 0.06694897264242172
	- 0.17608462274074554
	- 0.07807064801454544
	- 0.0868484303355217
	- 0.33540457487106323
	- 0.20728276669979095
	- 0.0
	- 0.00956575945019722
	- 0.009197483770549297
	max:
	- 0.14580604434013367
	- 0.33216384053230286
	- 0.3857804834842682
	- 3.4003844261169434
	- 0.7954911589622498
	- 0.6642207503318787
	- 0.0
	- 0.04104341194033623
	- -0.00018117300351150334
	min:
	- -0.1765444278717041
	- -0.29457300901412964
	- 0.008128180168569088
	- 2.2890501022338867
	- -1.883241891860962
	- -1.0600427389144897
	- 0.0
	- 0.0006495157140307128
	- -0.041782498359680176
	q01:
	- -0.14911890715360643
	- -0.25978428691625594
	- 0.009925739830359817
	- 2.7545341420173646
	- -1.3996034812927245
	- -0.6867720144987106
	- 0.0
	- 0.008197814421728254
	- -0.04015838988125324
	q99:
	- 0.09063626825809479
	- 0.29066365867853167
	- 0.3370887073874472
	- 3.2611824750900267
	- 0.32092821151018125
	- 0.4037663781642913
	- 0.0
	- 0.039891827926039694
	- -0.009106044843792932
	num_transitions: 66984
	num_trajectories: 454