mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-06-28 04:30:15 -05:00
Support Mimo-2.5 (#1723)
This commit is contained in:
parent
2dd3818083
commit
e76700119d
@ -168,6 +168,7 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
|
||||
{ LLM_KV_ATTENTION_SHARED_KV_LAYERS, "%s.attention.shared_kv_layers" },
|
||||
{ LLM_KV_ATTENTION_KEY_LENGTH_SWA, "%s.attention.key_length_swa" },
|
||||
{ LLM_KV_ATTENTION_VALUE_LENGTH_SWA, "%s.attention.value_length_swa" },
|
||||
{ LLM_KV_ATTENTION_VALUE_SCALE, "%s.attention.value_scale" },
|
||||
|
||||
{ LLM_KV_ROPE_DIMENSION_COUNT, "%s.rope.dimension_count" },
|
||||
{ LLM_KV_ROPE_DIMENSION_COUNT_SWA, "%s.rope.dimension_count_swa" },
|
||||
|
||||
@ -161,6 +161,7 @@ enum llm_kv {
|
||||
LLM_KV_ATTENTION_SHARED_KV_LAYERS,
|
||||
LLM_KV_ATTENTION_KEY_LENGTH_SWA,
|
||||
LLM_KV_ATTENTION_VALUE_LENGTH_SWA,
|
||||
LLM_KV_ATTENTION_VALUE_SCALE,
|
||||
|
||||
LLM_KV_ROPE_DIMENSION_COUNT,
|
||||
LLM_KV_ROPE_DIMENSION_COUNT_SWA,
|
||||
|
||||
@ -2575,6 +2575,10 @@ ggml_tensor * llm_build_context::build_std_attention(ggml_cgraph * gf, ggml_tens
|
||||
split_wv, bv ? bv->splits[id] : nullptr,
|
||||
the_q_norm, the_k_norm, f_attn_scale, il, add_graph_split);
|
||||
Qcur = Q; Kcur = K; Vcur = V;
|
||||
if (model.arch == LLM_ARCH_MIMO2 && std::abs(model.hparams.f_attn_v_scale - 1) > 1e-4f) {
|
||||
Vcur = ggml_scale(ctx0, Vcur, model.hparams.f_attn_v_scale);
|
||||
cb(Vcur, "Vcur_scales", il_cb);
|
||||
}
|
||||
}
|
||||
auto rope_factors = rope_factors_in;
|
||||
if (rope_factors) {
|
||||
@ -2781,6 +2785,10 @@ ggml_tensor * llm_build_context::build_std_attention(ggml_cgraph * gf, ggml_tens
|
||||
model.layers[il].wq, model.layers[il].bq, model.layers[il].wk, model.layers[il].bk, model.layers[il].wv, model.layers[il].bv,
|
||||
model.layers[il].attn_q_norm, model.layers[il].attn_k_norm, f_attn_scale, il);
|
||||
Qcur = Q; Kcur = K; Vcur = V;
|
||||
if (model.arch == LLM_ARCH_MIMO2 && std::abs(model.hparams.f_attn_v_scale - 1) > 1e-4f) {
|
||||
Vcur = ggml_scale(ctx0, Vcur, model.hparams.f_attn_v_scale);
|
||||
cb(Vcur, "Vcur_scales", il);
|
||||
}
|
||||
if (model.arch == LLM_ARCH_GEMMA4) {
|
||||
Vcur = ggml_reshape_3d(ctx0, Vcur, model.hparams.n_embd_head_v(il), model.hparams.n_head_kv(il), n_tokens);
|
||||
Vcur = ggml_rms_norm(ctx0, Vcur, model.hparams.f_norm_rms_eps);
|
||||
|
||||
@ -1248,6 +1248,7 @@ void llm_load_hparams(
|
||||
ml.get_key(LLM_KV_EXPERT_FEED_FORWARD_LENGTH, hparams.n_ff_exp);
|
||||
ml.get_key(LLM_KV_ATTENTION_SLIDING_WINDOW, hparams.n_swa);
|
||||
ml.get_key(LLM_KV_ROPE_FREQ_BASE_SWA, hparams.rope_freq_base_train_swa);
|
||||
ml.get_key(LLM_KV_ATTENTION_VALUE_SCALE, hparams.f_attn_v_scale, false);
|
||||
//TODO
|
||||
//hparams.swa_type = LLAMA_SWA_TYPE_STANDARD; // which is the same as OpenAI
|
||||
ml.get_key_or_arr(LLM_KV_ATTENTION_SLIDING_WINDOW_PATTERN, hparams.swa_layers, hparams.n_layer);
|
||||
|
||||
@ -105,6 +105,7 @@ struct llama_hparams {
|
||||
float f_residual_scale = 0.0f;
|
||||
float f_embedding_scale = 0.0f;
|
||||
float f_attention_scale = 0.0f;
|
||||
float f_attn_v_scale = 1.0f;
|
||||
|
||||
// grok-2
|
||||
float f_attn_out_scale = 0.0f;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user