mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-06-28 04:30:15 -05:00
Fix compiler warnings
This commit is contained in:
parent
7b73f45541
commit
3dd282358b
@ -157,10 +157,8 @@ ggml_tensor * llm_build_context::build_deepseek2_tp_attention(
|
||||
// Per-rank wk_b/wv_b slices already exist from distribute_mla_tensors:
|
||||
// wk_b_local_pp: [n_embd_head_qk_nope, kv_lora_rank, n_head_local]
|
||||
// wv_b_local_pp: [kv_lora_rank, n_embd_head_v, n_head_local]
|
||||
auto wk_b_pp_split_raw = (const ggml_split_tensor_t *)model.layers[il].wk_b->extra;
|
||||
auto wv_b_pp_split_raw = (const ggml_split_tensor_t *)model.layers[il].wv_b->extra;
|
||||
GGML_ASSERT(wk_b_pp_split_raw && wv_b_pp_split_raw);
|
||||
ggml_tensor * wk_b_local_pp = wk_b_pp_split_raw->splits[id];
|
||||
GGML_ASSERT(wv_b_pp_split_raw);
|
||||
ggml_tensor * wv_b_local_pp = wv_b_pp_split_raw->splits[id];
|
||||
|
||||
ggml_tensor * kv_cache_nope = ggml_view_2d(ctx0, cache_local,
|
||||
|
||||
@ -3812,8 +3812,6 @@ static void distribute_mla_tensors_for_split_mode_graph(
|
||||
const int n_head = hparams.n_head(il);
|
||||
const int n_embd_head_k = hparams.n_embd_head_k(il);
|
||||
const int n_embd_head_v = hparams.n_embd_head_v(il);
|
||||
const int qk_rope = hparams.n_rot;
|
||||
const int qk_nope = n_embd_head_k - qk_rope;
|
||||
|
||||
// granularity=4: keeps wo row blocks K-quant-aligned (% 256) and gqa_ratio % 4 == 0 for FA-MMA.
|
||||
auto split_heads = create_split(n_head, 4, cur_splits, mem_used);
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user