diff --git a/src/graphs/build_deepseek2.cpp b/src/graphs/build_deepseek2.cpp index 9d9bc558..0c4a2f70 100644 --- a/src/graphs/build_deepseek2.cpp +++ b/src/graphs/build_deepseek2.cpp @@ -157,10 +157,8 @@ ggml_tensor * llm_build_context::build_deepseek2_tp_attention( // Per-rank wk_b/wv_b slices already exist from distribute_mla_tensors: // wk_b_local_pp: [n_embd_head_qk_nope, kv_lora_rank, n_head_local] // wv_b_local_pp: [kv_lora_rank, n_embd_head_v, n_head_local] - auto wk_b_pp_split_raw = (const ggml_split_tensor_t *)model.layers[il].wk_b->extra; auto wv_b_pp_split_raw = (const ggml_split_tensor_t *)model.layers[il].wv_b->extra; - GGML_ASSERT(wk_b_pp_split_raw && wv_b_pp_split_raw); - ggml_tensor * wk_b_local_pp = wk_b_pp_split_raw->splits[id]; + GGML_ASSERT(wv_b_pp_split_raw); ggml_tensor * wv_b_local_pp = wv_b_pp_split_raw->splits[id]; ggml_tensor * kv_cache_nope = ggml_view_2d(ctx0, cache_local, diff --git a/src/llama-load-tensors.cpp b/src/llama-load-tensors.cpp index b27884d8..1fb84421 100644 --- a/src/llama-load-tensors.cpp +++ b/src/llama-load-tensors.cpp @@ -3812,8 +3812,6 @@ static void distribute_mla_tensors_for_split_mode_graph( const int n_head = hparams.n_head(il); const int n_embd_head_k = hparams.n_embd_head_k(il); const int n_embd_head_v = hparams.n_embd_head_v(il); - const int qk_rope = hparams.n_rot; - const int qk_nope = n_embd_head_k - qk_rope; // granularity=4: keeps wo row blocks K-quant-aligned (% 256) and gqa_ratio % 4 == 0 for FA-MMA. auto split_heads = create_split(n_head, 4, cur_splits, mem_used);