From f96eaddba8bed6a9a5e628bbf6a566775c70b49c Mon Sep 17 00:00:00 2001 From: Kawrakow Date: Fri, 26 Jun 2026 11:00:09 +0200 Subject: [PATCH] Revert DFlash SWA optimization (#2039) --- src/graphs/build_dflash.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/graphs/build_dflash.cpp b/src/graphs/build_dflash.cpp index 273372e6..725d08e5 100644 --- a/src/graphs/build_dflash.cpp +++ b/src/graphs/build_dflash.cpp @@ -315,9 +315,11 @@ ggml_cgraph * llm_build_context::build_dflash() { hparams.attn_soft_cap ? hparams.f_attn_logit_softcapping : 0.0f); cb(cur, "flash_attn", il); ggml_build_forward_expand(gf, cur); - if (use_swa) { - cur->op_params[4] = hparams.n_swa; - } + // Somethiong goes wrong with thisi optimization. + // I guess, the cross context does not mingle well with it. + //if (use_swa) { + // cur->op_params[4] = hparams.n_swa; + //} cur = ggml_reshape_2d(ctx0, cur, model.layers[il].wo->ne[0], n_tokens); cb(cur, "flash_attn_reshaped", il);