Ignore MTP layer(s) when computing required memory

2026-06-28 04:30:15 -05:00 · 2026-03-26 09:00:10 +00:00 · 2026-03-26 09:00:10 +00:00 · 0c280a1bd2
commit 0c280a1bd2
parent aa7fdb3259
1 changed files with 3 additions and 0 deletions
--- a/src/llama.cpp
+++ b/src/llama.cpp
@ -2064,6 +2064,9 @@ static std::pair<std::vector<double>, double> get_layer_sizes(const llama_model_
            LLAMA_LOG_WARN("Oops: strange layer index %d for tensor %s\n", il, name.c_str());
            continue;
        }
+        if (!model.mtp && model.hparams.nextn_predict_layers > 0 && il >= n_layer - model.hparams.nextn_predict_layers) {
+            continue;
+        }
        result[il] += size;
        if (auto pos = name.rfind(".bias"); pos < name.size() && name.size() - pos == 4) {
            // bias, we don't need to account for those