mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-06-28 04:30:15 -05:00
Ignore MTP layer(s) when computing required memory
This commit is contained in:
parent
aa7fdb3259
commit
0c280a1bd2
@ -2064,6 +2064,9 @@ static std::pair<std::vector<double>, double> get_layer_sizes(const llama_model_
|
||||
LLAMA_LOG_WARN("Oops: strange layer index %d for tensor %s\n", il, name.c_str());
|
||||
continue;
|
||||
}
|
||||
if (!model.mtp && model.hparams.nextn_predict_layers > 0 && il >= n_layer - model.hparams.nextn_predict_layers) {
|
||||
continue;
|
||||
}
|
||||
result[il] += size;
|
||||
if (auto pos = name.rfind(".bias"); pos < name.size() && name.size() - pos == 4) {
|
||||
// bias, we don't need to account for those
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user