mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-06-28 04:30:15 -05:00
Fix GLM MTP with split mode graph (#1887)
* Fix crash with GLM and MTP * Fix GLM MTP with split mode graph
This commit is contained in:
parent
1f66f9912f
commit
d503b046f7
@ -174,7 +174,9 @@ struct create_tensors_helper : public create_tensors_helper_interface {
|
||||
return ctx_map.at(model.buft_layer[i].buft);
|
||||
}
|
||||
inline ggml_context * ctx_for_layer_split(int i) const {
|
||||
return ctx_map.at(model.buft_layer[i].buft_matrix);
|
||||
const bool is_mtp_layer = model.hparams.nextn_predict_layers > 0 &&
|
||||
static_cast<uint32_t>(i) >= model.hparams.n_layer - model.hparams.nextn_predict_layers;
|
||||
return is_mtp_layer ? ctx_map.at(model.buft_layer[i].buft) : ctx_map.at(model.buft_layer[i].buft_matrix);
|
||||
}
|
||||
|
||||
std::map<ggml_backend_buffer_type_t, int> buft_layer_count;
|
||||
@ -2729,6 +2731,10 @@ bool create_tensors_helper::create_glm4_moe_tensors(const LLM_TN & tn) {
|
||||
const bool is_mtp_layer = hparams.nextn_predict_layers > 0 &&
|
||||
static_cast<uint32_t>(i) >= n_layer - hparams.nextn_predict_layers;
|
||||
|
||||
if (is_mtp_layer) {
|
||||
ctx_split = ctx_layer;
|
||||
}
|
||||
|
||||
int flags = 0;
|
||||
// Skip loading MTP layers if the feature is disabled
|
||||
if (!model.mtp) {
|
||||
@ -2780,7 +2786,7 @@ bool create_tensors_helper::create_glm4_moe_tensors(const LLM_TN & tn) {
|
||||
layer.ffn_exp_probs_b = create_tensor(ffn_ctx, tn(LLM_TENSOR_FFN_EXP_PROBS_B, "bias", i), { n_expert }, flags);
|
||||
|
||||
// MoE branch
|
||||
use_mmap_buffer &= !create_std_ffn_exps(n_embd, tn, i, flags);
|
||||
use_mmap_buffer &= !create_std_ffn_exps(n_embd, tn, i, flags, 0, ffn_ctx);
|
||||
|
||||
// Shared expert
|
||||
if (n_expert_shared > 0) {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user