mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-06-28 04:30:15 -05:00
Minor logging cleanup (#1873)
This commit is contained in:
parent
809a63bbb7
commit
0c45696db4
@ -938,15 +938,15 @@ void llm_load_hparams(
|
||||
int n_nead_kv = hparams.n_gqa();
|
||||
if (n_nead_kv%4 != 0 || hparams.n_embd_head_k(0) != expected_head_size_k || hparams.n_embd_head_v(0) != expected_head_size_v ||
|
||||
hparams.n_rot != 64) {
|
||||
printf("==========================================================================\n");
|
||||
printf("Detected incompatible DeepSeek model without a known way to fix it.\n");
|
||||
printf("Consider making your own ik_llama.cpp compatible model or\n");
|
||||
printf("ask the model provider to make one for you,\n\n");
|
||||
printf("Sorry, uknown model => cannot fix it => bailing out\n");
|
||||
printf("==========================================================================\n");
|
||||
LLAMA_LOG_ERROR("==========================================================================\n");
|
||||
LLAMA_LOG_ERROR("Detected incompatible DeepSeek model without a known way to fix it.\n");
|
||||
LLAMA_LOG_ERROR("Consider making your own ik_llama.cpp compatible model or\n");
|
||||
LLAMA_LOG_ERROR("ask the model provider to make one for you,\n\n");
|
||||
LLAMA_LOG_ERROR("Sorry, uknown model => cannot fix it => bailing out\n");
|
||||
LLAMA_LOG_ERROR("==========================================================================\n");
|
||||
GGML_ABORT("Fatal error");
|
||||
}
|
||||
printf("================= Adjusted mainline llama.cpp MLA tensors to ik_llama.cpp\n");
|
||||
LLAMA_LOG_INFO("================= Adjusted mainline llama.cpp MLA tensors to ik_llama.cpp\n");
|
||||
for (auto& item : hparams.n_head_kv_arr) item = n_nead_kv;
|
||||
hparams.n_embd_head_k_full = 192;
|
||||
hparams.n_embd_head_v_full = 128;
|
||||
@ -976,7 +976,7 @@ void llm_load_hparams(
|
||||
// GLM-4.7-Flash has 47 layers (or 48, if an MTP layer is included in the GGUF).
|
||||
hparams.expert_gating_func = hparams.n_layer == 47 || hparams.n_layer == 48 ?
|
||||
LLM_EXPERT_GATING_FUNC_SIGMOID : LLM_EXPERT_GATING_FUNC_SOFTMAX;
|
||||
printf("================= Missing experts gating function -> set to %s\n",
|
||||
LLAMA_LOG_INFO("================= Missing experts gating function -> set to %s\n",
|
||||
llm_expert_gating_func_name(llm_expert_gating_func_type(hparams.expert_gating_func)));
|
||||
}
|
||||
ml.get_key(LLM_KV_ROPE_SCALING_YARN_LOG_MUL, hparams.rope_yarn_log_mul, false);
|
||||
@ -1390,13 +1390,13 @@ void llm_load_hparams(
|
||||
int n_nead_kv = hparams.n_gqa();
|
||||
if (n_nead_kv%4 != 0 || hparams.n_embd_head_k_full != 576 || hparams.n_embd_head_v_full != 512 ||
|
||||
hparams.n_rot != 64) {
|
||||
printf("==========================================================================\n");
|
||||
printf("Detected incompatible DeepSeek model without a known way to fix it.\n");
|
||||
printf("Sorry, uknown model => cannot fix it => bailing out\n");
|
||||
printf("==========================================================================\n");
|
||||
LLAMA_LOG_ERROR("==========================================================================\n");
|
||||
LLAMA_LOG_ERROR("Detected incompatible DeepSeek model without a known way to fix it.\n");
|
||||
LLAMA_LOG_ERROR("Sorry, uknown model => cannot fix it => bailing out\n");
|
||||
LLAMA_LOG_ERROR("==========================================================================\n");
|
||||
GGML_ABORT("Fatal error");
|
||||
}
|
||||
printf("================= Adjusted mainline llama.cpp MLA tensors to ik_llama.cpp\n");
|
||||
LLAMA_LOG_INFO("================= Adjusted mainline llama.cpp MLA tensors to ik_llama.cpp\n");
|
||||
for (auto& item : hparams.n_head_kv_arr) item = n_nead_kv;
|
||||
hparams.n_embd_head_k_full = 192;
|
||||
hparams.n_embd_head_v_full = 128;
|
||||
|
||||
@ -1111,7 +1111,7 @@ static bool llama_kv_cache_init(
|
||||
if (split_cache || replicate_mla) {
|
||||
LLAMA_LOG_INFO("%s: KV cache size per device%s:\n", __func__,
|
||||
replicate_mla ? " (MLA replicated)" : "");
|
||||
for (int i = 0; i < int(mem_split.size()); ++i) printf(" Device %d: %g MiB\n", i, mem_split[i]/1024./1024.);
|
||||
for (int i = 0; i < int(mem_split.size()); ++i) LLAMA_LOG_INFO(" Device %d: %g MiB\n", i, mem_split[i]/1024./1024.);
|
||||
}
|
||||
|
||||
#if 0
|
||||
@ -2414,7 +2414,7 @@ static void llm_prepare_mla(llama_model & model, int mla) {
|
||||
split.ggml.splits = split.tensor_splits.data();
|
||||
computed->extra = (void *)&split.ggml;
|
||||
|
||||
printf("Computed %s as %d x %d x %d of type %s, split across %d devices on dim=2\n",
|
||||
LLAMA_LOG_INFO("Computed %s as %d x %d x %d of type %s, split across %d devices on dim=2\n",
|
||||
tname.c_str(), (int)source->ne[0], (int)source->ne[1], (int)source->ne[2],
|
||||
ggml_type_name(source->type), n_device);
|
||||
} else {
|
||||
@ -2431,7 +2431,7 @@ static void llm_prepare_mla(llama_model & model, int mla) {
|
||||
iqk_modify_tensor(computed.get());
|
||||
}
|
||||
|
||||
printf("Computed %s as %d x %d x %d of type %s and stored in buffer %s\n",
|
||||
LLAMA_LOG_INFO("Computed %s as %d x %d x %d of type %s and stored in buffer %s\n",
|
||||
tname.c_str(), (int)source->ne[0], (int)source->ne[1], (int)source->ne[2],
|
||||
ggml_type_name(source->type), ggml_backend_buffer_name(computed->buffer));
|
||||
}
|
||||
@ -2664,7 +2664,7 @@ static void llm_prepare_mla(llama_model & model, int mla) {
|
||||
model.tensors_by_name.push_back(std::make_pair(name, l.computed_wk_b_pp.get()));
|
||||
l.wk_b_pp = l.computed_wk_b_pp.get();
|
||||
|
||||
printf("Computed %s as %d x %d x %d of type %s, split across %d devices on dim=2\n",
|
||||
LLAMA_LOG_INFO("Computed %s as %d x %d x %d of type %s, split across %d devices on dim=2\n",
|
||||
name.c_str(),
|
||||
(int)l.computed_wk_b_pp->ne[0],
|
||||
(int)l.computed_wk_b_pp->ne[1],
|
||||
@ -2803,7 +2803,7 @@ static void llm_prepare_mla(llama_model & model, int mla) {
|
||||
l.wkv_b = l.computed_wkv_b.get();
|
||||
model.tensors_by_name.push_back(std::make_pair(name, l.wkv_b));
|
||||
|
||||
printf("Computed %s as %d x %d of type %s and stored in buffer %s\n", name.c_str(), (int)wkv_b->ne[0], (int)wkv_b->ne[1],
|
||||
LLAMA_LOG_INFO("Computed %s as %d x %d of type %s and stored in buffer %s\n", name.c_str(), (int)wkv_b->ne[0], (int)wkv_b->ne[1],
|
||||
ggml_type_name(wkv_b->type), ggml_backend_buffer_name(l.computed_wkv_b->buffer));
|
||||
|
||||
ggml_graph_clear(graph);
|
||||
@ -2930,7 +2930,7 @@ static void llm_apply_khad_pretransform(llama_model & model) {
|
||||
|
||||
static void llm_scale_gate_inp_s(llama_model & model, bool uses_mmap) {
|
||||
auto & hparams = model.hparams;
|
||||
printf("%s: n_embd = %d\n", __func__, hparams.n_embd);
|
||||
LLAMA_LOG_INFO("%s: n_embd = %d\n", __func__, hparams.n_embd);
|
||||
std::vector<float> values(hparams.n_embd);
|
||||
float scale = 1.0f/sqrtf((float)hparams.n_embd);
|
||||
int n_host = 0;
|
||||
@ -10776,7 +10776,7 @@ void llama_log_callback_default(ggml_log_level level, const char * text, void *
|
||||
void llama_set_offload_policy(struct llama_context * lctx, int op, bool on_or_off) {
|
||||
if (!lctx || !lctx->sched) return;
|
||||
const char * op_name = op < 0 || op >= int(GGML_OP_COUNT) ? "all ops" : ggml_op_name(ggml_op(op));
|
||||
printf("XXXXXXXXXXXXXXXXXXXXXXXXXXXX offload(%s) = %d\n", op_name, on_or_off);
|
||||
LLAMA_LOG_INFO("XXXXXXXXXXXXXXXXXXXXXXXXXXXX offload(%s) = %d\n", op_name, on_or_off);
|
||||
ggml_backend_sched_set_op_offload(lctx->sched, ggml_op(op), on_or_off);
|
||||
}
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user