diff --git a/common/common.cpp b/common/common.cpp index 3512c4f866..0dd9ede5eb 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -225,7 +225,7 @@ bool set_process_priority(enum ggml_sched_priority prio) { } if (!SetPriorityClass(GetCurrentProcess(), p)) { - CMN_WRN("failed to set process priority class %d : (%d)\n", prio, (int) GetLastError()); + COM_WRN("failed to set process priority class %d : (%d)\n", prio, (int) GetLastError()); return false; } @@ -251,7 +251,7 @@ bool set_process_priority(enum ggml_sched_priority prio) { } if (setpriority(PRIO_PROCESS, 0, p) != 0) { - CMN_WRN("failed to set process priority %d : %s (%d)\n", prio, strerror(errno), errno); + COM_WRN("failed to set process priority %d : %s (%d)\n", prio, strerror(errno), errno); return false; } return true; @@ -284,14 +284,14 @@ void postprocess_cpu_params(common_cpu_params & cpuparams, const common_cpu_para if (n_set && n_set < cpuparams.n_threads) { // Not enough set bits, may experience performance issues. - CMN_WRN("Not enough set bits in CPU mask (%d) to satisfy requested thread count: %d\n", n_set, cpuparams.n_threads); + COM_WRN("Not enough set bits in CPU mask (%d) to satisfy requested thread count: %d\n", n_set, cpuparams.n_threads); } } bool parse_cpu_range(const std::string & range, bool (&boolmask)[GGML_MAX_N_THREADS]) { size_t dash_loc = range.find('-'); if (dash_loc == std::string::npos) { - CMN_ERR("%s", "Format of CPU range is invalid! Expected []-[].\n"); + COM_ERR("%s", "Format of CPU range is invalid! Expected []-[].\n"); return false; } @@ -303,7 +303,7 @@ bool parse_cpu_range(const std::string & range, bool (&boolmask)[GGML_MAX_N_THRE } else { start_i = std::stoull(range.substr(0, dash_loc)); if (start_i >= GGML_MAX_N_THREADS) { - CMN_ERR("%s", "Start index out of bounds!\n"); + COM_ERR("%s", "Start index out of bounds!\n"); return false; } } @@ -313,7 +313,7 @@ bool parse_cpu_range(const std::string & range, bool (&boolmask)[GGML_MAX_N_THRE } else { end_i = std::stoull(range.substr(dash_loc + 1)); if (end_i >= GGML_MAX_N_THREADS) { - CMN_ERR("%s", "End index out of bounds!\n"); + COM_ERR("%s", "End index out of bounds!\n"); return false; } } @@ -348,7 +348,7 @@ bool parse_cpu_mask(const std::string & mask, bool (&boolmask)[GGML_MAX_N_THREAD } else if (c >= 'A' && c <= 'F') { id -= 'A' - 10; } else { - CMN_ERR("Invalid hex character '%c' at position %d\n", c, int32_t(i)); + COM_ERR("Invalid hex character '%c' at position %d\n", c, int32_t(i)); return false; } @@ -379,21 +379,21 @@ void common_params_print_info(const common_params & params, bool print_devices) #else const char * build_type = " (debug)"; #endif - CMN_TRC("%s: build %d (%s) with %s for %s%s\n", __func__, llama_build_number(), llama_commit(), llama_compiler(), llama_build_target(), build_type); + COM_TRC("%s: build %d (%s) with %s for %s%s\n", __func__, llama_build_number(), llama_commit(), llama_compiler(), llama_build_target(), build_type); - CMN_INF("%s: verbosity = %d (adjust with the `-lv N` CLI arg)\n", __func__, common_log_get_verbosity_thold()); + COM_INF("%s: verbosity = %d (adjust with the `-lv N` CLI arg)\n", __func__, common_log_get_verbosity_thold()); // device enumeration creates a primary context on CUDA backends, skip it when the caller does not own any device if (print_devices) { - CMN_TRC("%s", "device_info:\n"); + COM_TRC("%s", "device_info:\n"); for (size_t i = 0; i < ggml_backend_dev_count(); ++i) { auto * dev = ggml_backend_dev_get(i); size_t free, total; ggml_backend_dev_memory(dev, &free, &total); - CMN_TRC(" - %-8s: %s (%zu MiB, %zu MiB free)\n", ggml_backend_dev_name(dev), ggml_backend_dev_description(dev), total / 1024 / 1024, free / 1024 / 1024); + COM_TRC(" - %-8s: %s (%zu MiB, %zu MiB free)\n", ggml_backend_dev_name(dev), ggml_backend_dev_description(dev), total / 1024 / 1024, free / 1024 / 1024); } } - CMN_TRC("%s\n", common_params_get_system_info(params).c_str()); + COM_TRC("%s\n", common_params_get_system_info(params).c_str()); } std::string common_params_get_system_info(const common_params & params) { @@ -660,7 +660,7 @@ void string_process_escapes(std::string & input) { bool string_parse_kv_override(const char * data, std::vector & overrides) { const char * sep = strchr(data, '='); if (sep == nullptr || sep - data >= 128) { - CMN_ERR("%s: malformed KV override '%s'\n", __func__, data); + COM_ERR("%s: malformed KV override '%s'\n", __func__, data); return false; } llama_model_kv_override kvo; @@ -683,20 +683,20 @@ bool string_parse_kv_override(const char * data, std::vector 127) { - CMN_ERR("%s: malformed KV override '%s', value cannot exceed 127 chars\n", __func__, data); + COM_ERR("%s: malformed KV override '%s', value cannot exceed 127 chars\n", __func__, data); return false; } strncpy(kvo.val_str, sep, 127); kvo.val_str[127] = '\0'; } else { - CMN_ERR("%s: invalid type for KV override '%s'\n", __func__, data); + COM_ERR("%s: invalid type for KV override '%s'\n", __func__, data); return false; } overrides.emplace_back(std::move(kvo)); @@ -1199,8 +1199,8 @@ common_init_result::common_init_result(common_params & params, bool model_only) auto cparams = common_context_params_to_llama(params); if (params.fit_params) { - CMN_TRC("%s", "fitting params to device memory ...\n"); - CMN_TRC("%s", "(for bugs during this step try to reproduce them with -fit off, or provide --verbose logs if the bug only occurs with -fit on)\n"); + COM_TRC("%s", "fitting params to device memory ...\n"); + COM_TRC("%s", "(for bugs during this step try to reproduce them with -fit off, or provide --verbose logs if the bug only occurs with -fit on)\n"); common_fit_params(params.model.path.c_str(), &mparams, &cparams, params.tensor_split, params.tensor_buft_overrides.data(), @@ -1227,7 +1227,7 @@ common_init_result::common_init_result(common_params & params, bool model_only) llama_adapter_lora_ptr lora; lora.reset(llama_adapter_lora_init(model, la.path.c_str())); if (lora == nullptr) { - CMN_ERR("failed to load lora adapter '%s'\n", la.path.c_str()); + COM_ERR("failed to load lora adapter '%s'\n", la.path.c_str()); pimpl->model.reset(model); return; } @@ -1246,14 +1246,14 @@ common_init_result::common_init_result(common_params & params, bool model_only) common_init_sampler_from_model(model, params.sampling); if (params.sampling.ignore_eos && llama_vocab_eos(vocab) == LLAMA_TOKEN_NULL) { - CMN_WRN("%s", "vocab does not have an EOS token, ignoring --ignore-eos\n"); + COM_WRN("%s", "vocab does not have an EOS token, ignoring --ignore-eos\n"); params.sampling.ignore_eos = false; } // initialize once for (llama_token i = 0; i < llama_vocab_n_tokens(vocab); i++) { if (llama_vocab_is_eog(vocab, i)) { - CMN_TRC("added %s logit bias = %f\n", common_token_to_piece(vocab, i).c_str(), -INFINITY); + COM_TRC("added %s logit bias = %f\n", common_token_to_piece(vocab, i).c_str(), -INFINITY); params.sampling.logit_bias_eog.push_back({i, -INFINITY}); } } @@ -1291,7 +1291,7 @@ common_init_result::common_init_result(common_params & params, bool model_only) llama_context * lctx = llama_init_from_model(model, cparams); if (lctx == NULL) { - CMN_ERR("failed to create context with model '%s'\n", params.model.path.c_str()); + COM_ERR("failed to create context with model '%s'\n", params.model.path.c_str()); return; } @@ -1328,7 +1328,7 @@ common_init_result_ptr common_init_from_params(common_params & params, bool mode llama_model * model = res->model(); if (model == NULL) { - CMN_ERR("failed to load model '%s'\n", params.model.path.c_str()); + COM_ERR("failed to load model '%s'\n", params.model.path.c_str()); return res; } @@ -1338,14 +1338,14 @@ common_init_result_ptr common_init_from_params(common_params & params, bool mode llama_context * lctx = res->context(); if (lctx == NULL) { - CMN_ERR("failed to create context with model '%s'\n", params.model.path.c_str()); + COM_ERR("failed to create context with model '%s'\n", params.model.path.c_str()); return res; } const llama_vocab * vocab = llama_model_get_vocab(model); if (params.ctx_shift && !llama_memory_can_shift(llama_get_memory(lctx))) { - CMN_WRN("%s", "KV cache shifting is not supported for this context, disabling KV cache shifting\n"); + COM_WRN("%s", "KV cache shifting is not supported for this context, disabling KV cache shifting\n"); params.ctx_shift = false; } @@ -1374,7 +1374,7 @@ common_init_result_ptr common_init_from_params(common_params & params, bool mode bool ok = true; if (llama_vocab_bos(vocab) == LLAMA_TOKEN_NULL) { - CMN_WRN("%s", "vocab does not have a BOS token, reranking will not work\n"); + COM_WRN("%s", "vocab does not have a BOS token, reranking will not work\n"); ok = false; } @@ -1383,10 +1383,10 @@ common_init_result_ptr common_init_from_params(common_params & params, bool mode bool has_rerank_prompt = llama_model_chat_template(model, "rerank") != NULL; if (!has_eos && !has_sep && !has_rerank_prompt) { - CMN_WRN("%s", "vocab does not have an EOS token, SEP token, or rerank prompt. Reranking will not work\n"); + COM_WRN("%s", "vocab does not have an EOS token, SEP token, or rerank prompt. Reranking will not work\n"); ok = false; } else if (!has_eos) { - CMN_WRN("%s", "vocab does not have an EOS token, using SEP token as fallback\n"); + COM_WRN("%s", "vocab does not have an EOS token, using SEP token as fallback\n"); } if (!ok) { @@ -1399,7 +1399,7 @@ common_init_result_ptr common_init_from_params(common_params & params, bool mode } if (params.warmup) { - CMN_TRC("%s", "warming up the model with an empty run - please wait ... (--no-warmup to disable)\n"); + COM_TRC("%s", "warming up the model with an empty run - please wait ... (--no-warmup to disable)\n"); std::vector tmp; llama_token bos = llama_vocab_bos(vocab); @@ -1473,20 +1473,20 @@ common_context_seq_rm_type common_context_can_seq_rm(llama_context * ctx) { int ret = llama_decode(ctx, llama_batch_get_one(tmp.data(), tmp.size())); if (ret != 0) { - CMN_ERR("llama_decode() failed: %d\n", ret); + COM_ERR("llama_decode() failed: %d\n", ret); res = COMMON_CONTEXT_SEQ_RM_TYPE_NO; goto done; } if (llama_n_rs_seq(ctx) > 0) { - CMN_TRC("%s", "the context supports bounded partial sequence removal\n"); + COM_TRC("%s", "the context supports bounded partial sequence removal\n"); res = COMMON_CONTEXT_SEQ_RM_TYPE_RS; goto done; } // try to remove the last tokens if (!llama_memory_seq_rm(mem, 0, 1, -1)) { - CMN_TRC("%s", "the context does not support partial sequence removal\n"); + COM_TRC("%s", "the context does not support partial sequence removal\n"); res = COMMON_CONTEXT_SEQ_RM_TYPE_FULL; goto done; } @@ -1803,13 +1803,13 @@ static common_control_vector_data common_control_vector_load_one(const common_co }; struct gguf_context * ctx_gguf = gguf_init_from_file(load_info.fname.c_str(), meta_gguf_params); if (!ctx_gguf) { - CMN_ERR("failed to load control vector file from %s\n", load_info.fname.c_str()); + COM_ERR("failed to load control vector file from %s\n", load_info.fname.c_str()); return result; } int32_t n_tensors = gguf_get_n_tensors(ctx_gguf); if (n_tensors == 0) { - CMN_WRN("no direction tensors found in %s\n", load_info.fname.c_str()); + COM_WRN("no direction tensors found in %s\n", load_info.fname.c_str()); } for (int i = 0; i < n_tensors; i++) { @@ -1827,23 +1827,23 @@ static common_control_vector_data common_control_vector_load_one(const common_co } } if (layer_idx < 0) { - CMN_ERR("invalid/unparsable direction tensor layer index in %s\n", load_info.fname.c_str()); + COM_ERR("invalid/unparsable direction tensor layer index in %s\n", load_info.fname.c_str()); result.n_embd = -1; break; } else if (layer_idx == 0) { - CMN_ERR("invalid (zero) direction tensor layer index in %s\n", load_info.fname.c_str()); + COM_ERR("invalid (zero) direction tensor layer index in %s\n", load_info.fname.c_str()); result.n_embd = -1; break; } struct ggml_tensor * tensor = ggml_get_tensor(ctx, name.c_str()); if (tensor->type != GGML_TYPE_F32) { - CMN_ERR("invalid (non-F32) direction tensor type in %s\n", load_info.fname.c_str()); + COM_ERR("invalid (non-F32) direction tensor type in %s\n", load_info.fname.c_str()); result.n_embd = -1; break; } if (ggml_n_dims(tensor) != 1) { - CMN_ERR("invalid (non-1D) direction tensor shape in %s\n", load_info.fname.c_str()); + COM_ERR("invalid (non-1D) direction tensor shape in %s\n", load_info.fname.c_str()); result.n_embd = -1; break; } @@ -1851,7 +1851,7 @@ static common_control_vector_data common_control_vector_load_one(const common_co if (result.n_embd == -1) { result.n_embd = ggml_nelements(tensor); } else if (ggml_nelements(tensor) != result.n_embd) { - CMN_ERR("direction tensor in %s does not match previous dimensions\n", load_info.fname.c_str()); + COM_ERR("direction tensor in %s does not match previous dimensions\n", load_info.fname.c_str()); result.n_embd = -1; break; } @@ -1868,7 +1868,7 @@ static common_control_vector_data common_control_vector_load_one(const common_co } if (result.n_embd == -1) { - CMN_WRN("skipping %s due to invalid direction tensors\n", load_info.fname.c_str()); + COM_WRN("skipping %s due to invalid direction tensors\n", load_info.fname.c_str()); result.data.clear(); } @@ -1889,7 +1889,7 @@ common_control_vector_data common_control_vector_load(const std::vector(all_tokens.data() + offset), n_tokens_before_last))) { - CMN_ERR("%s", "failed to eval\n"); + COM_ERR("%s", "failed to eval\n"); return false; } n_past += n_tokens_before_last; llama_state_save_file(ctx, state_path.data(), all_tokens.data(), all_tokens.size()); - CMN_INF("saved session before last token to %s, n_new = %zu\n", state_path.data(), all_tokens.size()); + COM_INF("saved session before last token to %s, n_new = %zu\n", state_path.data(), all_tokens.size()); llama_token last_token = all_tokens.back(); llama_batch batch = llama_batch_get_one(&last_token, 1); @@ -2030,13 +2030,13 @@ bool common_prompt_batch_decode( batch.pos = &pos; if (llama_decode(ctx, batch)) { - CMN_ERR("%s", "failed to eval last token\n"); + COM_ERR("%s", "failed to eval last token\n"); return false; } n_past++; } else { if (llama_decode(ctx, llama_batch_get_one(const_cast(all_tokens.data() + offset), n_new))) { - CMN_ERR("%s", "failed to eval\n"); + COM_ERR("%s", "failed to eval\n"); return false; } n_past += n_new; diff --git a/common/common.h b/common/common.h index 40eafff31a..d56f6064b1 100644 --- a/common/common.h +++ b/common/common.h @@ -25,12 +25,12 @@ #define DIRECTORY_SEPARATOR '/' #endif // _WIN32 -#define CMN_DBG(fmt, ...) LOG_DBG("cmn %12.*s: " fmt, 12, __func__, __VA_ARGS__) -#define CMN_TRC(fmt, ...) LOG_TRC("cmn %12.*s: " fmt, 12, __func__, __VA_ARGS__) -#define CMN_INF(fmt, ...) LOG_INF("cmn %12.*s: " fmt, 12, __func__, __VA_ARGS__) -#define CMN_WRN(fmt, ...) LOG_WRN("cmn %12.*s: " fmt, 12, __func__, __VA_ARGS__) -#define CMN_ERR(fmt, ...) LOG_ERR("cmn %12.*s: " fmt, 12, __func__, __VA_ARGS__) -#define CMN_CNT(fmt, ...) LOG_CNT("" fmt, __VA_ARGS__) +#define COM_DBG(fmt, ...) LOG_DBG("cmn %12.*s: " fmt, 12, __func__, __VA_ARGS__) +#define COM_TRC(fmt, ...) LOG_TRC("cmn %12.*s: " fmt, 12, __func__, __VA_ARGS__) +#define COM_INF(fmt, ...) LOG_INF("cmn %12.*s: " fmt, 12, __func__, __VA_ARGS__) +#define COM_WRN(fmt, ...) LOG_WRN("cmn %12.*s: " fmt, 12, __func__, __VA_ARGS__) +#define COM_ERR(fmt, ...) LOG_ERR("cmn %12.*s: " fmt, 12, __func__, __VA_ARGS__) +#define COM_CNT(fmt, ...) LOG_CNT("" fmt, __VA_ARGS__) #define die(msg) do { fputs("error: " msg "\n", stderr); exit(1); } while (0) #define die_fmt(fmt, ...) do { fprintf(stderr, "error: " fmt "\n", __VA_ARGS__); exit(1); } while (0) diff --git a/common/reasoning-budget.cpp b/common/reasoning-budget.cpp index f678fce261..7da0bb1c57 100644 --- a/common/reasoning-budget.cpp +++ b/common/reasoning-budget.cpp @@ -65,12 +65,12 @@ static void common_reasoning_budget_accept(struct llama_sampler * smpl, llama_to if (ctx->start_matcher.advance(token)) { ctx->state = REASONING_BUDGET_COUNTING; ctx->remaining = ctx->budget; - CMN_TRC("activated, budget=%d tokens\n", ctx->budget); + COM_TRC("activated, budget=%d tokens\n", ctx->budget); if (ctx->remaining <= 0) { ctx->state = REASONING_BUDGET_FORCING; ctx->force_pos = 0; - CMN_TRC("%s", "budget=0, forcing immediately\n"); + COM_TRC("%s", "budget=0, forcing immediately\n"); } } break; @@ -80,7 +80,7 @@ static void common_reasoning_budget_accept(struct llama_sampler * smpl, llama_to { if (ctx->end_matcher.advance(token)) { ctx->state = REASONING_BUDGET_DONE; - CMN_TRC("%s", "deactivated (natural end)\n"); + COM_TRC("%s", "deactivated (natural end)\n"); break; } @@ -95,7 +95,7 @@ static void common_reasoning_budget_accept(struct llama_sampler * smpl, llama_to ctx->state = REASONING_BUDGET_FORCING; ctx->force_pos = 0; ctx->end_matcher.reset(); - CMN_TRC("%s", "UTF-8 complete, now forcing end sequence\n"); + COM_TRC("%s", "UTF-8 complete, now forcing end sequence\n"); } } else if (ctx->state == REASONING_BUDGET_COUNTING) { ctx->remaining--; @@ -104,11 +104,11 @@ static void common_reasoning_budget_accept(struct llama_sampler * smpl, llama_to ctx->state = REASONING_BUDGET_FORCING; ctx->force_pos = 0; ctx->end_matcher.reset(); - CMN_TRC("%s", "budget exhausted, forcing end sequence\n"); + COM_TRC("%s", "budget exhausted, forcing end sequence\n"); } else { ctx->state = REASONING_BUDGET_WAITING_UTF8; ctx->end_matcher.reset(); - CMN_TRC("%s", "budget exhausted, waiting for UTF-8 completion\n"); + COM_TRC("%s", "budget exhausted, waiting for UTF-8 completion\n"); } } } @@ -118,7 +118,7 @@ static void common_reasoning_budget_accept(struct llama_sampler * smpl, llama_to ctx->force_pos++; if (ctx->force_pos >= ctx->forced_tokens.size()) { ctx->state = REASONING_BUDGET_DONE; - CMN_TRC("%s", "forced sequence complete, done\n"); + COM_TRC("%s", "forced sequence complete, done\n"); } break; case REASONING_BUDGET_DONE: @@ -128,12 +128,12 @@ static void common_reasoning_budget_accept(struct llama_sampler * smpl, llama_to ctx->state = REASONING_BUDGET_COUNTING; ctx->remaining = ctx->budget; ctx->end_matcher.reset(); - CMN_TRC("re-activated on new start tag, budget=%d tokens\n", ctx->budget); + COM_TRC("re-activated on new start tag, budget=%d tokens\n", ctx->budget); if (ctx->remaining <= 0) { ctx->state = REASONING_BUDGET_FORCING; ctx->force_pos = 0; - CMN_TRC("%s", "budget=0, forcing immediately\n"); + COM_TRC("%s", "budget=0, forcing immediately\n"); } } break; @@ -264,7 +264,7 @@ bool common_reasoning_budget_force(struct llama_sampler * smpl) { ctx->state = REASONING_BUDGET_FORCING; ctx->force_pos = 0; ctx->end_matcher.reset(); - CMN_TRC("%s", "forced into forcing state (manual transition)\n"); + COM_TRC("%s", "forced into forcing state (manual transition)\n"); return true; }