mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2026-06-27 23:50:20 -05:00
cont : CMN_ -> COM_
This commit is contained in:
parent
127898ee6d
commit
3e26ea607d
@ -225,7 +225,7 @@ bool set_process_priority(enum ggml_sched_priority prio) {
|
||||
}
|
||||
|
||||
if (!SetPriorityClass(GetCurrentProcess(), p)) {
|
||||
CMN_WRN("failed to set process priority class %d : (%d)\n", prio, (int) GetLastError());
|
||||
COM_WRN("failed to set process priority class %d : (%d)\n", prio, (int) GetLastError());
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -251,7 +251,7 @@ bool set_process_priority(enum ggml_sched_priority prio) {
|
||||
}
|
||||
|
||||
if (setpriority(PRIO_PROCESS, 0, p) != 0) {
|
||||
CMN_WRN("failed to set process priority %d : %s (%d)\n", prio, strerror(errno), errno);
|
||||
COM_WRN("failed to set process priority %d : %s (%d)\n", prio, strerror(errno), errno);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
@ -284,14 +284,14 @@ void postprocess_cpu_params(common_cpu_params & cpuparams, const common_cpu_para
|
||||
|
||||
if (n_set && n_set < cpuparams.n_threads) {
|
||||
// Not enough set bits, may experience performance issues.
|
||||
CMN_WRN("Not enough set bits in CPU mask (%d) to satisfy requested thread count: %d\n", n_set, cpuparams.n_threads);
|
||||
COM_WRN("Not enough set bits in CPU mask (%d) to satisfy requested thread count: %d\n", n_set, cpuparams.n_threads);
|
||||
}
|
||||
}
|
||||
|
||||
bool parse_cpu_range(const std::string & range, bool (&boolmask)[GGML_MAX_N_THREADS]) {
|
||||
size_t dash_loc = range.find('-');
|
||||
if (dash_loc == std::string::npos) {
|
||||
CMN_ERR("%s", "Format of CPU range is invalid! Expected [<start>]-[<end>].\n");
|
||||
COM_ERR("%s", "Format of CPU range is invalid! Expected [<start>]-[<end>].\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -303,7 +303,7 @@ bool parse_cpu_range(const std::string & range, bool (&boolmask)[GGML_MAX_N_THRE
|
||||
} else {
|
||||
start_i = std::stoull(range.substr(0, dash_loc));
|
||||
if (start_i >= GGML_MAX_N_THREADS) {
|
||||
CMN_ERR("%s", "Start index out of bounds!\n");
|
||||
COM_ERR("%s", "Start index out of bounds!\n");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@ -313,7 +313,7 @@ bool parse_cpu_range(const std::string & range, bool (&boolmask)[GGML_MAX_N_THRE
|
||||
} else {
|
||||
end_i = std::stoull(range.substr(dash_loc + 1));
|
||||
if (end_i >= GGML_MAX_N_THREADS) {
|
||||
CMN_ERR("%s", "End index out of bounds!\n");
|
||||
COM_ERR("%s", "End index out of bounds!\n");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@ -348,7 +348,7 @@ bool parse_cpu_mask(const std::string & mask, bool (&boolmask)[GGML_MAX_N_THREAD
|
||||
} else if (c >= 'A' && c <= 'F') {
|
||||
id -= 'A' - 10;
|
||||
} else {
|
||||
CMN_ERR("Invalid hex character '%c' at position %d\n", c, int32_t(i));
|
||||
COM_ERR("Invalid hex character '%c' at position %d\n", c, int32_t(i));
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -379,21 +379,21 @@ void common_params_print_info(const common_params & params, bool print_devices)
|
||||
#else
|
||||
const char * build_type = " (debug)";
|
||||
#endif
|
||||
CMN_TRC("%s: build %d (%s) with %s for %s%s\n", __func__, llama_build_number(), llama_commit(), llama_compiler(), llama_build_target(), build_type);
|
||||
COM_TRC("%s: build %d (%s) with %s for %s%s\n", __func__, llama_build_number(), llama_commit(), llama_compiler(), llama_build_target(), build_type);
|
||||
|
||||
CMN_INF("%s: verbosity = %d (adjust with the `-lv N` CLI arg)\n", __func__, common_log_get_verbosity_thold());
|
||||
COM_INF("%s: verbosity = %d (adjust with the `-lv N` CLI arg)\n", __func__, common_log_get_verbosity_thold());
|
||||
|
||||
// device enumeration creates a primary context on CUDA backends, skip it when the caller does not own any device
|
||||
if (print_devices) {
|
||||
CMN_TRC("%s", "device_info:\n");
|
||||
COM_TRC("%s", "device_info:\n");
|
||||
for (size_t i = 0; i < ggml_backend_dev_count(); ++i) {
|
||||
auto * dev = ggml_backend_dev_get(i);
|
||||
size_t free, total;
|
||||
ggml_backend_dev_memory(dev, &free, &total);
|
||||
CMN_TRC(" - %-8s: %s (%zu MiB, %zu MiB free)\n", ggml_backend_dev_name(dev), ggml_backend_dev_description(dev), total / 1024 / 1024, free / 1024 / 1024);
|
||||
COM_TRC(" - %-8s: %s (%zu MiB, %zu MiB free)\n", ggml_backend_dev_name(dev), ggml_backend_dev_description(dev), total / 1024 / 1024, free / 1024 / 1024);
|
||||
}
|
||||
}
|
||||
CMN_TRC("%s\n", common_params_get_system_info(params).c_str());
|
||||
COM_TRC("%s\n", common_params_get_system_info(params).c_str());
|
||||
}
|
||||
|
||||
std::string common_params_get_system_info(const common_params & params) {
|
||||
@ -660,7 +660,7 @@ void string_process_escapes(std::string & input) {
|
||||
bool string_parse_kv_override(const char * data, std::vector<llama_model_kv_override> & overrides) {
|
||||
const char * sep = strchr(data, '=');
|
||||
if (sep == nullptr || sep - data >= 128) {
|
||||
CMN_ERR("%s: malformed KV override '%s'\n", __func__, data);
|
||||
COM_ERR("%s: malformed KV override '%s'\n", __func__, data);
|
||||
return false;
|
||||
}
|
||||
llama_model_kv_override kvo;
|
||||
@ -683,20 +683,20 @@ bool string_parse_kv_override(const char * data, std::vector<llama_model_kv_over
|
||||
} else if (std::strcmp(sep, "false") == 0) {
|
||||
kvo.val_bool = false;
|
||||
} else {
|
||||
CMN_ERR("%s: invalid boolean value for KV override '%s'\n", __func__, data);
|
||||
COM_ERR("%s: invalid boolean value for KV override '%s'\n", __func__, data);
|
||||
return false;
|
||||
}
|
||||
} else if (strncmp(sep, "str:", 4) == 0) {
|
||||
sep += 4;
|
||||
kvo.tag = LLAMA_KV_OVERRIDE_TYPE_STR;
|
||||
if (strlen(sep) > 127) {
|
||||
CMN_ERR("%s: malformed KV override '%s', value cannot exceed 127 chars\n", __func__, data);
|
||||
COM_ERR("%s: malformed KV override '%s', value cannot exceed 127 chars\n", __func__, data);
|
||||
return false;
|
||||
}
|
||||
strncpy(kvo.val_str, sep, 127);
|
||||
kvo.val_str[127] = '\0';
|
||||
} else {
|
||||
CMN_ERR("%s: invalid type for KV override '%s'\n", __func__, data);
|
||||
COM_ERR("%s: invalid type for KV override '%s'\n", __func__, data);
|
||||
return false;
|
||||
}
|
||||
overrides.emplace_back(std::move(kvo));
|
||||
@ -1199,8 +1199,8 @@ common_init_result::common_init_result(common_params & params, bool model_only)
|
||||
auto cparams = common_context_params_to_llama(params);
|
||||
|
||||
if (params.fit_params) {
|
||||
CMN_TRC("%s", "fitting params to device memory ...\n");
|
||||
CMN_TRC("%s", "(for bugs during this step try to reproduce them with -fit off, or provide --verbose logs if the bug only occurs with -fit on)\n");
|
||||
COM_TRC("%s", "fitting params to device memory ...\n");
|
||||
COM_TRC("%s", "(for bugs during this step try to reproduce them with -fit off, or provide --verbose logs if the bug only occurs with -fit on)\n");
|
||||
common_fit_params(params.model.path.c_str(), &mparams, &cparams,
|
||||
params.tensor_split,
|
||||
params.tensor_buft_overrides.data(),
|
||||
@ -1227,7 +1227,7 @@ common_init_result::common_init_result(common_params & params, bool model_only)
|
||||
llama_adapter_lora_ptr lora;
|
||||
lora.reset(llama_adapter_lora_init(model, la.path.c_str()));
|
||||
if (lora == nullptr) {
|
||||
CMN_ERR("failed to load lora adapter '%s'\n", la.path.c_str());
|
||||
COM_ERR("failed to load lora adapter '%s'\n", la.path.c_str());
|
||||
pimpl->model.reset(model);
|
||||
return;
|
||||
}
|
||||
@ -1246,14 +1246,14 @@ common_init_result::common_init_result(common_params & params, bool model_only)
|
||||
common_init_sampler_from_model(model, params.sampling);
|
||||
|
||||
if (params.sampling.ignore_eos && llama_vocab_eos(vocab) == LLAMA_TOKEN_NULL) {
|
||||
CMN_WRN("%s", "vocab does not have an EOS token, ignoring --ignore-eos\n");
|
||||
COM_WRN("%s", "vocab does not have an EOS token, ignoring --ignore-eos\n");
|
||||
params.sampling.ignore_eos = false;
|
||||
}
|
||||
|
||||
// initialize once
|
||||
for (llama_token i = 0; i < llama_vocab_n_tokens(vocab); i++) {
|
||||
if (llama_vocab_is_eog(vocab, i)) {
|
||||
CMN_TRC("added %s logit bias = %f\n", common_token_to_piece(vocab, i).c_str(), -INFINITY);
|
||||
COM_TRC("added %s logit bias = %f\n", common_token_to_piece(vocab, i).c_str(), -INFINITY);
|
||||
params.sampling.logit_bias_eog.push_back({i, -INFINITY});
|
||||
}
|
||||
}
|
||||
@ -1291,7 +1291,7 @@ common_init_result::common_init_result(common_params & params, bool model_only)
|
||||
|
||||
llama_context * lctx = llama_init_from_model(model, cparams);
|
||||
if (lctx == NULL) {
|
||||
CMN_ERR("failed to create context with model '%s'\n", params.model.path.c_str());
|
||||
COM_ERR("failed to create context with model '%s'\n", params.model.path.c_str());
|
||||
return;
|
||||
}
|
||||
|
||||
@ -1328,7 +1328,7 @@ common_init_result_ptr common_init_from_params(common_params & params, bool mode
|
||||
|
||||
llama_model * model = res->model();
|
||||
if (model == NULL) {
|
||||
CMN_ERR("failed to load model '%s'\n", params.model.path.c_str());
|
||||
COM_ERR("failed to load model '%s'\n", params.model.path.c_str());
|
||||
return res;
|
||||
}
|
||||
|
||||
@ -1338,14 +1338,14 @@ common_init_result_ptr common_init_from_params(common_params & params, bool mode
|
||||
|
||||
llama_context * lctx = res->context();
|
||||
if (lctx == NULL) {
|
||||
CMN_ERR("failed to create context with model '%s'\n", params.model.path.c_str());
|
||||
COM_ERR("failed to create context with model '%s'\n", params.model.path.c_str());
|
||||
return res;
|
||||
}
|
||||
|
||||
const llama_vocab * vocab = llama_model_get_vocab(model);
|
||||
|
||||
if (params.ctx_shift && !llama_memory_can_shift(llama_get_memory(lctx))) {
|
||||
CMN_WRN("%s", "KV cache shifting is not supported for this context, disabling KV cache shifting\n");
|
||||
COM_WRN("%s", "KV cache shifting is not supported for this context, disabling KV cache shifting\n");
|
||||
params.ctx_shift = false;
|
||||
}
|
||||
|
||||
@ -1374,7 +1374,7 @@ common_init_result_ptr common_init_from_params(common_params & params, bool mode
|
||||
bool ok = true;
|
||||
|
||||
if (llama_vocab_bos(vocab) == LLAMA_TOKEN_NULL) {
|
||||
CMN_WRN("%s", "vocab does not have a BOS token, reranking will not work\n");
|
||||
COM_WRN("%s", "vocab does not have a BOS token, reranking will not work\n");
|
||||
ok = false;
|
||||
}
|
||||
|
||||
@ -1383,10 +1383,10 @@ common_init_result_ptr common_init_from_params(common_params & params, bool mode
|
||||
bool has_rerank_prompt = llama_model_chat_template(model, "rerank") != NULL;
|
||||
|
||||
if (!has_eos && !has_sep && !has_rerank_prompt) {
|
||||
CMN_WRN("%s", "vocab does not have an EOS token, SEP token, or rerank prompt. Reranking will not work\n");
|
||||
COM_WRN("%s", "vocab does not have an EOS token, SEP token, or rerank prompt. Reranking will not work\n");
|
||||
ok = false;
|
||||
} else if (!has_eos) {
|
||||
CMN_WRN("%s", "vocab does not have an EOS token, using SEP token as fallback\n");
|
||||
COM_WRN("%s", "vocab does not have an EOS token, using SEP token as fallback\n");
|
||||
}
|
||||
|
||||
if (!ok) {
|
||||
@ -1399,7 +1399,7 @@ common_init_result_ptr common_init_from_params(common_params & params, bool mode
|
||||
}
|
||||
|
||||
if (params.warmup) {
|
||||
CMN_TRC("%s", "warming up the model with an empty run - please wait ... (--no-warmup to disable)\n");
|
||||
COM_TRC("%s", "warming up the model with an empty run - please wait ... (--no-warmup to disable)\n");
|
||||
|
||||
std::vector<llama_token> tmp;
|
||||
llama_token bos = llama_vocab_bos(vocab);
|
||||
@ -1473,20 +1473,20 @@ common_context_seq_rm_type common_context_can_seq_rm(llama_context * ctx) {
|
||||
|
||||
int ret = llama_decode(ctx, llama_batch_get_one(tmp.data(), tmp.size()));
|
||||
if (ret != 0) {
|
||||
CMN_ERR("llama_decode() failed: %d\n", ret);
|
||||
COM_ERR("llama_decode() failed: %d\n", ret);
|
||||
res = COMMON_CONTEXT_SEQ_RM_TYPE_NO;
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (llama_n_rs_seq(ctx) > 0) {
|
||||
CMN_TRC("%s", "the context supports bounded partial sequence removal\n");
|
||||
COM_TRC("%s", "the context supports bounded partial sequence removal\n");
|
||||
res = COMMON_CONTEXT_SEQ_RM_TYPE_RS;
|
||||
goto done;
|
||||
}
|
||||
|
||||
// try to remove the last tokens
|
||||
if (!llama_memory_seq_rm(mem, 0, 1, -1)) {
|
||||
CMN_TRC("%s", "the context does not support partial sequence removal\n");
|
||||
COM_TRC("%s", "the context does not support partial sequence removal\n");
|
||||
res = COMMON_CONTEXT_SEQ_RM_TYPE_FULL;
|
||||
goto done;
|
||||
}
|
||||
@ -1803,13 +1803,13 @@ static common_control_vector_data common_control_vector_load_one(const common_co
|
||||
};
|
||||
struct gguf_context * ctx_gguf = gguf_init_from_file(load_info.fname.c_str(), meta_gguf_params);
|
||||
if (!ctx_gguf) {
|
||||
CMN_ERR("failed to load control vector file from %s\n", load_info.fname.c_str());
|
||||
COM_ERR("failed to load control vector file from %s\n", load_info.fname.c_str());
|
||||
return result;
|
||||
}
|
||||
|
||||
int32_t n_tensors = gguf_get_n_tensors(ctx_gguf);
|
||||
if (n_tensors == 0) {
|
||||
CMN_WRN("no direction tensors found in %s\n", load_info.fname.c_str());
|
||||
COM_WRN("no direction tensors found in %s\n", load_info.fname.c_str());
|
||||
}
|
||||
|
||||
for (int i = 0; i < n_tensors; i++) {
|
||||
@ -1827,23 +1827,23 @@ static common_control_vector_data common_control_vector_load_one(const common_co
|
||||
}
|
||||
}
|
||||
if (layer_idx < 0) {
|
||||
CMN_ERR("invalid/unparsable direction tensor layer index in %s\n", load_info.fname.c_str());
|
||||
COM_ERR("invalid/unparsable direction tensor layer index in %s\n", load_info.fname.c_str());
|
||||
result.n_embd = -1;
|
||||
break;
|
||||
} else if (layer_idx == 0) {
|
||||
CMN_ERR("invalid (zero) direction tensor layer index in %s\n", load_info.fname.c_str());
|
||||
COM_ERR("invalid (zero) direction tensor layer index in %s\n", load_info.fname.c_str());
|
||||
result.n_embd = -1;
|
||||
break;
|
||||
}
|
||||
|
||||
struct ggml_tensor * tensor = ggml_get_tensor(ctx, name.c_str());
|
||||
if (tensor->type != GGML_TYPE_F32) {
|
||||
CMN_ERR("invalid (non-F32) direction tensor type in %s\n", load_info.fname.c_str());
|
||||
COM_ERR("invalid (non-F32) direction tensor type in %s\n", load_info.fname.c_str());
|
||||
result.n_embd = -1;
|
||||
break;
|
||||
}
|
||||
if (ggml_n_dims(tensor) != 1) {
|
||||
CMN_ERR("invalid (non-1D) direction tensor shape in %s\n", load_info.fname.c_str());
|
||||
COM_ERR("invalid (non-1D) direction tensor shape in %s\n", load_info.fname.c_str());
|
||||
result.n_embd = -1;
|
||||
break;
|
||||
}
|
||||
@ -1851,7 +1851,7 @@ static common_control_vector_data common_control_vector_load_one(const common_co
|
||||
if (result.n_embd == -1) {
|
||||
result.n_embd = ggml_nelements(tensor);
|
||||
} else if (ggml_nelements(tensor) != result.n_embd) {
|
||||
CMN_ERR("direction tensor in %s does not match previous dimensions\n", load_info.fname.c_str());
|
||||
COM_ERR("direction tensor in %s does not match previous dimensions\n", load_info.fname.c_str());
|
||||
result.n_embd = -1;
|
||||
break;
|
||||
}
|
||||
@ -1868,7 +1868,7 @@ static common_control_vector_data common_control_vector_load_one(const common_co
|
||||
}
|
||||
|
||||
if (result.n_embd == -1) {
|
||||
CMN_WRN("skipping %s due to invalid direction tensors\n", load_info.fname.c_str());
|
||||
COM_WRN("skipping %s due to invalid direction tensors\n", load_info.fname.c_str());
|
||||
result.data.clear();
|
||||
}
|
||||
|
||||
@ -1889,7 +1889,7 @@ common_control_vector_data common_control_vector_load(const std::vector<common_c
|
||||
break;
|
||||
}
|
||||
if (result.n_embd != -1 && result.n_embd != cur.n_embd) {
|
||||
CMN_ERR("control vectors in %s does not match previous dimensions\n", info.fname.c_str());
|
||||
COM_ERR("control vectors in %s does not match previous dimensions\n", info.fname.c_str());
|
||||
result.n_embd = -1;
|
||||
break;
|
||||
}
|
||||
@ -1905,7 +1905,7 @@ common_control_vector_data common_control_vector_load(const std::vector<common_c
|
||||
}
|
||||
|
||||
if (result.n_embd == -1) {
|
||||
CMN_ERR("%s", "no valid control vector files passed\n");
|
||||
COM_ERR("%s", "no valid control vector files passed\n");
|
||||
result.data.clear();
|
||||
}
|
||||
|
||||
@ -2016,13 +2016,13 @@ bool common_prompt_batch_decode(
|
||||
// memory, so we can't just remove the last token from the memory and replay the last token which
|
||||
// is the reason for this logic.
|
||||
if (llama_decode(ctx, llama_batch_get_one(const_cast<llama_token*>(all_tokens.data() + offset), n_tokens_before_last))) {
|
||||
CMN_ERR("%s", "failed to eval\n");
|
||||
COM_ERR("%s", "failed to eval\n");
|
||||
return false;
|
||||
}
|
||||
n_past += n_tokens_before_last;
|
||||
|
||||
llama_state_save_file(ctx, state_path.data(), all_tokens.data(), all_tokens.size());
|
||||
CMN_INF("saved session before last token to %s, n_new = %zu\n", state_path.data(), all_tokens.size());
|
||||
COM_INF("saved session before last token to %s, n_new = %zu\n", state_path.data(), all_tokens.size());
|
||||
|
||||
llama_token last_token = all_tokens.back();
|
||||
llama_batch batch = llama_batch_get_one(&last_token, 1);
|
||||
@ -2030,13 +2030,13 @@ bool common_prompt_batch_decode(
|
||||
batch.pos = &pos;
|
||||
|
||||
if (llama_decode(ctx, batch)) {
|
||||
CMN_ERR("%s", "failed to eval last token\n");
|
||||
COM_ERR("%s", "failed to eval last token\n");
|
||||
return false;
|
||||
}
|
||||
n_past++;
|
||||
} else {
|
||||
if (llama_decode(ctx, llama_batch_get_one(const_cast<llama_token*>(all_tokens.data() + offset), n_new))) {
|
||||
CMN_ERR("%s", "failed to eval\n");
|
||||
COM_ERR("%s", "failed to eval\n");
|
||||
return false;
|
||||
}
|
||||
n_past += n_new;
|
||||
|
||||
@ -25,12 +25,12 @@
|
||||
#define DIRECTORY_SEPARATOR '/'
|
||||
#endif // _WIN32
|
||||
|
||||
#define CMN_DBG(fmt, ...) LOG_DBG("cmn %12.*s: " fmt, 12, __func__, __VA_ARGS__)
|
||||
#define CMN_TRC(fmt, ...) LOG_TRC("cmn %12.*s: " fmt, 12, __func__, __VA_ARGS__)
|
||||
#define CMN_INF(fmt, ...) LOG_INF("cmn %12.*s: " fmt, 12, __func__, __VA_ARGS__)
|
||||
#define CMN_WRN(fmt, ...) LOG_WRN("cmn %12.*s: " fmt, 12, __func__, __VA_ARGS__)
|
||||
#define CMN_ERR(fmt, ...) LOG_ERR("cmn %12.*s: " fmt, 12, __func__, __VA_ARGS__)
|
||||
#define CMN_CNT(fmt, ...) LOG_CNT("" fmt, __VA_ARGS__)
|
||||
#define COM_DBG(fmt, ...) LOG_DBG("cmn %12.*s: " fmt, 12, __func__, __VA_ARGS__)
|
||||
#define COM_TRC(fmt, ...) LOG_TRC("cmn %12.*s: " fmt, 12, __func__, __VA_ARGS__)
|
||||
#define COM_INF(fmt, ...) LOG_INF("cmn %12.*s: " fmt, 12, __func__, __VA_ARGS__)
|
||||
#define COM_WRN(fmt, ...) LOG_WRN("cmn %12.*s: " fmt, 12, __func__, __VA_ARGS__)
|
||||
#define COM_ERR(fmt, ...) LOG_ERR("cmn %12.*s: " fmt, 12, __func__, __VA_ARGS__)
|
||||
#define COM_CNT(fmt, ...) LOG_CNT("" fmt, __VA_ARGS__)
|
||||
|
||||
#define die(msg) do { fputs("error: " msg "\n", stderr); exit(1); } while (0)
|
||||
#define die_fmt(fmt, ...) do { fprintf(stderr, "error: " fmt "\n", __VA_ARGS__); exit(1); } while (0)
|
||||
|
||||
@ -65,12 +65,12 @@ static void common_reasoning_budget_accept(struct llama_sampler * smpl, llama_to
|
||||
if (ctx->start_matcher.advance(token)) {
|
||||
ctx->state = REASONING_BUDGET_COUNTING;
|
||||
ctx->remaining = ctx->budget;
|
||||
CMN_TRC("activated, budget=%d tokens\n", ctx->budget);
|
||||
COM_TRC("activated, budget=%d tokens\n", ctx->budget);
|
||||
|
||||
if (ctx->remaining <= 0) {
|
||||
ctx->state = REASONING_BUDGET_FORCING;
|
||||
ctx->force_pos = 0;
|
||||
CMN_TRC("%s", "budget=0, forcing immediately\n");
|
||||
COM_TRC("%s", "budget=0, forcing immediately\n");
|
||||
}
|
||||
}
|
||||
break;
|
||||
@ -80,7 +80,7 @@ static void common_reasoning_budget_accept(struct llama_sampler * smpl, llama_to
|
||||
{
|
||||
if (ctx->end_matcher.advance(token)) {
|
||||
ctx->state = REASONING_BUDGET_DONE;
|
||||
CMN_TRC("%s", "deactivated (natural end)\n");
|
||||
COM_TRC("%s", "deactivated (natural end)\n");
|
||||
break;
|
||||
}
|
||||
|
||||
@ -95,7 +95,7 @@ static void common_reasoning_budget_accept(struct llama_sampler * smpl, llama_to
|
||||
ctx->state = REASONING_BUDGET_FORCING;
|
||||
ctx->force_pos = 0;
|
||||
ctx->end_matcher.reset();
|
||||
CMN_TRC("%s", "UTF-8 complete, now forcing end sequence\n");
|
||||
COM_TRC("%s", "UTF-8 complete, now forcing end sequence\n");
|
||||
}
|
||||
} else if (ctx->state == REASONING_BUDGET_COUNTING) {
|
||||
ctx->remaining--;
|
||||
@ -104,11 +104,11 @@ static void common_reasoning_budget_accept(struct llama_sampler * smpl, llama_to
|
||||
ctx->state = REASONING_BUDGET_FORCING;
|
||||
ctx->force_pos = 0;
|
||||
ctx->end_matcher.reset();
|
||||
CMN_TRC("%s", "budget exhausted, forcing end sequence\n");
|
||||
COM_TRC("%s", "budget exhausted, forcing end sequence\n");
|
||||
} else {
|
||||
ctx->state = REASONING_BUDGET_WAITING_UTF8;
|
||||
ctx->end_matcher.reset();
|
||||
CMN_TRC("%s", "budget exhausted, waiting for UTF-8 completion\n");
|
||||
COM_TRC("%s", "budget exhausted, waiting for UTF-8 completion\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -118,7 +118,7 @@ static void common_reasoning_budget_accept(struct llama_sampler * smpl, llama_to
|
||||
ctx->force_pos++;
|
||||
if (ctx->force_pos >= ctx->forced_tokens.size()) {
|
||||
ctx->state = REASONING_BUDGET_DONE;
|
||||
CMN_TRC("%s", "forced sequence complete, done\n");
|
||||
COM_TRC("%s", "forced sequence complete, done\n");
|
||||
}
|
||||
break;
|
||||
case REASONING_BUDGET_DONE:
|
||||
@ -128,12 +128,12 @@ static void common_reasoning_budget_accept(struct llama_sampler * smpl, llama_to
|
||||
ctx->state = REASONING_BUDGET_COUNTING;
|
||||
ctx->remaining = ctx->budget;
|
||||
ctx->end_matcher.reset();
|
||||
CMN_TRC("re-activated on new start tag, budget=%d tokens\n", ctx->budget);
|
||||
COM_TRC("re-activated on new start tag, budget=%d tokens\n", ctx->budget);
|
||||
|
||||
if (ctx->remaining <= 0) {
|
||||
ctx->state = REASONING_BUDGET_FORCING;
|
||||
ctx->force_pos = 0;
|
||||
CMN_TRC("%s", "budget=0, forcing immediately\n");
|
||||
COM_TRC("%s", "budget=0, forcing immediately\n");
|
||||
}
|
||||
}
|
||||
break;
|
||||
@ -264,7 +264,7 @@ bool common_reasoning_budget_force(struct llama_sampler * smpl) {
|
||||
ctx->state = REASONING_BUDGET_FORCING;
|
||||
ctx->force_pos = 0;
|
||||
ctx->end_matcher.reset();
|
||||
CMN_TRC("%s", "forced into forcing state (manual transition)\n");
|
||||
COM_TRC("%s", "forced into forcing state (manual transition)\n");
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user