mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2026-06-27 23:50:20 -05:00
server : hint preserve_thinking when supported by chat template
Detect if the chat template supports the 'preserve_thinking' kwarg (by checking for its presence in the template source) and print a hint suggesting users enable it via --chat-template-kwargs. This is particularly useful for models like Qwen3.6 where preserve_thinking is recommended but many users are unaware of the option. ref: https://docs.z.ai/guides/capabilities/thinking-mode#preserved-thinking Assisted-by: pi:llama.cpp/Qwen3.6-27B
This commit is contained in:
parent
0ed235ea2c
commit
eae7149824
@ -367,6 +367,13 @@ bool common_chat_templates_support_enable_thinking(const common_chat_templates *
|
||||
return params.supports_thinking;
|
||||
}
|
||||
|
||||
// Check if the template source contains the preserve_thinking kwarg.
|
||||
// This is useful for printing a hint to the user to enable it.
|
||||
bool common_chat_templates_support_preserve_thinking(const common_chat_templates * chat_templates) {
|
||||
std::string src = common_chat_templates_source(chat_templates);
|
||||
return src.find("preserve_thinking") != std::string::npos;
|
||||
}
|
||||
|
||||
std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const json & messages) {
|
||||
std::vector<common_chat_msg> msgs;
|
||||
|
||||
|
||||
@ -347,6 +347,7 @@ common_reasoning_format common_reasoning_format_from_name(const std::string & fo
|
||||
common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::string & tool_choice);
|
||||
|
||||
bool common_chat_templates_support_enable_thinking(const common_chat_templates * chat_templates);
|
||||
bool common_chat_templates_support_preserve_thinking(const common_chat_templates * chat_templates);
|
||||
|
||||
// Parses a JSON array of messages in OpenAI's chat completion API format.
|
||||
std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const nlohmann::ordered_json & messages);
|
||||
|
||||
@ -1517,6 +1517,14 @@ private:
|
||||
const bool enable_thinking = params_base.enable_reasoning != 0 && template_supports_thinking;
|
||||
SRV_INF("%s: chat template, thinking = %d\n", __func__, enable_thinking);
|
||||
|
||||
// hint: suggest preserve_thinking if the template supports it but user hasn't set it
|
||||
if (params_base.use_jinja && common_chat_templates_support_preserve_thinking(chat_templates.get())) {
|
||||
auto it = params_base.default_template_kwargs.find("preserve_thinking");
|
||||
if (it == params_base.default_template_kwargs.end()) {
|
||||
SRV_WRN("%s\n", "chat template supports 'preserve_thinking' - consider using --chat-template-kwargs \"{\\\"preserve_thinking\\\": true}\"");
|
||||
}
|
||||
}
|
||||
|
||||
// IMPORTANT: chat_params is reused across sleeping / resuming states,
|
||||
// never store llama_context/llama_model pointers in chat_params,
|
||||
// as they may be invalidated after sleeping
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user