diff --git a/.gitignore b/.gitignore index f1f9207b..b2996d5a 100644 --- a/.gitignore +++ b/.gitignore @@ -96,7 +96,7 @@ lcov-report/ !/examples/sycl/*.sh # Server Web UI temporary files - +/examples/server/webui/node_modules /examples/server/webui_llamacpp/.svelte-kit /examples/server/webui_llamacpp/node_modules /examples/server/webui_llamacpp/build diff --git a/README.md b/README.md index 239cd881..91a0e006 100644 --- a/README.md +++ b/README.md @@ -89,7 +89,7 @@ That's all! Open [http://127.0.0.1:8080](http://127.0.0.1:8080) in Browser start ### Model Support -LlaMA-3-Nemotron [PR 377](https://github.com/ikawrakow/ik_llama.cpp/pull/377), Qwen3 [PR 355](https://github.com/ikawrakow/ik_llama.cpp/pull/355), GLM-4 [PR 344](https://github.com/ikawrakow/ik_llama.cpp/pull/344), Command-A [PR 341](https://github.com/ikawrakow/ik_llama.cpp/pull/341), bitnet-b1.58-2B-4T [PR 337](https://github.com/ikawrakow/ik_llama.cpp/pull/337), LLaMA-4 [PR 321](https://github.com/ikawrakow/ik_llama.cpp/pull/321), Gemma3 [PR 276](https://github.com/ikawrakow/ik_llama.cpp/pull/276), DeepSeek-V3 [PR 176](https://github.com/ikawrakow/ik_llama.cpp/pull/176), Kimi-2 [PR 609](https://github.com/ikawrakow/ik_llama.cpp/pull/609), dots.llm1 [PR 573](https://github.com/ikawrakow/ik_llama.cpp/pull/573), Hunyuan [PR 565](https://github.com/ikawrakow/ik_llama.cpp/pull/565), GLM-4.5 [PR 668](https://github.com/ikawrakow/ik_llama.cpp/pull/668) (4.5/4.6/4.7/AIR), Ernie 4.5 MOE and 0.3B [PR 759](https://github.com/ikawrakow/ik_llama.cpp/pull/759), grok-2 [PR 782](https://github.com/ikawrakow/ik_llama.cpp/pull/782), Ling/Ring (Bailing-MoE2) [PR 833](https://github.com/ikawrakow/ik_llama.cpp/pull/833), Qwen3-VL [PR 883](https://github.com/ikawrakow/ik_llama.cpp/pull/883), SmolLM3 [PR 934](https://github.com/ikawrakow/ik_llama.cpp/pull/934), GigaChat3 [PR 995](https://github.com/ikawrakow/ik_llama.cpp/pull/995), ministral3 [PR 1030](https://github.com/ikawrakow/ik_llama.cpp/pull/1030), Mimo-V2-Flash [PR 1096](https://github.com/ikawrakow/ik_llama.cpp/pull/1096), GLM-4.7-Flash [PR 1168](https://github.com/ikawrakow/ik_llama.cpp/pull/1168), Seed-OSS [PR 1218](https://github.com/ikawrakow/ik_llama.cpp/pull/1218), Step-3.5-Flash [PR 1231](https://github.com/ikawrakow/ik_llama.cpp/pull/1231), GLM-5 [PR 1268](https://github.com/ikawrakow/ik_llama.cpp/pull/1268), Qwen3-Next [PR 1266](https://github.com/ikawrakow/ik_llama.cpp/pull/1266), Qwen3.5-MoE [PR 1288](https://github.com/ikawrakow/ik_llama.cpp/pull/1288) and dense Qwen-3.5 [1326](https://github.com/ikawrakow/ik_llama.cpp/pull/1326), Mistral 4 [PR 1450](https://github.com/ikawrakow/ik_llama.cpp/pull/1450), Bonsai 1-bit [PR 1570](https://github.com/ikawrakow/ik_llama.cpp/pull/1570), Gemma4 [PR 1581](https://github.com/ikawrakow/ik_llama.cpp/pull/1581), Mimo-2.5 [PR 1723](https://github.com/ikawrakow/ik_llama.cpp/pull/1723) +LlaMA-3-Nemotron [PR 377](https://github.com/ikawrakow/ik_llama.cpp/pull/377), Qwen3 [PR 355](https://github.com/ikawrakow/ik_llama.cpp/pull/355), GLM-4 [PR 344](https://github.com/ikawrakow/ik_llama.cpp/pull/344), Command-A [PR 341](https://github.com/ikawrakow/ik_llama.cpp/pull/341), bitnet-b1.58-2B-4T [PR 337](https://github.com/ikawrakow/ik_llama.cpp/pull/337), LLaMA-4 [PR 321](https://github.com/ikawrakow/ik_llama.cpp/pull/321), Gemma3 [PR 276](https://github.com/ikawrakow/ik_llama.cpp/pull/276), DeepSeek-V3 [PR 176](https://github.com/ikawrakow/ik_llama.cpp/pull/176), Kimi-2 [PR 609](https://github.com/ikawrakow/ik_llama.cpp/pull/609), dots.llm1 [PR 573](https://github.com/ikawrakow/ik_llama.cpp/pull/573), Hunyuan [PR 565](https://github.com/ikawrakow/ik_llama.cpp/pull/565), GLM-4.5 [PR 668](https://github.com/ikawrakow/ik_llama.cpp/pull/668) (4.5/4.6/4.7/AIR), Ernie 4.5 MOE and 0.3B [PR 759](https://github.com/ikawrakow/ik_llama.cpp/pull/759), grok-2 [PR 782](https://github.com/ikawrakow/ik_llama.cpp/pull/782), Ling/Ring (Bailing-MoE2) [PR 833](https://github.com/ikawrakow/ik_llama.cpp/pull/833), Qwen3-VL [PR 883](https://github.com/ikawrakow/ik_llama.cpp/pull/883), SmolLM3 [PR 934](https://github.com/ikawrakow/ik_llama.cpp/pull/934), GigaChat3 [PR 995](https://github.com/ikawrakow/ik_llama.cpp/pull/995), ministral3 [PR 1030](https://github.com/ikawrakow/ik_llama.cpp/pull/1030), Mimo-V2-Flash [PR 1096](https://github.com/ikawrakow/ik_llama.cpp/pull/1096), GLM-4.7-Flash [PR 1168](https://github.com/ikawrakow/ik_llama.cpp/pull/1168), Seed-OSS [PR 1218](https://github.com/ikawrakow/ik_llama.cpp/pull/1218), Step-3.5-Flash [PR 1231](https://github.com/ikawrakow/ik_llama.cpp/pull/1231), GLM-5 [PR 1268](https://github.com/ikawrakow/ik_llama.cpp/pull/1268), Qwen3-Next [PR 1266](https://github.com/ikawrakow/ik_llama.cpp/pull/1266), Qwen3.5-MoE [PR 1288](https://github.com/ikawrakow/ik_llama.cpp/pull/1288) and dense Qwen-3.5 [1326](https://github.com/ikawrakow/ik_llama.cpp/pull/1326), Mistral 4 [PR 1450](https://github.com/ikawrakow/ik_llama.cpp/pull/1450), Bonsai 1-bit [PR 1570](https://github.com/ikawrakow/ik_llama.cpp/pull/1570), Gemma4 [PR 1581](https://github.com/ikawrakow/ik_llama.cpp/pull/1581), Mimo-2.5 [PR 1723](https://github.com/ikawrakow/ik_llama.cpp/pull/1723), JetBrains Mellum2 [PR 1919](https://github.com/ikawrakow/ik_llama.cpp/pull/1919), Poolside Laguna XS.2 [PR 1911](https://github.com/ikawrakow/ik_llama.cpp/pull/1911), Cohere2-MoE North Mini Code [PR 1945](https://github.com/ikawrakow/ik_llama.cpp/pull/1945) ### Quantization @@ -125,6 +125,7 @@ Implemented for Zen4, AVX2, ARM_NEON, Metal, CUDA [PR 682](https://github.com/ik * `IQ1_M` [PR 327](https://github.com/ikawrakow/ik_llama.cpp/pull/327), `IQ2_XS` [PR 312](https://github.com/ikawrakow/ik_llama.cpp/pull/312), `Q2_K, Q4_K, Q5_K, Q4_1, Q5_1` [PR 302](https://github.com/ikawrakow/ik_llama.cpp/pull/302), `Q4_0, Q5_0, Q6_0, Q3_K, Q6_K, IQ4_XS, IQ4_NL` [PR 295](https://github.com/ikawrakow/ik_llama.cpp/pull/295) * Low perplexity `Q4_0` KV cache [PR 1547](https://github.com/ikawrakow/ik_llama.cpp/pull/1547) [PR 1556](https://github.com/ikawrakow/ik_llama.cpp/pull/1556) +* MTP: option to use re-quantized output tensor `--mtp-requantize-output-tensor new_type` [PR 1809](https://github.com/ikawrakow/ik_llama.cpp/pull/1809) #### Quantization performance improvements @@ -143,16 +144,16 @@ Implemented for Zen4, AVX2, ARM_NEON, Metal, CUDA [PR 682](https://github.com/ik * New split mode "graph" for multi GPU setups [PR 1022](https://github.com/ikawrakow/ik_llama.cpp/pull/1022) * Fused delta-net for Qwen3-Next and Qwen3.5-MoE [PR 1315](https://github.com/ikawrakow/ik_llama.cpp/pull/1315) [PR 1333](https://github.com/ikawrakow/ik_llama.cpp/pull/1333) [PR 1362](https://github.com/ikawrakow/ik_llama.cpp/pull/1362) [PR 1373](https://github.com/ikawrakow/ik_llama.cpp/pull/1373) * Hadamard transforms for K-cache and V-cache [PR 1033](https://github.com/ikawrakow/ik_llama.cpp/pull/1033) [PR 1034](https://github.com/ikawrakow/ik_llama.cpp/pull/1034) [PR 1527](https://github.com/ikawrakow/ik_llama.cpp/pull/1527) -* Auto-fit offloaded tensors to available VRAM (MoE and dense models) [PR 1501](https://github.com/ikawrakow/ik_llama.cpp/pull/1501) [PR 1504](https://github.com/ikawrakow/ik_llama.cpp/pull/1504) +* Auto-fit offloaded tensors to available VRAM (MoE and dense models) [PR 1501](https://github.com/ikawrakow/ik_llama.cpp/pull/1501) [PR 1504](https://github.com/ikawrakow/ik_llama.cpp/pull/1504), allows per GPU fit margin [PR 1872](https://github.com/ikawrakow/ik_llama.cpp/pull/1872) * Checkpoints for recurrent models [PR 1310](https://github.com/ikawrakow/ik_llama.cpp/pull/1310) [PR 1398](https://github.com/ikawrakow/ik_llama.cpp/pull/1398) -* MTP decoding support for popular models like GLM-4.x MoE [1270](https://github.com/ikawrakow/ik_llama.cpp/pull/1270), Qwen 3.5/3.6 [1698](https://github.com/ikawrakow/ik_llama.cpp/pull/1698) [1745](https://github.com/ikawrakow/ik_llama.cpp/pull/1745), Gemma 4 [1744](https://github.com/ikawrakow/ik_llama.cpp/pull/1744) +* MTP decoding support for popular models like GLM-4.x MoE [1270](https://github.com/ikawrakow/ik_llama.cpp/pull/1270), Qwen 3.5/3.6 [1698](https://github.com/ikawrakow/ik_llama.cpp/pull/1698) [1745](https://github.com/ikawrakow/ik_llama.cpp/pull/1745), Gemma 4 [1744](https://github.com/ikawrakow/ik_llama.cpp/pull/1744), GLM 5 [1890](https://github.com/ikawrakow/ik_llama.cpp/pull/1890) * Self speculative decoding, ngram [PR 1261](https://github.com/ikawrakow/ik_llama.cpp/pull/1261), suffix [PR 1646](https://github.com/ikawrakow/ik_llama.cpp/pull/1646) * String ban function for all completions [PR 1185](https://github.com/ikawrakow/ik_llama.cpp/pull/1185) [PR 1243](https://github.com/ikawrakow/ik_llama.cpp/pull/1243) * Expiring Logit Bias [PR 1731](https://github.com/ikawrakow/ik_llama.cpp/pull/1731) * OpenAI `/v1/responses` API endpoint [PR 1184](https://github.com/ikawrakow/ik_llama.cpp/pull/1184) * Function call support [PR 628](https://github.com/ikawrakow/ik_llama.cpp/pull/628) * jinja template support [PR 677](https://github.com/ikawrakow/ik_llama.cpp/pull/677) -* Webui: New Features for Conversations, Settings, and Chat Messages [PR 618](https://github.com/ikawrakow/ik_llama.cpp/pull/618) +* Webui: New Features for Conversations, Settings, and Chat Messages [PR 618](https://github.com/ikawrakow/ik_llama.cpp/pull/618), MCP [PR 1904](https://github.com/ikawrakow/ik_llama.cpp/pull/1904) * Dynamic control vector management endpoints [PR 1223](https://github.com/ikawrakow/ik_llama.cpp/pull/1223) * Legacy quants conversion schemes in `convert_hf_to_gguf.py` [PR 449](https://github.com/ikawrakow/ik_llama.cpp/pull/449), `Q6_0` in [PR 483](https://github.com/ikawrakow/ik_llama.cpp/pull/483) * Adaptive-P Sampler [PR 1100](https://github.com/ikawrakow/ik_llama.cpp/pull/1100) implemented as designed by it's author; supported on Webui diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt index e2e5c60a..920f9f31 100644 --- a/common/CMakeLists.txt +++ b/common/CMakeLists.txt @@ -71,6 +71,7 @@ add_library(${TARGET} STATIC train.cpp log.cpp log.h + http.h ngram-cache.cpp ngram-cache.h ngram-map.cpp diff --git a/common/chat-auto-parser-generator.cpp b/common/chat-auto-parser-generator.cpp index 6021fc4e..d78d4a3c 100644 --- a/common/chat-auto-parser-generator.cpp +++ b/common/chat-auto-parser-generator.cpp @@ -93,7 +93,8 @@ common_peg_arena autoparser::build_parser(const generation_params & inputs) cons } return build_chat_peg_parser([&](common_chat_peg_builder & p) { parser_build_context ctx(p, inputs); - bool extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE; + bool extract_reasoning = + inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE && (inputs.enable_thinking || !reasoning.start.empty()); ctx.extracting_reasoning = extract_reasoning && reasoning.mode != reasoning_mode::NONE; ctx.content = &content; @@ -155,6 +156,16 @@ common_peg_parser analyze_content::build_parser(parser_build_context & ctx) cons } return p.content(p.until(start)) + start + p.content(p.until(end)) + end + p.end(); } + if (is_end_delimited()) { + auto content = p.choice({ + p.content(p.until(end)) + p.optspace(end), + p.content(p.rest()), + }); + if (ctx.extracting_reasoning) { + return ctx.reasoning_parser + p.space() + content + p.end(); + } + return content + p.end(); + } return ctx.reasoning_parser + p.content(p.rest()) + p.end(); } @@ -216,7 +227,6 @@ common_peg_parser analyze_tools::build_tool_parser_json_native(parser_build_cont auto wrapped_content = ctx.content->build_optional_wrapped(ctx); return ctx.reasoning_parser + wrapped_content + tools_parser + p.end(); } - std::string tool_start = "{"; if (!format.section_start.empty()) { tool_start = format.section_start; @@ -224,7 +234,12 @@ common_peg_parser analyze_tools::build_tool_parser_json_native(parser_build_cont tool_start = format.per_call_start; } - return ctx.reasoning_parser + p.optional(p.content(p.until(tool_start))) + tools_parser + p.end(); + if (!ctx.content || !ctx.content->is_end_delimited()) { + return ctx.reasoning_parser + p.optional(p.content(p.until(tool_start))) + tools_parser + p.end(); + } + + auto content_end = p.optional(p.optspace(ctx.content->end)); + return ctx.reasoning_parser + p.space() + p.optional(p.content(p.until(tool_start))) + tools_parser + content_end + p.end(); } common_peg_parser analyze_tools::build_func_parser(common_chat_peg_builder & p, const std::string & name, @@ -333,7 +348,13 @@ common_peg_parser analyze_tools::build_tool_parser_tag_json(parser_build_context std::string trigger_marker = !format.section_start.empty() ? format.section_start : format.per_call_start; auto content_before_tools = trigger_marker.empty() ? p.eps() : p.until(trigger_marker); - return ctx.reasoning_parser + p.optional(p.content(content_before_tools)) + tool_calls + p.end(); + + if (!ctx.content || !ctx.content->is_end_delimited()) { + return ctx.reasoning_parser + p.optional(p.content(content_before_tools)) + tool_calls + p.end(); + } + + auto content_end = p.optional(p.optspace(ctx.content->end)); + return ctx.reasoning_parser + p.space() + p.optional(p.content(content_before_tools)) + tool_calls + content_end + p.end(); } common_peg_parser analyze_tools::build_tool_parser_tag_tagged(parser_build_context & ctx) const { @@ -464,7 +485,13 @@ common_peg_parser analyze_tools::build_tool_parser_tag_tagged(parser_build_conte std::string trigger_marker = !format.section_start.empty() ? format.section_start : format.per_call_start; auto content_before_tools = trigger_marker.empty() ? p.eps() : p.until(trigger_marker); - return ctx.reasoning_parser + p.optional(p.content(content_before_tools)) + tool_calls + p.end(); + + if (!ctx.content || !ctx.content->is_end_delimited()) { + return ctx.reasoning_parser + p.optional(p.content(content_before_tools)) + tool_calls + p.end(); + } + + auto content_end = p.optional(p.optspace(ctx.content->end)); + return ctx.reasoning_parser + p.space() + p.optional(p.content(content_before_tools)) + tool_calls + content_end + p.end(); } } // namespace autoparser diff --git a/common/chat-auto-parser.h b/common/chat-auto-parser.h index 6c547409..406afbee 100644 --- a/common/chat-auto-parser.h +++ b/common/chat-auto-parser.h @@ -101,6 +101,7 @@ enum class content_mode { PLAIN, // No content markers ALWAYS_WRAPPED, // Content always wrapped with markers WRAPPED_WITH_REASONING, // Content wrapped only when reasoning present + END_DELIMITED, // Content is terminated by a marker but has no start marker }; inline std::ostream & operator<<(std::ostream & os, const content_mode & mode) { @@ -111,6 +112,8 @@ inline std::ostream & operator<<(std::ostream & os, const content_mode & mode) { return os << "ALWAYS_WRAPPED"; case content_mode::WRAPPED_WITH_REASONING: return os << "WRAPPED_WITH_REASONING"; + case content_mode::END_DELIMITED: + return os << "END_DELIMITED"; default: return os << "UNKNOWN"; } @@ -286,6 +289,7 @@ struct analyze_content : analyze_base { common_peg_parser build_parser(parser_build_context & ctx) const override; bool is_always_wrapped() const; + bool is_end_delimited() const; common_peg_parser build_optional_wrapped(parser_build_context & ctx) const; }; diff --git a/common/chat-diff-analyzer.cpp b/common/chat-diff-analyzer.cpp index 9c7c9678..8eba8bff 100644 --- a/common/chat-diff-analyzer.cpp +++ b/common/chat-diff-analyzer.cpp @@ -45,6 +45,28 @@ static std::vector in the generation prompt, so generated + // reasoning starts immediately and is delimited only by . + [](const common_chat_template & tmpl, autoparser & analysis) -> void { + if (tmpl.src.find("laguna_glm_thinking") != std::string::npos && + tmpl.src.find("{{- \"\\n\" -}}") != std::string::npos && + tmpl.src.find("{{- '' -}}") != std::string::npos) { + analysis.reasoning.mode = reasoning_mode::TAG_BASED; + analysis.reasoning.start = ""; + analysis.reasoning.end = ""; + analysis.content.mode = content_mode::END_DELIMITED; + analysis.content.end = ""; + if (std::find(analysis.preserved_tokens.begin(), analysis.preserved_tokens.end(), "") == + analysis.preserved_tokens.end()) { + analysis.preserved_tokens.push_back(""); + } + if (std::find(analysis.preserved_tokens.begin(), analysis.preserved_tokens.end(), "") == + analysis.preserved_tokens.end()) { + analysis.preserved_tokens.push_back(""); + } + LOG_DBG(ANSI_ORANGE "[Patch: Poolside Laguna thinking template]\n" ANSI_RESET); + } + }, // Granite 3.3, with separate reasoning and content markers [](const common_chat_template & tmpl, autoparser & analysis) -> void { if (tmpl.src.find("Write your thoughts between and write your response between " @@ -552,6 +574,10 @@ bool analyze_content::is_always_wrapped() const { return mode == content_mode::ALWAYS_WRAPPED && !start.empty() && !end.empty(); } +bool analyze_content::is_end_delimited() const { + return mode == content_mode::END_DELIMITED && !end.empty(); +} + analyze_tools::analyze_tools(const common_chat_template & tmpl, const jinja::caps & caps, const analyze_reasoning & reasoning) diff --git a/common/chat-peg-parser.cpp b/common/chat-peg-parser.cpp index 79274feb..a2865d94 100644 --- a/common/chat-peg-parser.cpp +++ b/common/chat-peg-parser.cpp @@ -785,7 +785,22 @@ common_peg_parser common_chat_peg_builder::prefix(const std::string & s, const s if (delimiter.empty()) { return literal(s); } - return literal(s.substr(0, s.rfind(delimiter))); + auto pos = s.rfind(delimiter); + if (pos == std::string::npos) { + // The generation prompt may force-open the reasoning block without the + // whitespace that surrounds the detected tag (e.g. a prompt ending in + // '' while history renders '\n'). Only strip when the + // prompt ends exactly with the trimmed tag, so prompts with trailing + // whitespace after the tag (e.g. '\n') keep their behavior. + if (auto b = delimiter.find_first_not_of(" \t\n\r"); b != std::string::npos) { + auto e = delimiter.find_last_not_of (" \t\n\r"); + auto trimmed = delimiter.substr(b, e - b + 1); + if (s.size() >= trimmed.size() && s.compare(s.size() - trimmed.size(), trimmed.size(), trimmed) == 0) { + pos = s.size() - trimmed.size(); + } + } + } + return literal(s.substr(0, pos)); } common_peg_parser common_chat_peg_builder::optspace(const std::string & tag) { diff --git a/common/common.cpp b/common/common.cpp index 99c517e4..544eed2c 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -124,7 +124,16 @@ static int32_t common_speculative_stage_effective_n_min( std::vector common_params_speculative::get_resolved_stages() const { if (!stages.empty()) { - return stages; + std::vector resolved; + resolved.reserve(stages.size()); + + for (const auto & stage : stages) { + if (stage.type != COMMON_SPECULATIVE_TYPE_NONE) { + resolved.push_back(stage); + } + } + + return resolved; } if (type == COMMON_SPECULATIVE_TYPE_NONE) { @@ -167,6 +176,9 @@ common_params_speculative common_params_speculative::with_stage_overrides(const if (stage.has_suffix_max_depth_override()) { result.suffix_max_depth = stage.suffix_max_depth; } + if (stage.has_suffix_corpus_override()) { + result.suffix_corpus = stage.suffix_corpus; + } result.n_max = std::max(result.n_max, 0); result.n_min = std::max(0, std::min(result.n_min, result.n_max)); @@ -186,10 +198,39 @@ bool common_params_speculative::has_stage_type(common_speculative_type stage_typ }); } +void common_params_speculative::remove_stage_type(common_speculative_type stage_type) { + stages.erase(std::remove_if(stages.begin(), stages.end(), [stage_type](const common_speculative_stage_params & stage) { + return stage.type == stage_type; + }), stages.end()); + + if (type == stage_type) { + const auto resolved = get_resolved_stages(); + type = resolved.empty() ? COMMON_SPECULATIVE_TYPE_NONE : resolved.front().type; + } +} + bool common_params_speculative::has_composite_stage_chain() const { return get_resolved_stages().size() > 1; } +bool common_params_speculative::needs_dft_model() const { + return has_stage_type(COMMON_SPECULATIVE_TYPE_DRAFT) || + has_stage_type(COMMON_SPECULATIVE_TYPE_DFLASH) || + (has_stage_type(COMMON_SPECULATIVE_TYPE_MTP) && has_dft()); +} + +void common_params_speculative::clear_dft() { + if (model_dft != nullptr) { + llama_free_model(model_dft); + model_dft = nullptr; + } + + model.clear(); + params.clear(); + mparams_dft.path.clear(); + cparams_dft = llama_context_default_params(); +} + int32_t common_params_speculative::get_max_stage_n_max() const { const auto resolved = get_resolved_stages(); if (resolved.empty()) { @@ -619,28 +660,20 @@ static void common_speculative_finalize_stages(gpt_params & params) { auto & spec = params.speculative; if (!spec.stages.empty()) { - spec.type = spec.stages.front().type; + const auto resolved = spec.get_resolved_stages(); + if (resolved.size() != spec.stages.size()) { + spec.stages = resolved; + } + + spec.type = resolved.empty() ? COMMON_SPECULATIVE_TYPE_NONE : resolved.front().type; params.has_mtp = spec.has_stage_type(COMMON_SPECULATIVE_TYPE_MTP); return; } - const bool wants_mtp = params.has_mtp; - const bool wants_draft = spec.has_dft(); - if (spec.type != COMMON_SPECULATIVE_TYPE_NONE) { spec.stages.push_back({ .type = spec.type }); - - if (common_speculative_type_is_self_spec(spec.type)) { - if (wants_mtp) { - spec.stages.push_back({ .type = COMMON_SPECULATIVE_TYPE_MTP }); - } else if (wants_draft) { - spec.stages.push_back({ .type = COMMON_SPECULATIVE_TYPE_DRAFT }); - } - } - } else if (wants_mtp) { + } else if (params.has_mtp) { spec.stages.push_back({ .type = COMMON_SPECULATIVE_TYPE_MTP }); - } else if (wants_draft) { - spec.stages.push_back({ .type = COMMON_SPECULATIVE_TYPE_DRAFT }); } spec.type = spec.stages.empty() ? COMMON_SPECULATIVE_TYPE_NONE : spec.stages.front().type; @@ -834,13 +867,16 @@ static std::string common_normalize_spec_stage_key(std::string key) { std::replace(key.begin(), key.end(), '-', '_'); - if (key.rfind("spec_", 0) == 0) { - key.erase(0, 5); - } - return key; } +static std::invalid_argument common_speculative_legacy_option_error( + const std::string & arg, + const std::string & replacement) { + return std::invalid_argument( + "legacy speculative option '" + arg + "' is disabled; use " + replacement); +} + static void common_speculative_remove_explicit_stage(common_params_speculative & params, common_speculative_type type) { params.stages.erase(std::remove_if(params.stages.begin(), params.stages.end(), [type](const common_speculative_stage_params & stage) { return stage.type == type; @@ -857,21 +893,21 @@ static void common_speculative_stage_apply_kv( const std::string & value_raw) { const std::string key = common_normalize_spec_stage_key(key_raw); - if (key == "draft" || key == "draft_max" || key == "draft_n" || key == "n_max") { + if (key == "n_max") { stage.n_max = std::stoi(value_raw); if (stage.n_max < 0) { throw std::invalid_argument("speculative stage n_max must be >= 0"); } return; } - if (key == "draft_min" || key == "draft_n_min" || key == "n_min") { + if (key == "n_min") { stage.n_min = std::stoi(value_raw); if (stage.n_min < 0) { throw std::invalid_argument("speculative stage n_min must be >= 0"); } return; } - if (key == "draft_p_min" || key == "p_min") { + if (key == "p_min") { stage.p_min = std::stof(value_raw); if (stage.p_min < 0.0f) { throw std::invalid_argument("speculative stage p_min must be >= 0"); @@ -906,7 +942,7 @@ static void common_speculative_stage_apply_kv( } return; } - if (key == "suffix_min_match_len" || key == "suffix_pattern_len") { + if (key == "suffix_min_match_len") { stage.suffix_min_match_len = std::stoi(value_raw); if (stage.suffix_min_match_len < 1) { throw std::invalid_argument("speculative stage suffix_min_match_len must be at least 1"); @@ -920,10 +956,100 @@ static void common_speculative_stage_apply_kv( } return; } + if (key == "suffix_corpus") { + stage.suffix_corpus = value_raw; + if (stage.suffix_corpus.empty()) { + throw std::invalid_argument("speculative stage suffix_corpus must not be empty"); + } + return; + } throw std::invalid_argument("unknown speculative stage parameter: " + key_raw); } +static std::vector common_speculative_stage_split_kvs(const std::string & values) { + std::vector result; + std::string current; + char quote = '\0'; + bool escaped = false; + + for (char ch : values) { + if (escaped) { + current += ch; + escaped = false; + continue; + } + + if (ch == '\\') { + current += ch; + escaped = true; + continue; + } + + if (quote != '\0') { + if (ch == quote) { + quote = '\0'; + } + current += ch; + continue; + } + + if ((ch == '\'' || ch == '"') && !current.empty() && current.back() == '=') { + quote = ch; + current += ch; + continue; + } + + if (ch == ',') { + result.push_back(current); + current.clear(); + continue; + } + + current += ch; + } + + if (quote != '\0') { + throw std::invalid_argument("invalid speculative stage option list: unterminated quote"); + } + + result.push_back(current); + return result; +} + +static std::string common_speculative_stage_unescape_value(const std::string & value_raw) { + std::string value = value_raw; + if (value.size() >= 2) { + const char first = value.front(); + const char last = value.back(); + if ((first == '\'' && last == '\'') || (first == '"' && last == '"')) { + value = value.substr(1, value.size() - 2); + } + } + + std::string result; + result.reserve(value.size()); + + for (size_t i = 0; i < value.size(); ++i) { + const char ch = value[i]; + if (ch != '\\' || i + 1 >= value.size()) { + result += ch; + continue; + } + + const char next = value[i + 1]; + if (next == '\\' || next == ',' || next == '\'' || next == '"') { + result += next; + ++i; + continue; + } + + result += ch; + } + + return result; +} + static common_speculative_stage_params common_speculative_stage_from_arg(const std::string & value) { const auto spec_pos = value.find(':'); const std::string type_name = value.substr(0, spec_pos); @@ -938,15 +1064,13 @@ static common_speculative_stage_params common_speculative_stage_from_arg(const s return stage; } - std::stringstream ss(value.substr(spec_pos + 1)); - std::string kv; - while (std::getline(ss, kv, ',')) { + for (const std::string & kv : common_speculative_stage_split_kvs(value.substr(spec_pos + 1))) { const auto eq_pos = kv.find('='); if (eq_pos == std::string::npos) { throw std::invalid_argument("invalid speculative stage option: " + kv); } - common_speculative_stage_apply_kv(stage, kv.substr(0, eq_pos), kv.substr(eq_pos + 1)); + common_speculative_stage_apply_kv(stage, kv.substr(0, eq_pos), common_speculative_stage_unescape_value(kv.substr(eq_pos + 1))); } return stage; @@ -1393,18 +1517,18 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa } if (arg == "--draft" || arg == "--draft-max" || arg == "--draft-n") { CHECK_ARG - params.speculative.n_max = std::stoi(argv[i]); - return true; + throw common_speculative_legacy_option_error(arg, + "the value inside the relevant repeated --spec-type entry, e.g. --spec-type mtp:n_max=" + std::string(argv[i]) + ",p_min=0.0 or --spec-type draft:n_max=" + std::string(argv[i]) + ",p_min=0.0"); } if (arg == "--draft-min" || arg == "--draft-n-min") { CHECK_ARG - params.speculative.n_min = std::stoi(argv[i]); - return true; + throw common_speculative_legacy_option_error(arg, + "the value inside the relevant repeated --spec-type entry using the canonical key n_min, e.g. --spec-type ngram-mod:n_min=" + std::string(argv[i])); } if (arg == "--draft-p-min") { CHECK_ARG - params.speculative.p_min = std::stof(argv[i]); - return true; + throw common_speculative_legacy_option_error(arg, + "the value inside the relevant repeated --spec-type entry using the canonical key p_min, e.g. --spec-type mtp:p_min=" + std::string(argv[i])); } if (arg == "--recurrent-ckpt-mode") { CHECK_ARG @@ -1459,91 +1583,46 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa } if (arg == "--spec-stage") { CHECK_ARG - - if (params.speculative.stages.empty()) { - if (params.speculative.type != COMMON_SPECULATIVE_TYPE_NONE) { - throw std::invalid_argument("--spec-stage cannot be combined with --spec-type; use only --spec-stage for explicit stage chains"); - } - if (params.has_mtp) { - throw std::invalid_argument("--spec-stage cannot be combined with -mtp/--multi-token-prediction; add the mtp fallback explicitly with --spec-stage mtp[:k=v,...]"); - } - } - - params.speculative.stages.push_back(common_speculative_stage_from_arg(argv[i])); - if (params.speculative.stages.size() == 1) { - params.speculative.type = params.speculative.stages.front().type; - } - params.has_mtp = params.speculative.has_stage_type(COMMON_SPECULATIVE_TYPE_MTP); - return true; + throw common_speculative_legacy_option_error(arg, + "repeated --spec-type SPEC[:k=v,...] entries, e.g. --spec-type ngram-mod:n_max=64,n_min=2,ngram_size_n=8 --spec-type mtp:n_max=1,p_min=0.0"); } if (arg == "--spec-type") { CHECK_ARG - if (!params.speculative.stages.empty()) { - throw std::invalid_argument("--spec-type cannot be combined with --spec-stage; use only --spec-stage for explicit stage chains"); - } - - const auto stage = common_speculative_stage_from_arg(argv[i]); - const auto type = stage.type; - if (type == COMMON_SPECULATIVE_TYPE_NONE || type == COMMON_SPECULATIVE_TYPE_DFLASH || type == COMMON_SPECULATIVE_TYPE_MTP || common_speculative_type_is_self_spec(type)) { - params.speculative = params.speculative.with_stage_overrides(stage); - params.speculative.type = type; - if (type == COMMON_SPECULATIVE_TYPE_MTP) { - params.has_mtp = true; - } - } else { - throw std::invalid_argument("unknown speculative decoding type"); - } + params.speculative.stages.push_back(common_speculative_stage_from_arg(argv[i])); + const auto resolved = params.speculative.get_resolved_stages(); + params.speculative.type = resolved.empty() ? COMMON_SPECULATIVE_TYPE_NONE : resolved.front().type; + params.has_mtp = params.speculative.has_stage_type(COMMON_SPECULATIVE_TYPE_MTP); return true; } if (arg == "--spec-ngram-size-n") { CHECK_ARG - int value = std::stoi(argv[i]); - if (value < 1 || value > 1024) { - throw std::invalid_argument("ngram size N must be between 1 and 1024 inclusive"); - } - params.speculative.ngram_size_n = value; - return true; + throw common_speculative_legacy_option_error(arg, + "the canonical stage key inside --spec-type, e.g. --spec-type ngram-mod:ngram_size_n=" + std::string(argv[i])); } if (arg == "--spec-ngram-size-m") { CHECK_ARG - int value = std::stoi(argv[i]); - if (value < 1 || value > 1024) { - throw std::invalid_argument("ngram size M must be between 1 and 1024 inclusive"); - } - params.speculative.ngram_size_m = value; - return true; + throw common_speculative_legacy_option_error(arg, + "the canonical stage key inside --spec-type, e.g. --spec-type ngram-map-k4v:ngram_size_m=" + std::string(argv[i])); } if (arg == "--spec-ngram-min-hits") { CHECK_ARG - int value = std::stoi(argv[i]); - if (value < 1) { - throw std::invalid_argument("ngram min hits must be at least 1"); - } - params.speculative.ngram_min_hits = value; - return true; + throw common_speculative_legacy_option_error(arg, + "the canonical stage key inside --spec-type, e.g. --spec-type ngram-map-k4v:ngram_min_hits=" + std::string(argv[i])); } if (arg == "--suffix-pattern-len") { CHECK_ARG - int value = std::stoi(argv[i]); - if (value < 1) { - throw std::invalid_argument("suffix pattern length must be at least 1"); - } - params.speculative.suffix_min_match_len = value; - return true; + throw common_speculative_legacy_option_error(arg, + "the canonical stage key inside --spec-type, e.g. --spec-type suffix:suffix_min_match_len=" + std::string(argv[i])); } if (arg == "--suffix-max-depth") { CHECK_ARG - int value = std::stoi(argv[i]); - if (value < 1) { - throw std::invalid_argument("suffix max depth must be at least 1"); - } - params.speculative.suffix_max_depth = value; - return true; + throw common_speculative_legacy_option_error(arg, + "the canonical stage key inside --spec-type, e.g. --spec-type suffix:suffix_max_depth=" + std::string(argv[i])); } if (arg == "--suffix-corpus") { CHECK_ARG - params.speculative.suffix_corpus = argv[i]; - return true; + throw common_speculative_legacy_option_error(arg, + "the canonical stage key inside --spec-type, e.g. --spec-type suffix:suffix_corpus=" + std::string(argv[i])); } if (arg == "-a" || arg == "--alias") { CHECK_ARG @@ -1804,6 +1883,10 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa if (arg == "-amb" || arg == "--attention-max-batch") { CHECK_ARG params.attn_max_batch = std::stoi(argv[i]); + if (params.attn_max_batch > 0 && params.attn_max_batch < 128) { + LLAMA_LOG_WARN("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX amb = %d is too low. Changing to 128\n", params.attn_max_batch); + params.attn_max_batch = 128; + } return true; } if (arg == "-no-fmoe" || arg == "--no-fused-moe") { @@ -1988,17 +2071,12 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa return true; } if (arg == "-mtp" || arg == "--multi-token-prediction") { - if (!params.speculative.stages.empty()) { - throw std::invalid_argument("-mtp/--multi-token-prediction cannot be combined with --spec-stage; add the mtp fallback explicitly with --spec-stage mtp[:k=v,...]"); - } - - params.has_mtp = true; - return true; + throw common_speculative_legacy_option_error(arg, + "--spec-type mtp:n_max=1,p_min=0.0"); } if (arg == "-no-mtp" || arg == "--no-multi-token-prediction") { - params.has_mtp = false; - common_speculative_remove_explicit_stage(params.speculative, COMMON_SPECULATIVE_TYPE_MTP); - return true; + throw common_speculative_legacy_option_error(arg, + "remove the mtp entry from repeated --spec-type arguments"); } if (arg == "-draft" || arg == "--draft-params") { CHECK_ARG @@ -2409,6 +2487,10 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa params.webui = common_webui_from_name(std::string(argv[i])); return true; } + if (arg == "--webui-mcp-proxy" || arg == "--ui-mcp-proxy") { + params.webui_mcp_proxy = true; + return true; + } if (arg == "--api-key") { CHECK_ARG params.api_keys.push_back(argv[i]); @@ -3180,29 +3262,21 @@ void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & param options.push_back({ "*", "-hfr, --hf-repo REPO", "Hugging Face model repository (default: unused)" }); options.push_back({ "*", "-hff, --hf-file FILE", "Hugging Face model file (default: unused)" }); options.push_back({ "*", "-hft, --hf-token TOKEN", "Hugging Face access token (default: value from HF_TOKEN environment variable)" }); - options.push_back({ "*", "-mtp, --multi-token-prediction", "legacy shortcut for enabling MTP when --spec-stage is not used (default: %s)", params.has_mtp ? "true" : "false" }); - options.push_back({ "*", "-no-mtp, --no-multi-token-prediction", "disable the legacy MTP shortcut or remove an explicit MTP stage (default: %s)", !params.has_mtp ? "true" : "false" }); - options.push_back({ "*", "--draft-max, --draft, --draft-n N", - "global default number of tokens to draft for speculative decoding or for stages without an explicit n_max override (default: %d)", params.speculative.n_max }); - options.push_back({ "*", "--draft-min, --draft-n-min N", "global default minimum draft threshold or fallback threshold for stages without an explicit n_min override" }); - options.push_back({ "*", "--draft-p-min P", "global default minimum speculative decoding probability (greedy) for stages without an explicit p_min override (default: %.1f)", (double)params.speculative.p_min }); options.push_back({ "*", "--recurrent-ckpt-mode MODE", "checkpoint strategy for recurrent/hybrid speculative decoding\n" " auto auto-select: per-step if CUDA full-GPU, gpu-fallback otherwise (default)\n" " per-step save SSM state per draft step in VRAM; no re-decode on rejection\n" " gpu-fallback copy state to GPU buffer; re-decode on rejection\n" " cpu serialise state via llama_state_seq; re-decode on rejection" }); - options.push_back({ "*", "--spec-stage SPEC[:k=v,...]", "explicit speculative stage. repeat once for a supported two-stage chain.\n" - "examples: --spec-stage ngram-mod:n_max=64,n_min=2 --spec-stage mtp:n_max=1\n" - "supported two-stage shape in this PR: self-spec first, then mtp or draft fallback" }); - options.push_back({ "*", "--spec-type Name[:k=v,...] [none | dflash | mtp | ngram-cache | ngram-simple | ngram-map-k | ngram-map-k4v | ngram-mod | suffix]", "single-stage speculative selection when --spec-stage is not used (default: %d)\n", (int)params.speculative.type}); - options.push_back({ "*", "--spec-ngram-size-n N", "ngram size N for ngram-simple/ngram-map speculative decoding, length of lookup n-gram (default: %d)\n",params.speculative.ngram_size_n }); - - options.push_back({ "*", "--spec-ngram-size-m N", "ngram size M for ngram-simple/ngram-map speculative decoding, length of draft m-gram (default: %d)\n", params.speculative.ngram_size_m }); - - options.push_back({ "*", "--spec-ngram-min-hits N", "minimum hits for ngram-map speculative decoding (default: %d)\n", params.speculative.ngram_min_hits }); - options.push_back({ "*", "--suffix-pattern-len N", "minimum context match length for suffix decoding (default: %d)", params.speculative.suffix_min_match_len }); - options.push_back({ "*", "--suffix-max-depth N", "suffix tree maximum depth for suffix decoding (default: %d)", params.speculative.suffix_max_depth }); - options.push_back({ "*", "--suffix-corpus PATH", "corpus file to pre-warm the suffix tree: .json (array of strings or conversation messages) or .bin (raw int32 token IDs)" }); + options.push_back({ "*", "--spec-type SPEC[:k=v,...]", "canonical speculative stage entry; repeat for a supported two-stage chain.\n" + "types: none, draft, dflash, mtp, ngram-cache, ngram-simple, ngram-map-k, ngram-map-k4v, ngram-mod, suffix\n" + "canonical keys: n_max,n_min,p_min,cross_ctx,ngram_size_n,ngram_size_m,ngram_min_hits,suffix_min_match_len,suffix_max_depth,suffix_corpus\n" + "for comma-bearing string values, quote the value inside the stage payload for normal shell use\n" + "if argv is passed directly without shell unescaping, the parser also accepts escaped commas as \\,\n" + "examples: --spec-type mtp:n_max=1,p_min=0.0\n" + " --model-draft draft.gguf --spec-type dflash:n_max=4,cross_ctx=512\n" + " --spec-type ngram-mod:n_max=64,n_min=2,ngram_size_n=8 --spec-type mtp:n_max=1,p_min=0.0\n" + " --spec-type \"suffix:n_max=16,n_min=2,suffix_min_match_len=5,suffix_max_depth=64,suffix_corpus='/tmp/spec,type-corpus.json'\"\n" + "legacy --spec-stage, --draft-*, --spec-ngram-*, --suffix-* and -mtp flags are rejected" }); options.push_back({ "*", "--spec-autotune", "automatically tune speculative params to maximize tokens/sec" }); options.push_back({ "retrieval" }); @@ -3246,6 +3320,7 @@ void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & param "- auto: default webui \n" "- llamacpp: llamacpp webui \n" "(default: auto)", }); + options.push_back({ "server", " --ui-mcp-proxy, --webui-mcp-proxy", "experimental: whether to enable MCP CORS proxy - do not enable in untrusted environments (default: disabled)" }); options.push_back({ "server", " --api-key KEY", "API key to use for authentication (default: none)" }); options.push_back({ "server", " --api-key-file FNAME", "path to file containing API keys (default: none)" }); options.push_back({ "server", " --ssl-key-file FNAME", "path to file a PEM-encoded SSL private key" }); @@ -4024,14 +4099,7 @@ static std::pair get_batch_ubatch(const gpt_params & params) { if (params.n_ctx > 0) { n_batch = std::min(n_batch, params.n_ctx); } - if (!params.mmproj.path.empty() && params.mmproj_use_gpu) { - // temporary fix for qwen mtmd (only when mmproj is on GPU) - n_batch = std::max(n_batch, n_ubatch); - n_ubatch = n_batch; - fprintf(stdout, "Adjust batch size for mtmd: u_batch = %d, batch = %d\n", n_ubatch, n_batch); - } else { - n_ubatch = std::min(n_batch, n_ubatch); - } + n_ubatch = std::min(n_batch, n_ubatch); return {n_batch, n_ubatch}; } @@ -5121,7 +5189,7 @@ void yaml_dump_non_result_info(FILE * stream, const gpt_params & params, const l yaml_dump_string_multiline(stream, "in_prefix", params.input_prefix.c_str()); fprintf(stream, "in_prefix_bos: %s # default: false\n", params.input_prefix_bos ? "true" : "false"); - yaml_dump_string_multiline(stream, "in_suffix", params.input_prefix.c_str()); + yaml_dump_string_multiline(stream, "in_suffix", params.input_suffix.c_str()); fprintf(stream, "interactive: %s # default: false\n", params.interactive ? "true" : "false"); fprintf(stream, "interactive_first: %s # default: false\n", params.interactive_first ? "true" : "false"); fprintf(stream, "keep: %d # default: 0\n", params.n_keep); diff --git a/common/common.h b/common/common.h index 87de68d9..a6946528 100644 --- a/common/common.h +++ b/common/common.h @@ -171,6 +171,7 @@ struct common_speculative_stage_params { int32_t suffix_min_match_len = -1; int32_t suffix_max_depth = -1; + std::string suffix_corpus; bool has_n_max_override() const { return n_max >= 0; } bool has_n_min_override() const { return n_min >= 0; } @@ -181,6 +182,7 @@ struct common_speculative_stage_params { bool has_ngram_min_hits_override() const { return ngram_min_hits > 0; } bool has_suffix_min_match_len_override() const { return suffix_min_match_len >= 0; } bool has_suffix_max_depth_override() const { return suffix_max_depth >= 0; } + bool has_suffix_corpus_override() const { return !suffix_corpus.empty(); } }; struct common_params_model { @@ -254,7 +256,10 @@ struct common_params_speculative { common_params_speculative with_stage_overrides(const common_speculative_stage_params & stage) const; bool has_stage_chain() const; bool has_stage_type(common_speculative_type stage_type) const; + void remove_stage_type(common_speculative_type stage_type); bool has_composite_stage_chain() const; + bool needs_dft_model() const; + void clear_dft(); int32_t get_max_stage_n_max() const; int32_t get_min_usable_stage_n_min() const; @@ -505,6 +510,7 @@ struct gpt_params { // "advanced" endpoints are disabled by default for better security common_webui webui = COMMON_WEBUI_AUTO; + bool webui_mcp_proxy = false; bool endpoint_slots = true; bool endpoint_props = false; // only control POST requests, not GET bool endpoint_metrics = false; diff --git a/common/http.h b/common/http.h new file mode 100644 index 00000000..d3daccd6 --- /dev/null +++ b/common/http.h @@ -0,0 +1,99 @@ +#pragma once + +#include + +struct common_http_url { + std::string scheme; + std::string user; + std::string password; + std::string host; + int port; + std::string path; +}; + +static common_http_url common_http_parse_url(const std::string & url) { + common_http_url parts; + auto scheme_end = url.find("://"); + + if (scheme_end == std::string::npos) { + throw std::runtime_error("invalid URL: no scheme"); + } + parts.scheme = url.substr(0, scheme_end); + + if (parts.scheme != "http" && parts.scheme != "https") { + throw std::runtime_error("unsupported URL scheme: " + parts.scheme); + } + + auto rest = url.substr(scheme_end + 3); + auto at_pos = rest.find('@'); + + if (at_pos != std::string::npos) { + auto auth = rest.substr(0, at_pos); + auto colon_pos = auth.find(':'); + if (colon_pos != std::string::npos) { + parts.user = auth.substr(0, colon_pos); + parts.password = auth.substr(colon_pos + 1); + } else { + parts.user = auth; + } + rest = rest.substr(at_pos + 1); + } + + auto slash_pos = rest.find('/'); + + if (slash_pos != std::string::npos) { + parts.host = rest.substr(0, slash_pos); + parts.path = rest.substr(slash_pos); + } else { + parts.host = rest; + parts.path = "/"; + } + + auto colon_pos = parts.host.find(':'); + + if (colon_pos != std::string::npos) { + parts.port = std::stoi(parts.host.substr(colon_pos + 1)); + parts.host = parts.host.substr(0, colon_pos); + } else if (parts.scheme == "http") { + parts.port = 80; + } else if (parts.scheme == "https") { + parts.port = 443; + } else { + throw std::runtime_error("unsupported URL scheme: " + parts.scheme); + } + + return parts; +} + +static std::pair common_http_client(const std::string & url) { + common_http_url parts = common_http_parse_url(url); + + if (parts.host.empty()) { + throw std::runtime_error("error: invalid URL format"); + } + +#ifndef CPPHTTPLIB_OPENSSL_SUPPORT + if (parts.scheme == "https") { + throw std::runtime_error( + "HTTPS is not supported. Please rebuild with one of:\n" + " -DLLAMA_BUILD_BORINGSSL=ON\n" + " -DLLAMA_BUILD_LIBRESSL=ON\n" + " -DLLAMA_OPENSSL=ON (default, requires OpenSSL dev files installed)" + ); + } +#endif + + httplib::Client cli(parts.scheme + "://" + parts.host + ":" + std::to_string(parts.port)); + + if (!parts.user.empty()) { + cli.set_basic_auth(parts.user, parts.password); + } + + cli.set_follow_location(true); + + return { std::move(cli), std::move(parts) }; +} + +static std::string common_http_show_masked_url(const common_http_url & parts) { + return parts.scheme + "://" + (parts.user.empty() ? "" : "****:****@") + parts.host + parts.path; +} diff --git a/common/sampling.cpp b/common/sampling.cpp index 03504bee..5a7a9b69 100644 --- a/common/sampling.cpp +++ b/common/sampling.cpp @@ -24,7 +24,7 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co result->grammar = nullptr; result->rbudget = nullptr; - struct llama_grammar* grmr; + struct llama_grammar* grmr = nullptr; const std::string & grammar_str = common_grammar_value(params.grammar); if (grammar_str.compare(0, 11, "%llguidance") == 0) { #ifdef LLAMA_USE_LLGUIDANCE diff --git a/common/spec-tuner.cpp b/common/spec-tuner.cpp index 80427d41..52733877 100644 --- a/common/spec-tuner.cpp +++ b/common/spec-tuner.cpp @@ -357,20 +357,15 @@ void spec_tuner::print_best() const { { std::ostringstream oss; - oss << "Autotune reuse: "; + oss << "Autotune reuse: --spec-type " << common_speculative_type_to_str(spec_type); + bool first_kv = true; for (const auto & coord : coords) { bool is_int = (coord.name != "p_min"); - if (coord.name == "n_max") oss << "--draft-max "; - else if (coord.name == "p_min") oss << "--draft-p-min "; - else if (coord.name == "n_min") oss << "--draft-min "; - else if (coord.name == "ngram_size_n") oss << "--spec-ngram-size-n "; - else if (coord.name == "ngram_size_m") oss << "--spec-ngram-size-m "; - else if (coord.name == "ngram_min_hits") oss << "--spec-ngram-min-hits "; - else if (coord.name == "suffix_min_match_len") oss << "--suffix-pattern-len "; - else oss << "--" << coord.name << " "; + oss << (first_kv ? ':' : ',') << coord.name << '='; + first_kv = false; - if (is_int) oss << (int)coord.arms[coord.best_idx].value << " "; - else oss << std::fixed << std::setprecision(2) << coord.arms[coord.best_idx].value << " "; + if (is_int) oss << (int)coord.arms[coord.best_idx].value; + else oss << std::fixed << std::setprecision(2) << coord.arms[coord.best_idx].value; } LOG_INF("%s\n", oss.str().c_str()); } diff --git a/common/speculative-impl.h b/common/speculative-impl.h index ccec2e9e..48d810e7 100644 --- a/common/speculative-impl.h +++ b/common/speculative-impl.h @@ -1128,6 +1128,7 @@ struct common_speculative_state_suffix : public common_speculative_state { struct common_speculative { std::vector configs; // resolved stage config for each implementation std::vector> impls; // list of implementations to use and their states + common_speculative_checkpoint checkpoint; common_speculative_state * curr_impl = nullptr; // current implementation in use (for stats) std::unique_ptr tuner; int last_n_drafted = 0; @@ -1522,6 +1523,7 @@ void common_speculative_free(common_speculative * spec) { return; } + spec->checkpoint.clear(); delete spec; } diff --git a/common/speculative.cpp b/common/speculative.cpp index e7e6559f..d0825387 100644 --- a/common/speculative.cpp +++ b/common/speculative.cpp @@ -53,6 +53,18 @@ const std::map common_speculative_typ {"suffix", COMMON_SPECULATIVE_TYPE_SUFFIX} }; +void common_speculative_checkpoint::clear() { + valid = false; + per_step_enabled = false; + n_past = 0; + sampled = LLAMA_TOKEN_NULL; + + if (sampler != nullptr) { + common_sampler_free(sampler); + sampler = nullptr; + } +} + struct common_speculative_config { common_speculative_stage_params stage; common_speculative_type type; @@ -70,10 +82,10 @@ static bool common_speculative_are_compatible( const llama_vocab * vocab_tgt = llama_model_get_vocab(model_tgt); const llama_vocab * vocab_dft = llama_model_get_vocab(model_dft); - const bool vocab_type_tgt = llama_vocab_type(vocab_tgt); + const auto vocab_type_tgt = llama_vocab_type(vocab_tgt); LOG_DBG("%s: vocab_type tgt: %d\n", __func__, vocab_type_tgt); - const bool vocab_type_dft = llama_vocab_type(vocab_dft); + const auto vocab_type_dft = llama_vocab_type(vocab_dft); LOG_DBG("%s: vocab_type dft: %d\n", __func__, vocab_type_dft); if (vocab_type_tgt != vocab_type_dft) { @@ -261,6 +273,17 @@ static void dflash_append_target_features( int32_t n_rows); static void dflash_clear_target_features(common_speculative_state_dflash & state); static void mtp_invalidate_cached_drafts(common_speculative_state_mtp & state); +static bool common_speculative_checkpoint_save( + common_speculative_checkpoint & ckpt, + llama_model * model, + llama_context * ctx, + common_sampler * sampler_src, + const common_params_sampling & sparams, + llama_seq_id seq_id, + llama_pos n_past, + llama_token sampled, + int max_tokens, + int ckpt_mode); static std::vector mtp_speculative_gen_draft( common_speculative_state_mtp & state, @@ -583,6 +606,251 @@ bool common_speculative_ensure_sequence_hidden( return common_speculative_capture_output_hidden(spec, ctx, -1, seq_id, pos); } +common_speculative_draft_result common_speculative_draft_ex( + common_speculative * spec, + llama_context * ctx, + common_params_speculative & params, + const llama_tokens & prompt_tgt, + llama_token id_last, + llama_pos draft_base_pos, + llama_seq_id draft_seq_id) { + common_speculative_draft_result result = {}; + + if (common_speculative_has_type(spec, COMMON_SPECULATIVE_TYPE_MTP)) { + if (!common_speculative_ensure_sequence_hidden(spec, ctx, draft_seq_id, draft_base_pos - 1)) { + LOG_ERR("%s: seq_id=%d MTP hidden state is empty during speculation\n", + __func__, (int) draft_seq_id); + return result; + } + } + + result.tokens = common_speculative_draft( + spec, + params, + prompt_tgt, + id_last, + draft_base_pos, + draft_seq_id); + result.type = spec != nullptr && spec->curr_impl != nullptr + ? spec->curr_impl->type + : COMMON_SPECULATIVE_TYPE_NONE; + + return result; +} + +static bool common_speculative_has_target_features(const common_speculative * spec) { + return common_speculative_has_type(spec, COMMON_SPECULATIVE_TYPE_MTP) || + common_speculative_has_type(spec, COMMON_SPECULATIVE_TYPE_DFLASH); +} + +bool common_speculative_load_draft_model( + common_params_speculative & params, + const gpt_params & params_base) { + if (!params.has_dft()) { + return true; + } + + gpt_params params_dft; + params_dft.devices = params.devices; + params_dft.model = params.model; + params_dft.main_gpu = params_base.main_gpu; + params_dft.n_gpu_layers = params.n_gpu_layers; + params_dft.rpc_servers = params_base.rpc_servers; + params_dft.cache_type_k = params.cache_type_k.empty() ? params_base.cache_type_k : params.cache_type_k; + params_dft.cache_type_v = params.cache_type_v.empty() ? params_base.cache_type_v : params.cache_type_v; + params_dft.flash_attn = params_base.flash_attn; + params_dft.k_cache_hadamard = params_base.k_cache_hadamard; + params_dft.v_cache_hadamard = params_base.v_cache_hadamard; + + if (params.has_stage_type(COMMON_SPECULATIVE_TYPE_DFLASH)) { + params_dft.split_mode = params_base.split_mode; + for (size_t i = 0; i < std::size(params_dft.tensor_split); ++i) { + params_dft.tensor_split[i] = params_base.tensor_split[i]; + } + params_dft.attn_max_batch = params_base.attn_max_batch; + params_dft.graph_reuse = params_base.graph_reuse; + params_dft.split_mode_graph_scheduling = params_base.split_mode_graph_scheduling; + params_dft.scheduler_async = params_base.scheduler_async; + params_dft.max_extra_alloc_MiB = params_base.max_extra_alloc_MiB; + params_dft.reduce_type = params_base.reduce_type; + } + + if (!params.params.empty()) { + auto [argc, argv] = parse_command_line("llama-server " + params.params); + if (!gpt_params_parse(argc, argv, params_dft)) { + gpt_params_print_usage(argc, argv, params_dft); + free_command_line(argc, argv); + return false; + } + free_command_line(argc, argv); + } + + LOG_INF("%s: loading draft model '%s'\n", __func__, params_dft.model.c_str()); + + if (params_dft.n_ctx == 0) { + params_dft.n_ctx = params.n_ctx; + } + if (params.has_stage_type(COMMON_SPECULATIVE_TYPE_DFLASH) && params_dft.n_gpu_layers < 0) { + params_dft.n_gpu_layers = params_base.n_gpu_layers; + } + params_dft.n_ctx = params_dft.n_ctx == 0 ? params_base.n_ctx / params_base.n_parallel : params_dft.n_ctx; + params_dft.n_parallel = 1; + params_dft.n_batch = params_dft.n_ctx; + + params.mparams_dft.path = params_dft.model; + + llama_model_params mparams_dft = common_model_params_to_llama(params_dft); + llama_model * loaded_model = llama_model_load_from_file(params_dft.model.c_str(), mparams_dft); + if (loaded_model == nullptr) { + LOG_ERR("%s: failed to load draft model '%s'\n", __func__, params.model.c_str()); + return false; + } + + params.model_dft = loaded_model; + params.cparams_dft = common_context_params_to_llama(params_dft); + return true; +} + +bool common_speculative_prepare_mtp_runtime( + common_params_speculative & params, + const gpt_params & params_base, + const llama_model * model, + bool has_external_mtp) { + if (!params.has_stage_type(COMMON_SPECULATIVE_TYPE_MTP)) { + return false; + } + + if (llama_model_n_nextn_layer(model) == 0 && !has_external_mtp) { + LOG_WRN("%s: MTP speculative stage requested, but model has 0 NextN layers. Removing MTP from the configured stage chain.\n", + __func__); + params.remove_stage_type(COMMON_SPECULATIVE_TYPE_MTP); + if (!params.needs_dft_model()) { + params.clear_dft(); + } + return false; + } + + if (!has_external_mtp) { + gpt_params params_mtp = params_base; + params_mtp.pooling_type = LLAMA_POOLING_TYPE_NONE; + params.cparams_dft = common_context_params_to_llama(params_mtp); + } + + params.cparams_dft.mtp = true; + params.cparams_dft.mtp_op_type = MTP_OP_WARMUP; + params.cparams_dft.embeddings = true; + + return true; +} + +common_speculative_init_status common_speculative_try_init( + common_params_speculative & params, + llama_context * ctx_tgt, + common_speculative ** out_spec) { + if (out_spec != nullptr) { + *out_spec = nullptr; + } + + if (!params.has_stage_chain()) { + return COMMON_SPECULATIVE_INIT_SKIPPED; + } + + common_speculative * spec = common_speculative_init(params, ctx_tgt); + if (spec != nullptr) { + if (out_spec != nullptr) { + *out_spec = spec; + } + return COMMON_SPECULATIVE_INIT_READY; + } + + const llama_model * model = ctx_tgt != nullptr ? llama_get_model(ctx_tgt) : nullptr; + if (model != nullptr && llama_model_has_recurrent(model)) { + return COMMON_SPECULATIVE_INIT_ERR_RECURRENT; + } + if (params.has_stage_type(COMMON_SPECULATIVE_TYPE_MTP)) { + return COMMON_SPECULATIVE_INIT_ERR_MTP; + } + return COMMON_SPECULATIVE_INIT_ERR_GENERIC; +} + +void common_speculative_prepare_startup( + gpt_params & params_base, + bool allow_parallel_mtp) { + auto & params = params_base.speculative; + + if (!allow_parallel_mtp && params_base.n_parallel > 1 && params.has_stage_type(COMMON_SPECULATIVE_TYPE_MTP)) { + LOG_WRN("%s: MTP is not supported with parallel slots yet, removing the MTP stage to avoid cross-slot corruption. n_parallel=%d, stage_chain=%s\n", + __func__, params_base.n_parallel, common_speculative_stage_chain_to_str(params).c_str()); + params.remove_stage_type(COMMON_SPECULATIVE_TYPE_MTP); + } + + if (!params.needs_dft_model()) { + params.clear_dft(); + } + + params_base.has_mtp = params.has_stage_type(COMMON_SPECULATIVE_TYPE_MTP); +} + +bool common_speculative_finalize_startup( + gpt_params & params_base, + const llama_model * model) { + auto & params = params_base.speculative; + + if (!params.needs_dft_model()) { + params.clear_dft(); + } + + if (params.has_dft()) { + LLAMA_LOG_INFO("\n\n==================================loading DRAFT model==================================\n\n"); + if (!common_speculative_load_draft_model(params, params_base)) { + return false; + } + } + + params_base.has_mtp = params.has_stage_type(COMMON_SPECULATIVE_TYPE_MTP); + const bool has_external_mtp = params_base.has_mtp && + llama_model_is_gemma4_mtp_assistant(params.model_dft); + + params_base.has_mtp = common_speculative_prepare_mtp_runtime( + params, + params_base, + model, + has_external_mtp); + if (params_base.has_mtp) { + params_base.pooling_type = LLAMA_POOLING_TYPE_NONE; + } + + return true; +} + +bool common_speculative_before_draft( + common_speculative * spec, + llama_model * model, + llama_context * ctx, + common_sampler * sampler_src, + const common_params_sampling & sparams, + llama_seq_id seq_id, + llama_pos n_past, + llama_token sampled, + int max_tokens, + int ckpt_mode) { + if (spec == nullptr) { + return false; + } + + return common_speculative_checkpoint_save( + spec->checkpoint, + model, + ctx, + sampler_src, + sparams, + seq_id, + n_past, + sampled, + max_tokens, + ckpt_mode); +} + int32_t common_speculative_on_target_seq_batch( common_speculative * spec, llama_context * ctx_tgt, @@ -794,6 +1062,234 @@ bool common_speculative_commit_accepted_output( hidden_rows); } +static bool common_speculative_checkpoint_save( + common_speculative_checkpoint & ckpt, + llama_model * model, + llama_context * ctx, + common_sampler * sampler_src, + const common_params_sampling & sparams, + llama_seq_id seq_id, + llama_pos n_past, + llama_token sampled, + int max_tokens, + int ckpt_mode) { + ckpt.clear(); + ckpt.n_past = n_past; + ckpt.sampled = sampled; + + const int actual_mode = llama_spec_ckpt_init(ctx, ckpt_mode, max_tokens); + if (actual_mode == LLAMA_SPEC_CKPT_NONE) { + return false; + } + ckpt.per_step_enabled = (actual_mode == LLAMA_SPEC_CKPT_PER_STEP); + + ckpt.valid = llama_spec_ckpt_save(ctx, seq_id); + if (!ckpt.valid) { + llama_spec_ckpt_discard(ctx); + return false; + } + + ckpt.sampler = common_sampler_init(model, sparams); + if (ckpt.sampler == nullptr) { + common_speculative_checkpoint_discard(ckpt, ctx); + return false; + } + + if (sampler_src != nullptr) { + common_sampler_clone(sampler_src, ckpt.sampler); + } + + return true; +} + +const common_speculative_checkpoint * common_speculative_get_checkpoint(const common_speculative * spec) { + return spec != nullptr ? &spec->checkpoint : nullptr; +} + +void common_speculative_checkpoint_discard( + common_speculative_checkpoint & ckpt, + llama_context * ctx) { + ckpt.clear(); + llama_spec_ckpt_discard(ctx); +} + +void common_speculative_checkpoint_restore( + common_speculative_checkpoint & ckpt, + common_speculative * spec, + llama_context * ctx, + common_sampler * sampler_dst, + llama_seq_id seq_id, + common_speculative_type spec_type_used, + llama_token sampled_before, + const std::vector & ids, + int n_draft, + const std::vector & mtp_hidden_state_pre, + int32_t mtp_n_past_base) { + if (!ckpt.valid) { + return; + } + + if (ckpt.per_step_enabled) { + const int step = (int) ids.size() - 1; + llama_spec_ckpt_restore(ctx, seq_id, ckpt.n_past, step); + + if (ckpt.sampler != nullptr && sampler_dst != nullptr) { + common_sampler_clone(ckpt.sampler, sampler_dst); + } + if (sampler_dst != nullptr) { + for (llama_token id : ids) { + common_sampler_accept(sampler_dst, ctx, id, true); + } + } + + if (common_speculative_has_target_features(spec) && !mtp_hidden_state_pre.empty()) { + if (!common_speculative_commit_accepted_hidden_rows( + spec, + spec_type_used, + seq_id, + mtp_n_past_base, + sampled_before, + ids, + mtp_hidden_state_pre)) { + common_speculative_clear_sequence_hidden(spec, seq_id); + } else if (spec_type_used != COMMON_SPECULATIVE_TYPE_MTP) { + LOG_DBG("%s: seq_id=%d synced MTP target hidden state from accepted-prefix rows after per-step restore\n", + __func__, (int) seq_id); + } + } + + LOG_DBG("%s: seq_id=%d per-step restore: step=%d (rejected %d drafts)\n", + __func__, (int) seq_id, step, (int) (n_draft - (ids.size() - 1))); + } else { + llama_spec_ckpt_restore(ctx, seq_id, ckpt.n_past, 0); + + if (ckpt.sampler != nullptr && sampler_dst != nullptr) { + common_sampler_clone(ckpt.sampler, sampler_dst); + } + + if (!ids.empty()) { + const int n_re = (int) ids.size(); + llama_batch re_batch = llama_batch_init(n_re, 0, 1); + common_batch_add(re_batch, ckpt.sampled, ckpt.n_past, { seq_id }, n_re == 1); + for (int j = 0; j < n_re - 1; ++j) { + common_batch_add(re_batch, ids[j], ckpt.n_past + 1 + j, { seq_id }, j == n_re - 2); + } + + if (common_speculative_has_type(spec, COMMON_SPECULATIVE_TYPE_MTP)) { + for (int j = 0; j < re_batch.n_tokens; ++j) { + re_batch.logits[j] = true; + } + llama_set_embeddings(ctx, true); + } + + const int ret = llama_decode(ctx, re_batch); + if (ret != 0) { + LOG_ERR("%s: seq_id=%d failed to re-decode accepted tokens after checkpoint restore: %d\n", + __func__, (int) seq_id, ret); + } + + if (common_speculative_has_target_features(spec)) { + std::vector redecoded_indices(n_re); + for (int j = 0; j < n_re; ++j) { + redecoded_indices[j] = j; + } + + if (!common_speculative_commit_accepted_output( + spec, + ctx, + spec_type_used, + seq_id, + ckpt.n_past, + sampled_before, + ids, + redecoded_indices)) { + common_speculative_clear_sequence_hidden(spec, seq_id); + } + } + + if (sampler_dst != nullptr) { + for (llama_token id : ids) { + common_sampler_accept(sampler_dst, ctx, id, true); + } + } + + llama_batch_free(re_batch); + LOG_DBG("%s: seq_id=%d spec checkpoint restored: re-decoded %d tokens (rejected %d drafts)\n", + __func__, (int) seq_id, n_re, (int) (n_draft - (ids.size() - 1))); + } + } + + common_speculative_checkpoint_discard(ckpt, ctx); +} + +void common_speculative_commit( + common_speculative * spec, + llama_context * ctx, + common_sampler * sampler_dst, + llama_seq_id seq_id, + llama_token sampled_before, + const std::vector & ids, + int n_draft, + llama_pos pos_base, + const std::vector & accepted_output_indices) { + GGML_ASSERT(spec != nullptr); + GGML_ASSERT(!ids.empty()); + + common_speculative_checkpoint & ckpt = spec->checkpoint; + const common_speculative_type spec_type_used = spec->curr_impl != nullptr + ? spec->curr_impl->type + : COMMON_SPECULATIVE_TYPE_NONE; + const bool any_rejected = (int) ids.size() - 1 < n_draft; + std::vector mtp_hidden_state_pre; + + common_speculative_accept(spec, ids.size() - 1); + + if (common_speculative_has_target_features(spec) && + any_rejected && + ckpt.valid && + !accepted_output_indices.empty()) { + if (!common_speculative_copy_output_hidden_rows(spec, ctx, accepted_output_indices, mtp_hidden_state_pre)) { + mtp_hidden_state_pre.clear(); + } + } + + if (any_rejected && ckpt.valid) { + common_speculative_checkpoint_restore( + ckpt, + spec, + ctx, + sampler_dst, + seq_id, + spec_type_used, + sampled_before, + ids, + n_draft, + mtp_hidden_state_pre, + pos_base); + return; + } + + if (common_speculative_has_target_features(spec) && !accepted_output_indices.empty()) { + if (!common_speculative_commit_accepted_output( + spec, + ctx, + spec_type_used, + seq_id, + pos_base, + sampled_before, + ids, + accepted_output_indices)) { + common_speculative_clear_sequence_hidden(spec, seq_id); + } else if (spec_type_used != COMMON_SPECULATIVE_TYPE_MTP) { + LOG_DBG("%s: seq_id=%d synced MTP target hidden state from accepted-prefix rows\n", + __func__, (int) seq_id); + } + } + + llama_kv_cache_seq_rm(ctx, seq_id, pos_base + (llama_pos) (ids.size() - 1), -1); + common_speculative_checkpoint_discard(ckpt, ctx); +} + void common_speculative_print_stats(const common_speculative * spec, double slot_tps, int n_decoded, int n_past, common_params_speculative * active_params) { if (spec == nullptr) { return; @@ -1592,6 +2088,50 @@ void common_speculative_clear_sequence_hidden(common_speculative * spec, llama_s } } +void common_speculative_clear_sequence( + common_speculative * spec, + llama_seq_id seq_id, + bool clear_companion_ctx) { + if (spec != nullptr) { + spec->checkpoint.clear(); + spec->curr_impl = nullptr; + spec->last_n_drafted = 0; + spec->t_step_start_us = 0; + } + + common_speculative_clear_sequence_hidden(spec, seq_id); + + if (clear_companion_ctx) { + if (auto * ctx_mtp = common_speculative_get_companion_ctx(spec); ctx_mtp != nullptr) { + llama_kv_cache_clear(ctx_mtp); + } + } +} + +bool common_speculative_trim_sequence( + common_speculative * spec, + llama_context * ctx, + llama_seq_id seq_id, + llama_pos pos_begin) { + const bool target_trimmed = llama_kv_cache_seq_rm(ctx, seq_id, pos_begin, -1); + if (auto * ctx_mtp = common_speculative_get_companion_ctx(spec); ctx_mtp != nullptr) { + return target_trimmed && llama_kv_cache_seq_rm(ctx_mtp, seq_id, pos_begin, -1); + } + + return target_trimmed; +} + +void common_speculative_clear_sequence_kv( + common_speculative * spec, + llama_context * ctx, + llama_seq_id seq_id) { + common_speculative_clear_sequence(spec, seq_id); + llama_kv_cache_seq_rm(ctx, seq_id, -1, -1); + if (auto * ctx_mtp = common_speculative_get_companion_ctx(spec); ctx_mtp != nullptr) { + llama_kv_cache_seq_rm(ctx_mtp, seq_id, -1, -1); + } +} + llama_context * common_speculative_get_companion_ctx(common_speculative * spec) { if (auto * mtp_state = common_speculative_get_mtp_state(spec); mtp_state != nullptr) { return mtp_state->ctx_mtp; @@ -1858,13 +2398,10 @@ std::vector mtp_speculative_gen_draft( // This prevents cache state corruption where two cells map to the same logical position. // If the state contained in `last` had a valid token id and probability, it means that we // have previously run an "accept" batch, where the token sampled from the main model was included. - // In that case, we need to discard all tokens that we ran here to get the KV cache to the correct state. - // => for i0 = 1 we discard from n_past - // But if we did not have a valid last token_id, it means the first token we run was sampled from the - // main model. Hence we want to keep this token in the KV cache and discard all other tokens. - // => for i0 = 0 we discard from n_past + 1 + // Even in that case, the token at `n_past` is already committed and must remain in the KV cache, + // so we only discard the speculative tail starting at `n_past + 1`. if (n_decode > 0) { - llama_kv_cache_seq_rm(ctx, seq_id, n_past + 1 - i0, n_past + n_decode + 2); + llama_kv_cache_seq_rm(ctx, seq_id, n_past + 1, n_past + n_decode + 2); } return drafts; diff --git a/common/speculative.h b/common/speculative.h index 06d4b580..da740c0b 100644 --- a/common/speculative.h +++ b/common/speculative.h @@ -7,6 +7,14 @@ struct common_speculative; +enum common_speculative_init_status { + COMMON_SPECULATIVE_INIT_SKIPPED, + COMMON_SPECULATIVE_INIT_READY, + COMMON_SPECULATIVE_INIT_ERR_RECURRENT, + COMMON_SPECULATIVE_INIT_ERR_MTP, + COMMON_SPECULATIVE_INIT_ERR_GENERIC, +}; + using common_speculative_feature_kind = llama_spec_feature_kind; using common_speculative_feature_row_view = llama_spec_feature_row_view; using common_speculative_feature_view = llama_spec_feature_view; @@ -14,6 +22,21 @@ using common_speculative_feature_view = llama_spec_feature_view; static constexpr common_speculative_feature_kind COMMON_SPECULATIVE_FEATURE_NONE = LLAMA_SPEC_FEATURE_NONE; static constexpr common_speculative_feature_kind COMMON_SPECULATIVE_FEATURE_HIDDEN_STATE = LLAMA_SPEC_FEATURE_HIDDEN_STATE; +struct common_speculative_checkpoint { + bool valid = false; + bool per_step_enabled = false; + llama_pos n_past = 0; + llama_token sampled = LLAMA_TOKEN_NULL; + common_sampler * sampler = nullptr; + + void clear(); +}; + +struct common_speculative_draft_result { + llama_tokens tokens; + common_speculative_type type = COMMON_SPECULATIVE_TYPE_NONE; +}; + // comma separated list of all types std::string common_speculative_type_name_str(); @@ -31,6 +54,29 @@ common_speculative * common_speculative_init( common_params_speculative & params, llama_context * ctx_tgt); +common_speculative_init_status common_speculative_try_init( + common_params_speculative & params, + llama_context * ctx_tgt, + common_speculative ** out_spec); + +void common_speculative_prepare_startup( + gpt_params & params_base, + bool allow_parallel_mtp = true); + +bool common_speculative_finalize_startup( + gpt_params & params_base, + const llama_model * model); + +bool common_speculative_load_draft_model( + common_params_speculative & params, + const gpt_params & params_base); + +bool common_speculative_prepare_mtp_runtime( + common_params_speculative & params, + const gpt_params & params_base, + const llama_model * model, + bool has_external_mtp); + void common_speculative_free(common_speculative * spec); // optionally call once at the beginning of a new generation @@ -46,9 +92,30 @@ llama_tokens common_speculative_draft( llama_pos draft_base_pos = -1, llama_seq_id draft_seq_id = 0); +common_speculative_draft_result common_speculative_draft_ex( + common_speculative * spec, + llama_context * ctx, + common_params_speculative & params, + const llama_tokens & prompt, + llama_token id_last, + llama_pos draft_base_pos = -1, + llama_seq_id draft_seq_id = 0); + // informs the speculative decoder that n_accepted tokens were accepted by the target model void common_speculative_accept(common_speculative * spec, uint16_t n_accepted); +bool common_speculative_before_draft( + common_speculative * spec, + llama_model * model, + llama_context * ctx, + common_sampler * sampler_src, + const common_params_sampling & sparams, + llama_seq_id seq_id, + llama_pos n_past, + llama_token sampled, + int max_tokens, + int ckpt_mode); + bool common_speculative_ensure_sequence_hidden( common_speculative * spec, llama_context * ctx, @@ -87,10 +154,56 @@ bool common_speculative_commit_accepted_output( const std::vector & ids, const std::vector & output_indices); +const common_speculative_checkpoint * common_speculative_get_checkpoint(const common_speculative * spec); + +void common_speculative_checkpoint_discard( + common_speculative_checkpoint & ckpt, + llama_context * ctx); + +void common_speculative_checkpoint_restore( + common_speculative_checkpoint & ckpt, + common_speculative * spec, + llama_context * ctx, + common_sampler * sampler_dst, + llama_seq_id seq_id, + common_speculative_type spec_type_used, + llama_token sampled_before, + const std::vector & ids, + int n_draft, + const std::vector & mtp_hidden_state_pre, + int32_t mtp_n_past_base); + +void common_speculative_commit( + common_speculative * spec, + llama_context * ctx, + common_sampler * sampler_dst, + llama_seq_id seq_id, + llama_token sampled_before, + const std::vector & ids, + int n_draft, + llama_pos pos_base, + const std::vector & accepted_output_indices); + bool common_speculative_has_sequence_hidden(const common_speculative * spec, llama_seq_id seq_id); void common_speculative_clear_sequence_hidden(common_speculative * spec, llama_seq_id seq_id); +void common_speculative_clear_sequence( + common_speculative * spec, + llama_seq_id seq_id, + bool clear_companion_ctx = false); + +bool common_speculative_trim_sequence( + common_speculative * spec, + llama_context * ctx, + llama_seq_id seq_id, + llama_pos pos_begin); + +void common_speculative_clear_sequence_kv( + common_speculative * spec, + llama_context * ctx, + llama_seq_id seq_id); + llama_context * common_speculative_get_companion_ctx(common_speculative * spec); int32_t common_speculative_on_target_seq_batch( diff --git a/common/suffix-tree.cpp b/common/suffix-tree.cpp index 6e0c0691..09c2ff3c 100644 --- a/common/suffix-tree.cpp +++ b/common/suffix-tree.cpp @@ -209,7 +209,7 @@ static bool suffix_corpus_check_limit(const std::string & path, size_t n_tokens, return true; } - LOG_ERR("load_corpus: refusing suffix corpus '%s' - estimated insert work %llu exceeds limit %llu (tokens=%zu, depth=%d); reduce corpus size or --suffix-max-depth\n", + LOG_ERR("load_corpus: refusing suffix corpus '%s' - estimated insert work %llu exceeds limit %llu (tokens=%zu, depth=%d); reduce corpus size or lower suffix_max_depth inside --spec-type suffix:suffix_max_depth=...\n", path.c_str(), (unsigned long long) estimated_work, (unsigned long long) SUFFIX_CORPUS_MAX_INSERT_WORK, diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index dd2766fa..3cd8f9fe 100644 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -2519,6 +2519,144 @@ class DFlashDraftModel(Qwen3Model): return tensors +@Model.register("MellumForCausalLM") +class MellumModel(Model): + model_arch = gguf.MODEL_ARCH.MELLUM + + def set_vocab(self): + tokenizer_path = self.dir_model / "tokenizer.json" + with open(tokenizer_path, "r", encoding="utf-8") as f: + tokenizer_json = json.load(f) + + from tokenizers import Tokenizer + tokenizer = Tokenizer.from_file(str(tokenizer_path)) + + class TokenizerShim: + def encode(self, text: str) -> list[int]: + return tokenizer.encode(text).ids + + vocab: dict[str, int] = tokenizer_json["model"]["vocab"] + vocab_size = self.hparams.get("vocab_size", len(vocab)) + assert max(vocab.values()) < vocab_size + + tokpre = self.get_vocab_base_pre(TokenizerShim()) + reverse_vocab = {id_: encoded_tok for encoded_tok, id_ in vocab.items()} + added_vocab = { + item["content"]: item + for item in tokenizer_json.get("added_tokens", []) + if isinstance(item.get("content"), str) + } + + tokens: list[str] = [] + toktypes: list[int] = [] + for i in range(vocab_size): + if i not in reverse_vocab: + tokens.append(f"[PAD{i}]") + toktypes.append(gguf.TokenType.UNUSED) + continue + + token = reverse_vocab[i] + added_token = added_vocab.get(token) + if added_token is not None: + if added_token.get("special", False) or self.does_token_look_special(token): + toktypes.append(gguf.TokenType.CONTROL) + else: + token = token.replace("\u2581", " ") + toktypes.append(gguf.TokenType.USER_DEFINED) + else: + toktypes.append(gguf.TokenType.NORMAL) + tokens.append(token) + + self.gguf_writer.add_tokenizer_model("gpt2") + self.gguf_writer.add_tokenizer_pre(tokpre) + self.gguf_writer.add_token_list(tokens) + self.gguf_writer.add_token_types(toktypes) + + special_vocab = gguf.SpecialVocab(self.dir_model, load_merges=True) + special_vocab.add_to_gguf(self.gguf_writer) + + def set_gguf_parameters(self): + super().set_gguf_parameters() + if self.hparams.get("num_local_experts") is None and (n_experts := self.hparams.get("num_experts")) is not None: + self.gguf_writer.add_expert_count(n_experts) + + if (moe_intermediate_size := self.hparams.get("moe_intermediate_size")) is not None: + self.gguf_writer.add_expert_feed_forward_length(moe_intermediate_size) + logger.info(f"gguf: expert feed forward length = {moe_intermediate_size}") + + use_sliding_window = self.hparams.get("use_sliding_window") + sliding_window = self.hparams.get("sliding_window") + if (use_sliding_window is True or use_sliding_window is None) and sliding_window is not None: + self.gguf_writer.add_sliding_window(sliding_window) + logger.info(f"gguf: sliding window = {sliding_window}") + self.gguf_writer.add_sliding_window_pattern([t == "sliding_attention" for t in self.hparams["layer_types"]]) + logger.info(f"gguf: sliding window pattern length = {len(self.hparams['layer_types'])}") + + rope_parameters = self.hparams.get("rope_parameters", {}) + if full_attention_rope := rope_parameters.get("full_attention"): + if rope_theta := full_attention_rope.get("rope_theta"): + self.gguf_writer.add_rope_freq_base(rope_theta) + logger.info(f"gguf: rope freq base = {rope_theta}") + + if full_attention_rope.get("rope_type") == "yarn": + self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.YARN) + + if factor := full_attention_rope.get("factor"): + self.gguf_writer.add_rope_scaling_factor(factor) + if original_context_length := full_attention_rope.get("original_max_position_embeddings"): + self.gguf_writer.add_rope_scaling_orig_ctx_len(original_context_length) + if attention_factor := full_attention_rope.get("attention_factor"): + self.gguf_writer.add_rope_scaling_yarn_attn_factor(attention_factor) + if beta_fast := full_attention_rope.get("beta_fast"): + self.gguf_writer.add_rope_scaling_yarn_beta_fast(beta_fast) + if beta_slow := full_attention_rope.get("beta_slow"): + self.gguf_writer.add_rope_scaling_yarn_beta_slow(beta_slow) + + if sliding_attention_rope := rope_parameters.get("sliding_attention"): + if rope_theta_swa := sliding_attention_rope.get("rope_theta"): + self.gguf_writer.add_rope_freq_base_swa(rope_theta_swa) + logger.info(f"gguf: rope freq base swa = {rope_theta_swa}") + + _experts: list[dict[str, Tensor]] | None = None + + def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]: + if "experts" in name: + n_experts = self.find_hparam(["num_local_experts", "num_experts"]) + assert bid is not None + + if self._experts is None: + self._experts = [{} for _ in range(self.block_count)] + + self._experts[bid][name] = data_torch + + if len(self._experts[bid]) >= n_experts * 3: + tensors: list[tuple[str, Tensor]] = [] + + for w_name in ["down_proj", "gate_proj", "up_proj"]: + datas: list[Tensor] = [] + + for xid in range(n_experts): + ename = f"model.layers.{bid}.mlp.experts.{xid}.{w_name}.weight" + datas.append(self._experts[bid][ename]) + del self._experts[bid][ename] + + data_torch = torch.stack(datas, dim=0) + merged_name = f"model.layers.{bid}.mlp.experts.{w_name}.weight" + tensors.append((self.map_tensor_name(merged_name), data_torch)) + return tensors + return [] + + return [(self.map_tensor_name(name), data_torch)] + + def prepare_tensors(self): + super().prepare_tensors() + + if self._experts is not None: + experts = [k for d in self._experts for k in d.keys()] + if len(experts) > 0: + raise ValueError(f"Unprocessed experts: {experts}") + + @Model.register("Ernie4_5_ForCausalLM", "Ernie4_5ForCausalLM") class Ernie4_5Model(Model): model_arch = gguf.MODEL_ARCH.ERNIE4_5 diff --git a/convert_hf_to_gguf_update.py b/convert_hf_to_gguf_update.py index da894158..9168f903 100755 --- a/convert_hf_to_gguf_update.py +++ b/convert_hf_to_gguf_update.py @@ -105,6 +105,7 @@ models = [ {"name": "kimi-k2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/moonshotai/Kimi-K2-Base", "chkhsh": "81212dc7cdb7e0c1074ca62c5aeab0d43c9f52b8a737be7b12a777c953027890", }, {"name": "grok-2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/alvarobartt/grok-2-tokenizer", "chkhsh": "66b8d4e19ab16c3bfd89bce5d785fb7e0155e8648708a1f42077cb9fe002c273"}, {"name": "minimax-m2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/MiniMaxAI/MiniMax-M2", }, + {"name": "mellum2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/JetBrains/Mellum2-12B-A2.5B-Base", }, ] diff --git a/docs/autoparser.md b/docs/autoparser.md index adc4d43e..0b167789 100644 --- a/docs/autoparser.md +++ b/docs/autoparser.md @@ -69,6 +69,7 @@ Three outcomes for reasoning-prefill handling (in `generate_parser()`): | `PLAIN` | No content markers | | `ALWAYS_WRAPPED` | Content always wrapped: `...` | | `WRAPPED_WITH_REASONING` | Content wrapped only when reasoning is present | +| `END_DELIMITED` | Content has no start marker but ends at a marker | **`tool_format`**: Classification of tool call structure. @@ -357,6 +358,7 @@ A workaround array in `common/chat-diff-analyzer.cpp` applies post-hoc patches a 3. **Cohere Command R+** — source contains `<|CHATBOT_TOKEN|>`: sets `ALWAYS_WRAPPED` content mode if no content start is already set 4. **Functionary 3.1** — source contains `set has_code_interpreter`: forces `PLAIN` content, specific `per_call_start/end`, clears preserved tokens to only keep Functionary-specific markers 5. **DeepSeek-R1-Distill-Qwen** — source contains `tool▁calls▁begin` markers: overrides tool section/per-call markers with the correct Unicode block characters +6. **Poolside Laguna** — source contains `laguna_glm_thinking` and the Laguna generation prompt pattern: sets delimiter-style reasoning ending at `` and `END_DELIMITED` content ending at `` ### Parser Building @@ -380,6 +382,7 @@ Note: The start marker may be empty either because the analyzer detected delimit | Tools present | Dispatches to `analyze_tools::build_parser()` | | `ALWAYS_WRAPPED` with reasoning | `reasoning + start + content(until(end)) + end + end()` | | `ALWAYS_WRAPPED` without reasoning | `content(until(start)) + start + content(until(end)) + end + end()` | +| `END_DELIMITED` | `reasoning + content(until(end) or rest()) + optional end marker + end()` | | Default (PLAIN) | `reasoning + content(rest()) + end()` | #### Tool Parsers (`analyze_tools::build_parser`) @@ -392,7 +395,7 @@ Dispatches by `format.mode`: - `build_json_tools_nested_keys()` — nested: `{"function": {"name": "X", "arguments": {...}}}` - `build_json_tools_flat_keys()` — flat: `{"name": "X", "arguments": {...}}` -Handles content wrappers, array wrapping (`tools_array_wrapped`), parallel calls, and `parameter_order`. +Handles content wrappers, array wrapping (`tools_array_wrapped`), parallel calls, and `parameter_order`. If content is `END_DELIMITED`, the content end marker is also accepted after parsed tool calls. **`build_tool_parser_tag_json()`**: For each tool function: @@ -417,7 +420,7 @@ For closing: uses `function.close` if present; otherwise uses `peek(per_call_end All three tool parsers return: ```text -reasoning + optional(content(until(trigger_marker))) + tool_calls + end() +reasoning + optional(content(until(trigger_marker))) + tool_calls + optional(content_end) + end() ``` Each returned parser is wrapped by `wrap_for_generation_prompt()`, which prepends a literal for any boilerplate prefix of the generation prompt (the portion before the reasoning start marker). diff --git a/docs/parameters.md b/docs/parameters.md index 77ec4fa2..f9d8ca8a 100644 --- a/docs/parameters.md +++ b/docs/parameters.md @@ -1,6 +1,6 @@ # Parameters Documentation -Overview of the most common command-line parameters in `ik_llama.cpp` and some info how to use them. +Overview of the most common command-line parameters in `ik_llama.cpp` and some info how to use them. It is not exhaustive and may omit some available options. ## Table of Contents @@ -58,6 +58,10 @@ Some often used terms. | t/s | Token/second, measures PP and TG. | | full gpu | All processes offloaded to the GPU. | | hybrid cpu/gpu | Partial offload to the GPU. | +| RAG | Retrieval Augmented Generation. Provide external documents to the LLM for information lookup. | +| MCP | Model Context Protocol ), an [open standard](https://en.wikipedia.org/wiki/Model_Context_Protocol) for the way artificial intelligence (AI) systems like large language models (LLMs) integrate and share data with external tools, systems, and data sources | +| AI agent | Tool/program that uses LLM to achieve a goal/task via a series of planning/steps/actions/tool-calling/etc. `Coding agents` are specialized in software goals. | +| Agent harness | The tools and the infrastructure around the LLM in an AI Agent. `AI Agent = LLM+ Agent harness` | ## General Parameters @@ -65,7 +69,8 @@ Some often used terms. | - | - | - | - | | `-h, --help, --usage` | Print usage and exit | - | - | | `--fit` | Automatically fit to available VRAM | off | Loads as many tensors to the GPU(s) as available VRAM will permit. [PR 1501](https://github.com/ikawrakow/ik_llama.cpp/pull/1501) [PR 1504](https://github.com/ikawrakow/ik_llama.cpp/pull/1504) | -| `--fit-margin N` | Safety VRAM margin in MiB when using `--fit` | 1024 | Increase this value in case of CUDA OOM when loading the model. Decrease to less than 1024 if the model loads successfully and you feel that too much VRAM has been left unused | +| `--fit-margin N` | Safety VRAM margin in MiB when using `--fit` | 1024 | Increase this value in case of CUDA OOM when loading the model. Decrease to less than 1024 if the model loads successfully and you feel that too much VRAM has been left unused | +| `--gpu-fit-margin GPU1,M1,...` | Per GPU fit margin | - | Set the fit margin per GPU when auto-fitting the model. [PR 1872](https://github.com/ikawrakow/ik_llama.cpp/pull/1872) | | `-wgt, --worst-graph-tokens N` | Number of tokens to use for worst-case graph | - | Control compute buffer sizes for large batches. Provided "as is" for users that understand the limitations, please don't open issues when using this. [PR 1560](https://github.com/ikawrakow/ik_llama.cpp/pull/1560) | | `-t, --threads N` | Number of threads to use during generation | 4 | Try to match the number of physical CPU cores. Avoid odd numbers (e.g. 1,3,...). | | `-tb, --threads-batch N` | Number of threads to use during batch and prompt processing | Same as `--threads` | Same as `--threads` When doing full GPU offload, use a lower number (e.g. 2) | @@ -80,7 +85,7 @@ Some often used terms. | `--minilog` | Print important information | - | For `llama-server`, log request message for completions/response/anthropic and response. The prompt in the json format and the text response are saved in the log file and printed to the console. [PR 1477](https://github.com/ikawrakow/ik_llama.cpp/pull/1477) | | `-fa, --flash-attn` | Enables Flash Attention | on | auto / on / off Improves t/s and reduces memory usage. | | `--no-fa, --no-flash-attn` | Disable Flash Attention | | Alternative parameter to turn of FA. See `--flash-attn` | -| `-mla, --mla-use` | Enable MLA | 3 | 0 / 1 / 2 / 3 For DeepSeek models, and other recent models that are using MLA. [PR 188](https://github.com/ikawrakow/ik_llama.cpp/pull/188) [PR 205](https://github.com/ikawrakow/ik_llama.cpp/pull/205) [PR 235](https://github.com/ikawrakow/ik_llama.cpp/pull/235) [PR 243](https://github.com/ikawrakow/ik_llama.cpp/pull/243) [PR 252](https://github.com/ikawrakow/ik_llama.cpp/pull/252) [PR 253](https://github.com/ikawrakow/ik_llama.cpp/pull/253) [PR 273](https://github.com/ikawrakow/ik_llama.cpp/pull/273) [PR 386](https://github.com/ikawrakow/ik_llama.cpp/pull/386) [PR 497](https://github.com/ikawrakow/ik_llama.cpp/pull/497) [PR 943](https://github.com/ikawrakow/ik_llama.cpp/pull/943)| +| `-mla, --mla-use` | Enable MLA | 3 | 0 / 1 / 2 / 3 For DeepSeek models, and other recent models that are using MLA. [PR 188](https://github.com/ikawrakow/ik_llama.cpp/pull/188) [PR 205](https://github.com/ikawrakow/ik_llama.cpp/pull/205) [PR 235](https://github.com/ikawrakow/ik_llama.cpp/pull/235) [PR 243](https://github.com/ikawrakow/ik_llama.cpp/pull/243) [PR 252](https://github.com/ikawrakow/ik_llama.cpp/pull/252) [PR 253](https://github.com/ikawrakow/ik_llama.cpp/pull/253) [PR 273](https://github.com/ikawrakow/ik_llama.cpp/pull/273) [PR 386](https://github.com/ikawrakow/ik_llama.cpp/pull/386) [PR 497](https://github.com/ikawrakow/ik_llama.cpp/pull/497) [PR 943](https://github.com/ikawrakow/ik_llama.cpp/pull/943) [PR 1821](https://github.com/ikawrakow/ik_llama.cpp/pull/1821) | | `-amb, --attention-max-batch` | Max batch size for attention computations | 0 | Specifies the maximum K*Q size in MB we want to tolerate. [PR 237](https://github.com/ikawrakow/ik_llama.cpp/pull/237) | | `-fmoe or --fused-moe` | Fused MoE ffn_up and ffn_gate | - | Speedup for MoE models. [PR 229](https://github.com/ikawrakow/ik_llama.cpp/pull/229) | | `--no-fmoe, --no-fused-moe` | Disable fused MoE | Enabled | See `--fused-moe` | @@ -100,6 +105,7 @@ Some often used terms. | `--no-warmup` | Skip warming up the model with an empty run | - | | | `--mlock` | Force system to keep model in RAM rather than swapping or compressing | - | | | `--no-mmap` | Do not memory-map model (slower load but may reduce pageouts) | - | | +| `--ui-mcp-proxy, --webui-mcp-proxy` | Experimental: whether to enable MCP CORS proxy - do not enable in untrusted environments | disabled | Support CORS Proxy on llama-server backend side. It is required to make external mcp server work on llamacpp webui. [PR 1904](https://github.com/ikawrakow/ik_llama.cpp/pull/1904) | | `--defer-experts` | Defer expert mmap residency on Linux to reduce model load time | false | Using this flag, expert tensor pages are faulted in on demand rather than being eagerly loaded during initialization. This allows us to reduce cold-start latency, thus improving the load time of MoE models, particularly on systems where users are running models off of storage. [PR 1634](https://github.com/ikawrakow/ik_llama.cpp/pull/1634) | | `-rtr, --run-time-repack` | Repack tensors if interleaved variant is available | - | May improve performance on some systems. [PR 147](https://github.com/ikawrakow/ik_llama.cpp/pull/147) | | `--ctx-checkpoints` | set the number of checkpoints per slot | - | enable checkpoint for recurrent models Qwen3-Next and Qwen3.5-MoE. [PR 1310](https://github.com/ikawrakow/ik_llama.cpp/pull/1310) | @@ -120,21 +126,13 @@ Check the details [here](./speculative.md). | `-ctkd, --cache-type-k-draft TYPE` | KV cache data type for K for the draft model | - | For draft model, see: `-ctk` | | `-ctvd, --cache-type-v-draft TYPE` | KV cache data type for V for the draft model | - | For draft model, see: `-ctk` | | `-draft, --draft-params` | Comma-separated list of draft model parameters | - | | -| `--spec-ngram-size-n N` | ngram size N for ngram-simple/ngram-map speculative decoding, length of lookup n-gram| 12 | [PR 1261](https://github.com/ikawrakow/ik_llama.cpp/pull/1261) | -| `--spec-ngram-size-m N` | ngram size M for ngram-simple/ngram-map speculative decoding, length of draft m-gram | 48 | [PR 1261](https://github.com/ikawrakow/ik_llama.cpp/pull/1261) | -| `--spec-ngram-min-hits N` | minimum hits for ngram-map speculative decoding | 1 | [PR 1261](https://github.com/ikawrakow/ik_llama.cpp/pull/1261) | -| `--spec-type Name` | Comma-separated list of draft model parameters | - | none / ngram - cache / ngram - simple / ngram - map - k / ngram - map - k4v / ngram - mod / suffix [PR 1261](https://github.com/ikawrakow/ik_llama.cpp/pull/1261) [PR 1646](https://github.com/ikawrakow/ik_llama.cpp/pull/1646) | -| `--spec-stage SPEC[:k=v,...]` | Add an explicit speculative stage; repeat once for a supported two-stage chain | - | Supported two-stage shape: self-spec first, then `mtp` or `draft` fallback. [PR 1789](https://github.com/ikawrakow/ik_llama.cpp/pull/1789) | -| `-mtp, --multi-token-prediction` | | - | MTP decoding [PR 1270](https://github.com/ikawrakow/ik_llama.cpp/pull/1270) [1698](https://github.com/ikawrakow/ik_llama.cpp/pull/1698) | -| `-no-mtp, --no-multi-token-prediction` | | - | MTP decoding [PR 1270](https://github.com/ikawrakow/ik_llama.cpp/pull/1270) [1698](https://github.com/ikawrakow/ik_llama.cpp/pull/1698) | -| `--draft-max` | | - | MTP decoding [PR 1270](https://github.com/ikawrakow/ik_llama.cpp/pull/1270) [1698](https://github.com/ikawrakow/ik_llama.cpp/pull/1698) | -| `--draft-p-min` | | - | MTP decoding [PR 1270](https://github.com/ikawrakow/ik_llama.cpp/pull/1270) [1698](https://github.com/ikawrakow/ik_llama.cpp/pull/1698) | +| `--spec-type SPEC[:k=v,...]` | Canonical speculative stage entry; repeat to configure the supported two-stage chain | - | Types: `none`, `draft`, `mtp`, `ngram-cache`, `ngram-simple`, `ngram-map-k`, `ngram-map-k4v`, `ngram-mod`, `suffix`. Canonical keys: `n_max`, `n_min`, `p_min`, `ngram_size_n`, `ngram_size_m`, `ngram_min_hits`, `suffix_min_match_len`, `suffix_max_depth`, `suffix_corpus`. String values may escape commas as `\,` or quote the value inside the stage payload. Example: `--spec-type ngram-mod:n_max=64,n_min=2,ngram_size_n=8 --spec-type mtp:n_max=1,p_min=0.0` | | `--spec-autotune` | Automatically tune speculative params to maximize tokens/sec | - | Automatically determines the near-optimal arguments for the type of speculation being performed [PR 1595](https://github.com/ikawrakow/ik_llama.cpp/pull/1595) | | `--recurrent-ckpt-mode MODE` | Checkpoint strategy for recurrent/hybrid speculative decoding | auto | One of: - `auto` auto-select: per-step if CUDA full-GPU, gpu-fallback otherwise - `per-step` save SSM state per draft step in VRAM; no re-decode on rejection - `gpu-fallback` copy state to GPU buffer; re-decode on rejection - `cpu` serialise state via llama_state_seq; re-decode on rejection [PR 1669](https://github.com/ikawrakow/ik_llama.cpp/pull/1669) [PR 1774](https://github.com/ikawrakow/ik_llama.cpp/pull/1774) | Notes: -- `--spec-type` cannot be combined with `--spec-stage`. +- Legacy `--spec-stage`, `--draft-*`, `--spec-ngram-*`, `--suffix-*`, and `-mtp` flags are rejected with replacement guidance. - Explicit stage chains currently support at most two stages. - Supported self-spec stage names are `ngram-cache`, `ngram-simple`, `ngram-map-k`, `ngram-map-k4v`, `ngram-mod`, and `suffix`. - Composite stage chains disable speculative autotune. @@ -163,7 +161,7 @@ Good overview on [kalomaze/llm_samplers_explained.md](https://gist.github.com/ka | `--sampling-seq SEQUENCE` | Simplified sequence for samplers | dkfypmxntw | Same as `--samplers`, just shorter format. | | `--banned-string-file` | File path of the list of banned strings on each line | | | | `--banned-n` | Number of tokens banned in the phrase during rewind. | -1 | -1 means all tokens [PR 1185](https://github.com/ikawrakow/ik_llama.cpp/pull/1185) | -| `--expiring-logit-bias-file FILENAME` | Load bias states from a custom file format | - | [PR 1731](https://github.com/ikawrakow/ik_llama.cpp/pull/1731) | +| `--expiring-logit-bias-file FILENAME` | Load bias states from a custom file format | - | [PR 1731](https://github.com/ikawrakow/ik_llama.cpp/pull/1731) [PR 1770](https://github.com/ikawrakow/ik_llama.cpp/pull/1770) | ## Prompt Template @@ -201,7 +199,8 @@ MLA models already have the cache compressed, it doesn't really makes sense to c | `-nkvo, --no-kv-offload` | Disable KV offload | - | Keep KV on CPU. | | `-ctk, --cache-type-k TYPE` | KV cache data type for K | f16 | Reduces K size in KV which improves speed and reduces memory requirements, but may reduce output quality. | | `-ctv, --cache-type-v TYPE` | KV cache data type for V | f16 | See: `-ctk` | -| `--mtmd-kq-type type` | Define the type used for the `K*Q` matrix multiplication | - | Use une of `f16`/`bf16` instead of `f32` to improve speed up multimodal | +| `-mtprot, --mtp-requantize-output-tensor type` | Use output requantized to type for MTP | - | Improves TG performance for when using MTP. It requantize the tensor on-the-fly while loading the model, see [PR 1809](https://github.com/ikawrakow/ik_llama.cpp/pull/1809) for details and [PR 1810](https://github.com/ikawrakow/ik_llama.cpp/pull/1810) `--extra-output-tensor` as offline requantize alternative. | +| `--mtmd-kq-type type` | Define the type used for the `K*Q` matrix multiplication | - | Use one of `f16`/`bf16` instead of `f32` to improve speed up multimodal | | `--no-context-shift` | Disable context-shift | - | | | `--context-shift` | Set context-shift | on | auto / on / off / 0 / 1 [PR 973](https://github.com/ikawrakow/ik_llama.cpp/pull/973) | @@ -318,7 +317,7 @@ python3 gguf-py/scripts/gguf_dump.py /models/Qwen_Qwen3-0.6B-IQ4_NL.gguf - `-ngl`, `-ot`, `--cpu-moe`, `--n-cpu-moe N` - For MoE models, use a number greater than the number of model layers with `-ngl`. If unsure, use a large number like `-ngl 999`. - - It's good to explicitly put up/down/gate onto the GPU for speedups. + - It's good to explicitly put up/down/gate onto the GPU for speedups. - Up/Gate shouldn't be on separate GPU devices because it might cause a bit of a deadlock. - For models with shared experts (like GPT-OSS), they should end up on GPU. - In some quants the layers aren't uniform so it can be better to skip larger layers if more smaller blocks will fit without empty space where nothing fits. @@ -328,7 +327,7 @@ python3 gguf-py/scripts/gguf_dump.py /models/Qwen_Qwen3-0.6B-IQ4_NL.gguf - In general, in a single GPU + CPU system, you just do something like this: `-ngl 999` To put all layers in VRAM by default - + `-ot "blk.(?:[0-9]|[1-7][0-9]|[8][0-7]).ffn._exps.=CPU"` To create exceptions and put back in ram anything that has "ffn" and "_exps" in its name, and that sits in layers called "blk.n", where "n" (the lawyer number) is any match between 0 and 9, or between 1 to 7 + 0 to 9 (aka a number between 10 and 79), or 8 + 0 to 7 (aka a number between 80 and 87). Basically a complicated way of saying put all experts from layer 0 to 87 in ram. Experts from layer 88 to 93 (there's 93 layers in qwen3vl 235b) can sit in VRAM still. (Thats all I can load on a 5090). @@ -342,7 +341,7 @@ C. Other tips - If you are not happy with the allocations done by `--fit` across GPUs, use `-ts` to manually tweak. - Look for `ReBAR`/`Resizable BAR` support for your Motherboard, CPU, BIOS/UEFI and GPU. Then for the "patched driver" for your GPUs to enable GPU to GPU direct communication. -### Common GPU configurations and popular models +### Common GPU configurations and popular models WIP @@ -368,7 +367,7 @@ WIP | `-grt, --graph-reduce-type` | Type for data exchange between GPUs | f32 | q8_0 / bf16 / f16 / f32 Reduce the data transferred between GPUs [PR 1154](https://github.com/ikawrakow/ik_llama.cpp/pull/1154) | | `-smgs, --split-mode-graph-scheduling` | Force Split Mode Graph Scheduling | 0 | [PR 1068](https://github.com/ikawrakow/ik_llama.cpp/pull/1068) | | `--max-gpu N` | Define (and use) a maximum number of GPUs per layer with split mode "graph" | | This is of interest when there are more than 2 GPUs available, but using all of them leads to a lower performance than using just 2 (or using the default split mode "layer") [PR 1051](https://github.com/ikawrakow/ik_llama.cpp/pull/1051) | -| `-cuda, --cuda-params` | Comma-separated list of cuda parameters | - | Powerful way to tweak Fusion, GPU offload threshold, and MMQ-ID threshold. [PR 910](https://github.com/ikawrakow/ik_llama.cpp/pull/910) | +| `-cuda, --cuda-params` | Comma-separated list of cuda parameters | - | Powerful way to tweak Fusion, GPU offload threshold, and MMQ-ID threshold. [PR 910](https://github.com/ikawrakow/ik_llama.cpp/pull/910) [PR 1813](https://github.com/ikawrakow/ik_llama.cpp/pull/1813) | ## Model Options @@ -378,9 +377,7 @@ WIP | `--override-kv KEY=TYPE:VALUE` | Override model metadata by key | - | Advanced option to override model metadata by key. May be specified multiple times. types: int, float, bool, str. Example: `--override-kv tokenizer.ggml.add_bos_token=bool:false` | | `-m, --model FNAME` | Model path | models/$filename | Mandatory, the GGUF model file to be served. | | `-md, --model-draft FNAME` | Draft model for speculative decoding | unused | Required when an explicit `draft` stage is used. | -| `--draft-max, --draft, --draft-n N` | Global speculative draft cap, or fallback value for stages without an explicit `n_max` override | 16 | Also used by single-stage MTP and draft-model speculation. | -| `--draft-min, --draft-n-min N` | Global minimum speculative draft threshold, or fallback value for stages without an explicit `n_min` override | 0 | | -| `--draft-p-min P` | Global minimum speculative decoding probability (greedy), or fallback value for stages without an explicit `p_min` override | 0.8 | | +| `--spec-type SPEC[:k=v,...]` | Canonical speculative stage entry; repeat for the supported two-stage chain | none | Use stage-local keys like `n_max`, `n_min`, `p_min`, `ngram_size_n`, `ngram_size_m`, `ngram_min_hits`, `suffix_min_match_len`, `suffix_max_depth`, and `suffix_corpus`. | ### Request-Level Speculative Overrides @@ -453,6 +450,7 @@ llama-imatrix -m /models/model-bf16.gguf -f /models/calibration_data_v5_rc.txt - | - | - | - | - | | `--layer-similarity or -lsim` | Collect statistics about activations change caused by a layer using cosine similarity | - | [PR 328](https://github.com/ikawrakow/ik_llama.cpp/pull/328) | | `--hide-imatrix` | Store "top_secret" in the imatrix data file name | - | And in calibration dataset fields, and zeros in the batch size and number of chunks used to compute the imatrix. [PR 329](https://github.com/ikawrakow/ik_llama.cpp/pull/329) | +| `--output-draft FNAME ` | Paired draft output file | derived from `--output` | [PR 1803](https://github.com/ikawrakow/ik_llama.cpp/pull/1803) | Notes: - Use `convert_imatrix_gguf_to_dat.py` to convert the "new" GGUF imatrix files to the format supported here. [PR 1405](https://github.com/ikawrakow/ik_llama.cpp/pull/1405) @@ -479,6 +477,7 @@ llama-gguf-split --split --split-max-size 1G --no-tensor-first-split /models/mod | `--partial-requant` | quantize only missing split files in the split quantized .gguf destination directory | - | - | | `--symmetric-q40` | Use [-7:7] range for Q4_0 quantization (turns off imatrix) | - | This is useful for some models that have been trained to int4 using this specific quantization range (e.g., Kimi-2.6) [PR 1677](https://github.com/ikawrakow/ik_llama.cpp/pull/1677) | | `--slow-iq2ks` | Use the original very slow IQ2_KS quantization method | - | Alternative to the compile-time option [PR 1677](https://github.com/ikawrakow/ik_llama.cpp/pull/1677) | +| `--extra-output-tensor ggml_type` | Requantize and add output tensor of that type. | - | [PR 1810](https://github.com/ikawrakow/ik_llama.cpp/pull/1810) see `--mtp-requantize-output-tensor type` as on-the-fly alternative. | ### Build Arguments @@ -531,7 +530,7 @@ WIP ## Graph parallel models -Models architectures [supported](https://github.com/ikawrakow/ik_llama.cpp/blob/90de8e31db79fb3503da5e20db0d3e46726a2117/src/llama.cpp#L1986) by `--split-mode graph` +Models architectures [supported](https://github.com/ikawrakow/ik_llama.cpp/blob/022bd00aab9ec8428c4811275de89796c677d278/src/llama.cpp#L3056) by `--split-mode graph` ``` LLM_ARCH_LLAMA, @@ -552,4 +551,9 @@ LLM_ARCH_STEP35, LLM_ARCH_QWEN35, LLM_ARCH_QWEN35MOE, LLM_ARCH_GEMMA4, +LLM_ARCH_DEEPSEEK2, +LLM_ARCH_GLM_DSA, +LLM_ARCH_MISTRAL4, +LLM_ARCH_MELLUM, +LLM_ARCH_LAGUNA, ``` diff --git a/docs/speculative.md b/docs/speculative.md index 29da3328..6665dc3a 100644 --- a/docs/speculative.md +++ b/docs/speculative.md @@ -33,18 +33,18 @@ An example to use this approach can be the rewriting of source code by a LLM. This implementation looks for the last n-gram in history that matches the current n-gram and creates a draft using the m tokens following the matched n-gram. It is the simplest self-speculative approach with minimal overhead. ``` -llama-server [...] --spec-type ngram-simple --draft-max 64 +llama-server [...] --spec-type ngram-simple:n_max=64 ``` #### n-gram Map Key (`ngram-map-k`) -This implementation looks for the current n-gram of size n (called the _key_) in the token history. If the key n-gram is followed by the same m tokens (called the _mgram_) multiple times, it creates a draft using these m tokens. This approach requires a minimum number of occurrences (argument `--spec-ngram-min-hits`, default is 1) before generating drafts. +This implementation looks for the current n-gram of size n (called the _key_) in the token history. If the key n-gram is followed by the same m tokens (called the _mgram_) multiple times, it creates a draft using these m tokens. This approach requires a minimum number of occurrences (stage key `ngram_min_hits`, default is 1) before generating drafts. The number of accepted tokens is stored for each used n-gram. **Example:** ``` -llama-server [...] --spec-type ngram-map-k --draft-max 64 +llama-server [...] --spec-type ngram-map-k:n_max=64,ngram_min_hits=1 ``` #### n-gram Map Key-4-Values (`ngram-map-k4v`) @@ -55,7 +55,7 @@ The number of accepted tokens is stored for each used n-gram. **Example:** Server options to be used if there are a lot of longer repetitions. ``` -llama-server [...] --spec-type ngram-map-k4v --spec-ngram-size-n 8 --spec-ngram-size-m 8 --spec-ngram-min-hits 2 --draft-max 64 +llama-server [...] --spec-type ngram-map-k4v:n_max=64,ngram_size_n=8,ngram_size_m=8,ngram_min_hits=2 ``` ### n-gram Mod (`ngram-mod`) @@ -80,9 +80,9 @@ Currently, a single hash pool is shared across all server slots, so different re # notes: # - small `n` are not recommended # - MoEs require long drafts -# - dense models: can reduce `--draft-min` and `--draft-max` +# - dense models: can reduce `n_min` and `n_max` -llama-server ... --spec-type ngram-mod --spec-ngram-size-n 24 --draft-min 48 --draft-max 64 +llama-server ... --spec-type ngram-mod:n_max=64,n_min=48,ngram_size_n=24 ``` Applications: @@ -103,57 +103,78 @@ Example Video: ## Command-Line Options -If a draft model is combined with a draftless decoding the draftless decoding has higher precedence. +The canonical startup surface is repeated `--spec-type SPEC[:k=v,...]`. Legacy `--spec-stage`, `--draft-*`, `--spec-ngram-*`, `--suffix-*`, and `-mtp` flags are rejected with replacement guidance. -``` ---draft, --draft-n, --draft-max N number of tokens to draft for speculative decoding (default: 16) - (env: LLAMA_ARG_DRAFT_MAX) ---draft-min, --draft-n-min N minimum number of draft tokens to use for speculative decoding - (default: 0) - (env: LLAMA_ARG_DRAFT_MIN) -[...] ---spec-type [none|ngram-cache|ngram-simple|ngram-map-k|ngram-map-k4v|ngram-mod] - type of speculative decoding to use when no draft model is provided - (default: none) ---spec-ngram-size-n N ngram size N for ngram-simple/ngram-map speculative decoding, length - of lookup n-gram (default: 12) ---spec-ngram-size-m N ngram size M for ngram-simple/ngram-map speculative decoding, length - of draft m-gram (default: 48) ---spec-ngram-min-hits N minimum hits for ngram-map speculative decoding (default: 1) -``` +### `--spec-type SPEC[:k=v,...]` -### `--spec-type TYPE` - -Specifies a type of speculative decoding without draft model. +Each `--spec-type` entry defines one speculative stage. Repeat it to configure the supported two-stage path. | Type | Description | |------|-------------| -| `none` | No speculative decoding (default) | +| `none` | No speculative decoding | +| `draft` | Draft-model speculative decoding; pair with `-md/--model-draft` | +| `mtp` | Embedded or assistant-backed MTP | | `ngram-cache` | Use n-gram cache lookup | | `ngram-simple` | Use simple n-gram pattern matching | -| `ngram-map-k` | Use n-gram pattern matching with n-gram-keys | -| `ngram-map-k4v` | Use n-gram pattern matching with n-gram-keys and up to four m-gram values (experimental) | -| `ngram-mod` | Use basic ngram hasher for speculative decoding with shared pool | +| `ngram-map-k` | Use n-gram pattern matching with n-gram keys | +| `ngram-map-k4v` | Use n-gram pattern matching with n-gram keys and up to four m-gram values | +| `ngram-mod` | Use the shared n-gram hasher | +| `suffix` | Use suffix-tree speculative decoding | + +Canonical stage keys: + +| Key | Meaning | +|-----|---------| +| `n_max` | Maximum drafted tokens for that stage | +| `n_min` | Minimum usable drafted tokens for that stage | +| `p_min` | Minimum speculative probability threshold | +| `ngram_size_n` | Lookup n-gram size | +| `ngram_size_m` | Draft m-gram size | +| `ngram_min_hits` | Minimum matching hits for n-gram map stages | +| `suffix_min_match_len` | Minimum suffix context match length | +| `suffix_max_depth` | Maximum suffix-tree depth | +| `suffix_corpus` | Optional suffix corpus file for pre-warming | + +String-valued stage keys such as `suffix_corpus` need shell-safe quoting when the value contains commas. From a normal shell, quote the value inside the stage payload so the parser sees the comma as part of the string value. + +Example shell-safe form: -**Example:** Server-instance used to refactor source code. ```bash -./llama-server [...] --spec-type ngram-simple +./llama-server [...] \ + --spec-type "suffix:n_max=16,n_min=2,suffix_min_match_len=5,suffix_max_depth=64,suffix_corpus='/tmp/spec,type-corpus.json'" ``` -### `--spec-ngram-size-n N` +If you are constructing `argv` directly without shell unescaping, the parser also accepts escaped commas as `\,`. -Sets the size N of the lookup n-gram for n-gram map based speculative decoding. -The n-gram size N determines how many tokens in a row to look back when searching for matching patterns. +Examples: -### `--spec-ngram-size-m M` +```bash +# Single-stage MTP +./llama-server [...] --spec-type mtp:n_max=1,p_min=0.0 -Sets the size M of the draft m-gram for n-gram map based speculative decoding. -The m-gram size determines how many tokens to draft when a match is found. -Larger values can provide more speedup but may reduce acceptance rate. +# Single-stage ngram-mod +./llama-server [...] --spec-type ngram-mod:n_max=64,n_min=48,ngram_size_n=24 -### `--spec-ngram-min-hits H` +# Draft-model speculation +./llama-server [...] --model-draft draft.gguf --spec-type draft:n_max=4,p_min=0.0 -This option defines how often a key has to appear in the token history to be used as a draft (default is 1). +# Two-stage self-spec -> MTP fallback +./llama-server [...] \ + --spec-type ngram-mod:n_max=64,n_min=2,ngram_size_n=8 \ + --spec-type mtp:n_max=1,p_min=0.0 + +# Suffix stage with pre-warmed corpus +./llama-server [...] \ + --spec-type suffix:n_max=16,n_min=2,suffix_min_match_len=5,suffix_max_depth=64,suffix_corpus=/path/to/corpus.json + +# Suffix stage with a comma-bearing corpus path from a normal shell +./llama-server [...] \ + --spec-type "suffix:n_max=16,n_min=2,suffix_min_match_len=5,suffix_max_depth=64,suffix_corpus='/tmp/spec,type-corpus.json'" +``` + +### `--spec-autotune` + +Autotunes the active stage parameters and reports the best configuration back as a canonical `--spec-type ...` snippet. ## Statistics Each speculative decoding implementation prints statistics. @@ -180,4 +201,3 @@ statistics ngram_map_k: #calls(b,g,a) = 6 1690 26, #gen drafts = 26, #acc drafts - `#gen tokens`: number of tokens generated by this implementation (including rejected tokens) - `#acc tokens`: number of tokens accepted by the main model - `dur(b,g,a): durations of begin (new prompt), generation and accumulation (process acceptance). - diff --git a/examples/imatrix/imatrix.cpp b/examples/imatrix/imatrix.cpp index cff41565..0cf70853 100644 --- a/examples/imatrix/imatrix.cpp +++ b/examples/imatrix/imatrix.cpp @@ -1232,7 +1232,7 @@ int main(int argc, char ** argv) { } if (!use_paired_gemma4_mtp && llama_model_is_gemma4_mtp_assistant(model) && !params.process_output) { - fprintf(stderr, "%s: warning: standalone Gemma 4 assistant imatrix does not exercise the assistant layers. Use '-m -md -mtp' for meaningful calibration.\n", __func__); + fprintf(stderr, "%s: warning: standalone Gemma 4 assistant imatrix does not exercise the assistant layers. Use '-m -md --spec-type mtp:n_max=1,p_min=0.0' for meaningful calibration.\n", __func__); } const int n_ctx_train = llama_n_ctx_train(model); diff --git a/examples/mtmd/mtmd-helper.cpp b/examples/mtmd/mtmd-helper.cpp index 7a5d0ee5..ab227d47 100644 --- a/examples/mtmd/mtmd-helper.cpp +++ b/examples/mtmd/mtmd-helper.cpp @@ -183,7 +183,7 @@ static int32_t mtmd_helper_decode_image_chunk_impl( } const llama_model * model = llama_get_model(lctx); - int n_mmproj_embd = llama_model_n_embd_inp(model); + int n_mmproj_embd = llama_model_n_embd(model); int n_pos_per_embd = mtmd_decode_use_mrope(ctx) ? 4 : 1; int32_t n_tokens = mtmd_input_chunk_get_n_tokens(chunk); diff --git a/examples/mtmd/mtmd.cpp b/examples/mtmd/mtmd.cpp index e89edc5d..0490f2c6 100644 --- a/examples/mtmd/mtmd.cpp +++ b/examples/mtmd/mtmd.cpp @@ -307,6 +307,11 @@ struct mtmd_context { img_end = ""; //image_preproc = std::make_unique(ctx_v); } + else if (proj == PROJECTOR_TYPE_KIMIK25) { + // template renders: <|media_begin|>image<|media_content|> <|media_end|> + img_beg = "<|media_begin|>image<|media_content|>"; + img_end = "<|media_end|>"; + } } void init_audio() { diff --git a/examples/server/README.md b/examples/server/README.md index be3a7f74..a56aeecd 100644 --- a/examples/server/README.md +++ b/examples/server/README.md @@ -210,10 +210,10 @@ model: -m, --model FNAME model path (default: models/$filename with filename from --hf-file or --model-url if set, otherwise models/7B/ggml-model-f16.gguf) -md, --model-draft FNAME draft model for speculative decoding (default: unused) - --spec-stage SPEC[:k=v,...] - explicit speculative stage. repeat once for a supported two-stage chain - examples: --spec-stage ngram-mod:n_max=64,n_min=2 --spec-stage mtp:n_max=1 - supported two-stage shape: self-spec first, then mtp or draft fallback + --spec-type SPEC[:k=v,...] + canonical speculative stage entry; repeat for a supported two-stage chain + examples: --spec-type mtp:n_max=1,p_min=0.0 + --spec-type ngram-mod:n_max=64,n_min=2,ngram_size_n=8 --spec-type mtp:n_max=1,p_min=0.0 -mu, --model-url MODEL_URL model download url (default: unused) -hfr, --hf-repo REPO Hugging Face model repository (default: unused) -hff, --hf-file FILE Hugging Face model file (default: unused) @@ -966,15 +966,15 @@ To know the `id` of the adapter, use GET `/lora-adapters` ### Composite speculative decoding -Use `--spec-stage` for explicit stage chains. The currently supported two-stage shape is self-spec first, then `mtp` or `draft` fallback. +Use repeated `--spec-type SPEC[:k=v,...]` entries for explicit stage chains. The currently supported two-stage shape is self-spec first, then `mtp` or `draft` fallback. Example with `ngram-mod` plus MTP fallback: ```bash ./build/bin/llama-server \ --model /models/target-mtp.gguf \ - --spec-stage ngram-mod:n_max=64,n_min=2,ngram_size_n=8 \ - --spec-stage mtp:n_max=1,p_min=0.0 + --spec-type ngram-mod:n_max=64,n_min=2,ngram_size_n=8 \ + --spec-type mtp:n_max=1,p_min=0.0 ``` Example with `ngram-mod` plus draft-model fallback: @@ -983,14 +983,13 @@ Example with `ngram-mod` plus draft-model fallback: ./build/bin/llama-server \ --model /models/target.gguf \ --model-draft /models/draft.gguf \ - --spec-stage ngram-mod:n_max=64,n_min=2,ngram_size_n=8 \ - --spec-stage draft:n_max=4,p_min=0.0 + --spec-type ngram-mod:n_max=64,n_min=2,ngram_size_n=8 \ + --spec-type draft:n_max=4,p_min=0.0 ``` Notes: -- Use `--spec-type` when you want a single self-spec stage only. -- `--spec-type` cannot be combined with `--spec-stage`. +- Use `--spec-type` for both single-stage and two-stage startup configuration. - Explicit stage chains currently support at most two stages. ### Change system prompt on runtime diff --git a/examples/server/public/index.html.gz b/examples/server/public/index.html.gz index 8a136975..073c5da0 100644 Binary files a/examples/server/public/index.html.gz and b/examples/server/public/index.html.gz differ diff --git a/examples/server/public_llamacpp/index_llamacpp.html b/examples/server/public_llamacpp/index_llamacpp.html index 92895f26..3caa34b4 100644 --- a/examples/server/public_llamacpp/index_llamacpp.html +++ b/examples/server/public_llamacpp/index_llamacpp.html @@ -5,116 +5,212 @@ -
diff --git a/examples/server/public_llamacpp/index_llamacpp.html.gz b/examples/server/public_llamacpp/index_llamacpp.html.gz index f9b0f8f7..50078611 100644 Binary files a/examples/server/public_llamacpp/index_llamacpp.html.gz and b/examples/server/public_llamacpp/index_llamacpp.html.gz differ diff --git a/examples/server/server-common.cpp b/examples/server/server-common.cpp index 3ec73450..865b1f13 100644 --- a/examples/server/server-common.cpp +++ b/examples/server/server-common.cpp @@ -1250,7 +1250,7 @@ const mtmd::input_chunk_ptr& server_tokens::find_chunk(size_t idx) const { if (it != map_idx_to_media.end()) { return it->second; } - throw std::runtime_error("Chunk not found"); + throw std::runtime_error("Chunk not found, or idx is not the first token of a chunk"); } void server_tokens::push_back(llama_token tok) { @@ -1295,7 +1295,7 @@ void server_tokens::push_back(server_tokens& tokens) { // Assert if we are copying MTMD chunks to a server_tokens that does not have mtmd. // We could also just check, but this will prevent silently dropping MTMD data. GGML_ASSERT(has_mtmd); - for (auto it = tokens.map_idx_to_media.begin(); it != tokens.map_idx_to_media.end(); ) { + for (auto it = tokens.map_idx_to_media.begin(); it != tokens.map_idx_to_media.end(); it++) { auto* chunk = tokens.map_idx_to_media[it->first].get(); mtmd::input_chunk_ptr new_chunk(mtmd_input_chunk_copy(chunk)); map_idx_to_media[start_idx + it->first] = std::move(new_chunk); @@ -1369,18 +1369,10 @@ void server_tokens::keep_first(size_t n) { if (n == tokens.size()) { return; // nothing to do } - // we throw an error if we try to remove a token in the middle of an image - // for ex. with input of 5 text tokens and 2 images: - // [0] [1] [2] [3] [4] [img0] [img0] [img0] [img1] [img1] - // n 1 2 3 4 5 6 7 8 9 10 - // allowed to resize ^ ^ - // disallowed to resize ^ ^ ^ - if (n > 0) { - llama_token last_token = tokens[n - 1]; - // make sure we never remove tokens in the middle of an image - if (last_token == LLAMA_TOKEN_NULL) { - find_chunk(n - 1); // will throw an error if the token is not begin-of-chunk - } + // It is an internal error if the longest common prefix ends in the middle of an image + llama_token first_removed_token = tokens[n]; + if (first_removed_token == LLAMA_TOKEN_NULL) { + find_chunk(n); // will throw an error if the token is not begin-of-chunk } // remove all image chunks that are not used anymore for (auto it = map_idx_to_media.begin(); it != map_idx_to_media.end(); ) { diff --git a/examples/server/server-context.cpp b/examples/server/server-context.cpp index 25d2e09d..dcf3469d 100644 --- a/examples/server/server-context.cpp +++ b/examples/server/server-context.cpp @@ -136,12 +136,6 @@ static bool server_slot_prompt_batch_overlaps( return slot.prompt_batch_i0 < batch_i1 && batch_i0 < slot.prompt_batch_i1; } - -static bool params_use_gemma4_external_mtp(const gpt_params & params_base) { - return params_base.has_mtp && - llama_model_is_gemma4_mtp_assistant(params_base.speculative.model_dft); -} - struct server_mtp_warmup { llama_context * ctx_tgt; server_slot * slot; @@ -164,74 +158,12 @@ static bool server_response_needs_chat_parse(oaicompat_type oaicompat) { oaicompat == OAICOMPAT_TYPE_RESP; } -void server_speculative_checkpoint::clear() { - valid = false; - per_step_enabled = false; - n_past = 0; - sampled = LLAMA_TOKEN_NULL; - - if (sampler != nullptr) { - common_sampler_free(sampler); - sampler = nullptr; - } -} - -static void discard_speculative_checkpoint(server_slot & slot, llama_context * ctx) { - slot.spec_ckpt.clear(); - llama_spec_ckpt_discard(ctx); -} - -static bool save_speculative_checkpoint(server_slot & slot, llama_model * model, llama_context * ctx, int ckpt_mode) { - slot.spec_ckpt.clear(); - const int32_t n_pre_spec_tokens = slot.cache_tokens.n_tokens() - (int32_t)(slot.drafted.size() + 1); - slot.spec_ckpt.n_past = slot.cache_tokens.pos_next(n_pre_spec_tokens); - slot.spec_ckpt.sampled = slot.sampled; - - const int max_tokens = (int)slot.drafted.size() + 1; - const int actual_mode = llama_spec_ckpt_init(ctx, ckpt_mode, max_tokens); - if (actual_mode == LLAMA_SPEC_CKPT_NONE) { - return false; - } - slot.spec_ckpt.per_step_enabled = (actual_mode == LLAMA_SPEC_CKPT_PER_STEP); - - slot.spec_ckpt.valid = llama_spec_ckpt_save(ctx, slot.id); - if (!slot.spec_ckpt.valid) { - llama_spec_ckpt_discard(ctx); - return false; - } - - slot.spec_ckpt.sampler = common_sampler_init(model, slot.sparams); - if (slot.spec_ckpt.sampler == nullptr) { - discard_speculative_checkpoint(slot, ctx); - return false; - } - - common_sampler_clone(slot.ctx_sampling, slot.spec_ckpt.sampler); - return true; -} - -static void server_remove_speculative_stage(common_params_speculative & spec, common_speculative_type type) { - spec.stages.erase(std::remove_if(spec.stages.begin(), spec.stages.end(), [type](const common_speculative_stage_params & stage) { - return stage.type == type; - }), spec.stages.end()); - - if (spec.type == type) { - spec.type = COMMON_SPECULATIVE_TYPE_NONE; - const auto resolved = spec.get_resolved_stages(); - spec.type = resolved.empty() ? COMMON_SPECULATIVE_TYPE_NONE : resolved.front().type; - } -} - -static bool server_speculative_has_mtp(const common_params_speculative & spec) { - return spec.has_stage_type(COMMON_SPECULATIVE_TYPE_MTP); -} - static bool server_speculative_has_dflash(const common_params_speculative & spec) { return spec.has_stage_type(COMMON_SPECULATIVE_TYPE_DFLASH); } static bool server_speculative_has_target_features(const common_params_speculative & spec) { - return server_speculative_has_mtp(spec) || server_speculative_has_dflash(spec); + return spec.has_stage_type(COMMON_SPECULATIVE_TYPE_MTP) || server_speculative_has_dflash(spec); } static bool server_speculative_same_stage_types( @@ -262,7 +194,8 @@ static void server_reject_dead_speculative_request_overrides(const json & data) json_value_ptr(data, "speculative.ngram_size_m") != nullptr || json_value_ptr(data, "speculative.ngram_min_hits") != nullptr || json_value_ptr(data, "speculative.suffix_min_match_len") != nullptr || - json_value_ptr(data, "speculative.suffix_max_depth") != nullptr) { + json_value_ptr(data, "speculative.suffix_max_depth") != nullptr || + json_value_ptr(data, "speculative.suffix_corpus") != nullptr) { throw std::runtime_error("Error: structural speculative overrides are startup-only; per-request overrides only support speculative.n_max, speculative.n_min, speculative.p_min, and speculative.stages"); } } @@ -322,11 +255,8 @@ server_context::~server_context() { if (slot.ctx_sampling != nullptr) { common_sampler_free(slot.ctx_sampling); } - slot.spec_ckpt.clear(); common_speculative_free(slot.spec); slot.spec = nullptr; - slot.ctx_dft = nullptr; - llama_batch_free(slot.batch_spec); } if (ctx) { @@ -340,16 +270,7 @@ server_context::~server_context() { } // Free multimodal mtmd_free(mctx); - // Free draft model and context if they exist - if (ctx_draft) { - llama_free(ctx_draft); - ctx_draft = nullptr; - } - if (model_draft) { - llama_free_model(model_draft); - model_draft = nullptr; - } - + params_base.speculative.clear_dft(); llama_batch_free(batch); } @@ -372,18 +293,7 @@ bool server_context::load_model(const gpt_params& params_) { add_bos_token = llama_should_add_bos_token(model); has_eos_token = llama_add_eos_token(model) != 1; - if (params_base.has_mtp && params_base.n_parallel > 1) { - LOG_WARNING("MTP is not supported with parallel slots yet, disabling MTP to avoid cross-slot corruption.\n", { - {"n_parallel", params_base.n_parallel}, - }); - params_base.has_mtp = false; - if (params_base.speculative.type == COMMON_SPECULATIVE_TYPE_MTP) { - params_base.speculative.type = COMMON_SPECULATIVE_TYPE_NONE; - } - params_base.speculative.model.clear(); - params_base.speculative.params.clear(); - params_base.speculative.model_dft = nullptr; - } + common_speculative_prepare_startup(params_base, false); if (server_speculative_has_dflash(params_base.speculative) && params_base.n_parallel > 1) { LOG_ERROR("DFlash is currently limited to a single server slot (-np 1).\n", { @@ -391,9 +301,8 @@ bool server_context::load_model(const gpt_params& params_) { }); return false; } - - bool has_draft_model = !params_base.speculative.model.empty() || !params_base.speculative.params.empty(); - std::string& mmproj_path = params_base.mmproj.path; + const bool has_draft_model = params_base.speculative.has_dft(); + std::string & mmproj_path = params_base.mmproj.path; if (!mmproj_path.empty()) { mtmd_context_params mparams = mtmd_context_params_default(); mparams.use_gpu = params_base.mmproj_use_gpu; @@ -407,10 +316,10 @@ bool server_context::load_model(const gpt_params& params_) { mparams.image_max_tokens = params_base.image_max_tokens; mctx = mtmd_init_from_file(mmproj_path.c_str(), model, mparams); if (mctx == nullptr) { - LOG_ERROR("failed to load multimodal model, '%s'\n", mmproj_path.c_str()); + LOG_ERROR("failed to load multimodal model, %s\n", mmproj_path.c_str()); return false; } - LOG_INFO("loaded multimodal model, '%s'\n", mmproj_path.c_str()); + LOG_INFO("loaded multimodal model, %s\n", mmproj_path.c_str()); //if (params.n_cache_reuse) { // params_base.n_cache_reuse = 0; @@ -421,86 +330,22 @@ bool server_context::load_model(const gpt_params& params_) { LOG_ERROR("%s\n", "err: speculative decode is not supported by multimodal"); return false; } - const auto spec_stages = params_base.speculative.get_resolved_stages(); - const bool multimodal_spec_supported = spec_stages.empty() || - (spec_stages.size() == 1 && spec_stages.front().type == COMMON_SPECULATIVE_TYPE_MTP); - if (!multimodal_spec_supported) { + + const auto spec_stages = params_base.speculative.get_resolved_stages(); + const bool multimodal_spec_supported = spec_stages.empty() || + (spec_stages.size() == 1 && spec_stages.front().type == COMMON_SPECULATIVE_TYPE_MTP); + if (!multimodal_spec_supported) { params_base.speculative.type = COMMON_SPECULATIVE_TYPE_NONE; params_base.speculative.stages.clear(); params_base.has_mtp = false; SRV_WRN("%s\n", "speculative decoding is not supported by multimodal, it will be disabled"); } } - // Load draft model for speculative decoding if specified - if (has_draft_model) { - LLAMA_LOG_INFO("\n\n==================================loading DRAFT model==================================\n\n"); - - gpt_params params_dft; - params_dft.devices = params_base.speculative.devices; - params_dft.model = params_base.speculative.model; - params_dft.main_gpu = params_base.main_gpu; - params_dft.n_gpu_layers = params_base.speculative.n_gpu_layers; - params_dft.rpc_servers = params_base.rpc_servers; - params_dft.cache_type_k = params_base.speculative.cache_type_k.empty() ? params_base.cache_type_k : params_base.speculative.cache_type_k; - params_dft.cache_type_v = params_base.speculative.cache_type_v.empty() ? params_base.cache_type_v : params_base.speculative.cache_type_v; - params_dft.flash_attn = params_base.flash_attn; - params_dft.k_cache_hadamard = params_base.k_cache_hadamard; - params_dft.v_cache_hadamard = params_base.v_cache_hadamard; - if (server_speculative_has_dflash(params_base.speculative)) { - params_dft.split_mode = params_base.split_mode; - for (size_t i = 0; i < std::size(params_dft.tensor_split); ++i) { - params_dft.tensor_split[i] = params_base.tensor_split[i]; - } - params_dft.attn_max_batch = params_base.attn_max_batch; - params_dft.graph_reuse = params_base.graph_reuse; - params_dft.split_mode_graph_scheduling = params_base.split_mode_graph_scheduling; - params_dft.scheduler_async = params_base.scheduler_async; - params_dft.max_extra_alloc_MiB = params_base.max_extra_alloc_MiB; - params_dft.reduce_type = params_base.reduce_type; - } - if (!params_base.speculative.params.empty()) { - auto [argc, argv] = parse_command_line("llama-server " + params_base.speculative.params); - if (!gpt_params_parse(argc, argv, params_dft)) { - gpt_params_print_usage(argc, argv, params_dft); - free_command_line(argc, argv); - return false; - }; - free_command_line(argc, argv); - } - LOG_INFO("", { {"model", params_dft.model} }); - if (params_dft.n_ctx == 0) { - params_dft.n_ctx = params_base.speculative.n_ctx; - } - if (server_speculative_has_dflash(params_base.speculative) && params_dft.n_gpu_layers < 0) { - params_dft.n_gpu_layers = params_base.n_gpu_layers; - } - params_dft.n_ctx = params_dft.n_ctx == 0 ? params_base.n_ctx / params_base.n_parallel : params_dft.n_ctx; - params_dft.n_parallel = 1; - params_dft.n_batch = params_dft.n_ctx; - - params_base.speculative.mparams_dft.path = params_dft.model; // - - llama_model_params mparams_dft = common_model_params_to_llama(params_dft); - - llama_model * model_dft = llama_model_load_from_file(params_dft.model.c_str(), mparams_dft); - if (model_dft == nullptr) { - LOG_ERROR("failed to load draft model", { {"model", params_base.speculative.model} }); - return false; - } - - cparams_dft = common_context_params_to_llama(params_dft); - - params_base.speculative.model_dft = model_dft; - params_base.speculative.cparams_dft = cparams_dft; + if (!common_speculative_finalize_startup(params_base, model)) { + return false; } - if (server_speculative_has_mtp(params_base.speculative) && - llama_model_n_nextn_layer(model) == 0 && - !params_use_gemma4_external_mtp(params_base)) { - LOG_WARNING("WARNING: MTP speculative stage requested, but model has 0 NextN layers. MTP will be disabled.\n", {}); - params_base.has_mtp = false; - server_remove_speculative_stage(params_base.speculative, COMMON_SPECULATIVE_TYPE_MTP); - } + return true; } @@ -509,6 +354,20 @@ void server_context::init() { LOG_INFO("initializing slots", { {"n_slots", params_base.n_parallel} }); + if (params_base.has_mtp) { + SRV_INF("%s\n", "MTP needs embeddings on decode, enabling"); + llama_set_embeddings(ctx, true); + } + + const bool requested_spec = params_base.speculative.has_stage_chain(); + bool can_spec = true; + if (!params_base.dry_run) { + can_spec = common_speculative_is_compat(ctx); + } + if (!can_spec && requested_spec) { + SRV_WRN("%s", "speculative decoding not supported by this context\n"); + } + for (int i = 0; i < params_base.n_parallel; i++) { server_slot slot; @@ -552,69 +411,27 @@ void server_context::init() { slot.params.speculative = params_base.speculative; slot.sparams = params_base.sparams; - - const bool wants_mtp_stage = server_speculative_has_mtp(params_base.speculative); - if (wants_mtp_stage) { - const bool has_external_mtp = params_use_gemma4_external_mtp(params_base); - - if (llama_model_n_nextn_layer(model) > 0 || has_external_mtp) { - params_base.pooling_type = LLAMA_POOLING_TYPE_NONE; - - if (!has_external_mtp) { - params_base.speculative.cparams_dft = common_context_params_to_llama(params_base); - } - - params_base.speculative.cparams_dft.mtp = true; - params_base.speculative.cparams_dft.mtp_op_type = MTP_OP_WARMUP; - params_base.speculative.cparams_dft.embeddings = true; - - slot.has_mtp = true; - slot.params.speculative.cparams_dft = params_base.speculative.cparams_dft; - - slot.batch_spec = llama_batch_init(slot.params.speculative.get_max_stage_n_max() + 1, 0, 1); - SLT_DBG(slot, "batch_spec contains %d tokens\n", slot.batch_spec.n_tokens); - - SRV_INF("%s\n", "MTP needs embeddings on decode, enabling"); - llama_set_embeddings(ctx, true); - } - else { - SRV_WRN("%s\n", "MTP speculative stage requested, but model has 0 NextN layers. Removing MTP from the configured stage chain."); - params_base.has_mtp = false; - server_remove_speculative_stage(params_base.speculative, COMMON_SPECULATIVE_TYPE_MTP); - slot.params.speculative = params_base.speculative; - slot.has_mtp = false; - } - } - - const bool requested_spec = !params_base.speculative.get_resolved_stages().empty(); - - bool can_spec = true; - if (!params_base.dry_run) { - can_spec = common_speculative_is_compat(ctx); - } - if (!can_spec) { - SRV_WRN("%s", "speculative decoding not supported by this context\n"); - } // try speculative decoding if (can_spec && requested_spec) { - slot.spec = common_speculative_init(params_base.speculative, slot.ctx); - if (slot.spec) { - if (mctx && !slot.has_mtp) { + switch (common_speculative_try_init(params_base.speculative, slot.ctx, &slot.spec)) { + case COMMON_SPECULATIVE_INIT_READY: + if (mctx && !slot.uses_mtp()) { SRV_ERR("%s\n", "speculative decoding is not supported with multimodal"); return; } SLT_INF(slot, "%s", "speculative decoding context initialized\n"); - } else { - if (llama_model_has_recurrent(model)) { - SRV_ERR("%s", "failed to initialize recurrent speculative context\n"); - throw std::runtime_error("recurrent speculative context initialization failed"); - } else if (slot.has_mtp) { - SRV_ERR("%s", "failed to initialize MTP speculative context\n"); - throw std::runtime_error("MTP speculative context initialization failed"); - } else { - SRV_ERR("%s", "failed to initialize speculative decoding context\n"); - throw std::runtime_error("speculative decoding context initialization failed"); - } + break; + case COMMON_SPECULATIVE_INIT_ERR_RECURRENT: + SRV_ERR("%s", "failed to initialize recurrent speculative context\n"); + throw std::runtime_error("recurrent speculative context initialization failed"); + case COMMON_SPECULATIVE_INIT_ERR_MTP: + SRV_ERR("%s", "failed to initialize MTP speculative context\n"); + throw std::runtime_error("MTP speculative context initialization failed"); + case COMMON_SPECULATIVE_INIT_ERR_GENERIC: + SRV_ERR("%s", "failed to initialize speculative decoding context\n"); + throw std::runtime_error("speculative decoding context initialization failed"); + case COMMON_SPECULATIVE_INIT_SKIPPED: + break; } } @@ -735,9 +552,7 @@ void server_slot::reset() { prompt_batch_i1 = -1; n_sent_text = 0; drafted.clear(); - drafted_spec_type = COMMON_SPECULATIVE_TYPE_NONE; i_batch_dft.clear(); - spec_ckpt.clear(); spec_prompt_warmup_failed = false; n_sent_token_probs = 0; infill = false; @@ -756,7 +571,7 @@ void server_slot::reset() { image_just_processed = false; do_checkpoint = false; if (spec != nullptr) { - common_speculative_clear_sequence_hidden(spec, id); + common_speculative_clear_sequence(spec, id); } positional_bans.clear(); @@ -791,7 +606,11 @@ void server_slot::reset() { } bool server_slot::need_embd() const { - return embedding || has_mtp; + return embedding || uses_mtp(); +} + +bool server_slot::uses_mtp() const { + return params.speculative.has_stage_type(COMMON_SPECULATIVE_TYPE_MTP); } bool server_slot::has_budget(gpt_params& global_params) { @@ -827,7 +646,7 @@ void server_slot::add_token_string(const completion_token_output& token) { } bool server_slot::can_speculate() const { - return !spec_prompt_warmup_failed && (!!spec || has_mtp); + return !spec_prompt_warmup_failed && (!!spec || uses_mtp()); } int server_slot::get_n_draft_max() const { @@ -1367,6 +1186,10 @@ bool server_context::launch_slot_with_task(server_slot& slot, server_task& task) // speculative decoding parameters try { slot.params.speculative = defaults.speculative; + const bool has_flat_n_max = json_value_ptr(data, "speculative.n_max") != nullptr; + const bool has_flat_n_min = json_value_ptr(data, "speculative.n_min") != nullptr; + const bool has_flat_p_min = json_value_ptr(data, "speculative.p_min") != nullptr; + slot.params.speculative.n_max = json_value(data, "speculative.n_max", params_base.speculative.n_max); slot.params.speculative.n_min = json_value(data, "speculative.n_min", params_base.speculative.n_min); slot.params.speculative.p_min = json_value(data, "speculative.p_min", params_base.speculative.p_min); @@ -1374,6 +1197,20 @@ bool server_context::launch_slot_with_task(server_slot& slot, server_task& task) server_reject_dead_speculative_request_overrides(data); const json stages = json_value(data, "speculative.stages", json()); + if (stages.is_null() && !slot.params.speculative.stages.empty()) { + for (auto & stage : slot.params.speculative.stages) { + if (has_flat_n_max) { + stage.n_max = -1; + } + if (has_flat_n_min) { + stage.n_min = -1; + } + if (has_flat_p_min) { + stage.p_min = -1.0f; + } + } + } + if (!stages.is_null()) { if (!stages.is_array()) { throw std::runtime_error("Error: speculative.stages must be an array"); @@ -1412,11 +1249,11 @@ bool server_context::launch_slot_with_task(server_slot& slot, server_task& task) if (slot.can_speculate() && llama_model_has_recurrent(model) && - slot.params.speculative.n_max > params_base.speculative.n_max) { + slot.params.speculative.get_max_stage_n_max() > params_base.speculative.get_max_stage_n_max()) { send_error(task, - "Error: speculative.n_max=" + std::to_string(slot.params.speculative.n_max) + - " exceeds the recurrent speculative startup limit of " + std::to_string(params_base.speculative.n_max) + - "; restart the server with a higher --draft-max to reserve checkpoint capacity", + "Error: speculative n_max=" + std::to_string(slot.params.speculative.get_max_stage_n_max()) + + " exceeds the recurrent speculative startup limit of " + std::to_string(params_base.speculative.get_max_stage_n_max()) + + "; restart the server with a higher n_max inside the configured --spec-type stages to reserve checkpoint capacity", ERROR_TYPE_INVALID_REQUEST); return false; } @@ -1425,7 +1262,7 @@ bool server_context::launch_slot_with_task(server_slot& slot, server_task& task) throw std::runtime_error("Error: per-request speculative stages must match the server startup stage types; only stage parameter overrides are supported"); } - if (slot.params.speculative.has_stage_type(COMMON_SPECULATIVE_TYPE_MTP) && !slot.has_mtp) { + if (slot.params.speculative.has_stage_type(COMMON_SPECULATIVE_TYPE_MTP) && !params_base.has_mtp) { throw std::runtime_error("Error: MTP speculative stage requested, but the server was not started with MTP support"); } @@ -2205,10 +2042,7 @@ void server_context::kv_cache_clear() { continue; } - common_speculative_clear_sequence_hidden(slot.spec, slot.id); - if (auto * ctx_companion = common_speculative_get_companion_ctx(slot.spec); ctx_companion != nullptr) { - llama_kv_cache_clear(ctx_companion); - } + common_speculative_clear_sequence(slot.spec, slot.id, true); } clean_kv_cache = false; } @@ -2759,7 +2593,7 @@ void server_context::apply_server_biases(server_slot& slot) { } } -void server_context::request_completion(int id_task, int id_multi, json data, bool infill, bool embedding, server_tokens&& inputs) { +void server_context::request_completion(int id_task, int id_multi, json data, bool infill, bool embedding, server_tokens & inputs) { server_task task; task.id = id_task; task.id_multi = id_multi; @@ -2768,7 +2602,7 @@ void server_context::request_completion(int id_task, int id_multi, json data, bo task.infill = infill; task.embedding = embedding; task.type = SERVER_TASK_TYPE_COMPLETION; - task.tokens = std::move(inputs); + task.tokens = inputs.clone(); // when a completion task's prompt array is not a singleton, we split it into multiple requests // otherwise, it's a single-prompt task, we actually queue it // if there's numbers in the prompt array it will be treated as an array of tokens @@ -2807,7 +2641,8 @@ void server_context::request_cancel(int id_task) { } void server_context::split_multiprompt_task(int id_multi, server_task& multiprompt_task) { - const int prompt_count = multiprompt_task.data.at("prompt").size(); + auto prompts = multiprompt_task.data.at("prompt"); + const int prompt_count = prompts.size(); if (prompt_count <= 1) { send_error(multiprompt_task, "error while handling multiple prompts"); return; @@ -2825,11 +2660,11 @@ void server_context::split_multiprompt_task(int id_multi, server_task& multiprom // add subtasks for (int i = 0; i < prompt_count; i++) { json subtask_data = multiprompt_task.data; - subtask_data["prompt"] = subtask_data.at("prompt")[i]; + subtask_data["prompt"] = prompts[i]; // subtasks inherit everything else (infill mode, embedding mode, etc.) request_completion(subtask_ids[i], id_multi, subtask_data, multiprompt_task.infill, multiprompt_task.embedding, - std::move(multiprompt_task.tokens)); + multiprompt_task.tokens); } } @@ -3666,33 +3501,27 @@ void server_context::add_sampled_tokens() { // perform the speculative drafting for all sequences at the same time in a single batch const int n_draft_max_pre = slot.get_n_draft_max(); if (n_draft_max_pre > 0) { - if (mctx && !slot.has_mtp) { + if (mctx && !slot.uses_mtp()) { // we should never reach this, as speculative is automatically disabled if mmproj is loaded GGML_ABORT("not supported by multimodal"); } static const llama_tokens empty_prompt; - const llama_tokens & cached_text_tokens = slot.has_mtp && !slot.params.speculative.has_composite_stage_chain() + const llama_tokens & cached_text_tokens = slot.uses_mtp() && !slot.params.speculative.has_composite_stage_chain() ? empty_prompt : slot.cache_tokens.get_text_tokens(); auto & params_spec = slot.params.speculative; - const llama_pos draft_base_pos = slot.has_mtp ? slot.cache_tokens.pos_next() : -1; - - if (slot.has_mtp) { - if (!common_speculative_ensure_sequence_hidden(slot.spec, ctx, slot.id, draft_base_pos - 1)) { - LOG_ERROR("MTP hidden state is empty during speculation", {}); - } - } - - llama_tokens draft = common_speculative_draft( + const llama_pos draft_base_pos = slot.uses_mtp() ? slot.cache_tokens.pos_next() : -1; + common_speculative_draft_result draft_result = common_speculative_draft_ex( slot.spec, + ctx, params_spec, cached_text_tokens, slot.sampled, draft_base_pos, slot.id); - slot.drafted_spec_type = common_speculative_current_type(slot.spec); + llama_tokens & draft = draft_result.tokens; const int n_draft_max = slot.get_n_draft_max(); @@ -3717,7 +3546,6 @@ void server_context::add_sampled_tokens() { // fallback to normal decoding slot.i_batch = slot.i_batch_dft[0]; slot.drafted.clear(); - slot.drafted_spec_type = COMMON_SPECULATIVE_TYPE_NONE; slot.i_batch_dft.clear(); } else { // keep track of total number of drafted tokens tested @@ -3734,7 +3562,6 @@ void server_context::add_sampled_tokens() { } else { // no speculative decoding - slot.drafted_spec_type = COMMON_SPECULATIVE_TYPE_NONE; slot.i_batch = batch.n_tokens; common_batch_add(batch, slot.sampled, slot.cache_tokens.pos_next(), { slot.id }, true); @@ -4077,15 +3904,10 @@ void server_context::batch_pending_prompt(const int32_t n_ubatch, const int32_t slot.cache_tokens.keep_first(slot.n_past); int p0 = (int)system_tokens.size() + slot.n_past; p0 = system_tokens.size() + slot.cache_tokens.pos_next(); - auto * ctx_companion = slot.spec ? common_speculative_get_companion_ctx(slot.spec) : nullptr; - const bool target_trimmed = llama_kv_cache_seq_rm(ctx, slot.id, p0, -1); - const bool companion_trimmed = ctx_companion == nullptr || llama_kv_cache_seq_rm(ctx_companion, slot.id, p0, -1); - if (!target_trimmed || !companion_trimmed) { + const bool trimmed = common_speculative_trim_sequence(slot.spec, ctx, slot.id, p0); + if (!trimmed) { // could not partially delete (likely using a non-Transformer model) - llama_kv_cache_seq_rm(ctx, slot.id, -1, -1); - if (ctx_companion != nullptr) { - llama_kv_cache_seq_rm(ctx_companion, slot.id, -1, -1); - } + common_speculative_clear_sequence_kv(slot.spec, ctx, slot.id); p0 = (int)system_tokens.size(); if (p0 != 0) { @@ -4122,7 +3944,7 @@ void server_context::batch_pending_prompt(const int32_t n_ubatch, const int32_t llama_pos p1 = slot.cache_tokens.pos_next() + slot.n_past_prompt - slot.n_past; // add offset to prompt server_mtp_warmup mtp_media_warmup { ctx, - slot.has_mtp && slot.spec ? &slot : nullptr, + slot.uses_mtp() && slot.spec ? &slot : nullptr, }; mtmd_helper_eval_batch_callback mtp_media_callback = mtp_media_warmup.slot ? server_mtp_media_warmup_callback : nullptr; @@ -4268,113 +4090,6 @@ void server_context::extend_context(const int32_t n_tokens) { } } -// Restore recurrent state and re-decode accepted tokens after speculative-decode rejection. -static void restore_speculative_checkpoint( - server_slot & slot, llama_context * ctx, llama_model * model, - common_speculative_type spec_type_used, - llama_token sampled_before, - const std::vector & ids, int n_draft, - const std::vector & spec_feature_rows_pre, int32_t spec_n_past_base) { - if (slot.spec_ckpt.per_step_enabled) { - const int step = (int)ids.size() - 1; - llama_spec_ckpt_restore(ctx, slot.id, slot.spec_ckpt.n_past, step); - - if (slot.spec_ckpt.sampler) { - common_sampler_clone(slot.spec_ckpt.sampler, slot.ctx_sampling); - } - for (llama_token id : ids) { - common_sampler_accept(slot.ctx_sampling, ctx, id, true); - } - - // Update speculative target features using rows collected before checkpoint restore. - if (server_speculative_has_target_features(slot.params.speculative) && !spec_feature_rows_pre.empty()) { - if (!common_speculative_commit_accepted_hidden_rows( - slot.spec, - spec_type_used, - slot.id, - spec_n_past_base, - sampled_before, - ids, - spec_feature_rows_pre)) { - common_speculative_clear_sequence_hidden(slot.spec, slot.id); - } else if (spec_type_used != COMMON_SPECULATIVE_TYPE_MTP) { - SLT_DBG(slot, "%s", "synced MTP target hidden state from accepted-prefix rows after per-step restore"); - } - } - - SLT_DBG(slot, "per-step restore: step=%d (rejected %d drafts)\n", - step, (int)(n_draft - (ids.size() - 1))); - } else { - // Restore pre-speculation recurrent state then re-decode accepted tokens. - llama_spec_ckpt_restore(ctx, slot.id, slot.spec_ckpt.n_past, 0); - - if (slot.spec_ckpt.sampler) { - common_sampler_clone(slot.spec_ckpt.sampler, slot.ctx_sampling); - } - - if (!ids.empty()) { - // Re-decode to advance recurrent state to the accepted position. - const int n_re = (int)ids.size(); - llama_batch re_batch = llama_batch_init(n_re, 0, 1); - common_batch_add(re_batch, slot.spec_ckpt.sampled, slot.spec_ckpt.n_past, { slot.id }, n_re == 1); - for (int j = 0; j < n_re - 1; j++) { - common_batch_add(re_batch, ids[j], slot.spec_ckpt.n_past + 1 + j, { slot.id }, j == n_re - 2); - } - - if (slot.has_mtp) { - for (int j = 0; j < re_batch.n_tokens; j++) { - re_batch.logits[j] = true; - } - llama_set_embeddings(ctx, true); - } - - const int ret = llama_decode(ctx, re_batch); - if (ret != 0) { - SLT_ERR(slot, "failed to re-decode accepted tokens after checkpoint restore: %d\n", ret); - } - if (server_speculative_has_target_features(slot.params.speculative)) { - const int n_accepted = (int)ids.size(); - std::vector redecoded_indices(n_accepted); - for (int j = 0; j < n_accepted; ++j) { - redecoded_indices[j] = j; - } - - server_dflash_contract_log_accept( - slot, - spec_type_used, - "restore", - true, - n_draft, - ids, - slot.spec_ckpt.n_past, - redecoded_indices); - - if (!common_speculative_commit_accepted_output( - slot.spec, - ctx, - spec_type_used, - slot.id, - slot.spec_ckpt.n_past, - sampled_before, - ids, - redecoded_indices)) { - common_speculative_clear_sequence_hidden(slot.spec, slot.id); - } - } - - for (llama_token id : ids) { - common_sampler_accept(slot.ctx_sampling, ctx, id, true); - } - - llama_batch_free(re_batch); - SLT_DBG(slot, "spec checkpoint restored: re-decoded %d tokens (rejected %d drafts)\n", - n_re, (int)(n_draft - (ids.size() - 1))); - } - } - - discard_speculative_checkpoint(slot, ctx); -} - void server_context::speculative_decoding_accept() { for (auto& slot : slots) { if (slot.state != SLOT_STATE_PROCESSING || slot.i_batch_dft.empty()) { @@ -4382,7 +4097,6 @@ void server_context::speculative_decoding_accept() { } const llama_token sampled_before = slot.sampled; - const common_speculative_type spec_type_used = slot.drafted_spec_type; size_t n_draft = slot.drafted.size(); slot.ctx_sampling->to_generated_text = &slot.generated_text; @@ -4412,28 +4126,15 @@ void server_context::speculative_decoding_accept() { continue; } - const bool any_rejected = (ids.size() - 1) < n_draft; - int32_t spec_n_past_base = 0; - std::vector spec_feature_rows_pre; std::vector accepted_output_indices; if (server_speculative_has_target_features(slot.params.speculative)) { - const int32_t n_pre_spec_tokens = slot.cache_tokens.n_tokens() - (int32_t)(slot.drafted.size() + 1); - spec_n_past_base = slot.cache_tokens.pos_next(n_pre_spec_tokens); - if (!ids.empty()) { accepted_output_indices.assign(slot.i_batch_dft.begin(), slot.i_batch_dft.begin() + ids.size()); } - - if (any_rejected && slot.spec_ckpt.valid && !accepted_output_indices.empty()) { - if (!common_speculative_copy_output_hidden_rows(slot.spec, ctx, accepted_output_indices, spec_feature_rows_pre)) { - spec_feature_rows_pre.clear(); - } - } } slot.i_batch_dft.clear(); slot.drafted.clear(); - slot.drafted_spec_type = COMMON_SPECULATIVE_TYPE_NONE; slot.n_past += ids.size(); slot.n_decoded += ids.size(); @@ -4443,11 +4144,9 @@ void server_context::speculative_decoding_accept() { // update how many tokens out of those tested were accepted slot.n_draft_accepted += ids.size() - 1; - // inform the speculative decoding about the number of accepted tokens - common_speculative_accept(slot.spec, ids.size() - 1); - // rollback to the state before sampling the draft tokens slot.cache_tokens.keep_first(slot.cache_tokens.n_tokens() - n_draft); + const llama_pos spec_pos_base = slot.cache_tokens.pos_next(); // add accepted tokens to the prompt for (auto it = ids.begin(); it != ids.end() - 1; ++it) { @@ -4456,39 +4155,34 @@ void server_context::speculative_decoding_accept() { slot.sampled = ids.back(); // last accepted token slot.n_past = slot.cache_tokens.n_tokens(); - // for recurrent/hybrid models: if any drafts were rejected, restore recurrent state - if (any_rejected && slot.spec_ckpt.valid) { - restore_speculative_checkpoint(slot, ctx, model, spec_type_used, sampled_before, ids, n_draft, spec_feature_rows_pre, spec_n_past_base); - } else { - if (server_speculative_has_target_features(slot.params.speculative) && !accepted_output_indices.empty()) { - server_dflash_contract_log_accept( - slot, - spec_type_used, - "direct", - false, - n_draft, - ids, - spec_n_past_base, - accepted_output_indices); + const common_speculative_type spec_type_used = common_speculative_current_type(slot.spec); + const bool any_rejected = (ids.size() - 1) < n_draft; + const common_speculative_checkpoint * ckpt = common_speculative_get_checkpoint(slot.spec); + const bool will_restore = any_rejected && ckpt != nullptr && ckpt->valid; - if (!common_speculative_commit_accepted_output( - slot.spec, - ctx, - spec_type_used, - slot.id, - spec_n_past_base, - sampled_before, - ids, - accepted_output_indices)) { - common_speculative_clear_sequence_hidden(slot.spec, slot.id); - } else if (spec_type_used != COMMON_SPECULATIVE_TYPE_MTP) { - SLT_DBG(slot, "%s", "synced MTP target hidden state from accepted-prefix rows"); - } - } - llama_kv_cache_seq_rm(ctx, slot.id, slot.cache_tokens.pos_next(slot.n_past), -1); - discard_speculative_checkpoint(slot, ctx); + if (server_speculative_has_target_features(slot.params.speculative) && !accepted_output_indices.empty()) { + server_dflash_contract_log_accept( + slot, + spec_type_used, + will_restore ? "restore" : "direct", + any_rejected, + n_draft, + ids, + spec_pos_base, + accepted_output_indices); } + common_speculative_commit( + slot.spec, + ctx, + slot.ctx_sampling, + slot.id, + sampled_before, + ids, + n_draft, + spec_pos_base, + accepted_output_indices); + for (size_t i = 0; i < ids.size(); ++i) { completion_token_output result; @@ -4911,7 +4605,7 @@ void server_context::process_batch_tokens(int32_t & n_batch) { if (slot.n_decoded == 0 && slot.can_speculate()) { static const llama_tokens empty_prompt; - const llama_tokens & spec_prompt = slot.has_mtp && !slot.params.speculative.has_composite_stage_chain() + const llama_tokens & spec_prompt = slot.uses_mtp() && !slot.params.speculative.has_composite_stage_chain() ? empty_prompt : slot.cache_tokens.get_text_tokens(); common_speculative_begin(slot.spec, spec_prompt); @@ -4935,7 +4629,7 @@ void server_context::process_batch_tokens(int32_t & n_batch) { completion_token_output result; const int tok_idx = slot.i_batch - i; - if (slot.has_mtp && slot.n_decoded == 0) { + if (slot.uses_mtp() && slot.n_decoded == 0) { (void) common_speculative_capture_output_hidden(slot.spec, ctx, tok_idx, slot.id, slot.n_past); } @@ -5076,10 +4770,25 @@ void server_context::update_slots() { if (slot.state != SLOT_STATE_PROCESSING || slot.i_batch_dft.empty()) { continue; } - if (save_speculative_checkpoint(slot, model, ctx, ckpt_mode)) { - const char * mode_name = slot.spec_ckpt.per_step_enabled ? "per-step" : "shadow/cpu"; + const int32_t n_pre_spec_tokens = slot.cache_tokens.n_tokens() - (int32_t) (slot.drafted.size() + 1); + const llama_pos n_past_pre_spec = slot.cache_tokens.pos_next(n_pre_spec_tokens); + const int max_tokens = (int) slot.drafted.size() + 1; + if (common_speculative_before_draft( + slot.spec, + model, + ctx, + slot.ctx_sampling, + slot.sparams, + slot.id, + n_past_pre_spec, + slot.sampled, + max_tokens, + ckpt_mode)) { + const common_speculative_checkpoint * ckpt = common_speculative_get_checkpoint(slot.spec); + GGML_ASSERT(ckpt != nullptr); + const char * mode_name = ckpt->per_step_enabled ? "per-step" : "shadow/cpu"; SLT_DBG(slot, "spec checkpoint saved (mode=%s), n_past_pre_spec=%d\n", - mode_name, slot.spec_ckpt.n_past); + mode_name, ckpt->n_past); } else { SLT_WRN(slot, "%s", "failed to save spec checkpoint\n"); } diff --git a/examples/server/server-context.h b/examples/server/server-context.h index f1e25ecd..68e0d215 100644 --- a/examples/server/server-context.h +++ b/examples/server/server-context.h @@ -22,16 +22,6 @@ enum slot_command { SLOT_COMMAND_RELEASE, }; -struct server_speculative_checkpoint { - bool valid = false; - bool per_step_enabled = false; // per-step SSM checkpoints active - llama_pos n_past = 0; - llama_token sampled = LLAMA_TOKEN_NULL; - common_sampler * sampler = nullptr; // saved sampler state - - void clear(); -}; - struct server_slot { int id; int id_task = -1; @@ -39,9 +29,6 @@ struct server_slot { struct slot_params params; - llama_batch batch_spec = {}; - llama_context * ctx_dft = nullptr; - bool released = false; slot_state state = SLOT_STATE_IDLE; slot_command command = SLOT_COMMAND_NONE; @@ -138,7 +125,6 @@ struct server_slot { // sampling llama_token sampled; // in speculative mode, this is the last accepted token llama_tokens drafted; - common_speculative_type drafted_spec_type = COMMON_SPECULATIVE_TYPE_NONE; json json_schema; @@ -173,13 +159,7 @@ struct server_slot { // expiring logit bias std::vector prev_elb_states; - bool has_mtp = false; - - // saves recurrent state before a speculative batch so it can be restored on rejection - server_speculative_checkpoint spec_ckpt; - bool spec_prompt_warmup_failed = false; - // speculative decoding stats int32_t n_draft_total = 0; // Total draft tokens generated int32_t n_draft_accepted = 0; // Draft tokens actually accepted @@ -199,6 +179,7 @@ struct server_slot { void reset(); bool need_embd() const; + bool uses_mtp() const; bool has_budget(gpt_params& global_params); @@ -270,11 +251,6 @@ struct server_context { // multimodal mtmd_context* mctx = nullptr; - // For speculative decoding - llama_model* model_draft = nullptr; - llama_context* ctx_draft = nullptr; - llama_context_params cparams_dft; - int32_t n_ctx; // total context for all clients / slots // system prompt @@ -354,7 +330,7 @@ struct server_context { void apply_server_biases(server_slot& slot); - void request_completion(int id_task, int id_multi, json data, bool infill, bool embedding, server_tokens&& inputs); + void request_completion(int id_task, int id_multi, json data, bool infill, bool embedding, server_tokens & inputs); void request_cancel(int id_task); diff --git a/examples/server/server-cors-proxy.h b/examples/server/server-cors-proxy.h new file mode 100644 index 00000000..4deb3f3a --- /dev/null +++ b/examples/server/server-cors-proxy.h @@ -0,0 +1,170 @@ +#pragma once + +#include "common.h" +#include "http.h" +#include +#include +#include +#include + +static std::string to_lower_copy(const std::string & value) { + std::string lowered(value.size(), '\0'); + std::transform(value.begin(), value.end(), lowered.begin(), [](unsigned char c) { return std::tolower(c); }); + return lowered; +} + +static httplib::Request prepare_proxy_req_header(const std::string & method, + const std::string & scheme, + const std::string & host, + int port, + const std::string & path, + const std::map & headers, + const std::string & body, + const httplib::FormFiles & files) { + httplib::Request req; + bool has_files = !files.empty(); + req.form.files = files; + std::string effective_body = body; + std::string override_content_type; + req.method = method; + req.path = path; + for (const auto & [key, value] : headers) { + const auto lowered = to_lower_copy(key); + if (lowered == "accept-encoding") { + // disable Accept-Encoding to avoid compressed responses + continue; + } + if (lowered == "transfer-encoding") { + // the body is already decoded + continue; + } + if (lowered == "content-length") { + // let httplib calculate Content-Length from the actual body + continue; + } + if (lowered == "content-type") { + if (has_files) { + // we set our own Content-Type with the new boundary + continue; + } + // when no files but the original request was multipart, + // the body is now JSON, so correct the Content-Type + if (value.find("multipart/form-data") != std::string::npos) { + override_content_type = "application/json; charset=utf-8"; + continue; + } + } + if (lowered == "host") { + bool is_default_port = (scheme == "https" && port == 443) || (scheme == "http" && port == 80); + req.set_header(key, is_default_port ? host : host + ":" + std::to_string(port)); + } else { + req.set_header(key, value); + } + } + req.body = effective_body; + if (!override_content_type.empty()) { + req.set_header("Content-Type", override_content_type); + } + //req.response_handler = response_handler; + //req.content_receiver = content_receiver; + + return req; +} + +static std::string get_param(httplib::Params params,const std::string & key, const std::string & def = "") { + auto it = params.find("url"); + if (it != params.end()) { + return it->second; + } + return def; +} + +static void proxy_request(const httplib::Request & req, + httplib::Response & res, + const std::string & method) { + std::string target_url = get_param(req.params, "url"); + common_http_url parsed_url = common_http_parse_url(target_url); + if (parsed_url.host.empty()) { + throw std::runtime_error("invalid target URL: missing host"); + } + + if (parsed_url.path.empty()) { + parsed_url.path = "/"; + } + + if (!parsed_url.password.empty()) { + throw std::runtime_error("authentication in target URL is not supported"); + } + + if (parsed_url.scheme != "http" && parsed_url.scheme != "https") { + throw std::runtime_error("unsupported URL scheme in target URL: " + parsed_url.scheme); + } + + SRV_INF("proxying %s request to %s://%s:%i%s\n", method.c_str(), parsed_url.scheme.c_str(), parsed_url.host.c_str(), parsed_url.port, parsed_url.path.c_str()); + std::map headers; + for (auto [key, value] : req.headers) { + auto new_key = key; + if (string_starts_with(new_key, "x-proxy-header-")) { + string_replace_all(new_key, "x-proxy-header-", ""); + } + headers[new_key] = value; + } + + httplib::Request proxy_req = prepare_proxy_req_header(method, + parsed_url.scheme, + parsed_url.host, + parsed_url.port, + parsed_url.path, + headers, + req.body, + req.form.files); + + // Make the proxied request + httplib::Result proxy_res; + + if (parsed_url.scheme == "https") { +#ifdef CPPHTTPLIB_OPENSSL_SUPPORT + httplib::SSLClient cli(parsed_url.host, parsed_url.port); + // set timeouts, follow redirects as needed + cli.set_connection_timeout(600); + cli.set_read_timeout(600); + cli.set_write_timeout(600); + cli.set_follow_location(true); + proxy_res = cli.send(proxy_req); +#else + res.status = 501; + res.set_content("HTTPS not supported (build with OpenSSL)", "text/plain"); + return; +#endif + } else { + httplib::Client cli(parsed_url.host, parsed_url.port); + cli.set_connection_timeout(600); + cli.set_read_timeout(600); + cli.set_write_timeout(600); + proxy_res = cli.send(std::move(proxy_req)); + } + + if (!proxy_res) { + std::string error_data = "Proxy failed: " + httplib::to_string(proxy_res.error()); + json final_response{ {"error", error_data} }; + res.set_content(safe_json_to_str(final_response), "application/json; charset=utf-8"); + res.status = json_value(error_data, "code", 500); + return; + } + + res.status = proxy_res->status; + res.set_content(proxy_res->body, proxy_res->get_header_value("Content-Type")); + for (const auto & h : proxy_res->headers) { + // skip hop-by-hop headers + if (h.first != "Transfer-Encoding" && h.first != "Connection") + res.set_header(h.first, h.second); + } +} + +static void proxy_handler_get(const httplib::Request & req, httplib::Response & res) { + proxy_request(req, res, "GET"); +} + +static void proxy_handler_post(const httplib::Request & req, httplib::Response & res) { + proxy_request(req, res, "POST"); +} diff --git a/examples/server/server.cpp b/examples/server/server.cpp index e7e55634..b51f8e4f 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -2,6 +2,7 @@ #include "server-context.h" #include "server-common.h" #include "server-chat.h" +#include "server-cors-proxy.h" #include "chat.h" #include "common.h" @@ -329,6 +330,18 @@ struct server_response_reader { return !cancelled && received_count < id_tasks.size(); } + // cancel-cascade fix: true only if one of THIS reader's tasks is on a + // slot (the active decode). Used to gate llama_decode_stop() so a queued/ + // deferred task's disconnect cannot abort another task's active decode via + // the process-global stop_internal_decode flag. Best-effort cross-thread + // read (slots are not resized at runtime; same race class as the global). + bool any_task_on_slot() const { + for (const auto & slot : ctx_server.slots) { + if (slot.is_processing() && id_tasks.count(slot.id_task)) return true; + } + return false; + } + // return nullptr if should_stop() is true before receiving a result // note: if one error is received, it will stop further processing and return error result server_task_result_ptr next(const std::function& should_stop) { @@ -1020,7 +1033,8 @@ int main(int argc, char ** argv) { {"vision", ctx_server.chat_params.allow_image}, {"audio", ctx_server.chat_params.allow_audio}, } }, - { "n_ctx", ctx_server.n_ctx } + { "n_ctx", ctx_server.n_ctx }, + { "cors_proxy_enabled", ctx_server.params_base.webui_mcp_proxy}, }; @@ -1125,7 +1139,7 @@ int main(int argc, char ** argv) { // non-stream, wait for the results auto all_results = rd->wait_for_all(is_connection_closed); if (all_results.is_terminated) { - llama_decode_stop(); // send a signal to stop decode process + if (rd->any_task_on_slot()) llama_decode_stop(); // cancel-cascade fix: stop only if OUR task is the active decode return; // connection is closed } else if (all_results.error) { @@ -1139,8 +1153,8 @@ int main(int argc, char ** argv) { arr.push_back(res->to_json()); } // if single request, return single object instead of array - res_ok(res, arr.size() == 1 ? arr[0] : arr); - } + res_ok(res, arr.size() == 1 ? arr[0] : arr); + } } else { // in streaming mode, the first error must be treated as non-stream response @@ -1148,7 +1162,7 @@ int main(int argc, char ** argv) { // ref: https://github.com/ggml-org/llama.cpp/pull/16486#discussion_r2419657309 server_task_result_ptr first_result = rd->next(is_connection_closed); if (first_result == nullptr) { - llama_decode_stop(); // send a signal to stop decode process + if (rd->any_task_on_slot()) llama_decode_stop(); // cancel-cascade fix: stop only if OUR task is the active decode return; // connection is closed } else if (first_result->is_error()) { @@ -1356,10 +1370,11 @@ int main(int argc, char ** argv) { const auto handle_infill = [&ctx_server, &handle_completions_impl](const httplib::Request & req, httplib::Response & res) { log_prompt(ctx_server.params_base, json::parse(req.body)); json data = json::parse(req.body); - const int id_task = ctx_server.queue_tasks.get_new_id(); - server_tokens token; // dummy tokens - ctx_server.queue_results.add_waiting_task_id(id_task); - ctx_server.request_completion(id_task, -1, data, true, false, std::move(token)); + //avoid double submits + //const int id_task = ctx_server.queue_tasks.get_new_id(); + //server_tokens token; // dummy tokens + //ctx_server.queue_results.add_waiting_task_id(id_task); + //ctx_server.request_completion(id_task, -1, data, true, false, token); std::vector files; // dummy handle_completions_impl( SERVER_TASK_TYPE_INFILL, @@ -1477,7 +1492,7 @@ int main(int argc, char ** argv) { // collect results if (all_results.is_terminated) { - llama_decode_stop(); + if (rd.any_task_on_slot()) llama_decode_stop(); // cancel-cascade fix: stop only if OUR task is the active decode return; // connection is closed } else if (all_results.error) { @@ -2108,6 +2123,16 @@ int main(int argc, char ** argv) { } #endif } + + // CORS proxy (EXPERIMENTAL, only used by the Web UI for MCP) + if (params.webui_mcp_proxy) { + SRV_WRN("%s", "-----------------\n"); + SRV_WRN("%s", "CORS proxy is enabled, do not expose server to untrusted environments\n"); + SRV_WRN("%s", "This feature is EXPERIMENTAL and may be removed or changed in future versions\n"); + SRV_WRN("%s", "-----------------\n"); + svr->Get("/cors-proxy", proxy_handler_get); + svr->Post("/cors-proxy", proxy_handler_post); + } // // Start the server // diff --git a/examples/server/webui/dist/index.html b/examples/server/webui/dist/index.html index 0d185b68..7ec85dab 100644 --- a/examples/server/webui/dist/index.html +++ b/examples/server/webui/dist/index.html @@ -636,7 +636,7 @@ In order to be iterable, non-array objects must have a [Symbol.iterator]() metho In order to be iterable, non-array objects must have a [Symbol.iterator]() method.`)}function tO(l,t){if(l){if(typeof l=="string")return JL(l,t);var e=Object.prototype.toString.call(l).slice(8,-1);if(e==="Object"&&l.constructor&&(e=l.constructor.name),e==="Map"||e==="Set")return Array.from(l);if(e==="Arguments"||/^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(e))return JL(l,t)}}function JL(l,t){(t==null||t>l.length)&&(t=l.length);for(var e=0,n=new Array(t);e0&&arguments[0]!==void 0?arguments[0]:"",e=t.split(","),n=e.length>1?"one of ".concat(e.join(", ")):e[0];return{code:J$,message:"File type must be ".concat(n)}},yx=function(t){return{code:v$,message:"File is larger than ".concat(t," ").concat(t===1?"byte":"bytes")}},Wx=function(t){return{code:U$,message:"File is smaller than ".concat(t," ").concat(t===1?"byte":"bytes")}},P$={code:E$,message:"Too many files"};function eO(l,t){var e=l.type==="application/x-moz-file"||F$(l,t);return[e,e?null:K$(t)]}function lO(l,t,e){if(nm(l.size))if(nm(t)&&nm(e)){if(l.size>e)return[!1,yx(e)];if(l.sizee)return[!1,yx(e)]}return[!0,null]}function nm(l){return l!=null}function H$(l){var t=l.files,e=l.accept,n=l.minSize,i=l.maxSize,s=l.multiple,a=l.maxFiles,c=l.validator;return!s&&t.length>1||s&&a>=1&&t.length>a?!1:t.every(function(o){var d=eO(o,e),h=F2(d,1),b=h[0],p=lO(o,n,i),y=F2(p,1),M=y[0],T=c?c(o):null;return b&&M&&!T})}function BM(l){return typeof l.isPropagationStopped=="function"?l.isPropagationStopped():typeof l.cancelBubble<"u"?l.cancelBubble:!1}function w1(l){return l.dataTransfer?Array.prototype.some.call(l.dataTransfer.types,function(t){return t==="Files"||t==="application/x-moz-file"}):!!l.target&&!!l.target.files}function Nx(l){l.preventDefault()}function Q$(l){return l.indexOf("MSIE")!==-1||l.indexOf("Trident/")!==-1}function B$(l){return l.indexOf("Edge/")!==-1}function A$(){var l=arguments.length>0&&arguments[0]!==void 0?arguments[0]:window.navigator.userAgent;return Q$(l)||B$(l)}function Xr(){for(var l=arguments.length,t=new Array(l),e=0;e1?i-1:0),a=1;al.length)&&(t=l.length);for(var e=0,n=new Array(t);e=0)&&Object.prototype.propertyIsEnumerable.call(l,n)&&(e[n]=l[n])}return e}function htt(l,t){if(l==null)return{};var e={},n=Object.keys(l),i,s;for(s=0;s=0)&&(e[i]=l[i]);return e}var HG=B.forwardRef(function(l,t){var e=l.children,n=AM(l,ltt),i=btt(n),s=i.open,a=AM(i,ntt);return B.useImperativeHandle(t,function(){return{open:s}},[s]),I2.createElement(B.Fragment,null,e(li(li({},a),{},{open:s})))});HG.displayName="Dropzone";var aO={disabled:!1,getFilesFromEvent:X$,maxSize:1/0,minSize:0,multiple:!0,maxFiles:0,preventDropOnDocument:!0,noClick:!1,noKeyboard:!1,noDrag:!1,noDragEventsBubbling:!1,validator:null,useFsAccessApi:!1,autoFocus:!1};HG.defaultProps=aO;HG.propTypes={children:Vn.func,accept:Vn.objectOf(Vn.arrayOf(Vn.string)),multiple:Vn.bool,preventDropOnDocument:Vn.bool,noClick:Vn.bool,noKeyboard:Vn.bool,noDrag:Vn.bool,noDragEventsBubbling:Vn.bool,minSize:Vn.number,maxSize:Vn.number,maxFiles:Vn.number,disabled:Vn.bool,getFilesFromEvent:Vn.func,onFileDialogCancel:Vn.func,onFileDialogOpen:Vn.func,useFsAccessApi:Vn.bool,autoFocus:Vn.bool,onDragEnter:Vn.func,onDragLeave:Vn.func,onDragOver:Vn.func,onDrop:Vn.func,onDropAccepted:Vn.func,onDropRejected:Vn.func,onError:Vn.func,validator:Vn.func};var EL={isFocused:!1,isFileDialogActive:!1,isDragActive:!1,isDragAccept:!1,isDragReject:!1,acceptedFiles:[],fileRejections:[]};function btt(){var l=arguments.length>0&&arguments[0]!==void 0?arguments[0]:{},t=li(li({},aO),l),e=t.accept,n=t.disabled,i=t.getFilesFromEvent,s=t.maxSize,a=t.minSize,c=t.multiple,o=t.maxFiles,d=t.onDragEnter,h=t.onDragLeave,b=t.onDragOver,p=t.onDrop,y=t.onDropAccepted,M=t.onDropRejected,T=t.onFileDialogCancel,Y=t.onFileDialogOpen,L=t.useFsAccessApi,x=t.autoFocus,g=t.preventDropOnDocument,F=t.noClick,j=t.noKeyboard,D=t.noDrag,_=t.noDragEventsBubbling,ut=t.onError,K=t.validator,f=B.useMemo(function(){return $$(e)},[e]),lt=B.useMemo(function(){return q$(e)},[e]),tt=B.useMemo(function(){return typeof Y=="function"?Y:Gx},[Y]),ht=B.useMemo(function(){return typeof T=="function"?T:Gx},[T]),dt=B.useRef(null),It=B.useRef(null),kt=B.useReducer(Ztt,EL),Gt=VX(kt,2),gt=Gt[0],vt=Gt[1],H=gt.isFocused,nt=gt.isFileDialogActive,bt=B.useRef(typeof window<"u"&&window.isSecureContext&&L&&_$()),$=function(){!bt.current&&nt&&setTimeout(function(){if(It.current){var de=It.current.files;de.length||(vt({type:"closeDialog"}),ht())}},300)};B.useEffect(function(){return window.addEventListener("focus",$,!1),function(){window.removeEventListener("focus",$,!1)}},[It,nt,ht,bt]);var _t=B.useRef([]),ce=function(de){dt.current&&dt.current.contains(de.target)||(de.preventDefault(),_t.current=[])};B.useEffect(function(){return g&&(document.addEventListener("dragover",Nx,!1),document.addEventListener("drop",ce,!1)),function(){g&&(document.removeEventListener("dragover",Nx),document.removeEventListener("drop",ce))}},[dt,g]),B.useEffect(function(){return!n&&x&&dt.current&&dt.current.focus(),function(){}},[dt,x,n]);var ne=B.useCallback(function(qt){ut?ut(qt):console.error(qt)},[ut]),Nt=B.useCallback(function(qt){qt.preventDefault(),qt.persist(),Ql(qt),_t.current=[].concat(att(_t.current),[qt.target]),w1(qt)&&Promise.resolve(i(qt)).then(function(de){if(!(BM(qt)&&!_)){var Yt=de.length,Jt=Yt>0&&H$({files:de,accept:f,minSize:a,maxSize:s,multiple:c,maxFiles:o,validator:K}),be=Yt>0&&!Jt;vt({isDragAccept:Jt,isDragReject:be,isDragActive:!0,type:"setDraggedFiles"}),d&&d(qt)}}).catch(function(de){return ne(de)})},[i,d,ne,_,f,a,s,c,o,K]),xt=B.useCallback(function(qt){qt.preventDefault(),qt.persist(),Ql(qt);var de=w1(qt);if(de&&qt.dataTransfer)try{qt.dataTransfer.dropEffect="copy"}catch{}return de&&b&&b(qt),!1},[b,_]),Zt=B.useCallback(function(qt){qt.preventDefault(),qt.persist(),Ql(qt);var de=_t.current.filter(function(Jt){return dt.current&&dt.current.contains(Jt)}),Yt=de.indexOf(qt.target);Yt!==-1&&de.splice(Yt,1),_t.current=de,!(de.length>0)&&(vt({type:"setDraggedFiles",isDragActive:!1,isDragAccept:!1,isDragReject:!1}),w1(qt)&&h&&h(qt))},[dt,h,_]),Ft=B.useCallback(function(qt,de){var Yt=[],Jt=[];qt.forEach(function(be){var ee=eO(be,f),Q=VX(ee,2),C=Q[0],q=Q[1],mt=lO(be,a,s),Vt=VX(mt,2),ie=Vt[0],Ot=Vt[1],fe=K?K(be):null;if(C&&ie&&!fe)Yt.push(be);else{var oe=[q,Ot];fe&&(oe=oe.concat(fe)),Jt.push({file:be,errors:oe.filter(function(_e){return _e})})}}),(!c&&Yt.length>1||c&&o>=1&&Yt.length>o)&&(Yt.forEach(function(be){Jt.push({file:be,errors:[P$]})}),Yt.splice(0)),vt({acceptedFiles:Yt,fileRejections:Jt,isDragReject:Jt.length>0,type:"setFiles"}),p&&p(Yt,Jt,de),Jt.length>0&&M&&M(Jt,de),Yt.length>0&&y&&y(Yt,de)},[vt,c,f,a,s,o,p,y,M,K]),te=B.useCallback(function(qt){qt.preventDefault(),qt.persist(),Ql(qt),_t.current=[],w1(qt)&&Promise.resolve(i(qt)).then(function(de){BM(qt)&&!_||Ft(de,qt)}).catch(function(de){return ne(de)}),vt({type:"reset"})},[i,Ft,ne,_]),he=B.useCallback(function(){if(bt.current){vt({type:"openDialog"}),tt();var qt={multiple:c,types:lt};window.showOpenFilePicker(qt).then(function(de){return i(de)}).then(function(de){Ft(de,null),vt({type:"closeDialog"})}).catch(function(de){ttt(de)?(ht(de),vt({type:"closeDialog"})):ett(de)?(bt.current=!1,It.current?(It.current.value=null,It.current.click()):ne(new Error("Cannot open the file picker because the https://developer.mozilla.org/en-US/docs/Web/API/File_System_Access_API is not supported and no was provided."))):ne(de)});return}It.current&&(vt({type:"openDialog"}),tt(),It.current.value=null,It.current.click())},[vt,tt,ht,L,Ft,ne,lt,c]),$e=B.useCallback(function(qt){!dt.current||!dt.current.isEqualNode(qt.target)||(qt.key===" "||qt.key==="Enter"||qt.keyCode===32||qt.keyCode===13)&&(qt.preventDefault(),he())},[dt,he]),je=B.useCallback(function(){vt({type:"focus"})},[]),xe=B.useCallback(function(){vt({type:"blur"})},[]),Ke=B.useCallback(function(){F||(A$()?setTimeout(he,0):he())},[F,he]),Vl=function(de){return n?null:de},gl=function(de){return j?null:Vl(de)},Pl=function(de){return D?null:Vl(de)},Ql=function(de){_&&de.stopPropagation()},El=B.useMemo(function(){return function(){var qt=arguments.length>0&&arguments[0]!==void 0?arguments[0]:{},de=qt.refKey,Yt=de===void 0?"ref":de,Jt=qt.role,be=qt.onKeyDown,ee=qt.onFocus,Q=qt.onBlur,C=qt.onClick,q=qt.onDragEnter,mt=qt.onDragOver,Vt=qt.onDragLeave,ie=qt.onDrop,Ot=AM(qt,itt);return li(li(UL({onKeyDown:gl(Xr(be,$e)),onFocus:gl(Xr(ee,je)),onBlur:gl(Xr(Q,xe)),onClick:Vl(Xr(C,Ke)),onDragEnter:Pl(Xr(q,Nt)),onDragOver:Pl(Xr(mt,xt)),onDragLeave:Pl(Xr(Vt,Zt)),onDrop:Pl(Xr(ie,te)),role:typeof Jt=="string"&&Jt!==""?Jt:"presentation"},Yt,dt),!n&&!j?{tabIndex:0}:{}),Ot)}},[dt,$e,je,xe,Ke,Nt,xt,Zt,te,j,D,n]),pn=B.useCallback(function(qt){qt.stopPropagation()},[]),sn=B.useMemo(function(){return function(){var qt=arguments.length>0&&arguments[0]!==void 0?arguments[0]:{},de=qt.refKey,Yt=de===void 0?"ref":de,Jt=qt.onChange,be=qt.onClick,ee=AM(qt,stt),Q=UL({accept:f,multiple:c,type:"file",style:{border:0,clip:"rect(0, 0, 0, 0)",clipPath:"inset(50%)",height:"1px",margin:"0 -1px -1px 0",overflow:"hidden",padding:0,position:"absolute",width:"1px",whiteSpace:"nowrap"},onChange:Vl(Xr(Jt,te)),onClick:Vl(Xr(be,pn)),tabIndex:-1},Yt,It);return li(li({},Q),ee)}},[It,e,c,te,n]);return li(li({},gt),{},{isFocused:H&&!n,getRootProps:El,getInputProps:sn,rootRef:dt,inputRef:It,open:Vl(he)})}function Ztt(l,t){switch(t.type){case"focus":return li(li({},l),{},{isFocused:!0});case"blur":return li(li({},l),{},{isFocused:!1});case"openDialog":return li(li({},EL),{},{isFileDialogActive:!0});case"closeDialog":return li(li({},l),{},{isFileDialogActive:!1});case"setDraggedFiles":return li(li({},l),{},{isDragActive:t.isDragActive,isDragAccept:t.isDragAccept,isDragReject:t.isDragReject});case"setFiles":return li(li({},l),{},{acceptedFiles:t.acceptedFiles,fileRejections:t.fileRejections,isDragReject:t.isDragReject});case"reset":return li({},EL);default:return l}}function Gx(){}const IX={content(){const l=new URL(window.location.href);return l.searchParams.get("m")??l.searchParams.get("q")??""},shouldSend(){return new URL(window.location.href).searchParams.has("q")},clear(){HC(["m","q"])}};function ptt(l,t){const e=rl.filterByLeafNodeId(l,t,!0),n=[],i=new Map;for(const a of l)i.set(a.id,a);const s=a=>{let c=i.get(a);for(;c&&c.children.length!==0;)c=i.get(c.children.at(-1)??-1);return(c==null?void 0:c.id)??-1};for(const a of e){const c=i.get(a.parent??-1);if(!c)continue;const o=c.children;a.type!=="root"&&n.push({msg:a,siblingLeafNodeIds:o.map(s),siblingCurrIdx:o.indexOf(a.id)})}return n}function Tx(){const{viewingChat:l,sendMessage:t,isGenerating:e,stopGenerating:n,pendingMessages:i,canvasData:s,replaceMessageAndGenerate:a,continueMessageAndGenerate:c}=xc(),o=u_(IX.content()),d=u$();o_(o,d);const h=B.useRef(null);m_(h);const[b,p]=B.useState(-1),y=B.useMemo(()=>l?ptt(l.messages,b):[],[b,l]),M=(l==null?void 0:l.conv.id)??null,T=i[M??""];B.useEffect(()=>{p(-1)},[M]);const Y=_=>{_&&p(_)},L=async()=>{var ut;const _=o.value();try{const K=e(M??"");if(console.log("IsGenerating",K),_.trim().length===0||K)return;o.setValue(""),p(-1),nu(!1);const f=((ut=y.at(-1))==null?void 0:ut.msg.id)??null,lt=await t(M,f,_,d.items,Y);console.log("Send msg success:",lt),lt||o.setValue(_),d.clearItems()}catch(K){Dn.error(K instanceof Error?K.message:String(K)),o.setValue(_)}},x=async(_,ut)=>{l&&(p(_.id),nu(!1),await a(l.conv.id,_.parent,ut,_.extra,Y),p(-1),nu(!1))},g=async _=>{l&&(p(_.parent),nu(!1),await a(l.conv.id,_.parent,null,_.extra,Y),p(-1),nu(!1))},F=async(_,ut)=>{!l||!c||(p(_.id),nu(!1),await c(l.conv.id,_.id,ut,Y),p(-1),nu(!1))},j=!!s;B.useEffect(()=>{IX.shouldSend()?L():o.focus(),IX.clear()},[o.ref]);const D=T&&!y.some(_=>_.msg.id===T.id)?[{msg:T,siblingLeafNodeIds:[],siblingCurrIdx:0,isPending:!0}]:[];return U.jsxs("div",{className:es({"grid lg:gap-8 grow transition-[300ms]":!0,"grid-cols-[1fr_0fr] lg:grid-cols-[1fr_1fr]":j,"grid-cols-[1fr_0fr]":!j}),children:[U.jsxs("div",{className:es({"flex flex-col w-full lg:w-[75vw] lg:mx-auto":!0,"hidden lg:flex":j,flex:!j}),children:[U.jsx("div",{className:"flex items-center justify-center",children:l==null?void 0:l.conv.model_name}),U.jsxs("div",{id:"messages-list",className:"grow",ref:h,children:[U.jsxs("div",{className:"mt-auto flex justify-center",children:[U.jsx("div",{children:""}),l==null&&U.jsx("div",{className:"w-full max-w-2xl px-4",children:U.jsx("div",{className:"mb-8 text-center",children:U.jsx("p",{className:"text-1xl text-muted-foreground",children:"How can I help you today?"})})})]}),[...y,...D].map(_=>{const ut=_.msg,K=(T==null?void 0:T.id)===ut.id;return U.jsx(s_,{msg:K?T:ut,siblingLeafNodeIds:_.siblingLeafNodeIds,siblingCurrIdx:_.siblingCurrIdx,onRegenerateMessage:g,onEditMessage:x,onChangeSibling:p,isPending:K||_.isPending,onContinueMessage:F},ut.id)})]}),U.jsx(ytt,{textarea:o,extraContext:d,onSend:L,onStop:()=>n(M??""),isGenerating:e(M??"")})]}),U.jsx("div",{className:"w-full sticky top-[7em] h-[calc(100vh-9em)]",children:(s==null?void 0:s.type)===CG.PY_INTERPRETER&&U.jsx(r_,{})})]})}function ytt({textarea:l,extraContext:t,onSend:e,onStop:n,isGenerating:i}){const{config:s}=xc(),[a,c]=B.useState(!1);return U.jsx("div",{role:"group","aria-label":"Chat input",className:es({"flex items-end pt-8 pb-6 sticky bottom-0 bg-base-100":!0,"opacity-50":a}),children:U.jsx(HG,{noClick:!0,onDrop:o=>{c(!1),t.onFileAdded(o)},onDragEnter:()=>c(!0),onDragLeave:()=>c(!1),multiple:!0,children:({getRootProps:o,getInputProps:d})=>U.jsxs("div",{className:"flex flex-col rounded-xl border-1 border-base-content/30 p-3 w-full",onPasteCapture:h=>{const b=h.clipboardData.getData("text/plain");if(b.length>0&&s.pasteLongTextToFileLen>0&&b.length>s.pasteLongTextToFileLen){t.addItems([{type:"context",name:"Pasted Content",content:b}]),h.preventDefault();return}const p=Array.from(h.clipboardData.items).filter(y=>y.kind==="file").map(y=>y.getAsFile()).filter(y=>y!==null);p.length>0&&(h.preventDefault(),t.onFileAdded(p))},...o(),children:[!i&&U.jsx(OR,{items:t.items,removeItem:t.removeItem}),U.jsxs("div",{className:"flex flex-row w-full",children:[U.jsx("textarea",{className:"text-md outline-none border-none w-full resize-vertical lg:resize-none lg:max-h-48 lg:overflow-y-auto",placeholder:"Type a message...",ref:l.ref,onInput:l.onInput,onKeyDown:h=>{h.nativeEvent.isComposing||h.keyCode===229||h.key==="Enter"&&!h.shiftKey&&(h.preventDefault(),e())},id:"msg-input",dir:"auto",rows:2}),U.jsxs("div",{className:"flex flex-row gap-2 ml-2",children:[U.jsx("label",{htmlFor:"file-upload",className:es({"btn w-8 h-8 p-0 rounded-full":!0,"btn-disabled":i}),"aria-label":"Upload file",tabIndex:0,role:"button",children:U.jsx(p_,{className:"h-5 w-5"})}),U.jsx("input",{id:"file-upload",type:"file",disabled:i,...d(),hidden:!0}),i?U.jsx("button",{className:"btn btn-neutral w-8 h-8 p-0 rounded-full",onClick:n,children:U.jsx(W_,{className:"h-5 w-5"})}):U.jsx("button",{className:"btn btn-primary w-8 h-8 p-0 rounded-full",onClick:e,"aria-label":"Send message",children:U.jsx(b_,{className:"h-5 w-5"})})]})]})]})})})}const Wtt=["prefix_role","stop_string","reasoning_format","temperature","top_k","top_p","min_p","max_tokens"],Ntt=["top_n_sigma","adaptive_target","adaptive_decay","xtc_probability","xtc_threshold","dynatemp_range","dynatemp_exponent","typical_p"],Mtt=["dry_multiplier","dry_base","dry_allowed_length","dry_penalty_last_n","repeat_last_n","repeat_penalty","presence_penalty","frequency_penalty"],q0="w-4 h-4 mr-1 inline";function Gtt({currentConfig:l,onLoadPreset:t}){const[e,n]=B.useState(()=>rl.getPresets()),[i,s]=B.useState(""),[a,c]=B.useState(null),{showConfirm:o,showAlert:d}=$L(),h=async()=>{if(!i.trim()){await d("Please enter a preset name");return}const y=e.find(M=>M.name===i.trim());if(y)await o(`Preset "${i}" already exists. Do you want to overwrite it?`)&&(rl.updatePreset(y.id,l),n(rl.getPresets()),s(""),await d("Preset updated successfully"));else{const M=rl.savePreset(i.trim(),l);n([...e,M]),s(""),await d("Preset saved successfully")}},b=async y=>{await o(`Load preset "${y.name}"? Current settings will be replaced.`)&&(t(y.config),c(y.id))},p=async y=>{await o(`Delete preset "${y.name}"?`)&&(rl.deletePreset(y.id),n(e.filter(M=>M.id!==y.id)),a===y.id&&c(null))};return U.jsxs("div",{className:"space-y-4",children:[U.jsxs("div",{className:"form-control",children:[U.jsx("label",{className:"label",children:U.jsx("span",{className:"label-text",children:"Save current settings as preset"})}),U.jsxs("div",{className:"join",children:[U.jsx("input",{type:"text",placeholder:"Enter preset name",className:"input input-bordered join-item flex-1",value:i,onChange:y=>s(y.target.value),onKeyPress:y=>{y.key==="Enter"&&h()}}),U.jsx("button",{className:"btn btn-primary join-item",onClick:h,children:"Save Preset"})]})]}),U.jsxs("div",{className:"form-control",children:[U.jsx("label",{className:"label",children:U.jsx("span",{className:"label-text",children:"Saved presets"})}),e.length===0?U.jsx("div",{className:"alert",children:U.jsx("span",{children:"No presets saved yet"})}):U.jsx("div",{className:"space-y-2 max-h-64 overflow-y-auto",children:e.map(y=>U.jsx("div",{className:es({"card bg-base-200 p-3":!0,"ring-2 ring-primary":a===y.id}),children:U.jsxs("div",{className:"flex items-center justify-between",children:[U.jsxs("div",{children:[U.jsx("h4",{className:"font-semibold",children:y.name}),U.jsxs("p",{className:"text-sm opacity-70",children:["Created: ",new Date(y.createdAt).toLocaleString()]})]}),U.jsxs("div",{className:"flex gap-2",children:[U.jsx("button",{className:"btn btn-sm btn-primary",onClick:()=>b(y),children:"Load"}),U.jsx("button",{className:"btn btn-sm btn-error",onClick:()=>p(y),children:U.jsx(tV,{className:"w-4 h-4"})})]})]})},y.id))})]})]})}const Ttt=(l,t)=>[{title:U.jsxs(U.Fragment,{children:[U.jsx(hJ,{className:q0}),"Presets"]}),fields:[{type:4,key:"custom",component:()=>U.jsx(Gtt,{currentConfig:l,onLoadPreset:t})}]},{title:U.jsxs(U.Fragment,{children:[U.jsx(GJ,{className:q0}),"General"]}),fields:[{type:0,label:"API Key",key:"apiKey"},{type:1,label:"System Message (will be disabled if left empty)",key:"systemMessage"},{type:3,label:"Completion Type",key:"completionType",options:"Chat|Text"},...Wtt.map(e=>({type:0,label:e,key:e})),{type:0,label:"Paste length to file",key:"pasteLongTextToFileLen"},{type:2,label:"Parse PDF as image instead of text",key:"pdfAsImage"}]},{title:U.jsxs(U.Fragment,{children:[U.jsx(zJ,{className:q0}),"Samplers"]}),fields:[{type:0,label:"Samplers queue",key:"samplers"},...Ntt.map(e=>({type:0,label:e,key:e}))]},{title:U.jsxs(U.Fragment,{children:[U.jsx(xJ,{className:q0}),"Penalties"]}),fields:Mtt.map(e=>({type:0,label:e,key:e}))},{title:U.jsxs(U.Fragment,{children:[U.jsx(ZJ,{className:q0}),"Reasoning"]}),fields:[{type:2,label:"Expand thought process by default when generating messages",key:"showThoughtInProgress"},{type:2,label:"Exclude thought process when sending requests to API (Recommended for Reasoning Models like Deepseek R1)",key:"excludeThoughtOnReq"}]},{title:U.jsxs(U.Fragment,{children:[U.jsx(FJ,{className:q0}),"Advanced"]}),fields:[{type:4,key:"custom",component:()=>{const e=async()=>{const i=await(await fetch("/demo-conversation.json")).json();rl.remove(i.id);for(const s of i.messages)rl.appendMsg(i.id,s,s.model_name)};return U.jsx("button",{className:"btn",onClick:e,children:"(debug) Import demo conversation"})}},{type:4,key:"custom",component:()=>{const e=async()=>{const n=await rl.exportDB(),i=document.createElement("a");document.body.appendChild(i),i.href=URL.createObjectURL(n),document.body.appendChild(i),i.download="llamawebui_dump.json",i.click(),document.body.removeChild(i)};return U.jsx("button",{className:"btn",onClick:e,children:"Export conversation database"})}},{type:4,key:"custom",component:()=>{const e=async n=>{if(console.log(n),!n.target.files)throw Dn.error("Target.files cant be null"),new Error("e.target.files cant be null");if(n.target.files.length!=1)throw Dn.error("Number of selected files for DB import must be 1 but was "+n.target.files.length+"."),new Error("Number of selected files for DB import must be 1 but was "+n.target.files.length+".");const i=n.target.files[0];try{if(!i)throw new Error("No DB found to import.");console.log("Importing DB "+i.name),await rl.importDB(i),Dn.success("Import complete"),window.location.reload()}catch(s){Dn.error(""+s)}};return U.jsxs("div",{children:[U.jsxs("label",{htmlFor:"db-import",className:"btn",role:"button",tabIndex:0,children:[" ","Reset and import conversation database"," "]}),U.jsx("input",{id:"db-import",type:"file",accept:".json",className:"file-upload",onInput:e,hidden:!0})]})}},{type:2,label:"Show generation stats (model name, context size, prompt and token per second)",key:"showTokensPerSecond"},{type:2,label:"Use server defaults for parameters (skip sending temp, top_k, top_p, min_p, typical p from WebUI)",key:"useServerDefaults"},{type:1,label:U.jsxs(U.Fragment,{children:["Custom JSON config (For more info, refer to"," ",U.jsx(D1,{href:"https://github.com/ikawrakow/ik_llama.cpp/tree/main/examples/server/README.md",children:"server documentation"}),")"]}),key:"custom"}]},{title:U.jsxs(U.Fragment,{children:[U.jsx(uJ,{className:q0}),"Experimental"]}),fields:[{type:4,key:"custom",component:()=>U.jsx(U.Fragment,{children:U.jsxs("p",{className:"mb-8",children:["Experimental features are not guaranteed to work correctly.",U.jsx("br",{}),U.jsx("br",{}),"If you encounter any problems, create a"," ",U.jsx(D1,{href:"https://github.com/ikawrakow/ik_llama.cpp/issues/new?template=019-bug-misc.yml",children:"Bug (misc.)"})," ","report on Github. Please also specify ",U.jsx("b",{children:"webui/experimental"})," on the report title and include screenshots.",U.jsx("br",{}),U.jsx("br",{}),"Some features may require packages downloaded from CDN, so they need internet connection."]})})},{type:2,label:U.jsxs(U.Fragment,{children:[U.jsx("b",{children:"Enable Python interpreter"}),U.jsx("br",{}),U.jsxs("small",{className:"text-xs",children:["This feature uses"," ",U.jsx(D1,{href:"https://pyodide.org",children:"pyodide"}),', downloaded from CDN. To use this feature, ask the LLM to generate Python code inside a Markdown code block. You will see a "Run" button on the code block, near the "Copy" button.']})]}),key:"pyIntepreterEnabled"}]}];function Xtt({show:l,onClose:t}){const{config:e,saveConfig:n}=xc(),[i,s]=B.useState(0),[a,c]=B.useState(JSON.parse(JSON.stringify(e))),o=Ttt(a,c),d=()=>{window.confirm("Are you sure you want to reset all settings?")&&c(Br)};function h(M){return M in Br}const b=()=>{const M={...Br,...JSON.parse(JSON.stringify(a))};for(const T in M){if(!h(T)){console.log(`Unknown default type for key ${T}`);continue}const Y=M[T],L=fX(Br[T]),x=xX(Br[T]),g=SX(Br[T]);if(x){if(!xX(Y)){alert(`Value for ${T} must be string`);return}}else if(g){const F=Y.toString().trim(),j=Number(F);if(isNaN(j)||!SX(j)||F.length===0){alert(`Value for ${T} must be numeric`);return}M[T]=j}else if(L){if(!fX(Y)){alert(`Value for ${T} must be boolean`);return}}else Dn.error(`Unknown default type for key ${T}`)}n(M),t()},p=M=>T=>{c({...a,[M]:T})},y=B.useRef(null);return U.jsx("dialog",{className:es({modal:!0,"modal-open":l}),children:U.jsxs("div",{className:"modal-box w-11/12 max-w-3xl",children:[U.jsx("h3",{className:"text-lg font-bold mb-6",children:"Settings"}),U.jsxs("div",{className:"flex flex-col md:flex-row h-[calc(90vh-12rem)]",children:[U.jsx("div",{className:"hidden md:flex flex-col items-stretch pr-4 mr-4 border-r-2 border-base-200",children:o.map((M,T)=>U.jsx("div",{className:es({"btn btn-ghost justify-start font-normal w-44 mb-1":!0,"btn-active":i===T}),onClick:()=>s(T),dir:"auto",children:M.title},T))}),U.jsx("div",{className:"md:hidden flex flex-row gap-2 mb-4",children:U.jsxs("details",{className:"dropdown",ref:y,children:[U.jsx("summary",{className:"btn bt-sm w-full m-1",children:o[i].title}),U.jsx("ul",{className:"menu dropdown-content bg-base-100 rounded-box z-[1] w-52 p-2 shadow",children:o.map((M,T)=>U.jsx("div",{className:es({"btn btn-ghost justify-start font-normal":!0,"btn-active":i===T}),onClick:()=>{var Y;s(T),(Y=y.current)==null||Y.removeAttribute("open")},dir:"auto",children:M.title},T))})]})}),U.jsxs("div",{className:"grow overflow-y-auto px-4",children:[o[i].fields.map((M,T)=>{const Y=`${i}-${T}`;if(M.type===0)return U.jsx(Ltt,{configKey:M.key,value:a[M.key],onChange:p(M.key),label:M.label},Y);if(M.type===1)return U.jsx(Ytt,{configKey:M.key,value:a[M.key].toString(),onChange:p(M.key),label:M.label},Y);if(M.type===2)return U.jsx(Vtt,{configKey:M.key,value:!!a[M.key],onChange:p(M.key),label:M.label},Y);if(M.type===3)return U.jsx(Itt,{configKey:M.key,value:a[M.key].toString(),onChange:p(M.key),label:M.label,options:M.options?M.options:""},Y);if(M.type===4)return U.jsx("div",{className:"mb-2",children:typeof M.component=="string"?M.component:M.component({value:a[M.key],onChange:p(M.key)})},Y)}),U.jsx("p",{className:"opacity-40 mb-6 text-sm mt-8",children:"Settings are saved in browser's localStorage"})]})]}),U.jsxs("div",{className:"modal-action",children:[U.jsx("button",{className:"btn",onClick:d,children:"Reset to default"}),U.jsx("button",{className:"btn",onClick:t,children:"Close"}),U.jsx("button",{className:"btn btn-primary",onClick:b,children:"Save"})]})]})})}function Ytt({configKey:l,value:t,onChange:e,label:n}){return U.jsxs("label",{className:"form-control mb-2",children:[U.jsx("div",{className:"label inline",children:n||l}),U.jsx("textarea",{className:"textarea textarea-bordered h-24",placeholder:`Default: ${Br[l]||"none"}`,value:t,onChange:i=>e(i.target.value)})]})}function Ltt({configKey:l,value:t,onChange:e,label:n}){const i=BC[l];return U.jsxs(U.Fragment,{children:[i&&U.jsxs("div",{className:"block md:hidden mb-1",children:[U.jsx("b",{children:n||l}),U.jsx("br",{}),U.jsx("p",{className:"text-xs whitespace-normal",children:i})]}),U.jsxs("label",{className:"input input-bordered join-item grow flex items-center gap-2 mb-2",children:[U.jsxs("div",{className:"dropdown dropdown-hover",children:[U.jsx("div",{tabIndex:0,role:"button",className:"font-bold hidden md:block",children:n||l}),i&&U.jsx("div",{className:"dropdown-content menu bg-base-100 rounded-box z-10 w-64 p-2 shadow mt-4 whitespace-normal break-words",children:i})]}),U.jsx("input",{type:"text",className:"grow",placeholder:`Default: ${Br[l]||"none"}`,value:t,onChange:s=>e(s.target.value)})]})]})}function Vtt({configKey:l,value:t,onChange:e,label:n}){return U.jsxs("div",{className:"flex flex-row items-center mb-2",children:[U.jsx("input",{type:"checkbox",className:"toggle",checked:t,onChange:i=>e(i.target.checked)}),U.jsx("span",{className:"ml-4",children:n||l})]})}function Itt({configKey:l,value:t,onChange:e,label:n,options:i}){const s=i.split("|"),a=i.includes(t)?t:i[0]||"";return U.jsx("div",{className:"flex flex-row items-center mb-2",children:U.jsxs("div",{className:"flex flex-col w-full",children:[U.jsx("label",{className:"mb-1 text-sm font-medium text-gray-700",children:n||l}),U.jsx("select",{className:"select select-bordered w-full max-w-xs",value:a,onChange:c=>e(c.target.value),children:s.map(c=>U.jsx("option",{value:c,children:c},c))})]})})}function wtt(){return U.jsx(sJ,{children:U.jsx(IC,{children:U.jsx("div",{className:"flex flex-row drawer lg:drawer-open h-screen",children:U.jsx(iJ,{children:U.jsx(nC,{children:U.jsxs(z1,{element:U.jsx(ztt,{}),children:[U.jsx(z1,{path:"/chat/:convId",element:U.jsx(Tx,{})}),U.jsx(z1,{path:"*",element:U.jsx(Tx,{})})]})})})})})})}function ztt(){const{showSettings:l,setShowSettings:t}=xc();return U.jsxs(U.Fragment,{children:[U.jsx(QU,{}),U.jsxs("main",{className:"drawer-content grow flex flex-col h-screen mx-auto px-4 overflow-auto bg-base-100",id:"main-scroll",children:[U.jsx(KJ,{}),U.jsx(eC,{})]}),U.jsx(Xtt,{show:l,onClose:()=>t(!1)}),U.jsx(lJ,{})]})}oj.createRoot(document.getElementById("root")).render(U.jsx(B.StrictMode,{children:U.jsx(wtt,{})})); - +
diff --git a/examples/server/webui/package-lock.json b/examples/server/webui/package-lock.json index bde1a54d..8e48c013 100644 --- a/examples/server/webui/package-lock.json +++ b/examples/server/webui/package-lock.json @@ -6603,20 +6603,6 @@ "dev": true, "license": "ISC" }, - "node_modules/yaml": { - "version": "2.7.0", - "resolved": "https://registry.npmjs.org/yaml/-/yaml-2.7.0.tgz", - "integrity": "sha512-+hSoy/QHluxmC9kCIJyL/uyFmLmc+e5CFR5Wa+bpIhIj85LVb9ZH2nVnqrHoSvKogwODv0ClqZkmiSSaIH5LTA==", - "license": "ISC", - "optional": true, - "peer": true, - "bin": { - "yaml": "bin.mjs" - }, - "engines": { - "node": ">= 14" - } - }, "node_modules/yocto-queue": { "version": "0.1.0", "resolved": "https://registry.npmjs.org/yocto-queue/-/yocto-queue-0.1.0.tgz", diff --git a/examples/server/webui_llamacpp/.prettierignore b/examples/server/webui_llamacpp/.prettierignore index 7d74fe24..7bbdcf6a 100644 --- a/examples/server/webui_llamacpp/.prettierignore +++ b/examples/server/webui_llamacpp/.prettierignore @@ -7,3 +7,12 @@ bun.lockb # Miscellaneous /static/ +dist/ +.svelte-kit/ +build/ + +# Build output +/dist/ +/build/ +/.svelte-kit/ +test-results diff --git a/examples/server/webui_llamacpp/.storybook/ModeWatcherDecorator.svelte b/examples/server/webui_llamacpp/.storybook/decorators/ModeWatcherDecorator.svelte similarity index 100% rename from examples/server/webui_llamacpp/.storybook/ModeWatcherDecorator.svelte rename to examples/server/webui_llamacpp/.storybook/decorators/ModeWatcherDecorator.svelte diff --git a/examples/server/webui_llamacpp/.storybook/TooltipProviderDecorator.svelte b/examples/server/webui_llamacpp/.storybook/decorators/TooltipProviderDecorator.svelte similarity index 72% rename from examples/server/webui_llamacpp/.storybook/TooltipProviderDecorator.svelte rename to examples/server/webui_llamacpp/.storybook/decorators/TooltipProviderDecorator.svelte index 9aad1eaa..ba0cabc5 100644 --- a/examples/server/webui_llamacpp/.storybook/TooltipProviderDecorator.svelte +++ b/examples/server/webui_llamacpp/.storybook/decorators/TooltipProviderDecorator.svelte @@ -1,5 +1,5 @@ + + + + + + + +

{tooltip}

+
+
diff --git a/examples/server/webui_llamacpp/src/lib/components/app/actions/ActionIconCopyToClipboard.svelte b/examples/server/webui_llamacpp/src/lib/components/app/actions/ActionIconCopyToClipboard.svelte new file mode 100644 index 00000000..999f0cba --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/actions/ActionIconCopyToClipboard.svelte @@ -0,0 +1,17 @@ + + + canCopy && copyToClipboard(text)} +/> diff --git a/examples/server/webui_llamacpp/src/lib/components/app/actions/index.ts b/examples/server/webui_llamacpp/src/lib/components/app/actions/index.ts new file mode 100644 index 00000000..4bb2a58d --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/actions/index.ts @@ -0,0 +1,13 @@ +/** + * + * ACTIONS + * + * Small interactive components for user actions. + * + */ + +/** Styled icon button for action triggers with tooltip. */ +export { default as ActionIcon } from './ActionIcon.svelte'; + +/** Copy-to-clipboard icon button with clipboard logic. */ +export { default as ActionIconCopyToClipboard } from './ActionIconCopyToClipboard.svelte'; diff --git a/examples/server/webui_llamacpp/src/lib/components/app/badges/BadgeInfo.svelte b/examples/server/webui_llamacpp/src/lib/components/app/badges/BadgeInfo.svelte new file mode 100644 index 00000000..25986082 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/badges/BadgeInfo.svelte @@ -0,0 +1,26 @@ + + + diff --git a/examples/server/webui_llamacpp/src/lib/components/app/badges/BadgesModality.svelte b/examples/server/webui_llamacpp/src/lib/components/app/badges/BadgesModality.svelte new file mode 100644 index 00000000..d87184ea --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/badges/BadgesModality.svelte @@ -0,0 +1,36 @@ + + +{#each modalities as modality (modality)} + {#if modality === ModelModality.VISION || modality === ModelModality.AUDIO || modality === ModelModality.VIDEO} + + {#if modality === ModelModality.VISION} + + + Vision (Image) + {:else if modality === ModelModality.VIDEO} + + {/if} +{/each} diff --git a/examples/server/webui_llamacpp/src/lib/components/app/badges/index.ts b/examples/server/webui_llamacpp/src/lib/components/app/badges/index.ts new file mode 100644 index 00000000..f8098056 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/badges/index.ts @@ -0,0 +1,13 @@ +/** + * + * BADGES & INDICATORS + * + * Small visual indicators for status and metadata. + * + */ + +/** Generic info badge with optional tooltip and click handler. */ +export { default as BadgeInfo } from './BadgeInfo.svelte'; + +/** Badge indicating model modality (vision, audio, tools). */ +export { default as BadgesModality } from './BadgesModality.svelte'; diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatAttachments/ChatAttachmentPreview.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatAttachments/ChatAttachmentPreview.svelte deleted file mode 100644 index 212b1fe8..00000000 --- a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatAttachments/ChatAttachmentPreview.svelte +++ /dev/null @@ -1,273 +0,0 @@ - - -
-
- {#if isPdf} -
- - - -
- {/if} -
- -
- {#if isImage && displayPreview} -
- {displayName} -
- {:else if isPdf && pdfViewMode === 'pages'} - {#if pdfImagesLoading} -
-
-
- -

Converting PDF to images...

-
-
- {:else if pdfImagesError} -
-
- - -

Failed to load PDF images

- -

{pdfImagesError}

- - -
-
- {:else if pdfImages.length > 0} -
- {#each pdfImages as image, index (image)} -
-

Page {index + 1}

- - PDF Page {index + 1} -
- {/each} -
- {:else} -
-
- - -

No PDF pages available

-
-
- {/if} - {:else if (isText || (isPdf && pdfViewMode === 'text')) && displayTextContent} -
- {displayTextContent} -
- {:else if isAudio} -
-
- - - {#if attachment?.type === 'audioFile'} - - {:else if uploadedFile?.preview} - - {:else} -

Audio preview not available

- {/if} - -

- {displayName} -

-
-
- {:else} -
-
- {#if IconComponent} - - {/if} - -

Preview not available for this file type

-
-
- {/if} -
-
diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatAttachments/ChatAttachmentThumbnailFile.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatAttachments/ChatAttachmentThumbnailFile.svelte deleted file mode 100644 index 46f0d000..00000000 --- a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatAttachments/ChatAttachmentThumbnailFile.svelte +++ /dev/null @@ -1,129 +0,0 @@ - - -{#if type === MimeTypeText.PLAIN || type === FileTypeCategory.TEXT} - {#if readonly} - - - {:else} - - - {/if} -{:else} - -{/if} diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatAttachments/ChatAttachmentsList.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatAttachments/ChatAttachmentsList.svelte deleted file mode 100644 index 050c7933..00000000 --- a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatAttachments/ChatAttachmentsList.svelte +++ /dev/null @@ -1,278 +0,0 @@ - - -{#if displayItems.length > 0} -
-
- - -
- {#each displayItems as item (item.id)} - {#if item.isImage && item.preview} - openPreview(item, event)} - /> - {:else} - openPreview(item, event)} - /> - {/if} - {/each} -
- - -
- - {#if showViewAll} -
- -
- {/if} -
-{/if} - -{#if previewItem} - -{/if} - - diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatAttachments/ChatAttachmentsList/ChatAttachmentsList.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatAttachments/ChatAttachmentsList/ChatAttachmentsList.svelte new file mode 100644 index 00000000..e74bd845 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatAttachments/ChatAttachmentsList/ChatAttachmentsList.svelte @@ -0,0 +1,119 @@ + + +{#snippet attachmentitem(item: ChatAttachmentDisplayItem)} + openPreview(i, event)} + {readonly} + /> +{/snippet} + +{#if displayItems.length > 0} +
+ {#if limitToSingleRow} + + {#each displayItems as item (item.id)} + {@render attachmentitem(item)} + {/each} + + {:else} +
+ {#each displayItems as item (item.id)} + {@render attachmentitem(item)} + {/each} +
+ {/if} +
+{/if} + + + +{#if mcpResourcePreviewExtra} + +{/if} diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatAttachments/ChatAttachmentsList/ChatAttachmentsListItem/ChatAttachmentsListItem.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatAttachments/ChatAttachmentsList/ChatAttachmentsListItem/ChatAttachmentsListItem.svelte new file mode 100644 index 00000000..143621cd --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatAttachments/ChatAttachmentsList/ChatAttachmentsListItem/ChatAttachmentsListItem.svelte @@ -0,0 +1,132 @@ + + +{#if isMcpPrompt(item)} + {@const mcpPrompt = + item.attachment?.type === AttachmentType.MCP_PROMPT + ? (item.attachment as DatabaseMessageExtraMcpPrompt) + : item.uploadedFile?.mcpPrompt + ? { + type: AttachmentType.MCP_PROMPT as const, + name: item.name, + serverName: item.uploadedFile.mcpPrompt.serverName, + promptName: item.uploadedFile.mcpPrompt.promptName, + content: item.textContent ?? '', + arguments: item.uploadedFile.mcpPrompt.arguments + } + : null} + {#if mcpPrompt} + onFileRemove(item.id) : undefined} + /> + {/if} +{:else if isMcpResource(item)} + {@const mcpResource = item.attachment as DatabaseMessageExtraMcpResource} + + onMcpResourcePreview?.(mcpResource)} + /> +{:else if item.isImage && item.preview} + onPreview?.(item)} + /> +{:else if isPdfFile(item.attachment, item.uploadedFile)} + onPreview?.(item)} + /> +{:else} + onPreview?.(item)} + /> +{/if} diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatAttachments/ChatAttachmentsList/ChatAttachmentsListItem/ChatAttachmentsListItemMcpPrompt.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatAttachments/ChatAttachmentsList/ChatAttachmentsListItem/ChatAttachmentsListItemMcpPrompt.svelte new file mode 100644 index 00000000..636e93f2 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatAttachments/ChatAttachmentsList/ChatAttachmentsListItem/ChatAttachmentsListItemMcpPrompt.svelte @@ -0,0 +1,41 @@ + + +
+ + + {#if !readonly && onRemove} +
+ onRemove?.()} /> +
+ {/if} +
diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatAttachments/ChatAttachmentsList/ChatAttachmentsListItem/ChatAttachmentsListItemMcpResource.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatAttachments/ChatAttachmentsList/ChatAttachmentsListItem/ChatAttachmentsListItemMcpResource.svelte new file mode 100644 index 00000000..6e1f639f --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatAttachments/ChatAttachmentsList/ChatAttachmentsListItem/ChatAttachmentsListItemMcpResource.svelte @@ -0,0 +1,89 @@ + + + + + + + + +
+ {#if favicon} + {attachment.resource.serverName} { + (e.currentTarget as HTMLImageElement).style.display = 'none'; + }} + src={favicon} + /> + {/if} + + + {serverName} + +
+
+
diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatAttachments/ChatAttachmentsList/ChatAttachmentsListItem/ChatAttachmentsListItemThumbnailFile.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatAttachments/ChatAttachmentsList/ChatAttachmentsListItem/ChatAttachmentsListItemThumbnailFile.svelte new file mode 100644 index 00000000..df49dd46 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatAttachments/ChatAttachmentsList/ChatAttachmentsListItem/ChatAttachmentsListItemThumbnailFile.svelte @@ -0,0 +1,184 @@ + + +{#snippet textPreview(content: string)} +
+
+ {getPreviewText(content)} +
+ + {#if content.length > 150} +
+ {/if} +
+{/snippet} + +{#snippet removeButton()} +
+ onRemove?.(id)} /> +
+{/snippet} + +{#snippet fileIcon()} +
+ {#if isAudio} + + {:else if isVideo} +
+{/snippet} + +{#snippet info(text: string | undefined)} + {#if text} + {text} + {/if} +{/snippet} + +{#if isTextWithContent || isPdfWithContent} + +{:else} + +{/if} diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatAttachments/ChatAttachmentThumbnailImage.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatAttachments/ChatAttachmentsList/ChatAttachmentsListItem/ChatAttachmentsListItemThumbnailImage.svelte similarity index 54% rename from examples/server/webui_llamacpp/src/lib/components/app/chat/ChatAttachments/ChatAttachmentThumbnailImage.svelte rename to examples/server/webui_llamacpp/src/lib/components/app/chat/ChatAttachments/ChatAttachmentsList/ChatAttachmentsListItem/ChatAttachmentsListItemThumbnailImage.svelte index da9ceb63..b78a6591 100644 --- a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatAttachments/ChatAttachmentThumbnailImage.svelte +++ b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatAttachments/ChatAttachmentsList/ChatAttachmentsListItem/ChatAttachmentsListItemThumbnailImage.svelte @@ -1,62 +1,65 @@ -
- {#if onClick} +{#snippet image()} + {name} +{/snippet} + +
+ {#if onclick} {:else} - {name} + {@render image()} {/if} {#if !readonly}
- + onRemove?.(id)} + stopPropagationOnClick + tooltip="Remove" + />
{/if}
diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatAttachments/ChatAttachmentsPreview.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatAttachments/ChatAttachmentsPreview.svelte new file mode 100644 index 00000000..cba323f2 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatAttachments/ChatAttachmentsPreview.svelte @@ -0,0 +1,212 @@ + + +
+
+ 1} /> + +
+ {#if currentItem} + + + + {/if} + + +
+
+
diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatAttachments/ChatAttachmentsPreview/ChatAttachmentsPreviewCurrentItem/ChatAttachmentsPreviewCurrentItem.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatAttachments/ChatAttachmentsPreview/ChatAttachmentsPreviewCurrentItem/ChatAttachmentsPreviewCurrentItem.svelte new file mode 100644 index 00000000..30e84812 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatAttachments/ChatAttachmentsPreview/ChatAttachmentsPreviewCurrentItem/ChatAttachmentsPreviewCurrentItem.svelte @@ -0,0 +1,74 @@ + + +{#if currentItem} + {#key currentItem.id} + {#if isPdf} + + {:else if isImage} + + {:else if isText && displayTextContent} + + {:else if isAudio} + + {:else if isVideo} + + {:else if isUnavailable} + + {/if} + {/key} +{/if} diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatAttachments/ChatAttachmentsPreview/ChatAttachmentsPreviewCurrentItem/ChatAttachmentsPreviewCurrentItemAudio.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatAttachments/ChatAttachmentsPreview/ChatAttachmentsPreviewCurrentItem/ChatAttachmentsPreviewCurrentItemAudio.svelte new file mode 100644 index 00000000..06e1f592 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatAttachments/ChatAttachmentsPreview/ChatAttachmentsPreviewCurrentItem/ChatAttachmentsPreviewCurrentItemAudio.svelte @@ -0,0 +1,26 @@ + + +
+
+ + + {#if audioSrc} + + {:else} +

Audio preview not available

+ {/if} + +

{currentItem?.name || 'Audio'}

+
+
diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatAttachments/ChatAttachmentsPreview/ChatAttachmentsPreviewCurrentItem/ChatAttachmentsPreviewCurrentItemImage.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatAttachments/ChatAttachmentsPreview/ChatAttachmentsPreviewCurrentItem/ChatAttachmentsPreviewCurrentItemImage.svelte new file mode 100644 index 00000000..070ff823 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatAttachments/ChatAttachmentsPreview/ChatAttachmentsPreviewCurrentItem/ChatAttachmentsPreviewCurrentItemImage.svelte @@ -0,0 +1,18 @@ + + +{#if displayPreview} +
+ {currentItem?.name +
+{/if} diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatAttachments/ChatAttachmentsPreview/ChatAttachmentsPreviewCurrentItem/ChatAttachmentsPreviewCurrentItemPdf.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatAttachments/ChatAttachmentsPreview/ChatAttachmentsPreviewCurrentItem/ChatAttachmentsPreviewCurrentItemPdf.svelte new file mode 100644 index 00000000..750532a6 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatAttachments/ChatAttachmentsPreview/ChatAttachmentsPreviewCurrentItem/ChatAttachmentsPreviewCurrentItemPdf.svelte @@ -0,0 +1,174 @@ + + +
+ + + +
+ +{#if !hasVisionModality && activeModelId && currentItem} + + + Preview only + + + The selected model does not support vision. Only the extracted + + + (pdfViewMode = PdfViewMode.TEXT)} + > + text + + will be sent to the model. + + + +{/if} + +{#if pdfImagesLoading} +
+
+
+

Converting PDF to images...

+
+
+{:else if pdfImagesError} +
+
+ +

Failed to load PDF images

+

{pdfImagesError}

+
+
+{:else if pdfImages.length > 0} + {#each pdfImages as image, index (image)} +

Page {index + 1}

+ PDF Page {index + 1} +
+ {/each} +{:else} +
+
+ +

No PDF pages available

+
+
+{/if} + +{#if pdfViewMode === PdfViewMode.TEXT && displayTextContent} +
+ +
+{/if} diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatAttachments/ChatAttachmentsPreview/ChatAttachmentsPreviewCurrentItem/ChatAttachmentsPreviewCurrentItemText.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatAttachments/ChatAttachmentsPreview/ChatAttachmentsPreviewCurrentItem/ChatAttachmentsPreviewCurrentItemText.svelte new file mode 100644 index 00000000..5977523a --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatAttachments/ChatAttachmentsPreview/ChatAttachmentsPreviewCurrentItem/ChatAttachmentsPreviewCurrentItemText.svelte @@ -0,0 +1,21 @@ + + +{#if displayTextContent} +
+ +
+{/if} diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatAttachments/ChatAttachmentsPreview/ChatAttachmentsPreviewCurrentItem/ChatAttachmentsPreviewCurrentItemUnavailable.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatAttachments/ChatAttachmentsPreview/ChatAttachmentsPreviewCurrentItem/ChatAttachmentsPreviewCurrentItemUnavailable.svelte new file mode 100644 index 00000000..d3002a93 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatAttachments/ChatAttachmentsPreview/ChatAttachmentsPreviewCurrentItem/ChatAttachmentsPreviewCurrentItemUnavailable.svelte @@ -0,0 +1,17 @@ + + +
+
+ + +

Preview not available for this file type

+
+
diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatAttachments/ChatAttachmentsPreview/ChatAttachmentsPreviewCurrentItem/ChatAttachmentsPreviewCurrentItemVideo.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatAttachments/ChatAttachmentsPreview/ChatAttachmentsPreviewCurrentItem/ChatAttachmentsPreviewCurrentItemVideo.svelte new file mode 100644 index 00000000..62040b36 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatAttachments/ChatAttachmentsPreview/ChatAttachmentsPreviewCurrentItem/ChatAttachmentsPreviewCurrentItemVideo.svelte @@ -0,0 +1,27 @@ + + +
+
+
+
diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatAttachments/ChatAttachmentsPreview/ChatAttachmentsPreviewFileInfo.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatAttachments/ChatAttachmentsPreview/ChatAttachmentsPreviewFileInfo.svelte new file mode 100644 index 00000000..d27d54a4 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatAttachments/ChatAttachmentsPreview/ChatAttachmentsPreviewFileInfo.svelte @@ -0,0 +1,16 @@ + + +
+

{displayName}

+ + {#if fileSize} +

{fileSize}

+ {/if} +
diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatAttachments/ChatAttachmentsPreview/ChatAttachmentsPreviewNavButtons.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatAttachments/ChatAttachmentsPreview/ChatAttachmentsPreviewNavButtons.svelte new file mode 100644 index 00000000..a57e3145 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatAttachments/ChatAttachmentsPreview/ChatAttachmentsPreviewNavButtons.svelte @@ -0,0 +1,34 @@ + + +{#if show} + + + +{/if} diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatAttachments/ChatAttachmentsPreview/ChatAttachmentsPreviewThumbnailStrip.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatAttachments/ChatAttachmentsPreview/ChatAttachmentsPreviewThumbnailStrip.svelte new file mode 100644 index 00000000..3f19ee39 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatAttachments/ChatAttachmentsPreview/ChatAttachmentsPreviewThumbnailStrip.svelte @@ -0,0 +1,66 @@ + + +{#if items.length > 1} +
+ + {#each items as item, index (item.id)} + + {/each} + +
+{/if} diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatAttachments/ChatAttachmentsViewAll.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatAttachments/ChatAttachmentsViewAll.svelte deleted file mode 100644 index ae82f7b7..00000000 --- a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatAttachments/ChatAttachmentsViewAll.svelte +++ /dev/null @@ -1,190 +0,0 @@ - - -
-
- {#if fileItems.length > 0} -
-

Files ({fileItems.length})

-
- {#each fileItems as item (item.id)} - openPreview(item, event)} - /> - {/each} -
-
- {/if} - - {#if imageItems.length > 0} -
-

Images ({imageItems.length})

-
- {#each imageItems as item (item.id)} - {#if item.preview} - openPreview(item, event)} - /> - {/if} - {/each} -
-
- {/if} -
-
- -{#if previewItem} - -{/if} diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatForm.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatForm.svelte index 6c9a1184..ed26f9ea 100644 --- a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatForm.svelte +++ b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatForm.svelte @@ -1,121 +1,251 @@ - +
{ + event.preventDefault(); + + if (!canSubmit || disabled || hasLoadingAttachments) return; + + onSubmit?.(); + }} > -
- - 0 || uploadedFiles.length > 0} - {disabled} - {isLoading} - {isRecording} - onFileUpload={handleFileUpload} - onMicClick={handleMicClick} - onStop={handleStop} - /> +
+ { + handleInput(); + onValueChange?.(value); + }} + {disabled} + {placeholder} + /> + + {#if mcpHasResourceAttachments()} + { + preSelectedResourceUri = uri; + isResourceDialogOpen = true; + }} + /> + {/if} + + onSystemPromptClick?.({ message: value, files: uploadedFiles })} + onMcpPromptClick={showMcpPromptButton ? () => (isPromptPickerOpen = true) : undefined} + onMcpResourcesClick={() => (isResourceDialogOpen = true)} + /> +
- + { + mcpStore.attachResource(resource.uri); + }} + onOpenChange={(newOpen: boolean) => { + if (!newOpen) { + preSelectedResourceUri = undefined; + } + }} +/> diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormActionAdd/ChatFormActionAddButton.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormActionAdd/ChatFormActionAddButton.svelte new file mode 100644 index 00000000..7175888a --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormActionAdd/ChatFormActionAddButton.svelte @@ -0,0 +1,33 @@ + + + + + + + + +

{ATTACHMENT_TOOLTIP_TEXT}

+
+
diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormActionAdd/ChatFormActionAddDropdown.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormActionAdd/ChatFormActionAddDropdown.svelte new file mode 100644 index 00000000..47954032 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormActionAdd/ChatFormActionAddDropdown.svelte @@ -0,0 +1,179 @@ + + +
+ + + + {#snippet child({ props })} + + {ATTACHMENT_TOOLTIP_TEXT} + + + + {/snippet} + + + +

{ATTACHMENT_TOOLTIP_TEXT}

+
+
+ + + + + + + Add files + + + + {#each ATTACHMENT_FILE_ITEMS as item (item.id)} + {@const enabled = attachmentMenu.isItemEnabled(item.enabledWhen)} + {#if enabled} + attachmentMenu.callbacks[item.action]()} + > + + + {item.label} + + {:else if item.disabledTooltip} + + + {#snippet child({ props })} +
+ + + + {item.label} + +
+ {/snippet} +
+ + +

{item.disabledTooltip}

+
+
+ {/if} + {/each} +
+
+ + + + + System Message + + + + + + + {#if hasMcpPromptsSupport} + + + + + + MCP Prompt + + {/if} + + {#if hasMcpResourcesSupport} + + + + MCP Resources + + {/if} +
+
+
diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormActionAdd/ChatFormActionAddMcpServersSubmenu.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormActionAdd/ChatFormActionAddMcpServersSubmenu.svelte new file mode 100644 index 00000000..dd357d6c --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormActionAdd/ChatFormActionAddMcpServersSubmenu.svelte @@ -0,0 +1,150 @@ + + + + + + + + MCP Servers + + + + {#if hasMcpServers} + +
+ {#each filteredMcpServers as server (server.id)} + {@const healthState = mcpStore.getHealthCheckState(server.id)} + {@const hasError = healthState.status === HealthCheckStatus.ERROR} + {@const isEnabledForChat = isServerEnabledForChat(server.id)} + {@const displayName = getServerLabel(server)} + {@const faviconUrl = mcpStore.getServerFavicon(server.id)} + + + {/each} +
+ + {#snippet footer()} + + + + Manage MCP Servers + + {/snippet} +
+ {:else} +
+ No MCP servers configured +
+ + + + + + + Add MCP Servers + + {/if} +
+
+
diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormActionAdd/ChatFormActionAddSheet.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormActionAdd/ChatFormActionAddSheet.svelte new file mode 100644 index 00000000..c4069163 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormActionAdd/ChatFormActionAddSheet.svelte @@ -0,0 +1,297 @@ + + +
+ + {@render trigger({ disabled, onclick: () => (sheetOpen = true) })} + + + + Add to chat + + + Add files, system prompt or configure MCP servers + + + +
+ (filesExpanded = open)}> + + {#if filesExpanded} + + {:else} + + {/if} + + + + Add files + + + +
+ {#each ATTACHMENT_FILE_ITEMS as item (item.id)} + {@const enabled = attachmentMenu.isItemEnabled(item.enabledWhen)} + {#if enabled} + + {:else if item.disabledTooltip} + + + + + + +

{item.disabledTooltip}

+
+
+ {/if} + {/each} +
+
+
+ + (mcpExpanded = open)}> + + {#if mcpExpanded} + + {:else} + + {/if} + + + + MCP Servers + + + {getEnabledMcpServers().length} server{getEnabledMcpServers().length !== 1 ? 's' : ''} + + + + +
+ {#each getEnabledMcpServers() as server (server.id)} + {@const healthState = mcpStore.getHealthCheckState(server.id)} + {@const hasError = healthState.status === HealthCheckStatus.ERROR} + {@const displayName = mcpStore.getServerLabel(server)} + {@const faviconUrl = mcpStore.getServerFavicon(server.id)} + {@const isEnabled = conversationsStore.isMcpServerEnabledForChat(server.id)} + + + {/each} + + {#if getEnabledMcpServers().length === 0} +
+ No MCP servers configured +
+ {/if} +
+
+
+ + {#if toolsPanel.totalToolCount > 0} + (toolsExpanded = open)}> + + {#if toolsExpanded} + + {:else} + + {/if} + + + + Tools + + + {toolsPanel.totalToolCount} tool{toolsPanel.totalToolCount !== 1 ? 's' : ''} + + + + +
+ {#each toolsPanel.activeGroups as group (group.label)} + {@const checked = toolsPanel.isGroupChecked(group)} + {@const enabledCount = toolsPanel.getEnabledToolCount(group)} + {@const favicon = toolsPanel.getFavicon(group)} + + + {/each} +
+
+
+ {/if} + + + + {#if hasMcpPromptsSupport} + + {/if} + + {#if hasMcpResourcesSupport} + + {/if} +
+
+
+
diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormActionAdd/ChatFormActionAddToolsSubmenu.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormActionAdd/ChatFormActionAddToolsSubmenu.svelte new file mode 100644 index 00000000..9a5b0cbe --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormActionAdd/ChatFormActionAddToolsSubmenu.svelte @@ -0,0 +1,157 @@ + + + open && toolsPanel.handleOpen()}> + + + + Tools + + + + {#if toolsPanel.totalToolCount === 0} + {#if toolsStore.loading} +
+ + + Loading tools... +
+ {:else if toolsStore.isToolsEndpointUnreachable} +
+ + + + + Run llama-server with {CLI_FLAGS.TOOLS} flag to enable + + Built-in Tools. + + + + + + + + {hasMcpServersAvailable ? 'Enable' : 'Add'} MCP Server(s) to access + + MCP Tools. + + +
+ {:else if toolsStore.error} +
Failed to load tools
+ {:else if toolsPanel.noToolsInfoMessage} +
+ + + {toolsPanel.noToolsInfoMessage} +
+ {:else} +
No tools available
+ {/if} + {:else} +
+ {#each toolsPanel.activeGroups as group (group.label)} + {@const isExpanded = toolsPanel.expandedGroups.has(group.label)} + {@const checked = toolsPanel.isGroupChecked(group)} + {@const favicon = toolsPanel.getFavicon(group)} + + toolsPanel.toggleGroupExpanded(group.label)} + > +
+ + {#if isExpanded} + + {:else} + + {/if} + + + {#if favicon} + { + (e.currentTarget as HTMLImageElement).style.display = 'none'; + }} + /> + {/if} + + {group.label} + + + + {toolsPanel.getEnabledToolCount(group)}/{group.tools.length} + + + + + + {#snippet child({ props })} + toolsPanel.toggleGroupByLabel(group.label)} + class="mr-2 h-4 w-4 shrink-0" + /> + {/snippet} + + + +

+ {checked ? 'Disable' : 'Enable'} + {group.tools.length} tool{group.tools.length !== 1 ? 's' : ''} +

+
+
+
+ + +
+ {#each group.tools as entry (entry.key)} + {@const enabled = toolsStore.isToolEnabled(entry.key)} + + {/each} +
+
+
+ {/each} +
+ {/if} +
+
diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormActionAdd/ChatFormActionsAdd.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormActionAdd/ChatFormActionsAdd.svelte new file mode 100644 index 00000000..6a91bf90 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormActionAdd/ChatFormActionsAdd.svelte @@ -0,0 +1,66 @@ + + +{#if isMobile.current} + + {#snippet trigger({ disabled, onclick })} + + {/snippet} + +{:else} + +{/if} diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormActionModels.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormActionModels.svelte new file mode 100644 index 00000000..712326cb --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormActionModels.svelte @@ -0,0 +1,193 @@ + + +{#if isMobile.current} + +{:else} + +{/if} diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormActionRecord.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormActionRecord.svelte new file mode 100644 index 00000000..f1b08490 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormActionRecord.svelte @@ -0,0 +1,52 @@ + + +
+ + + + + + {#if !hasAudioModality} + +

Current model does not support audio

+
+ {/if} +
+
diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormActionSubmit.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormActionSubmit.svelte new file mode 100644 index 00000000..8774bf63 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormActionSubmit.svelte @@ -0,0 +1,46 @@ + + +{#snippet submitButton(props = {})} + +{/snippet} + +{#if tooltipLabel} + + + {@render submitButton()} + + + +

{tooltipLabel}

+
+
+{:else} + {@render submitButton()} +{/if} diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormActions.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormActions.svelte new file mode 100644 index 00000000..a80f00bc --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormActions.svelte @@ -0,0 +1,177 @@ + + +
+ {#if showAddButton} +
+ goto(ROUTES.MCP_SERVERS)} + /> +
+ {/if} + +
+ + + {#if showModelSelector} + + {/if} +
+ + {#if isReasoning} + + {/if} + + {#if isLoading && !canSubmit} + + {:else if shouldShowRecordButton} + + {:else} + + {/if} +
diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormReasoningEffortSubmenu.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormReasoningEffortSubmenu.svelte new file mode 100644 index 00000000..936e0377 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormReasoningEffortSubmenu.svelte @@ -0,0 +1,132 @@ + + +{#if modelSupportsThinking} + + + {#if thinkingEnabled} + + {:else} + + {/if} + + Thinking + + {#if thinkingEnabled} + {currentEffort} + {:else} + off + {/if} + + + + {#each REASONING_EFFORT_LEVELS as level (level.value)} + + {/each} + + +{/if} diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormReasoningToggle.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormReasoningToggle.svelte new file mode 100644 index 00000000..f6bcbcb0 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormReasoningToggle.svelte @@ -0,0 +1,145 @@ + + +{#if modelSupportsThinking} + + + + + {#if thinkingEnabled} + + {:else} + + {/if} + + + + +

{tooltipText}

+
+
+ + +
Reasoning effort
+ + {#each REASONING_EFFORT_LEVELS as level (level.value)} + + {/each} +
+
+{/if} diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatFormFileInputInvisible.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatFormFileInputInvisible.svelte index aa277630..395ecb20 100644 --- a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatFormFileInputInvisible.svelte +++ b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatFormFileInputInvisible.svelte @@ -1,31 +1,21 @@ - -{#if show} -
-

- Press Enter to send, - Shift + Enter for new line -

-
-{/if} diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatFormMcpResourcesList.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatFormMcpResourcesList.svelte new file mode 100644 index 00000000..36c82224 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatFormMcpResourcesList.svelte @@ -0,0 +1,44 @@ + + +{#if hasAttachments} +
+ + {#each attachments as attachment, i (attachment.id)} + handleResourceClick(attachment.resource.uri)} + /> + {/each} + +
+{/if} diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatFormModelSelector.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatFormModelSelector.svelte deleted file mode 100644 index f07eb7a4..00000000 --- a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatFormModelSelector.svelte +++ /dev/null @@ -1,352 +0,0 @@ - - - - - - -
- {#if loading && options.length === 0 && !isMounted} -
- - Loading models… -
- {:else if options.length === 0} -

No models available.

- {:else} - {@const selectedOption = getDisplayOption()} - -
- - - {#if isOpen} -
-
0 - ? `${menuPosition.maxHeight}px` - : undefined} - > - {#each options as option (option.id)} - - {/each} -
-
- {/if} -
- {/if} - - {#if error} -

{error}

- {/if} -
diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatFormPickers/ChatFormPicker/ChatFormPickerItemHeader.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatFormPickers/ChatFormPicker/ChatFormPickerItemHeader.svelte new file mode 100644 index 00000000..11ca5204 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatFormPickers/ChatFormPicker/ChatFormPickerItemHeader.svelte @@ -0,0 +1,55 @@ + + +
+
+ {#if faviconUrl} + { + (e.currentTarget as HTMLImageElement).style.display = 'none'; + }} + /> + {/if} + + {serverLabel} +
+ +
+ + {title} + + + {#if titleExtra} + {@render titleExtra()} + {/if} +
+ + {#if description} +

+ {description} +

+ {/if} + + {#if subtitle} + {@render subtitle()} + {/if} +
diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatFormPickers/ChatFormPicker/ChatFormPickerList.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatFormPickers/ChatFormPicker/ChatFormPickerList.svelte new file mode 100644 index 00000000..6647928b --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatFormPickers/ChatFormPicker/ChatFormPickerList.svelte @@ -0,0 +1,81 @@ + + + + {#if showSearchInput} +
+ +
+ {/if} + +
+ {#if isLoading} + {#if skeleton} + {@render skeleton()} + {/if} + {:else if items.length === 0} +
{emptyMessage}
+ {:else} + {#each items as itemData, index (itemKey(itemData, index))} + {@render item(itemData, index, index === selectedIndex)} + {/each} + {/if} +
+ + {#if footer} + {@render footer()} + {/if} +
diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatFormPickers/ChatFormPicker/ChatFormPickerListItem.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatFormPickers/ChatFormPicker/ChatFormPickerListItem.svelte new file mode 100644 index 00000000..4d82c6b5 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatFormPickers/ChatFormPicker/ChatFormPickerListItem.svelte @@ -0,0 +1,23 @@ + + + diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatFormPickers/ChatFormPicker/ChatFormPickerListItemSkeleton.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatFormPickers/ChatFormPicker/ChatFormPickerListItemSkeleton.svelte new file mode 100644 index 00000000..5a2ab26f --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatFormPickers/ChatFormPicker/ChatFormPickerListItemSkeleton.svelte @@ -0,0 +1,30 @@ + + +
+
+ +
+
+
+
+ + +
+
+ + {#if showBadge} +
+ {/if} +
+ + +
+
+
diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatFormPickers/ChatFormPicker/ChatFormPickerPopover.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatFormPickers/ChatFormPicker/ChatFormPickerPopover.svelte new file mode 100644 index 00000000..c43a002e --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatFormPickers/ChatFormPicker/ChatFormPickerPopover.svelte @@ -0,0 +1,50 @@ + + + { + if (!open) { + onClose?.(); + } + }} +> + + + event.preventDefault()} + > + {@render children()} + + diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatFormPickers/ChatFormPickerMcpPrompts/ChatFormPickerMcpPrompts.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatFormPickers/ChatFormPickerMcpPrompts/ChatFormPickerMcpPrompts.svelte new file mode 100644 index 00000000..ff734ac8 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatFormPickers/ChatFormPickerMcpPrompts/ChatFormPickerMcpPrompts.svelte @@ -0,0 +1,435 @@ + + + + {#if selectedPrompt} + {@const prompt = selectedPrompt} + {@const server = serverSettingsMap.get(prompt.serverName)} + {@const serverLabel = server ? mcpStore.getServerLabel(server) : prompt.serverName} + +
+ + {#snippet titleExtra()} + {#if prompt.arguments?.length} + + {prompt.arguments.length} arg{prompt.arguments.length > 1 ? 's' : ''} + + {/if} + {/snippet} + + + +
+ {:else} + prompt.serverName + ':' + prompt.name} + > + {#snippet item(prompt, index, isSelected)} + {@const server = serverSettingsMap.get(prompt.serverName)} + {@const serverLabel = server ? mcpStore.getServerLabel(server) : prompt.serverName} + + handlePromptClick(prompt)} + > + + {#snippet titleExtra()} + {#if prompt.arguments?.length} + + {prompt.arguments.length} arg{prompt.arguments.length > 1 ? 's' : ''} + + {/if} + {/snippet} + + + {/snippet} + + {#snippet skeleton()} + + {/snippet} + + {/if} +
diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatFormPickers/ChatFormPickerMcpPrompts/ChatFormPromptPickerArgumentForm.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatFormPickers/ChatFormPickerMcpPrompts/ChatFormPromptPickerArgumentForm.svelte new file mode 100644 index 00000000..92572b89 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatFormPickers/ChatFormPickerMcpPrompts/ChatFormPromptPickerArgumentForm.svelte @@ -0,0 +1,74 @@ + + +
+ {#each prompt.arguments ?? [] as arg (arg.name)} + onArgInput(arg.name, value)} + onKeydown={(e) => onArgKeydown(e, arg.name)} + onBlur={() => onArgBlur(arg.name)} + onFocus={() => onArgFocus(arg.name)} + onSelectSuggestion={(value) => onSelectSuggestion(arg.name, value)} + /> + {/each} + + {#if promptError} + + {/if} + +
+ + + +
+ diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatFormPickers/ChatFormPickerMcpPrompts/ChatFormPromptPickerArgumentInput.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatFormPickers/ChatFormPickerMcpPrompts/ChatFormPromptPickerArgumentInput.svelte new file mode 100644 index 00000000..638d10ee --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatFormPickers/ChatFormPickerMcpPrompts/ChatFormPromptPickerArgumentInput.svelte @@ -0,0 +1,84 @@ + + +
+ + + onInput(e.currentTarget.value)} + onkeydown={onKeydown} + onblur={onBlur} + onfocus={onFocus} + placeholder={argument.description || argument.name} + required={argument.required} + autocomplete="off" + /> + + {#if isAutocompleteActive && suggestions.length > 0} +
+ {#each suggestions as suggestion, i (suggestion)} + + {/each} +
+ {/if} +
diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatFormPickers/ChatFormPickerMcpResources.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatFormPickers/ChatFormPickerMcpResources.svelte new file mode 100644 index 00000000..1125ae8e --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatFormPickers/ChatFormPickerMcpResources.svelte @@ -0,0 +1,237 @@ + + + + resource.serverName + ':' + resource.uri} + > + {#snippet item(resource, index, isSelected)} + {@const server = serverSettingsMap.get(resource.serverName)} + {@const serverLabel = server ? mcpStore.getServerLabel(server) : resource.serverName} + + handleResourceClick(resource)} + > + + {#snippet titleExtra()} + {#if isResourceAttached(resource.uri)} + + attached + + {/if} + {/snippet} + + {#snippet subtitle()} +

+ {resource.uri} +

+ {/snippet} +
+
+ {/snippet} + + {#snippet skeleton()} + + {/snippet} + + {#snippet footer()} + {#if onBrowse && resources.length > 3} + + {/if} + {/snippet} +
+
diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatFormPickers/ChatFormPickers.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatFormPickers/ChatFormPickers.svelte new file mode 100644 index 00000000..7c5dc85b --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatFormPickers/ChatFormPickers.svelte @@ -0,0 +1,75 @@ + + + + + diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatFormTextarea.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatFormTextarea.svelte index 7c0679bd..72e62f31 100644 --- a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatFormTextarea.svelte +++ b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatForm/ChatFormTextarea.svelte @@ -1,10 +1,11 @@ - -{#if message.role === 'user'} - -{:else} - (shouldBranchAfterEdit = value)} - {showDeleteDialog} - {siblingInfo} - {thinkingContent} - {toolCallContent} - /> -{/if} diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatMessages/ChatMessage/ChatMessage.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatMessages/ChatMessage/ChatMessage.svelte new file mode 100644 index 00000000..4d0b302d --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatMessages/ChatMessage/ChatMessage.svelte @@ -0,0 +1,395 @@ + + +
+ {#if message.role === MessageRole.SYSTEM} + + {:else if mcpPromptExtra} + + {:else if message.role === MessageRole.USER} + + {:else} + + {/if} +
diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatMessages/ChatMessage/ChatMessageAssistant/ChatMessageAssistant.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatMessages/ChatMessage/ChatMessageAssistant/ChatMessageAssistant.svelte new file mode 100644 index 00000000..4c74206f --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatMessages/ChatMessage/ChatMessageAssistant/ChatMessageAssistant.svelte @@ -0,0 +1,387 @@ + + +
+ {#if showProcessingInfoTop} +
+
+ + {processingState.getPromptProgressText() ?? + processingState.getProcessingMessage() ?? + 'Processing...'} + +
+
+ {/if} + + {#if editCtx.isEditing} + + {:else if message.role === MessageRole.ASSISTANT} + {#if showRawOutput} +
{rawOutputContent || ''}
+ {:else} + + {/if} + {:else} +
+ {messageContent} +
+ {/if} + + {#if showProcessingInfoBottom} +
+
+ + {processingState.getPromptProgressText() ?? + processingState.getProcessingMessage() ?? + 'Processing...'} + +
+
+ {/if} + +
+ {#if displayedModel} +
+ {#if isRouter} + { + const status = modelsStore.getModelStatus(modelId); + + if (status !== ServerModelStatus.LOADED) { + await modelsStore.loadModel(modelId); + } + + onRegenerate(modelName); + return true; + }} + /> + {:else} + + {/if} + + {#if currentConfig.showMessageStats && message.timings && message.timings.predicted_n && message.timings.predicted_ms} + {@const agentic = message.timings.agentic} + + {:else if isLoading() && currentConfig.showMessageStats} + {@const liveStats = processingState.getLiveProcessingStats()} + {@const genStats = processingState.getLiveGenerationStats()} + {@const promptProgress = processingState.processingState?.promptProgress} + {@const isStillProcessingPrompt = + promptProgress && promptProgress.processed < promptProgress.total} + + {#if liveStats || genStats} + + {/if} + {/if} +
+ {/if} +
+ + {#if message.timestamp && !editCtx.isEditing} + (showRawOutput = enabled)} + /> + {/if} +
+ + diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatMessages/ChatMessage/ChatMessageMcpPrompt/ChatMessageMcpPrompt.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatMessages/ChatMessage/ChatMessageMcpPrompt/ChatMessageMcpPrompt.svelte new file mode 100644 index 00000000..2dcb36ba --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatMessages/ChatMessage/ChatMessageMcpPrompt/ChatMessageMcpPrompt.svelte @@ -0,0 +1,83 @@ + + +
+ {#if editCtx.isEditing} + + {:else} + + + {#if message.timestamp} +
+ +
+ {/if} + {/if} +
diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatMessages/ChatMessage/ChatMessageMcpPrompt/ChatMessageMcpPromptContent.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatMessages/ChatMessage/ChatMessageMcpPrompt/ChatMessageMcpPromptContent.svelte new file mode 100644 index 00000000..3d5dec3b --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatMessages/ChatMessage/ChatMessageMcpPrompt/ChatMessageMcpPromptContent.svelte @@ -0,0 +1,197 @@ + + +
+
+
+ + + {#if serverFavicon} + { + (e.currentTarget as HTMLImageElement).style.display = 'none'; + }} + /> + {/if} + + + + {serverDisplayName} + + + + +
+ + {#if showArgBadges} +
+ {#each argumentEntries as [key, value] (key)} + + + + (hoveredArgKey = key)} + onmouseleave={() => (hoveredArgKey = null)} + > + {key} + + + + + {value} + + + {/each} +
+ {/if} +
+ + {#if loadError} + +
+ {loadError} +
+
+ {:else if isLoading} + +
+
+
+ +
+ +
+
+
+
+ {:else if hasContent} + +
+ + + + {#each contentParts as part, i (i)}{#if part.argKey} (hoveredArgKey = part.argKey)} + onmouseleave={() => (hoveredArgKey = null)}>{part.text}{:else}{part.text}{/if}{/each} +
+
+ {/if} +
diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatMessages/ChatMessage/ChatMessageSystem/ChatMessageSystem.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatMessages/ChatMessage/ChatMessageSystem/ChatMessageSystem.svelte new file mode 100644 index 00000000..9d3d07a2 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatMessages/ChatMessage/ChatMessageSystem/ChatMessageSystem.svelte @@ -0,0 +1,232 @@ + + +
+ {#if editCtx.isEditing} +
+ + +
+ + + +
+
+ {:else} + {#if message.content.trim()} +
+ +
+ {/if} +
+ + {#if isExpanded && showExpandButton} +
+ +
+ {/if} + + +
+ {/if} + + {#if message.timestamp} +
+ +
+ {/if} + {/if} + diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatMessages/ChatMessage/ChatMessageUser/ChatMessageUser.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatMessages/ChatMessage/ChatMessageUser/ChatMessageUser.svelte new file mode 100644 index 00000000..96ec1ddf --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatMessages/ChatMessage/ChatMessageUser/ChatMessageUser.svelte @@ -0,0 +1,83 @@ + + +
+ {#if editCtx.isEditing} + + {:else} + + + {#if message.timestamp} +
+ +
+ {/if} + {/if} +
diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatMessages/ChatMessage/ChatMessageUser/ChatMessageUserBubble.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatMessages/ChatMessage/ChatMessageUser/ChatMessageUserBubble.svelte new file mode 100644 index 00000000..dabb337d --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatMessages/ChatMessage/ChatMessageUser/ChatMessageUserBubble.svelte @@ -0,0 +1,76 @@ + + +{#if attachments && attachments.length > 0} +
+ +
+{/if} + +{#if content.trim()} + + {#if renderMarkdown && currentConfig.renderUserContentAsMarkdown} +
+ +
+ {:else} + + {content} + + {/if} +
+{/if} diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatMessages/ChatMessage/ChatMessageUser/ChatMessageUserPending.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatMessages/ChatMessage/ChatMessageUser/ChatMessageUserPending.svelte new file mode 100644 index 00000000..4be582b3 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatMessages/ChatMessage/ChatMessageUser/ChatMessageUserPending.svelte @@ -0,0 +1,69 @@ + + +
+ {#if editCtx.isEditing} + + {:else} + + +
+
+
+
+ + + +
+
+
+
+ {/if} +
diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatMessages/ChatMessageActions.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatMessages/ChatMessageActions.svelte deleted file mode 100644 index ff335c32..00000000 --- a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatMessages/ChatMessageActions.svelte +++ /dev/null @@ -1,100 +0,0 @@ - - -
-
- {#if siblingInfo && siblingInfo.totalSiblings > 1} - - {/if} - -
- - - {#if onEdit} - - {/if} - - {#if role === 'assistant' && onRegenerate} - - {/if} - - {#if role === 'assistant' && onContinue} - - {/if} - - -
-
-
- - 1 - ? `This will delete ${deletionInfo.totalCount} messages including: ${deletionInfo.userMessages} user message${deletionInfo.userMessages > 1 ? 's' : ''} and ${deletionInfo.assistantMessages} assistant response${deletionInfo.assistantMessages > 1 ? 's' : ''}. All messages in this branch and their responses will be permanently removed. This action cannot be undone.` - : 'Are you sure you want to delete this message? This action cannot be undone.'} - confirmText={deletionInfo && deletionInfo.totalCount > 1 - ? `Delete ${deletionInfo.totalCount} Messages` - : 'Delete'} - cancelText="Cancel" - variant="destructive" - icon={Trash2} - onConfirm={handleConfirmDelete} - onCancel={() => onShowDeleteDialogChange(false)} -/> diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatMessages/ChatMessageActions/ChatMessageActionCard/ChatMessageActionCard.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatMessages/ChatMessageActions/ChatMessageActionCard/ChatMessageActionCard.svelte new file mode 100644 index 00000000..25403197 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatMessages/ChatMessageActions/ChatMessageActionCard/ChatMessageActionCard.svelte @@ -0,0 +1,23 @@ + + +
+
+ + + {@render message()} + +
+
+ {@render actions()} +
+
diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatMessages/ChatMessageActions/ChatMessageActionCard/ChatMessageActionCardContinueRequest.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatMessages/ChatMessageActions/ChatMessageActionCard/ChatMessageActionCardContinueRequest.svelte new file mode 100644 index 00000000..bbb1f0ac --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatMessages/ChatMessageActions/ChatMessageActionCard/ChatMessageActionCardContinueRequest.svelte @@ -0,0 +1,30 @@ + + + + {#snippet message()} + Agentic turn limit reached. Continue? + {/snippet} + + {#snippet actions()} + + + + {/snippet} + diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatMessages/ChatMessageActions/ChatMessageActionCard/ChatMessageActionCardPermissionRequest.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatMessages/ChatMessageActions/ChatMessageActionCard/ChatMessageActionCardPermissionRequest.svelte new file mode 100644 index 00000000..e466c84e --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatMessages/ChatMessageActions/ChatMessageActionCard/ChatMessageActionCardPermissionRequest.svelte @@ -0,0 +1,88 @@ + + + + {#snippet message()} + Allow use of + + {toolName} + + {#if serverLabel} + from {serverLabel} + {/if} + + ? + {/snippet} + + {#snippet actions()} + + + + + + + + + + + + + onDecision(ToolPermissionDecision.ALWAYS)}> + Always allow
{toolName}
+ tool +
+ {#if serverLabel} + onDecision(ToolPermissionDecision.ALWAYS_SERVER)}> + Always allow all tools from {serverLabel} + + {:else} + {@const source = toolsStore.getToolSource(toolName)} + {@const providerName = + source === ToolSource.BUILTIN + ? TOOL_SERVER_LABELS[ToolSource.BUILTIN] + : source === ToolSource.CUSTOM + ? TOOL_SERVER_LABELS[ToolSource.CUSTOM] + : 'MCP Tools'} + onDecision(ToolPermissionDecision.ALWAYS_SERVER)}> + Approve all tools from {providerName} + + {/if} +
+
+ + + {/snippet} +
diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatMessages/ChatMessageActions/ChatMessageActionIcons/ChatMessageActionIcons.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatMessages/ChatMessageActions/ChatMessageActionIcons/ChatMessageActionIcons.svelte new file mode 100644 index 00000000..503a2d08 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatMessages/ChatMessageActions/ChatMessageActionIcons/ChatMessageActionIcons.svelte @@ -0,0 +1,184 @@ + + +
+
+ {#if siblingInfo && siblingInfo.totalSiblings > 1} + + {/if} + +
+ + + {#if onEdit} + + {/if} + + {#if role === MessageRole.ASSISTANT && onRegenerate} + onRegenerate()} /> + {/if} + + {#if role === MessageRole.ASSISTANT && onContinue} + + {/if} + + {#if onForkConversation} + + {/if} + + +
+
+ + {#if showRawOutputSwitch} +
+ Show raw output + onRawOutputToggle?.(checked)} + /> +
+ {/if} +
+ + 1 + ? `This will delete ${deletionInfo.totalCount} messages including: ${deletionInfo.userMessages} user message${deletionInfo.userMessages > 1 ? 's' : ''} and ${deletionInfo.assistantMessages} assistant response${deletionInfo.assistantMessages > 1 ? 's' : ''}. All messages in this branch and their responses will be permanently removed. This action cannot be undone.` + : 'Are you sure you want to delete this message? This action cannot be undone.'} + confirmText={deletionInfo && deletionInfo.totalCount > 1 + ? `Delete ${deletionInfo.totalCount} Messages` + : 'Delete'} + cancelText="Cancel" + variant="destructive" + icon={Trash2} + onConfirm={handleConfirmDelete} + onCancel={() => onShowDeleteDialogChange(false)} +/> + + (showForkDialog = false)} +> +
+
+ + + +
+ +
+ { + forkIncludeAttachments = checked === true; + }} + /> + + +
+
+
diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatMessages/ChatMessageActions/ChatMessageActionIcons/ChatMessageActionIconsBranchingControls.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatMessages/ChatMessageActions/ChatMessageActionIcons/ChatMessageActionIconsBranchingControls.svelte new file mode 100644 index 00000000..465dcab7 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatMessages/ChatMessageActions/ChatMessageActionIcons/ChatMessageActionIconsBranchingControls.svelte @@ -0,0 +1,49 @@ + + +{#if siblingInfo && siblingInfo.totalSiblings > 1} + +{/if} diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatMessages/ChatMessageAgenticContent.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatMessages/ChatMessageAgenticContent.svelte new file mode 100644 index 00000000..3a9cc7e9 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatMessages/ChatMessageAgenticContent.svelte @@ -0,0 +1,415 @@ + + +{#snippet renderSection(section: (typeof sectionsParsed)[number], index: number)} + {#if section.type === AgenticSectionType.TEXT} +
+ +
+ {:else if section.type === AgenticSectionType.TOOL_CALL_STREAMING} + {@const streamingIcon = isStreaming ? Loader2 : Loader2} + {@const streamingIconClass = isStreaming ? 'h-4 w-4 animate-spin' : 'h-4 w-4'} + + toggleExpanded(index, section)} + > +
+
+ Arguments: + + {#if isStreaming} + + {/if} +
+ {#if section.toolArgs} + + {:else if isStreaming} +
+ Receiving arguments... +
+ {:else} +
+ Response was truncated +
+ {/if} +
+
+ {:else if section.type === AgenticSectionType.TOOL_CALL || section.type === AgenticSectionType.TOOL_CALL_PENDING} + {@const isPending = section.type === AgenticSectionType.TOOL_CALL_PENDING} + {@const toolIcon = isPending ? Loader2 : Wrench} + {@const toolIconClass = isPending ? 'h-4 w-4 animate-spin' : 'h-4 w-4'} + + toggleExpanded(index, section)} + > + {#if section.toolArgs && section.toolArgs !== '{}'} +
+
Arguments:
+ + +
+ {/if} + +
+
+ Result: + + {#if isPending} + + {/if} +
+ {#if isPending} +
+ Waiting for result... +
+ {:else if section.toolResult} +
+ {#each section.parsedLines as line, i (i)} +
+ {line.text} +
+ {#if line.image} + {line.image.name} + {/if} + {/each} +
+ {:else} +
No output
+ {/if} +
+
+ {:else if section.type === AgenticSectionType.REASONING} + toggleExpanded(index, section)} + > +
+
+ {section.content} +
+
+
+ {:else if section.type === AgenticSectionType.REASONING_PENDING} + {@const reasoningTitle = isStreaming ? 'Reasoning...' : 'Reasoning'} + {@const reasoningSubtitle = isStreaming ? '' : 'incomplete'} + + toggleExpanded(index, section)} + > +
+
+ {section.content} +
+
+
+ {/if} +{/snippet} + +
+ {#if highlightTurns && turnGroups.length > 1} + {#each turnGroups as turn, turnIndex (turnIndex)} + {@const turnStats = message?.timings?.agentic?.perTurn?.[turnIndex]} +
+ Turn {turnIndex + 1} + {#each turn.sections as section, sIdx (turn.flatIndices[sIdx])} + {@render renderSection(section, turn.flatIndices[sIdx])} + {/each} + {#if turnStats} +
+ 0 + ? buildTurnAgenticTimings(turnStats) + : undefined} + initialView={ChatMessageStatsView.GENERATION} + hideSummary + /> +
+ {/if} +
+ {/each} + {:else} + {#each sectionsParsed as section, index (index)} + {@render renderSection(section, index)} + {/each} + {/if} + + {#if pendingPermission && !permissionDismissed} + + {/if} + + {#if pendingContinue && !continueDismissed} + + {/if} +
+ + diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte deleted file mode 100644 index 865d81ba..00000000 --- a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte +++ /dev/null @@ -1,420 +0,0 @@ - - -
- {#if thinkingContent} - - {/if} - - {#if message?.role === 'assistant' && isLoading() && !message?.content?.trim()} -
-
- - {processingState.getProcessingMessage()} - -
-
- {/if} - - {#if isEditing} -
- - -
-
- onShouldBranchAfterEditChange?.(checked === true)} - /> - -
-
- - - -
-
-
- {:else if message.role === 'assistant'} - {#if config().disableReasoningFormat} -
{messageContent || ''}
- {:else} - - {/if} - {:else} -
- {messageContent} -
- {/if} - -
- {#if displayedModel()} - - - - - Model used: - - - - - {/if} - - {#if config().showToolCalls} - {#if (toolCalls && toolCalls.length > 0) || fallbackToolCalls} - - - - - Tool calls: - - - {#if toolCalls && toolCalls.length > 0} - {#each toolCalls as toolCall, index (toolCall.id ?? `${index}`)} - {@const badge = formatToolCallBadge(toolCall, index)} - - {/each} - {:else if fallbackToolCalls} - - {/if} - - {/if} - {/if} - - {#if currentConfig.showMessageStats && message.timings && message.timings.predicted_n && message.timings.predicted_ms} - {@const tokensPerSecond = (message.timings.predicted_n / message.timings.predicted_ms) * 1000} - - - - - Statistics: - - -
- - - {tokensPerSecond.toFixed(2)} tokens/s - - - - {message.timings.predicted_n} tokens - - - - {(message.timings.predicted_ms / 1000).toFixed(2)}s - -
-
- {/if} -
- - {#if message.timestamp && !isEditing} - - {/if} -
- - diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatMessages/ChatMessageBranchingControls.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatMessages/ChatMessageBranchingControls.svelte deleted file mode 100644 index 7420bb19..00000000 --- a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatMessages/ChatMessageBranchingControls.svelte +++ /dev/null @@ -1,84 +0,0 @@ - - -{#if siblingInfo && siblingInfo.totalSiblings > 1} - -{/if} diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatMessages/ChatMessageEditForm.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatMessages/ChatMessageEditForm.svelte new file mode 100644 index 00000000..962f2a28 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatMessages/ChatMessageEditForm.svelte @@ -0,0 +1,154 @@ + + + + +
+ +
+ +
+ {#if isUserMessage && editCtx.showSaveOnlyOption} +
+ + + +
+ {:else if isAssistantMessage} +
+ + + +
+ {:else} +
+ {/if} + + +
+ + (showDiscardDialog = false)} +/> diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatMessages/ChatMessageStatistics/ChatMessageStatistics.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatMessages/ChatMessageStatistics/ChatMessageStatistics.svelte new file mode 100644 index 00000000..34362e02 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatMessages/ChatMessageStatistics/ChatMessageStatistics.svelte @@ -0,0 +1,303 @@ + + +
+
+ {#if hasPromptStats || isLive} + + + + + + +

Reading (prompt processing)

+
+
+ {/if} + + + + + + +

+ {isGenerationDisabled + ? 'Generation (waiting for tokens...)' + : 'Generation (token output)'} +

+
+
+ + {#if hasAgenticStats} + + + + + + +

Tool calls

+
+
+ + {#if !hideSummary} + + + + + + +

Agentic summary

+
+
+ {/if} + {/if} +
+ +
+ {#if activeView === ChatMessageStatsView.GENERATION && hasGenerationStats} + + + + + + {:else if activeView === ChatMessageStatsView.TOOLS && hasAgenticStats} + + + + + + {:else if activeView === ChatMessageStatsView.SUMMARY && hasAgenticStats} + + + + + + {:else if hasPromptStats} + + + + + + {/if} +
+
diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatMessages/ChatMessageStatistics/ChatMessageStatisticsBadge.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatMessages/ChatMessageStatistics/ChatMessageStatisticsBadge.svelte new file mode 100644 index 00000000..eea7da7b --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatMessages/ChatMessageStatistics/ChatMessageStatisticsBadge.svelte @@ -0,0 +1,44 @@ + + +{#if tooltipLabel} + + + + {#snippet icon()} + + {/snippet} + + {value} + + + +

{tooltipLabel}

+
+
+{:else} + + {#snippet icon()} + + {/snippet} + + {value} + +{/if} diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatMessages/ChatMessageThinkingBlock.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatMessages/ChatMessageThinkingBlock.svelte deleted file mode 100644 index 9245ad51..00000000 --- a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatMessages/ChatMessageThinkingBlock.svelte +++ /dev/null @@ -1,68 +0,0 @@ - - - - - -
- - - - {isStreaming ? 'Reasoning...' : 'Reasoning'} - -
- -
- - - Toggle reasoning content -
-
- - -
-
-
- {reasoningContent ?? ''} -
-
-
-
-
-
diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatMessages/ChatMessageUser.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatMessages/ChatMessageUser.svelte deleted file mode 100644 index c8b615e1..00000000 --- a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatMessages/ChatMessageUser.svelte +++ /dev/null @@ -1,185 +0,0 @@ - - -
- {#if isEditing} -
- - -
- - - {#if onSaveEditOnly} - - {/if} - - -
-
- {:else} - {#if message.extra && message.extra.length > 0} -
- -
- {/if} - - {#if message.content.trim()} - - {#if currentConfig.renderUserContentAsMarkdown} -
- -
- {:else} - - {message.content} - - {/if} -
- {/if} - - {#if message.timestamp} -
- -
- {/if} - {/if} -
diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatMessages/ChatMessages.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatMessages/ChatMessages.svelte index ee147858..281e6ad0 100644 --- a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatMessages/ChatMessages.svelte +++ b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatMessages/ChatMessages.svelte @@ -1,33 +1,120 @@ -
- {#each displayMessages as { message, siblingInfo } (message.id)} +
+ {#each displayMessages as { message, toolMessages, isLastAssistantMessage, siblingInfo } (message.id)} {/each} + + {#if activeConversation() && agenticPendingSteeringMessageContent(activeConversation()!.id)} + {@const convId = activeConversation()!.id} + {@const pendingContent = agenticPendingSteeringMessageContent(convId)} + + {#if pendingContent} + chatStore.abortCurrentFlow(convId)} + onEdit={(newContent, extras) => agenticInjectSteeringMessage(convId, newContent, extras)} + onDelete={() => agenticClearSteeringMessage(convId)} + /> + {/if} + {:else if activeConversation() && chatPendingMessageContent(activeConversation()!.id)} + {@const convId = activeConversation()!.id} + {@const pendingContent = chatPendingMessageContent(convId)} + + {#if pendingContent} + chatStore.abortCurrentFlow(convId)} + onEdit={(newContent, extras) => chatInjectPendingMessage(convId, newContent, extras)} + onDelete={() => chatClearPendingMessage(convId)} + /> + {/if} + {/if}
diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatScreen/ChatScreen.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatScreen/ChatScreen.svelte index c736178f..e733a64a 100644 --- a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatScreen/ChatScreen.svelte +++ b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatScreen/ChatScreen.svelte @@ -1,64 +1,56 @@ @@ -290,9 +383,9 @@ - - -{#if !isEmpty} +{#if isServerLoading} + +{:else}
- { - if (!disableAutoScroll) { - userScrolledUp = false; - autoScrollEnabled = true; - scrollChatToBottom(); - } - }} - /> - -
- - - {#if serverWarning()} - +
+ {#if !isEmpty} + { + autoScroll.enable(); + if (!autoScroll.userScrolledUp) { + autoScroll.scrollToBottom(); + } + }} + /> {/if} -
- stopGeneration()} - showHelperText={false} - bind:uploadedFiles +
+ + + -
-
-
-{:else if isServerLoading} - - -{:else if serverStore.error && !serverStore.modelName} - -{:else if serverStore.modelName} -
-
-
-

llama.cpp

-

How can I help you today?

-
+ -
- -
+ - {#if serverWarning()} - - {/if} - -
- stopGeneration()} - showHelperText={true} - bind:uploadedFiles - /> +
+ chatStore.stopGeneration()} + onSystemPromptAdd={handleSystemPromptAdd} + bind:uploadedFiles + /> +
{/if} - - - - - - - - File Upload Error - - - Some files cannot be uploaded with the current model. - - - -
- {#if fileErrorData.generallyUnsupported.length > 0} -
-

Unsupported File Types

- -
- {#each fileErrorData.generallyUnsupported as file (file.name)} -
-

- {file.name} -

- -

File type not supported

-
- {/each} -
-
- {/if} - - {#if fileErrorData.modalityUnsupported.length > 0} -
-
- {#each fileErrorData.modalityUnsupported as file (file.name)} -
-

- {file.name} -

- -

- {fileErrorData.modalityReasons[file.name] || 'Not supported by current model'} -

-
- {/each} -
-
- {/if} -
- -
-

This model supports:

- -

- {fileErrorData.supportedTypes.join(', ')} -

-
- - - (showFileErrorDialog = false)}> - Got it - - -
-
-
+ - - diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatScreen/ChatScreenActionScrollDown.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatScreen/ChatScreenActionScrollDown.svelte new file mode 100644 index 00000000..c43bee3e --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatScreen/ChatScreenActionScrollDown.svelte @@ -0,0 +1,61 @@ + + +
+ +
diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatScreen/ChatScreenForm.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatScreen/ChatScreenForm.svelte new file mode 100644 index 00000000..aa1c0536 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatScreen/ChatScreenForm.svelte @@ -0,0 +1,126 @@ + + +
+ +
diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatScreen/ChatScreenGreeting.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatScreen/ChatScreenGreeting.svelte new file mode 100644 index 00000000..141d4f4e --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatScreen/ChatScreenGreeting.svelte @@ -0,0 +1,25 @@ + + + diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatScreen/ChatScreenHeader.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatScreen/ChatScreenHeader.svelte deleted file mode 100644 index 24803d0a..00000000 --- a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatScreen/ChatScreenHeader.svelte +++ /dev/null @@ -1,23 +0,0 @@ - - -
-
- -
-
- - (settingsOpen = open)} /> diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatScreen/ChatScreenProcessingInfo.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatScreen/ChatScreenProcessingInfo.svelte index ecab2321..f38f3519 100644 --- a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatScreen/ChatScreenProcessingInfo.svelte +++ b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatScreen/ChatScreenProcessingInfo.svelte @@ -1,34 +1,55 @@ -
-
+
+
{#each processingDetails as detail (detail)} - {detail} + {detail} {/each}
@@ -89,7 +86,7 @@ position: sticky; top: 0; z-index: 10; - padding: 1.5rem 1rem; + padding: 0 1rem 0.75rem; opacity: 0; transform: translateY(50%); transition: @@ -116,7 +113,6 @@ color: var(--muted-foreground); font-size: 0.75rem; padding: 0.25rem 0.75rem; - background: var(--muted); border-radius: 0.375rem; font-family: ui-monospace, SFMono-Regular, 'SF Mono', Consolas, 'Liberation Mono', Menlo, monospace; diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatScreen/ChatScreenServerError.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatScreen/ChatScreenServerError.svelte new file mode 100644 index 00000000..2a998dbe --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatScreen/ChatScreenServerError.svelte @@ -0,0 +1,34 @@ + + +{#if hasError} +
+ + + + + Server unavailable + + + + + {serverError()} + +
+{/if} diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatScreen/ChatScreenWarning.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatScreen/ChatScreenWarning.svelte deleted file mode 100644 index 8b8d9168..00000000 --- a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatScreen/ChatScreenWarning.svelte +++ /dev/null @@ -1,38 +0,0 @@ - - -
-
-
-
- -

- Server `/props` endpoint not available - using cached data -

-
- -
-
-
diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatSettings/ChatSettings.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatSettings/ChatSettings.svelte deleted file mode 100644 index 204f0d75..00000000 --- a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatSettings/ChatSettings.svelte +++ /dev/null @@ -1,497 +0,0 @@ - - -
- - - - -
-
- -
- - -
-
- {#each settingSections as section (section.title)} - - {/each} -
-
- - -
-
-
- - -
-
- - - {#if currentSection.title === 'Import/Export'} - - {:else} -
- -
- {/if} -
- -
-

Settings are saved in browser's localStorage

-
-
-
-
- - diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatSettings/ChatSettingsImportExportTab.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatSettings/ChatSettingsImportExportTab.svelte deleted file mode 100644 index b2adf394..00000000 --- a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatSettings/ChatSettingsImportExportTab.svelte +++ /dev/null @@ -1,255 +0,0 @@ - - -
-
-
-

Export Conversations

- -

- Download all your conversations as a JSON file. This includes all messages, attachments, and - conversation history. -

- - - - {#if showExportSummary && exportedConversations.length > 0} -
-
- Exported {exportedConversations.length} conversation{exportedConversations.length === 1 - ? '' - : 's'} -
- -
    - {#each exportedConversations.slice(0, 10) as conv (conv.id)} -
  • • {conv.name || 'Untitled conversation'}
  • - {/each} - - {#if exportedConversations.length > 10} -
  • - ... and {exportedConversations.length - 10} more -
  • - {/if} -
-
- {/if} -
- -
-

Import Conversations

- -

- Import one or more conversations from a previously exported JSON file. This will merge with - your existing conversations. -

- - - - {#if showImportSummary && importedConversations.length > 0} -
-
- Imported {importedConversations.length} conversation{importedConversations.length === 1 - ? '' - : 's'} -
- -
    - {#each importedConversations.slice(0, 10) as conv (conv.id)} -
  • • {conv.name || 'Untitled conversation'}
  • - {/each} - - {#if importedConversations.length > 10} -
  • - ... and {importedConversations.length - 10} more -
  • - {/if} -
-
- {/if} -
-
-
- - (showExportDialog = false)} - onConfirm={handleExportConfirm} -/> - - (showImportDialog = false)} - onConfirm={handleImportConfirm} -/> diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatSidebar/ChatSidebar.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatSidebar/ChatSidebar.svelte deleted file mode 100644 index 34f3da53..00000000 --- a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatSidebar/ChatSidebar.svelte +++ /dev/null @@ -1,207 +0,0 @@ - - - - - -

llama.cpp

-
- - -
- - - {#if (filteredConversations.length > 0 && isSearchModeActive) || !isSearchModeActive} - - {isSearchModeActive ? 'Search results' : 'Conversations'} - - {/if} - - - - {#each filteredConversations as conversation (conversation.id)} - - - - {/each} - - {#if filteredConversations.length === 0} -
-

- {searchQuery.length > 0 - ? 'No results found' - : isSearchModeActive - ? 'Start typing to see results' - : 'No conversations yet'} -

-
- {/if} -
-
-
- -
-
- - { - showDeleteDialog = false; - selectedConversation = null; - }} -/> - - - - - Edit Conversation Name - - { - if (e.key === 'Enter') { - e.preventDefault(); - handleConfirmEdit(); - } - }} - placeholder="Enter a new name" - type="text" - bind:value={editedName} - /> - - - - { - showEditDialog = false; - selectedConversation = null; - }}>Cancel - Save - - - diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatSidebar/ChatSidebarActions.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatSidebar/ChatSidebarActions.svelte deleted file mode 100644 index 30d1f9d4..00000000 --- a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatSidebar/ChatSidebarActions.svelte +++ /dev/null @@ -1,81 +0,0 @@ - - -
- {#if isSearchModeActive} -
- - - e.key === 'Escape' && handleSearchModeDeactivate()} - placeholder="Search conversations..." - class="pl-8" - /> - - -
- {:else} - - - - {/if} -
diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatSidebar/ChatSidebarSearch.svelte b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatSidebar/ChatSidebarSearch.svelte deleted file mode 100644 index c9e6c661..00000000 --- a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatSidebar/ChatSidebarSearch.svelte +++ /dev/null @@ -1,33 +0,0 @@ - - -
- - - -
diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatSidebar/handle-mobile-sidebar-item-click.ts b/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatSidebar/handle-mobile-sidebar-item-click.ts deleted file mode 100644 index 4b9b8765..00000000 --- a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatSidebar/handle-mobile-sidebar-item-click.ts +++ /dev/null @@ -1,9 +0,0 @@ -import { useSidebar } from '$lib/components/ui/sidebar'; - -const sidebar = useSidebar(); - -export function handleMobileSidebarItemClick() { - if (sidebar.isMobile) { - sidebar.toggle(); - } -} diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/index.ts b/examples/server/webui_llamacpp/src/lib/components/app/chat/index.ts new file mode 100644 index 00000000..8ed3cc65 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/chat/index.ts @@ -0,0 +1,692 @@ +/** + * + * ATTACHMENTS + * + * Components for displaying and managing different attachment types in chat messages. + * Supports two operational modes: + * - **Readonly mode**: For displaying stored attachments in sent messages (DatabaseMessageExtra[]) + * - **Editable mode**: For managing pending uploads in the input form (ChatUploadedFile[]) + * + * The attachment system uses `getAttachmentDisplayItems()` utility to normalize both + * data sources into a unified display format, enabling consistent rendering regardless + * of the attachment origin. + * + */ + +/** + * **ChatAttachmentsList** - Unified display for file attachments in chat + * + * Central component for rendering file attachments in both ChatMessage (readonly) + * and ChatForm (editable) contexts. + * + * **Architecture:** + * - Delegates rendering to specialized thumbnail components based on attachment type + * - Manages scroll state and navigation arrows for horizontal overflow + * - Integrates with DialogChatAttachmentsPreview for full-size gallery/single viewing + * - Validates vision modality support via `activeModelId` prop + * + * **Features:** + * - Horizontal scroll with smooth navigation arrows + * - Image thumbnails with lazy loading and error fallback + * - File type icons for non-image files (PDF, text, audio, etc.) + * - MCP prompt attachments with expandable content preview + * - Click-to-preview with full-size dialog and download option + * - "View All" button when `limitToSingleRow` is enabled and content overflows + * - Vision modality validation to warn about unsupported image uploads + * - Customizable thumbnail dimensions via `imageHeight`/`imageWidth` props + * + * @example + * ```svelte + * + * + * + * + * removeFile(id)} + * limitToSingleRow + * activeModelId={selectedModel} + * /> + * ``` + */ +export { default as ChatAttachmentsList } from './ChatAttachments/ChatAttachmentsList/ChatAttachmentsList.svelte'; + +/** + * Renders a single attachment item based on its type (image, file, MCP prompt, or MCP resource). + * Delegates to specialized sub-components: ChatAttachmentsListItemThumbnailImage, ChatAttachmentsListItemThumbnailFile, + * ChatAttachmentsListItemMcpPrompt, or ChatAttachmentsListItemMcpResource. + */ +export { default as ChatAttachmentsListItem } from './ChatAttachments/ChatAttachmentsList/ChatAttachmentsListItem/ChatAttachmentsListItem.svelte'; + +/** + * Displays MCP Prompt attachment with expandable content preview. + * Shows server name, prompt name, and allows expanding to view full prompt arguments + * and content. Used when user selects a prompt from ChatFormPickerMcpPrompts. + */ +export { default as ChatAttachmentsListItemMcpPrompt } from './ChatAttachments/ChatAttachmentsList/ChatAttachmentsListItem/ChatAttachmentsListItemMcpPrompt.svelte'; + +/** + * Displays a single MCP Resource attachment with icon, name, and server info. + * Shows loading/error states and supports remove action. + * Used within ChatAttachmentMcpResources for individual resource display. + */ +export { default as ChatAttachmentsListItemMcpResource } from './ChatAttachments/ChatAttachmentsList/ChatAttachmentsListItem/ChatAttachmentsListItemMcpResource.svelte'; + +/** + * Thumbnail for non-image file attachments. Displays file type icon based on extension, + * file name (truncated), and file size. + * Handles text files, PDFs, audio, and other document types. + */ +export { default as ChatAttachmentsListItemThumbnailFile } from './ChatAttachments/ChatAttachmentsList/ChatAttachmentsListItem/ChatAttachmentsListItemThumbnailFile.svelte'; + +/** + * Thumbnail for image attachments with lazy loading and error fallback. + * Displays image preview with configurable dimensions. Falls back to placeholder + * on load error. + */ +export { default as ChatAttachmentsListItemThumbnailImage } from './ChatAttachments/ChatAttachmentsList/ChatAttachmentsListItem/ChatAttachmentsListItemThumbnailImage.svelte'; + +/** + * Unified attachment preview component for dialog display. Shows a single file + * preview without carousel, or a gallery/carousel view when multiple items exist. + * Uses ChatAttachmentPreviewSingle internally for each item's content. + */ +export { default as ChatAttachmentsPreview } from './ChatAttachments/ChatAttachmentsPreview.svelte'; +export { default as ChatAttachmentsPreviewNavButtons } from './ChatAttachments/ChatAttachmentsPreview/ChatAttachmentsPreviewNavButtons.svelte'; +export { default as ChatAttachmentsPreviewFileInfo } from './ChatAttachments/ChatAttachmentsPreview/ChatAttachmentsPreviewFileInfo.svelte'; +export { default as ChatAttachmentsPreviewThumbnailStrip } from './ChatAttachments/ChatAttachmentsPreview/ChatAttachmentsPreviewThumbnailStrip.svelte'; +export { default as ChatAttachmentsPreviewCurrentItem } from './ChatAttachments/ChatAttachmentsPreview/ChatAttachmentsPreviewCurrentItem/ChatAttachmentsPreviewCurrentItem.svelte'; + +/** + * + * FORM + * + * Components for the chat input area. The form handles user input, file attachments, + * audio recording, and MCP prompts & resources selection. It integrates with multiple stores: + * - `chatStore` for message submission and generation control + * - `modelsStore` for model selection and validation + * - `mcpStore` for MCP prompt browsing and loading + * + * The form exposes a public API for programmatic control from parent components + * (focus, height reset, model selector, validation). + * + */ + +/** + * **ChatForm** - Main chat input component with rich features + * + * The primary input interface for composing and sending chat messages. + * Orchestrates text input, file attachments, audio recording, and MCP prompts. + * Used by ChatScreenForm and ChatMessageEditForm for both new conversations and message editing. + * + * **Architecture:** + * - Composes ChatFormTextarea, ChatFormActions, and ChatFormPickerMcpPrompts + * - Manages file upload state via `uploadedFiles` bindable prop + * - Integrates with ModelsSelectorDropdown for model selection in router mode + * - Communicates with parent via callbacks (onSubmit, onFilesAdd, onStop, etc.) + * + * **Input Handling:** + * - IME-safe Enter key handling (waits for composition end) + * - Shift+Enter for newline, Enter for submit + * - Paste handler for files and long text (> {pasteLongTextToFileLen} chars → file conversion) + * - Keyboard shortcut `/` triggers MCP prompt picker + * + * **Features:** + * - Auto-resizing textarea with placeholder + * - File upload via button dropdown (images/text/PDF), drag-drop, or paste + * - Audio recording with WAV conversion (when model supports audio) + * - MCP prompt picker with search and argument forms + * - MCP reource picker with component to list attached resources at the bottom of Chat Form + * - Model selector integration (router mode) + * - Loading state with stop button, disabled state for errors + * + * **Exported API:** + * - `focus()` - Focus the textarea programmatically + * - `resetTextareaHeight()` - Reset textarea to default height after submit + * - `openModelSelector()` - Open model selection dropdown + * - `checkModelSelected(): boolean` - Validate model selection, show error if none + * + * @example + * ```svelte + * + * ``` + */ +export { default as ChatForm } from './ChatForm/ChatForm.svelte'; + +/** + * Wrapper component for the "add to chat" button (Plus icon). + * Exposes a `button` snippet that can be used inside DropdownMenu.Trigger (desktop) + * or Sheet.Root (mobile) to maintain consistent styling while allowing + * platform-specific trigger wrappers. + */ +export { default as ChatFormActionsAdd } from './ChatForm/ChatFormActions/ChatFormActionAdd/ChatFormActionsAdd.svelte'; + +/** + * Audio recording button with real-time recording indicator. Records audio + * and converts to WAV format for upload. Only visible when the active model + * supports audio modality and setting for automatic audio input is enabled. Shows recording duration while active. + */ +export { default as ChatFormActionRecord } from './ChatForm/ChatFormActions/ChatFormActionRecord.svelte'; + +/** + * Container for chat form action buttons. Arranges file attachment, audio record, + * and submit/stop buttons in a horizontal layout. Handles conditional visibility + * based on model capabilities and loading state. + */ +export { default as ChatFormActions } from './ChatForm/ChatFormActions/ChatFormActions.svelte'; + +/** + * Submit/stop button with loading state. Shows send icon normally, transforms + * to stop icon during generation. Disabled when input is empty or form is disabled. + * Triggers onSubmit or onStop callbacks based on current state. + */ +export { default as ChatFormActionSubmit } from './ChatForm/ChatFormActions/ChatFormActionSubmit.svelte'; + +/** + * Model selector component for the chat form action bar. Renders either a dropdown + * (desktop) or bottom sheet (mobile) for selecting the conversation model in router mode. + * Exposes an `open` method for programmatically opening the selector. + */ +export { default as ChatFormActionModels } from './ChatForm/ChatFormActions/ChatFormActionModels.svelte'; + +/** + * Dropdown submenu for managing tool permissions in the chat form. + * + * Displays a collapsible list of available tools organized by group (Built-in / JSON Schema). + * Each group can be expanded to show individual tools with checkboxes for enabling/disabling. + * Provides bulk enable/disable controls per group and shows enabled/total tool counts. + * Opens the tools panel on the server when the menu opens. + * + * Features: + * - Grouped tools with collapsible sections + * - Group favicon display (MCP server icons) + * - Per-group and per-tool toggle checkboxes + * - Loading/error states for tool discovery + * - Integration with toolsPanel for state management + * + * @example + * ```svelte + * + * ``` + */ +export { default as ChatFormActionAddToolsSubmenu } from './ChatForm/ChatFormActions/ChatFormActionAdd/ChatFormActionAddToolsSubmenu.svelte'; + +/** + * Dropdown submenu for managing MCP servers in the chat form. + * + * Displays a searchable list of enabled MCP servers with toggle switches + * to enable/disable each server for chat. Shows server favicon, health status, + * and a "Manage MCP Servers" settings link. + * + * Features: + * - Search/filter servers by name or URL + * - Per-server toggle to enable/disable for chat + * - Health check indicator (shows "Error" badge for failed servers) + * - Server favicon display + * - Settings link to manage MCP server configuration + * + * @example + * ```svelte + * + * ``` + */ +export { default as ChatFormActionAddMcpServersSubmenu } from './ChatForm/ChatFormActions/ChatFormActionAdd/ChatFormActionAddMcpServersSubmenu.svelte'; + +/** + * **ChatFormReasoningToggle** - Thinking toggle button with effort dropdown + * + * A toggle button with lightbulb icon that indicates thinking status. + * Shows the reasoning effort dropdown when clicked. + * Only visible when the current model supports thinking. + */ +export { default as ChatFormReasoningToggle } from './ChatForm/ChatFormActions/ChatFormReasoningToggle.svelte'; + +/** + * Hidden file input element for programmatic file selection. + */ +export { default as ChatFormFileInputInvisible } from './ChatForm/ChatFormFileInputInvisible.svelte'; + +/** + * Displays MCP Resource attachments as a horizontal carousel. + * Shows resource name, URI, and allows clicking to view resource content. + */ +export { default as ChatFormMcpResourcesList } from './ChatForm/ChatFormMcpResourcesList.svelte'; + +/** + * Auto-resizing textarea with IME composition support. Automatically adjusts + * height based on content. Handles IME input correctly (waits for composition + * end before processing Enter key). Exposes focus() and resetHeight() methods. + */ +export { default as ChatFormTextarea } from './ChatForm/ChatFormTextarea.svelte'; + +/** + * **ChatFormPickerMcpPrompts** - MCP prompt selection interface + * + * Floating picker for browsing and selecting MCP Server Prompts. + * Triggered by typing `/` in the chat input or choosing `MCP Prompt` option in ChatFormActionAddDropdown. + * Loads prompts from connected MCP servers and allows users to select and configure them. + * + * **Architecture:** + * - Fetches available prompts from mcpStore + * - Manages selection state and keyboard navigation internally + * - Delegates argument input to ChatFormPromptPickerArgumentForm + * - Communicates prompt loading lifecycle via callbacks + * + * **Prompt Loading Flow:** + * 1. User selects prompt → `onPromptLoadStart` called with placeholder ID + * 2. Prompt content fetched from MCP server asynchronously + * 3. On success → `onPromptLoadComplete` with full prompt data + * 4. On failure → `onPromptLoadError` with error details + * + * **Features:** + * - Search/filter prompts by name across all connected servers + * - Keyboard navigation (↑/↓ to navigate, Enter to select, Esc to close) + * - Argument input forms for prompts with required parameters + * - Autocomplete suggestions for argument values + * - Loading states with skeleton placeholders + * - Server information header per prompt for visual identification + * + * **Exported API:** + * - `handleKeydown(event): boolean` - Process keyboard events, returns true if handled + * + * @example + * ```svelte + * showPicker = false} + * onPromptLoadStart={(id, info) => addPlaceholder(id, info)} + * onPromptLoadComplete={(id, result) => replacePlaceholder(id, result)} + * onPromptLoadError={(id, error) => handleError(id, error)} + * /> + * ``` + */ +export { default as ChatFormPickerMcpPrompts } from './ChatForm/ChatFormPickers/ChatFormPickerMcpPrompts/ChatFormPickerMcpPrompts.svelte'; + +/** + * Form for entering MCP prompt arguments. Displays input fields for each + * required argument defined by the prompt. Validates input and submits + * when all required fields are filled. Shows argument descriptions as hints. + */ +export { default as ChatFormPromptPickerArgumentForm } from './ChatForm/ChatFormPickers/ChatFormPickerMcpPrompts/ChatFormPromptPickerArgumentForm.svelte'; + +/** + * Single argument input field with autocomplete suggestions. Fetches suggestions + * from MCP server based on argument type. Supports keyboard navigation through + * suggestions list. Used within ChatFormPromptPickerArgumentForm. + */ +export { default as ChatFormPromptPickerArgumentInput } from './ChatForm/ChatFormPickers/ChatFormPickerMcpPrompts/ChatFormPromptPickerArgumentInput.svelte'; + +/** + * Shared popover wrapper for inline picker popovers (prompts, resources). + * Provides consistent positioning, styling, and open/close behavior. + */ +export { default as ChatFormPickerPopover } from './ChatForm/ChatFormPickers/ChatFormPicker/ChatFormPickerPopover.svelte'; + +/** + * Generic scrollable list for picker popovers. Provides search input, + * scroll-into-view for keyboard navigation, loading skeletons, empty state, + * and optional footer. Uses Svelte 5 snippets for item/skeleton/footer rendering. + * Shared by ChatFormPickerMcpPrompts and ChatFormPickerMcpResources. + */ +export { default as ChatFormPickerList } from './ChatForm/ChatFormPickers/ChatFormPicker/ChatFormPickerList.svelte'; + +/** + * Generic button wrapper for picker list items. Provides consistent styling, + * hover/selected states, and data-picker-index attribute for scroll-into-view. + * Shared by ChatFormPickerMcpPrompts and ChatFormPickerMcpResources. + */ +export { default as ChatFormPickerListItem } from './ChatForm/ChatFormPickers/ChatFormPicker/ChatFormPickerListItem.svelte'; + +/** + * Generic header for picker items displaying server favicon, label, item title, + * and optional description. Accepts `titleExtra` and `subtitle` snippets for + * custom content like badges or URIs. Shared by both pickers. + */ +export { default as ChatFormPickerItemHeader } from './ChatForm/ChatFormPickers/ChatFormPicker/ChatFormPickerItemHeader.svelte'; + +/** + * Generic skeleton loading placeholder for picker list items. Configurable + * title width and optional badge skeleton. Shared by both pickers. + */ +export { default as ChatFormPickerListItemSkeleton } from './ChatForm/ChatFormPickers/ChatFormPicker/ChatFormPickerListItemSkeleton.svelte'; + +/** + * **ChatFormPickerMcpResources** - MCP resource selection interface + * + * Floating picker for browsing and attaching MCP Server Resources. + * Triggered by typing `@` in the chat input. + * Loads resources from connected MCP servers and allows users to attach them to the chat context. + * + * **Features:** + * - Search/filter resources by name, title, description, or URI across all connected servers + * - Keyboard navigation (↑/↓ to navigate, Enter to select, Esc to close) + * - Shows attached state for already-attached resources + * - Loading states with skeleton placeholders + * - Server information header per resource for visual identification + * + * **Exported API:** + * - `handleKeydown(event): boolean` - Process keyboard events, returns true if handled + */ +export { default as ChatFormPickerMcpResources } from './ChatForm/ChatFormPickers/ChatFormPickerMcpResources.svelte'; + +/** + * **ChatFormPickers** - Chat input picker container + * + * Container component that hosts both MCP prompt and MCP resource pickers. + * Manages shared state, keyboard navigation, and coordination between the two + * picker interfaces. Used within ChatForm for `@`-triggered pickers. + */ +export { default as ChatFormPickers } from './ChatForm/ChatFormPickers/ChatFormPickers.svelte'; + +/** + * + * MESSAGES + * + * Components for displaying chat messages. The message system supports: + * - **Conversation branching**: Messages can have siblings (alternative versions) + * created by editing or regenerating. Users can navigate between branches. + * - **Role-based rendering**: Different layouts for user, assistant, and system messages + * - **Streaming support**: Real-time display of assistant responses as they generate + * - **Agentic workflows**: Special rendering for tool calls and reasoning blocks + * + * The branching system uses `getMessageSiblings()` utility to compute sibling info + * for each message based on the full conversation tree stored in the database. + * + */ + +/** + * **ChatMessages** - Message list container with branching support + * + * Container component that renders the list of messages in a conversation. + * Computes sibling information for each message to enable branch navigation. + * Integrates with conversationsStore for message operations. + * + * **Architecture:** + * - Fetches all conversation messages to compute sibling relationships + * - Filters system messages based on user config (`showSystemMessage`) + * - Delegates rendering to ChatMessage for each message + * - Propagates all message operations to chatStore via callbacks + * + * **Branching Logic:** + * - Uses `getMessageSiblings()` to find all messages with same parent + * - Computes `siblingInfo: { currentIndex, totalSiblings, siblingIds }` + * - Enables navigation between alternative message versions + * + * **Message Operations (delegated to chatStore):** + * - Edit with branching: Creates new message branch, preserves original + * - Edit with replacement: Modifies message in place + * - Regenerate: Creates new assistant response as sibling + * - Delete: Removes message and all descendants (cascade) + * - Continue: Appends to incomplete assistant message + * + * @example + * ```svelte + * + * ``` + */ +export { default as ChatMessages } from './ChatMessages/ChatMessages.svelte'; + +/** + * **ChatMessage** - Single message display with actions + * + * Renders a single chat message with role-specific styling and full action + * support. Delegates to specialized components based on message role: + * ChatMessageUser, ChatMessageAssistant, or ChatMessageSystem. + * + * **Architecture:** + * - Routes to role-specific component based on `message.type` + * - Manages edit mode state and inline editing UI + * - Handles action callbacks (copy, edit, delete, regenerate) + * - Displays branching controls when message has siblings + * + * **User Messages:** + * - Shows attachments via ChatAttachments + * - Displays MCP prompts if present + * - Edit creates new branch or preserves responses + * + * **Assistant Messages:** + * - Renders content via MarkdownContent or ChatMessageAgenticContent + * - Shows model info badge (when enabled) + * - Regenerate creates sibling with optional model override + * - Continue action for incomplete responses + * + * **Features:** + * - Inline editing with file attachments support + * - Copy formatted content to clipboard + * - Delete with confirmation (shows cascade delete count) + * - Branching controls for sibling navigation + * - Statistics display (tokens, timing) + * + * @example + * ```svelte + * + * ``` + */ +export { default as ChatMessage } from './ChatMessages/ChatMessage/ChatMessage.svelte'; + +/** + * **ChatMessageAgenticContent** - Agentic workflow output display + * + * Specialized renderer for assistant messages with tool calls and reasoning. + * Derives display sections from structured message data (toolCalls, reasoningContent, + * and child tool result messages) and renders them as interactive collapsible sections. + * + * **Architecture:** + * - Uses `deriveAgenticSections()` from `$lib/utils` to build sections from structured data + * - Renders sections as CollapsibleContentBlock components + * - Handles streaming state for progressive content display + * - Falls back to MarkdownContent for plain text sections + * + * **Execution States:** + * - **Streaming**: Animated spinner, block expanded, auto-scroll enabled + * - **Pending**: Waiting indicator for queued tool calls + * - **Completed**: Static display, block collapsed by default + * + * **Features:** + * - JSON arguments syntax highlighting via SyntaxHighlightedCode + * - Tool results display with formatting + * - Plain text sections between markers rendered as markdown + * - Smart collapse defaults (expanded while streaming, collapsed when done) + * + * @example + * ```svelte + * + * ``` + */ +export { default as ChatMessageAgenticContent } from './ChatMessages/ChatMessageAgenticContent.svelte'; +export { default as ChatMessageActionCardPermissionRequest } from './ChatMessages/ChatMessageActions/ChatMessageActionCard/ChatMessageActionCardPermissionRequest.svelte'; +export { default as ChatMessageActionCard } from './ChatMessages/ChatMessageActions/ChatMessageActionCard/ChatMessageActionCard.svelte'; +export { default as ChatMessageActionCardContinueRequest } from './ChatMessages/ChatMessageActions/ChatMessageActionCard/ChatMessageActionCardContinueRequest.svelte'; + +/** + * Action buttons toolbar for messages. Displays copy, edit, delete, and regenerate + * buttons based on message role. Includes branching controls when message has siblings. + * Shows delete confirmation dialog with cascade delete count. Handles raw output toggle + * for assistant messages. + */ +export { default as ChatMessageActionIcons } from './ChatMessages/ChatMessageActions/ChatMessageActionIcons/ChatMessageActionIcons.svelte'; + +/** + * Navigation controls for message siblings (conversation branches). Displays + * prev/next arrows with current position counter (e.g., "2/5"). Enables users + * to navigate between alternative versions of a message created by editing + * or regenerating. Uses `conversationsStore.navigateToSibling()` for navigation. + */ +export { default as ChatMessageActionIconsBranchingControls } from './ChatMessages/ChatMessageActions/ChatMessageActionIcons/ChatMessageActionIconsBranchingControls.svelte'; + +/** + * Statistics display for assistant messages. Shows token counts (prompt/completion), + * generation timing, tokens per second, and model name (when enabled in settings). + * Data sourced from message.timings stored during generation. + */ +export { default as ChatMessageStatistics } from './ChatMessages/ChatMessageStatistics/ChatMessageStatistics.svelte'; +export { default as ChatMessageStatisticsBadge } from './ChatMessages/ChatMessageStatistics/ChatMessageStatisticsBadge.svelte'; + +/** + * MCP prompt display in user messages. Shows when user selected an MCP prompt + * via ChatFormPickerMcpPrompts. Displays server name, prompt name, and expandable + * content preview. Stored in message.extra as DatabaseMessageExtraMcpPrompt. + */ +export { default as ChatMessageMcpPrompt } from './ChatMessages/ChatMessage/ChatMessageMcpPrompt/ChatMessageMcpPrompt.svelte'; + +/** + * Formatted content display for MCP prompt messages. Renders the full prompt + * content with arguments in a readable format. Used within ChatMessageMcpPrompt + * for the expanded view. + */ +export { default as ChatMessageMcpPromptContent } from './ChatMessages/ChatMessage/ChatMessageMcpPrompt/ChatMessageMcpPromptContent.svelte'; + +/** + * Assistant message display component. Renders assistant responses with left-aligned styling. + * Supports both plain markdown content (via MarkdownContent) and agentic content with tool calls + * (via ChatMessageAgenticContent). Shows model info badge, statistics, and action buttons. + * Handles streaming state with real-time content updates. + */ +export { default as ChatMessageAssistant } from './ChatMessages/ChatMessage/ChatMessageAssistant/ChatMessageAssistant.svelte'; + +/** + * Inline message editing form. Provides textarea for editing message content with + * attachment management. Shows save/cancel buttons and optional "Save only" button + * for editing without regenerating responses. Used within ChatMessage components + * when user enters edit mode. + */ +export { default as ChatMessageEditForm } from './ChatMessages/ChatMessageEditForm.svelte'; + +/** + * User message display component. Renders user messages with right-aligned bubble styling. + * Shows message content, attachments via ChatAttachmentsList, and MCP prompts if present. + * Supports inline editing mode with ChatMessageEditForm integration. + */ +export { default as ChatMessageUser } from './ChatMessages/ChatMessage/ChatMessageUser/ChatMessageUser.svelte'; +export { default as ChatMessageUserBubble } from './ChatMessages/ChatMessage/ChatMessageUser/ChatMessageUserBubble.svelte'; +export { default as ChatMessageUserPending } from './ChatMessages/ChatMessage/ChatMessageUser/ChatMessageUserPending.svelte'; + +/** + * System message display component. Renders system messages with distinct styling. + * Visibility controlled by `showSystemMessage` config setting. + */ +export { default as ChatMessageSystem } from './ChatMessages/ChatMessage/ChatMessageSystem/ChatMessageSystem.svelte'; + +/** + * + * SCREEN + * + * Top-level chat interface components. ChatScreen is the main container that + * orchestrates all chat functionality. It integrates with multiple stores: + * - `chatStore` for message operations and generation control + * - `conversationsStore` for conversation management + * - `serverStore` for server connection state + * - `modelsStore` for model capabilities (vision, audio modalities) + * + * The screen handles the complete chat lifecycle from empty state to active + * conversation with streaming responses. + * + */ + +/** + * **ChatScreen** - Main chat interface container + * + * Top-level component that orchestrates the entire chat interface. Manages + * messages display, input form, file handling, auto-scroll, error dialogs, + * and server state. Used as the main content area in chat routes. + * + * **Architecture:** + * - Composes ChatMessages, ChatScreenForm, and dialogs + * - Manages auto-scroll via `createAutoScrollController()` hook + * - Handles file upload pipeline (validation → processing → state update) + * - Integrates with serverStore for loading/error/warning states + * - Tracks active model for modality validation (vision, audio) + * + * **File Upload Pipeline:** + * 1. Files received via drag-drop, paste, or file picker + * 2. Validated against supported types (`isFileTypeSupported()`) + * 3. Filtered by model modalities (`filterFilesByModalities()`) + * 4. Empty files detected and reported via DialogEmptyFileAlert + * 5. Valid files processed to ChatUploadedFile[] format + * 6. Unsupported files shown in error dialog with reasons + * + * **State Management:** + * - `isEmpty`: Shows centered welcome UI when no conversation active + * - `isCurrentConversationLoading`: Tracks generation state for current chat + * - `activeModelId`: Determines available modalities for file validation + * - `uploadedFiles`: Pending file attachments for next message + * + * **Features:** + * - Messages display with smart auto-scroll (pauses on user scroll up) + * - File drag-drop with visual overlay indicator + * - File validation with detailed error messages + * - Error dialog management (chat errors, model unavailable) + * - Server loading/error/warning states with appropriate UI + * - Conversation deletion with confirmation dialog + * - Processing info display (tokens/sec, timing) during generation + * - Keyboard shortcuts (Ctrl+Shift+Backspace to delete conversation) + * + * @example + * ```svelte + * + * + * + * + * + * ``` + */ +export { default as ChatScreen } from './ChatScreen/ChatScreen.svelte'; + +/** + * Visual overlay displayed when user drags files over the chat screen. + * Shows drop zone indicator to guide users where to release files. + * Integrated with ChatScreen's drag-drop file upload handling. + */ +export { default as ChatScreenDragOverlay } from './ChatScreen/ChatScreenDragOverlay.svelte'; + +/** + * Chat form wrapper within ChatScreen. Positions the ChatForm component at the + * bottom of the screen with proper padding and max-width constraints. Handles + * the visual container styling for the input area. + */ +export { default as ChatScreenForm } from './ChatScreen/ChatScreenForm.svelte'; + +/** + * Processing info display during generation. Shows real-time statistics: + * tokens per second, prompt/completion token counts, and elapsed time. + * Data sourced from slotsService polling during active generation. + * Only visible when `isCurrentConversationLoading` is true. + */ +export { default as ChatScreenProcessingInfo } from './ChatScreen/ChatScreenProcessingInfo.svelte'; + +/** + * Scroll-to-bottom action button. Displays a floating button when the user + * has scrolled up more than half a viewport height from the bottom. + * Takes the chat container element as a prop to manage scroll state internally. + */ +export { default as ChatScreenActionScrollDown } from './ChatScreen/ChatScreenActionScrollDown.svelte'; + +/** + * Server error alert displayed when the server is unreachable. + * Shows the error message with a retry button. + * Rendered inside ChatScreen when `serverError` store has a value. + */ +export { default as ChatScreenServerError } from './ChatScreen/ChatScreenServerError.svelte'; diff --git a/examples/server/webui_llamacpp/src/lib/components/app/content/CollapsibleContentBlock.svelte b/examples/server/webui_llamacpp/src/lib/components/app/content/CollapsibleContentBlock.svelte new file mode 100644 index 00000000..b7297ab6 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/content/CollapsibleContentBlock.svelte @@ -0,0 +1,98 @@ + + + { + open = value; + onToggle?.(); + }} + class={className} +> + + +
+ {#if IconComponent} + + {/if} + + {title} + + {#if subtitle} + {subtitle} + {/if} +
+ +
+ + + Toggle content +
+
+ + +
+ {@render children()} +
+
+
+
diff --git a/examples/server/webui_llamacpp/src/lib/components/app/content/MarkdownContent/MarkdownContent.svelte b/examples/server/webui_llamacpp/src/lib/components/app/content/MarkdownContent/MarkdownContent.svelte new file mode 100644 index 00000000..9c4c49c0 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/content/MarkdownContent/MarkdownContent.svelte @@ -0,0 +1,757 @@ + + + + +
+ {#each renderedBlocks as block (block.id)} +
+ {@html block.html} +
+ {/each} + + {#if unstableBlockHtml} +
+ + {@html unstableBlockHtml} +
+ {/if} + + {#if incompleteCodeBlock} + {#if incompleteCodeBlock.language === 'mermaid'} +
+
+ mermaid +
+ +
+
+
+ Generating diagram... +
+
+ {:else} +
+
+ {incompleteCodeBlock.language || 'text'} + { + previewCode = code; + previewLanguage = lang; + previewDialogOpen = true; + }} + /> +
+ +
streamingAutoScroll.handleScroll()} + > +
{@html highlightCode(
+								incompleteCodeBlock.code,
+								incompleteCodeBlock.language || 'text'
+							)}
+
+
+ {/if} + {/if} +
+ + + + + + diff --git a/examples/server/webui_llamacpp/src/lib/components/app/content/MarkdownContent/markdown-content.css b/examples/server/webui_llamacpp/src/lib/components/app/content/MarkdownContent/markdown-content.css new file mode 100644 index 00000000..07904f76 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/content/MarkdownContent/markdown-content.css @@ -0,0 +1,685 @@ +.markdown-block--unstable { + display: contents; +} + +/* Streaming code block uses .code-block-wrapper styles */ +.streaming-code-block .streaming-code-pre { + background: transparent; + padding: 0.5rem; + margin: 0; + overflow-x: visible; + border-radius: 0; + border: none; + font-size: 0.875rem; +} + +/* Base typography styles */ +.markdown-content :global(p) { + margin-block: 1rem; + line-height: 1.75; +} + +.markdown-content :global(:is(h1, h2, h3, h4, h5, h6):first-child) { + margin-top: 0; +} + +/* Headers with consistent spacing */ +.markdown-content :global(h1) { + font-size: 1.875rem; + font-weight: 700; + line-height: 1.2; + margin: 1.5rem 0 0.75rem 0; +} + +.markdown-content :global(h2) { + font-size: 1.5rem; + font-weight: 600; + line-height: 1.3; + margin: 1.25rem 0 0.5rem 0; +} + +.markdown-content :global(h3) { + font-size: 1.25rem; + font-weight: 600; + margin: 1.5rem 0 0.5rem 0; + line-height: 1.4; +} + +.markdown-content :global(h4) { + font-size: 1.125rem; + font-weight: 600; + margin: 0.75rem 0 0.25rem 0; +} + +.markdown-content :global(h5) { + font-size: 1rem; + font-weight: 600; + margin: 0.5rem 0 0.25rem 0; +} + +.markdown-content :global(h6) { + font-size: 0.875rem; + font-weight: 600; + margin: 0.5rem 0 0.25rem 0; +} + +/* Text formatting */ +.markdown-content :global(strong) { + font-weight: 600; +} + +.markdown-content :global(em) { + font-style: italic; +} + +.markdown-content :global(del) { + text-decoration: line-through; + opacity: 0.7; +} + +/* Inline code */ +.markdown-content :global(code:not(pre code)) { + background: var(--muted); + color: var(--muted-foreground); + padding: 0.125rem 0.375rem; + border-radius: 0.375rem; + font-size: 0.875rem; +} + +.markdown-content :global(pre) { + display: inline; + margin: 0 !important; + overflow: hidden !important; + background: var(--muted); + overflow-x: auto; + border-radius: 1rem; + border: none; + line-height: 1 !important; +} + +.markdown-content :global(pre code) { + padding: 0 !important; + display: inline !important; +} + +.markdown-content :global(code) { + background: transparent; + color: var(--code-foreground); +} + +/* Links */ +.markdown-content :global(a) { + color: var(--primary); + text-decoration: underline; + text-underline-offset: 2px; + transition: color 0.2s ease; + overflow-wrap: anywhere; + word-break: break-all; +} + +.markdown-content :global(a:hover) { + color: var(--primary); +} + +/* Lists */ +.markdown-content :global(ul) { + list-style-type: disc; + margin-inline-start: 1.5rem; + margin-bottom: 1rem; +} + +.markdown-content :global(ol) { + list-style-type: decimal; + margin-inline-start: 1.5rem; + margin-bottom: 1rem; +} + +.markdown-content :global(li) { + margin-bottom: 0.25rem; + padding-inline-start: 0.5rem; +} + +.markdown-content :global(li::marker) { + color: var(--muted-foreground); +} + +/* Nested lists */ +.markdown-content :global(ul ul) { + list-style-type: circle; + margin-top: 0.25rem; + margin-bottom: 0.25rem; +} + +.markdown-content :global(ol ol) { + list-style-type: lower-alpha; + margin-top: 0.25rem; + margin-bottom: 0.25rem; +} + +/* Task lists */ +.markdown-content :global(.task-list-item) { + list-style: none; + margin-inline-start: 0; + padding-inline-start: 0; +} + +.markdown-content :global(.task-list-item-checkbox) { + margin-right: 0.5rem; + margin-top: 0.125rem; +} + +/* Blockquotes */ +.markdown-content :global(blockquote) { + border-left: 4px solid var(--border); + padding: 0.5rem 1rem; + margin: 1.5rem 0; + font-style: italic; + color: var(--muted-foreground); + background: var(--muted); + border-radius: 0 0.375rem 0.375rem 0; +} + +/* Tables */ +.markdown-content :global(table) { + width: 100%; + margin: 1.5rem 0; + border-collapse: collapse; + border: 1px solid var(--border); + border-radius: 0.375rem; + overflow: hidden; +} + +.markdown-content :global(th) { + background: hsl(var(--muted) / 0.3); + border: 1px solid var(--border); + padding: 0.5rem 0.75rem; + text-align: left; + font-weight: 600; +} + +.markdown-content :global(td) { + border: 1px solid var(--border); + padding: 0.5rem 0.75rem; +} + +.markdown-content :global(tr:nth-child(even)) { + background: hsl(var(--muted) / 0.1); +} + +/* User message markdown should keep table borders visible on light primary backgrounds */ +div.markdown-user-content :global(table), +div.markdown-user-content :global(th), +div.markdown-user-content :global(td), +div.markdown-user-content :global(.table-wrapper) { + border-color: currentColor; +} + +/* Horizontal rules */ +.markdown-content :global(hr) { + border: none; + border-top: 1px solid var(--border); + margin: 1.5rem 0; +} + +/* Images */ +.markdown-content :global(img) { + border-radius: 0.5rem; + box-shadow: + 0 1px 3px 0 rgb(0 0 0 / 0.1), + 0 1px 2px -1px rgb(0 0 0 / 0.1); + margin: 1.5rem 0; + max-width: 100%; + height: auto; +} + +/* Code blocks */ + +.markdown-content :global(.code-block-wrapper) { + margin: 1.5rem 0; + border-radius: 0.75rem; + overflow: hidden; + border: 1px solid color-mix(in oklch, var(--border) 30%, transparent); + background: var(--code-background); + box-shadow: 0 1px 2px 0 rgb(0 0 0 / 0.05); + min-height: var(--min-message-height); + max-height: var(--max-message-height); +} + +.markdown-content:global(.dark) :global(.code-block-wrapper) { + border-color: color-mix(in oklch, var(--border) 20%, transparent); +} + +/* Scroll container for code blocks (both streaming and completed) */ +.markdown-content :global(.code-block-scroll-container), +.streaming-code-scroll-container { + min-height: var(--min-message-height); + max-height: var(--max-message-height); + overflow-y: auto; + overflow-x: auto; + padding: 3rem 1rem 1rem; + line-height: 1.3; +} + +.full-height-code-blocks :global(.code-block-wrapper) { + max-height: none; +} + +.full-height-code-blocks :global(.code-block-scroll-container), +.full-height-code-blocks .streaming-code-scroll-container { + max-height: none; + overflow-y: visible; +} + +.markdown-content :global(.code-block-header) { + display: flex; + justify-content: space-between; + align-items: center; + padding: 0.5rem 1rem 0; + font-size: 0.875rem; + position: absolute; + top: 0; + left: 0; + right: 0; +} + +.markdown-content :global(.code-language) { + color: var(--color-foreground); + font-weight: 500; + font-family: + ui-monospace, SFMono-Regular, 'SF Mono', Monaco, 'Cascadia Code', 'Roboto Mono', Consolas, + 'Liberation Mono', Menlo, monospace; + text-transform: uppercase; + font-size: 0.75rem; + letter-spacing: 0.05em; +} + +.markdown-content :global(.code-block-actions) { + display: flex; + align-items: center; + gap: 0.5rem; +} + +.markdown-content :global(.copy-code-btn), +.markdown-content :global(.preview-code-btn) { + display: flex; + align-items: center; + justify-content: center; + padding: 0; + background: transparent; + color: var(--code-foreground); + cursor: pointer; + transition: all 0.2s ease; +} + +.markdown-content :global(.copy-code-btn:hover), +.markdown-content :global(.preview-code-btn:hover) { + transform: scale(1.05); +} + +.markdown-content :global(.copy-code-btn:active), +.markdown-content :global(.preview-code-btn:active) { + transform: scale(0.95); +} + +.markdown-content :global(.code-block-wrapper pre) { + background: transparent; + margin: 0; + border-radius: 0; + border: none; + font-size: 0.875rem; +} + +/* Mentions and hashtags */ +.markdown-content :global(.mention) { + color: hsl(var(--primary)); + font-weight: 500; + text-decoration: none; +} + +.markdown-content :global(.mention:hover) { + text-decoration: underline; +} + +.markdown-content :global(.hashtag) { + color: hsl(var(--primary)); + font-weight: 500; + text-decoration: none; +} + +.markdown-content :global(.hashtag:hover) { + text-decoration: underline; +} + +/* Advanced table enhancements */ +.markdown-content :global(table) { + transition: all 0.2s ease; +} + +.markdown-content :global(table:hover) { + box-shadow: + 0 4px 6px -1px rgb(0 0 0 / 0.1), + 0 2px 4px -2px rgb(0 0 0 / 0.1); +} + +.markdown-content :global(th:hover), +.markdown-content :global(td:hover) { + background: var(--muted); +} + +/* Disable hover effects when rendering user messages */ +.markdown-user-content :global(a), +.markdown-user-content :global(a:hover) { + color: inherit; +} + +.markdown-user-content :global(table:hover) { + box-shadow: none; +} + +.markdown-user-content :global(th:hover), +.markdown-user-content :global(td:hover) { + background: inherit; +} + +/* Enhanced blockquotes */ +.markdown-content :global(blockquote) { + transition: all 0.2s ease; + position: relative; +} + +.markdown-content :global(blockquote:hover) { + border-left-width: 6px; + background: var(--muted); + transform: translateX(2px); +} + +.markdown-content :global(blockquote::before) { + content: '"'; + position: absolute; + top: -0.5rem; + left: 0.5rem; + font-size: 3rem; + color: var(--muted-foreground); + font-family: serif; + line-height: 1; +} + +/* Enhanced images */ +.markdown-content :global(img) { + transition: all 0.3s ease; + cursor: pointer; +} + +.markdown-content :global(img:hover) { + transform: scale(1.02); + box-shadow: + 0 10px 15px -3px rgb(0 0 0 / 0.1), + 0 4px 6px -4px rgb(0 0 0 / 0.1); +} + +/* Image zoom overlay */ +.markdown-content :global(.image-zoom-overlay) { + position: fixed; + top: 0; + left: 0; + right: 0; + bottom: 0; + background: rgba(0, 0, 0, 0.8); + display: flex; + align-items: center; + justify-content: center; + z-index: 1000; + cursor: pointer; +} + +.markdown-content :global(.image-zoom-overlay img) { + max-width: 90vw; + max-height: 90vh; + border-radius: 0.5rem; + box-shadow: 0 25px 50px -12px rgb(0 0 0 / 0.25); +} + +/* Enhanced horizontal rules */ +.markdown-content :global(hr) { + border: none; + height: 2px; + background: linear-gradient(to right, transparent, var(--border), transparent); + margin: 2rem 0; + position: relative; +} + +.markdown-content :global(hr::after) { + content: ''; + position: absolute; + top: 50%; + left: 50%; + transform: translate(-50%, -50%); + width: 1rem; + height: 1rem; + background: var(--border); + border-radius: 50%; +} + +/* Scrollable tables */ +.markdown-content :global(.table-wrapper) { + overflow-x: auto; + margin: 1.5rem 0; + border-radius: 0.5rem; + border: 1px solid var(--border); +} + +.markdown-content :global(.table-wrapper table) { + margin: 0; + border: none; +} + +/* Responsive adjustments */ +@media (max-width: 640px) { + .markdown-content :global(h1) { + font-size: 1.5rem; + } + + .markdown-content :global(h2) { + font-size: 1.25rem; + } + + .markdown-content :global(h3) { + font-size: 1.125rem; + } + + .markdown-content :global(table) { + font-size: 0.875rem; + } + + .markdown-content :global(th), + .markdown-content :global(td) { + padding: 0.375rem 0.5rem; + } + + .markdown-content :global(.table-wrapper) { + margin: 0.5rem -1rem; + border-radius: 0; + border-left: none; + border-right: none; + } +} + +/* Dark mode adjustments */ +@media (prefers-color-scheme: dark) { + .markdown-content :global(blockquote:hover) { + background: var(--muted); + } +} + +/* Image load error fallback */ +.markdown-content :global(.image-load-error) { + display: flex; + align-items: center; + justify-content: center; + margin: 1.5rem 0; + padding: 1.5rem; + border-radius: 0.5rem; + background: var(--muted); + border: 1px dashed var(--border); +} + +.markdown-content :global(.image-error-content) { + display: flex; + flex-direction: column; + align-items: center; + gap: 0.75rem; + color: var(--muted-foreground); + text-align: center; +} + +.markdown-content :global(.image-error-content svg) { + opacity: 0.5; +} + +.markdown-content :global(.image-error-text) { + font-size: 0.875rem; +} + +.markdown-content :global(.image-error-link) { + display: inline-flex; + align-items: center; + gap: 0.375rem; + padding: 0.5rem 1rem; + font-size: 0.875rem; + font-weight: 500; + color: var(--primary); + background: var(--background); + border: 1px solid var(--border); + border-radius: 0.375rem; + text-decoration: none; + transition: all 0.2s ease; +} + +.markdown-content :global(.image-error-link:hover) { + background: var(--muted); + border-color: var(--primary); +} + +/* Mermaid diagrams */ +.markdown-content :global(pre.mermaid) { + background: transparent; + border: none; + padding: 0; + text-align: center; + font-family: inherit; + cursor: pointer; + transition: opacity 0.15s ease; + position: relative; +} + +/* Hide mermaid code text until rendered - prevents flash */ +.markdown-content :global(pre.mermaid:not([data-mermaid-rendered])), +.markdown-content :global(pre.mermaid[data-mermaid-rendered]:not(:has(svg))) { + display: none; +} + +.markdown-content :global(pre.mermaid:hover) { + opacity: 0.85; +} + +.markdown-content :global(pre.mermaid svg) { + max-width: 90%; + margin: 0 auto; + height: auto; + display: block; + padding: 3rem 1rem; +} + +/* Mermaid block wrapper - matches code block styling */ +.markdown-content :global(.mermaid-block-wrapper) { + margin: 1.5rem 0; + border-radius: 0.75rem; + overflow: hidden; + border: 1px solid color-mix(in oklch, var(--border) 30%, transparent); + background: var(--code-background); + box-shadow: 0 1px 2px 0 rgb(0 0 0 / 0.05); + position: relative; + min-height: var(--min-message-height); + max-height: var(--max-message-height); +} + +.markdown-content:global(.dark) :global(.mermaid-block-wrapper) { + border-color: color-mix(in oklch, var(--border) 20%, transparent); +} + +.markdown-content :global(.mermaid-scroll-container) { + min-height: 350px; + max-height: var(--max-message-height); + overflow-y: auto; + overflow-x: auto; + display: flex; + align-items: center; + justify-content: center; + padding: 3rem 1rem 1rem; +} + +.full-height-code-blocks :global(.mermaid-block-wrapper) { + max-height: none; +} + +.full-height-code-blocks :global(.mermaid-scroll-container) { + max-height: none; + overflow-y: visible; +} + +/* Mermaid block uses same header styling as code blocks */ +.markdown-content :global(.mermaid-block-wrapper .code-block-header) { + display: flex; + justify-content: space-between; + align-items: center; + padding: 0.5rem 1rem 0; + font-size: 0.875rem; + position: absolute; + top: 0; + left: 0; + right: 0; +} + +.markdown-content :global(.mermaid-block-wrapper .code-block-actions) { + display: flex; + align-items: center; + gap: 0.5rem; +} + +/* Mermaid pre element - remove default margins */ +.markdown-content :global(.mermaid-block-wrapper pre.mermaid) { + background: transparent; + border: none; + padding: 0; + margin: 0; + text-align: center; +} + +/* Mermaid SVG should be bigger */ +.markdown-content :global(.mermaid-block-wrapper pre.mermaid svg) { + width: unset !important; + height: auto; + display: block; + padding: 3rem 1rem; +} + +/* Streaming mermaid block - empty preview box */ +.mermaid-streaming-block { + min-height: 300px; + display: flex; + align-items: center; + justify-content: center; +} + +.mermaid-loading-placeholder { + display: flex; + align-items: center; + justify-content: center; + padding: 3rem; + color: var(--muted-foreground); +} + +.mermaid-loading-text { + font-size: 0.875rem; + font-style: italic; +} diff --git a/examples/server/webui_llamacpp/src/lib/components/app/content/MarkdownContent/markdown-handlers.ts b/examples/server/webui_llamacpp/src/lib/components/app/content/MarkdownContent/markdown-handlers.ts new file mode 100644 index 00000000..55440848 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/content/MarkdownContent/markdown-handlers.ts @@ -0,0 +1,264 @@ +/** + * Event handler factories for markdown content components. + * Uses dependency injection pattern to avoid direct component state access. + */ + +import { copyCodeToClipboard, copyToClipboard } from '$lib/utils'; + +export interface PreviewState { + previewDialogOpen: boolean; + previewCode: string; + previewLanguage: string; + setPreviewDialogOpen: (open: boolean) => void; + setPreviewCode: (code: string) => void; + setPreviewLanguage: (lang: string) => void; +} + +export interface MermaidPreviewState { + mermaidPreviewOpen: boolean; + mermaidPreviewSvgHtml: string; + setMermaidPreviewOpen: (open: boolean) => void; + setMermaidPreviewSvgHtml: (html: string) => void; +} + +export interface RenderedBlocksState { + renderedBlocks: Array<{ id: string; html: string; contentHash?: string }>; + setRenderedBlocks: (blocks: Array<{ id: string; html: string; contentHash?: string }>) => void; +} + +/** + * Creates a click handler for copy buttons in code blocks. + * Copies the code content to clipboard. + */ +export function createHandleCopyClick() { + return async function handleCopyClick(event: Event) { + event.preventDefault(); + event.stopPropagation(); + + const target = event.currentTarget as HTMLButtonElement | null; + if (!target) return; + + const wrapper = target.closest('.code-block-wrapper'); + if (!wrapper) return; + + const codeElement = wrapper.querySelector('code[data-code-id]'); + if (!codeElement) return; + + const rawCode = codeElement.textContent ?? ''; + + try { + await copyCodeToClipboard(rawCode); + } catch (error) { + console.error('Failed to copy code:', error); + } + }; +} + +/** + * Creates a handler for preview dialog open state changes. + * Clears preview content when dialog is closed. + */ +export function createHandlePreviewDialogOpenChange(previewState: PreviewState) { + return function handlePreviewDialogOpenChange(open: boolean) { + previewState.setPreviewDialogOpen(open); + + if (!open) { + previewState.setPreviewCode(''); + previewState.setPreviewLanguage('text'); + } + }; +} + +/** + * Creates a click handler for preview buttons within HTML code blocks. + * Opens a preview dialog with the rendered HTML content. + */ +export function createHandlePreviewClick(previewState: PreviewState) { + return async function handlePreviewClick(event: Event) { + event.preventDefault(); + event.stopPropagation(); + + const target = event.currentTarget as HTMLButtonElement | null; + if (!target) return; + + const wrapper = target.closest('.code-block-wrapper'); + if (!wrapper) return; + + const codeElement = wrapper.querySelector('code[data-code-id]'); + if (!codeElement) return; + + const rawCode = codeElement.textContent ?? ''; + const languageLabel = wrapper.querySelector('.code-language'); + const language = languageLabel?.textContent?.trim() || 'text'; + + previewState.setPreviewCode(rawCode); + previewState.setPreviewLanguage(language); + previewState.setPreviewDialogOpen(true); + }; +} + +/** + * Creates a click handler for mermaid block interactions. + * Handles copy, preview, and diagram click events via event delegation. + */ +export function createHandleMermaidClick(mermaidState: MermaidPreviewState) { + return async function handleMermaidClick(event: MouseEvent) { + const target = event.target as HTMLElement; + + // Check if clicking on copy or preview button in mermaid block + const copyBtn = target.closest('.mermaid-block-wrapper .copy-code-btn'); + const previewBtn = target.closest('.mermaid-block-wrapper .preview-code-btn'); + + if (copyBtn || previewBtn) { + const wrapper = target.closest('.mermaid-block-wrapper'); + if (!wrapper) return; + + const preElement = wrapper.querySelector('pre.mermaid[data-mermaid-syntax]'); + if (!preElement) return; + + const mermaidSyntax = preElement.dataset.mermaidSyntax ?? ''; + + if (copyBtn) { + event.preventDefault(); + event.stopPropagation(); + try { + await copyToClipboard(mermaidSyntax); + } catch (error) { + console.error('Failed to copy mermaid syntax:', error); + } + return; + } + + if (previewBtn) { + event.preventDefault(); + event.stopPropagation(); + const svg = preElement.querySelector('svg'); + if (!svg) return; + mermaidState.setMermaidPreviewSvgHtml(svg.outerHTML); + mermaidState.setMermaidPreviewOpen(true); + return; + } + } + + // Otherwise, open preview when clicking on the mermaid diagram itself + const mermaidEl = target.closest('.mermaid'); + if (!mermaidEl) return; + + const svg = mermaidEl.querySelector('svg'); + if (!svg) return; + + mermaidState.setMermaidPreviewSvgHtml(svg.outerHTML); + mermaidState.setMermaidPreviewOpen(true); + }; +} + +/** + * Creates a handler for mermaid preview dialog open state changes. + * Cleans up SVG content when dialog is closed. + */ +export function createHandleMermaidPreviewOpenChange(mermaidState: MermaidPreviewState) { + return function handleMermaidPreviewOpenChange(open: boolean) { + mermaidState.setMermaidPreviewOpen(open); + if (!open) { + mermaidState.setMermaidPreviewSvgHtml(''); + } + }; +} + +/** + * Creates an error handler for images that fail to load (e.g., CORS issues). + * Shows fallback UI for broken images. + */ +export function createHandleImageError( + renderedBlocksState: RenderedBlocksState, + IMAGE_NOT_ERROR_BOUND_SELECTOR: string, + DATA_ERROR_BOUND_ATTR: string, + BOOL_TRUE_STRING: string +) { + return async function handleImageError(event: Event) { + const img = event.target as HTMLImageElement; + if (!img) return; + + const blockId = img.closest('[data-block-id]')?.getAttribute('data-block-id'); + if (!blockId) return; + + const block = renderedBlocksState.renderedBlocks.find((b) => b.id === blockId); + if (!block) return; + + // Skip if already handled + if (img.dataset[DATA_ERROR_BOUND_ATTR] === BOOL_TRUE_STRING) return; + img.dataset[DATA_ERROR_BOUND_ATTR] = BOOL_TRUE_STRING; + + // Get the fallback HTML and replace the image + const fallbackHtml = `
+ ⚠️ + Failed to load image +
`; + + // Replace the img element with fallback in the block's HTML + const newHtml = block.html.replace(/img[^>]*src=["']([^"']*)[^>]*>/g, (match, src) => { + if (src === img.src) { + return fallbackHtml.replace('data-original-src=""', `data-original-src="${src}"`); + } + return match; + }); + + // Update the block + const newBlocks = renderedBlocksState.renderedBlocks.map((b) => + b.id === blockId ? { ...b, html: newHtml } : b + ); + renderedBlocksState.setRenderedBlocks(newBlocks); + }; +} + +/** + * Creates a function to set up code block action event listeners. + * Binds click handlers to copy and preview buttons within code blocks. + */ +export function createSetupCodeBlockActions( + handleCopyClick: (event: Event) => void, + handlePreviewClick: (event: Event) => void +) { + return function setupCodeBlockActions(containerRef: HTMLElement | null) { + if (!containerRef) return; + + const wrappers = containerRef.querySelectorAll('.code-block-wrapper'); + + for (const wrapper of wrappers) { + const copyButton = wrapper.querySelector('.copy-code-btn'); + const previewButton = wrapper.querySelector('.preview-code-btn'); + + if (copyButton && copyButton.dataset.listenerBound !== 'true') { + copyButton.dataset.listenerBound = 'true'; + copyButton.addEventListener('click', handleCopyClick); + } + + if (previewButton && previewButton.dataset.listenerBound !== 'true') { + previewButton.dataset.listenerBound = 'true'; + previewButton.addEventListener('click', handlePreviewClick); + } + } + }; +} + +/** + * Creates a function to set up image error handlers. + * Attaches error handlers to images to show fallback UI when loading fails. + */ +export function createSetupImageErrorHandlers( + handleImageError: (event: Event) => void, + IMAGE_NOT_ERROR_BOUND_SELECTOR: string, + DATA_ERROR_BOUND_ATTR: string, + BOOL_TRUE_STRING: string +) { + return function setupImageErrorHandlers(containerRef: HTMLElement | null) { + if (!containerRef) return; + + const images = containerRef.querySelectorAll(IMAGE_NOT_ERROR_BOUND_SELECTOR); + + for (const img of images) { + img.dataset[DATA_ERROR_BOUND_ATTR] = BOOL_TRUE_STRING; + img.addEventListener('error', handleImageError); + } + }; +} diff --git a/examples/server/webui_llamacpp/src/lib/components/app/content/MarkdownContent/markdown-utils.ts b/examples/server/webui_llamacpp/src/lib/components/app/content/MarkdownContent/markdown-utils.ts new file mode 100644 index 00000000..dfb56d53 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/content/MarkdownContent/markdown-utils.ts @@ -0,0 +1,84 @@ +/** + * Utility functions for markdown processing in MarkdownContent component. + */ + +import type { RootContent as HastRootContent } from 'hast'; + +/** + * Generates a unique identifier for a HAST node based on its position. + * Used for stable block identification during incremental rendering. + * @param node - The HAST root content node + * @param indexFallback - Fallback index if position is unavailable + * @returns Unique string identifier for the node + */ +export function getHastNodeId(node: HastRootContent, indexFallback: number): string { + const position = node.position; + + if (position?.start?.offset != null && position?.end?.offset != null) { + return `hast-${position.start.offset}-${position.end.offset}`; + } + + return `${node.type}-${indexFallback}`; +} + +/** + * Generates a hash for MDAST node based on its position. + * Used for cache lookup during incremental rendering. + */ +export function getMdastNodeHash(node: unknown, index: number): string { + const n = node as { + type?: string; + position?: { start?: { offset?: number }; end?: { offset?: number } }; + }; + + if (n.position?.start?.offset != null && n.position?.end?.offset != null) { + return `${n.type}-${n.position.start.offset}-${n.position.end.offset}`; + } + + return `${n.type}-idx${index}`; +} + +/** + * Determines if the new content is an append (new content added to existing blocks). + * This is used to optimize cache reuse during streaming updates. + * + * @param newContent - The new markdown content + * @param previousContent - The previous markdown content to check against + * @returns true if the content appears to be an append operation + */ +export function isAppendMode(newContent: string, previousContent: string): boolean { + return previousContent.length > 0 && newContent.startsWith(previousContent); +} + +export interface CodeInfo { + rawCode: string; + language: string; +} + +/** + * Extracts code information from a button click target within a code block. + * @param target - The clicked button element + * @returns Object with rawCode and language, or null if extraction fails + */ +export function getCodeInfoFromTarget(target: HTMLElement): CodeInfo | null { + const wrapper = target.closest('.code-block-wrapper'); + + if (!wrapper) { + console.error('No wrapper found'); + return null; + } + + const codeElement = wrapper.querySelector('code[data-code-id]'); + + if (!codeElement) { + console.error('No code element found in wrapper'); + return null; + } + + const rawCode = codeElement.textContent ?? ''; + + const languageLabel = wrapper.querySelector('.code-language'); + const language = languageLabel?.textContent?.trim() || 'text'; + + return { rawCode, language }; +} diff --git a/examples/server/webui_llamacpp/src/lib/components/app/content/MarkdownContent/plugins/rehype/code-block-utils.ts b/examples/server/webui_llamacpp/src/lib/components/app/content/MarkdownContent/plugins/rehype/code-block-utils.ts new file mode 100644 index 00000000..73231546 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/content/MarkdownContent/plugins/rehype/code-block-utils.ts @@ -0,0 +1,151 @@ +/** + * Shared utilities for enhanced code blocks and mermaid diagram blocks. + * Contains common HAST element creation functions to avoid code duplication. + */ + +import type { Element, ElementContent } from 'hast'; +import { + CODE_BLOCK_HEADER_CLASS, + CODE_BLOCK_ACTIONS_CLASS, + CODE_LANGUAGE_CLASS, + COPY_CODE_BTN_CLASS, + PREVIEW_CODE_BTN_CLASS, + RELATIVE_CLASS, + COPY_ICON_SVG, + PREVIEW_ICON_SVG +} from '$lib/constants'; + +export interface BlockIdGenerator { + (id: number): string; +} + +/** + * Creates an icon element with the given SVG content. + */ +export function createIconElement(svg: string): Element { + return { + type: 'element', + tagName: 'span', + properties: {}, + children: [{ type: 'raw', value: svg } as unknown as ElementContent] + }; +} + +/** + * Creates a button element with icon. + */ +export function createButton( + className: string, + title: string, + iconSvg: string, + id: string, + idAttribute: string +): Element { + return { + type: 'element', + tagName: 'button', + properties: { + className: [className], + [idAttribute]: id, + title, + type: 'button' + }, + children: [createIconElement(iconSvg)] + }; +} + +/** + * Creates a copy button element. + */ +export function createCopyButton(id: string, idAttribute: string, title: string = 'Copy'): Element { + return createButton(COPY_CODE_BTN_CLASS, title, COPY_ICON_SVG, id, idAttribute); +} + +/** + * Creates a preview button element. + */ +export function createPreviewButton( + id: string, + idAttribute: string, + title: string = 'Preview' +): Element { + return createButton(PREVIEW_CODE_BTN_CLASS, title, PREVIEW_ICON_SVG, id, idAttribute); +} + +/** + * Creates a block header with language label and action buttons. + */ +export function createBlockHeader( + language: string, + id: string, + idAttribute: string, + actions: Element[], + languageClassName: string = CODE_LANGUAGE_CLASS +): Element { + return { + type: 'element', + tagName: 'div', + properties: { className: [CODE_BLOCK_HEADER_CLASS] }, + children: [ + { + type: 'element', + tagName: 'span', + properties: { className: [languageClassName] }, + children: [{ type: 'text', value: language }] + }, + { + type: 'element', + tagName: 'div', + properties: { className: [CODE_BLOCK_ACTIONS_CLASS] }, + children: actions + } + ] + }; +} + +/** + * Creates a scroll container element. + */ +export function createScrollContainer(preElement: Element, scrollContainerClass: string): Element { + return { + type: 'element', + tagName: 'div', + properties: { className: [scrollContainerClass] }, + children: [preElement] + }; +} + +/** + * Creates a wrapper element with header and scroll container. + */ +export function createWrapper( + header: Element, + preElement: Element, + wrapperClass: string, + scrollContainerClass: string, + additionalAttributes?: Record +): Element { + return { + type: 'element', + tagName: 'div', + properties: { + className: [wrapperClass, RELATIVE_CLASS], + ...additionalAttributes + } as Element['properties'], + children: [header, createScrollContainer(preElement, scrollContainerClass)] + }; +} + +/** + * Generates a unique block ID using a global counter. + */ +export function generateBlockId(prefix: string, windowKey: keyof Window): string { + if (typeof window !== 'undefined') { + const idx = window[windowKey] as number | undefined; + const next = (idx ?? 0) + 1; + (window as unknown as Record)[windowKey] = next; + return `${prefix}-${next}`; + } + // Fallback for SSR - use timestamp + random + return `${prefix}-${Date.now()}-${Math.random().toString(36).slice(2, 7)}`; +} diff --git a/examples/server/webui_llamacpp/src/lib/components/app/content/MarkdownContent/plugins/rehype/enhance-code-blocks.ts b/examples/server/webui_llamacpp/src/lib/components/app/content/MarkdownContent/plugins/rehype/enhance-code-blocks.ts new file mode 100644 index 00000000..b72e806b --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/content/MarkdownContent/plugins/rehype/enhance-code-blocks.ts @@ -0,0 +1,88 @@ +/** + * Rehype plugin to enhance code blocks with wrapper, header, and action buttons. + * + * Wraps
 elements with a container that includes:
+ * - Language label
+ * - Copy button
+ * - Preview button (for HTML code blocks)
+ *
+ * This operates directly on the HAST tree for better performance,
+ * avoiding the need to stringify and re-parse HTML.
+ */
+
+import type { Plugin } from 'unified';
+import type { Root, Element, ElementContent } from 'hast';
+import { visit } from 'unist-util-visit';
+import { CODE_BLOCK_SCROLL_CONTAINER_CLASS, CODE_BLOCK_WRAPPER_CLASS } from '$lib/constants';
+import {
+	createBlockHeader,
+	createCopyButton,
+	createPreviewButton,
+	createWrapper,
+	generateBlockId
+} from './code-block-utils';
+
+declare global {
+	interface Window {
+		idxCodeBlock?: number;
+	}
+}
+
+function extractLanguage(codeElement: Element): string {
+	const className = codeElement.properties?.className;
+	if (!Array.isArray(className)) return 'text';
+
+	for (const cls of className) {
+		if (typeof cls === 'string' && cls.startsWith('language-')) {
+			return cls.replace('language-', '');
+		}
+	}
+
+	return 'text';
+}
+
+/**
+ * Rehype plugin to enhance code blocks with wrapper, header, and action buttons.
+ * This plugin wraps 
 elements with a container that includes:
+ * - Language label
+ * - Copy button
+ * - Preview button (for HTML code blocks)
+ */
+export const rehypeEnhanceCodeBlocks: Plugin<[], Root> = () => {
+	return (tree: Root) => {
+		visit(tree, 'element', (node: Element, index, parent) => {
+			if (node.tagName !== 'pre' || !parent || index === undefined) return;
+
+			const codeElement = node.children.find(
+				(child): child is Element => child.type === 'element' && child.tagName === 'code'
+			);
+
+			if (!codeElement) return;
+
+			const language = extractLanguage(codeElement);
+			const codeId = generateBlockId('code', 'idxCodeBlock');
+
+			codeElement.properties = {
+				...codeElement.properties,
+				'data-code-id': codeId
+			};
+
+			const actions: Element[] = [createCopyButton(codeId, 'data-code-id', 'Copy code')];
+
+			if (language.toLowerCase() === 'html') {
+				actions.push(createPreviewButton(codeId, 'data-code-id', 'Preview code'));
+			}
+
+			const header = createBlockHeader(language, codeId, 'data-code-id', actions);
+			const wrapper = createWrapper(
+				header,
+				node,
+				CODE_BLOCK_WRAPPER_CLASS,
+				CODE_BLOCK_SCROLL_CONTAINER_CLASS
+			);
+
+			// Replace pre with wrapper in parent
+			(parent.children as ElementContent[])[index] = wrapper;
+		});
+	};
+};
diff --git a/examples/server/webui_llamacpp/src/lib/components/app/content/MarkdownContent/plugins/rehype/enhance-links.ts b/examples/server/webui_llamacpp/src/lib/components/app/content/MarkdownContent/plugins/rehype/enhance-links.ts
new file mode 100644
index 00000000..b5fbcbda
--- /dev/null
+++ b/examples/server/webui_llamacpp/src/lib/components/app/content/MarkdownContent/plugins/rehype/enhance-links.ts
@@ -0,0 +1,33 @@
+/**
+ * Rehype plugin to enhance links with security attributes.
+ *
+ * Adds target="_blank" and rel="noopener noreferrer" to all anchor elements,
+ * ensuring external links open in new tabs safely.
+ */
+
+import type { Plugin } from 'unified';
+import type { Root, Element } from 'hast';
+import { visit } from 'unist-util-visit';
+
+/**
+ * Rehype plugin that adds security attributes to all links.
+ * This plugin ensures external links open in new tabs safely by adding:
+ * - target="_blank"
+ * - rel="noopener noreferrer"
+ */
+export const rehypeEnhanceLinks: Plugin<[], Root> = () => {
+	return (tree: Root) => {
+		visit(tree, 'element', (node: Element) => {
+			if (node.tagName !== 'a') return;
+
+			const props = node.properties ?? {};
+
+			// Only modify if href exists
+			if (!props.href) return;
+
+			props.target = '_blank';
+			props.rel = 'noopener noreferrer';
+			node.properties = props;
+		});
+	};
+};
diff --git a/examples/server/webui_llamacpp/src/lib/components/app/content/MarkdownContent/plugins/rehype/enhance-mermaid-blocks.ts b/examples/server/webui_llamacpp/src/lib/components/app/content/MarkdownContent/plugins/rehype/enhance-mermaid-blocks.ts
new file mode 100644
index 00000000..ab24e782
--- /dev/null
+++ b/examples/server/webui_llamacpp/src/lib/components/app/content/MarkdownContent/plugins/rehype/enhance-mermaid-blocks.ts
@@ -0,0 +1,85 @@
+/**
+ * Rehype plugin to enhance mermaid diagram blocks with wrapper, header, and action buttons.
+ *
+ * Wraps 
 elements with a container that includes:
+ * - Language label ("mermaid")
+ * - Copy button (copies mermaid syntax to clipboard)
+ * - Preview button (opens fullscreen preview dialog)
+ *
+ * This operates directly on the HAST tree for better performance,
+ * avoiding the need to stringify and re-parse HTML.
+ */
+
+import type { Plugin } from 'unified';
+import type { Root, Element, ElementContent } from 'hast';
+import { visit } from 'unist-util-visit';
+import { MERMAID_WRAPPER_CLASS, MERMAID_SCROLL_CONTAINER_CLASS } from '$lib/constants';
+import {
+	createBlockHeader,
+	createCopyButton,
+	createPreviewButton,
+	createWrapper,
+	generateBlockId
+} from './code-block-utils';
+
+declare global {
+	interface Window {
+		idxMermaidBlock?: number;
+	}
+}
+
+/**
+ * Rehype plugin to enhance mermaid diagram blocks with wrapper, header, and action buttons.
+ * This plugin wraps 
 elements with a container that includes:
+ * - Language label ("mermaid")
+ * - Copy button
+ * - Preview button
+ */
+export const rehypeEnhanceMermaidBlocks: Plugin<[], Root> = () => {
+	return (tree: Root) => {
+		visit(tree, 'element', (node: Element, index, parent) => {
+			if (node.tagName !== 'pre' || !parent || index === undefined) return;
+
+			const className = node.properties?.className;
+			if (!Array.isArray(className)) return;
+
+			const isMermaid = className.some((cls) => typeof cls === 'string' && cls === 'mermaid');
+
+			if (!isMermaid) return;
+
+			const mermaidId = generateBlockId('mermaid', 'idxMermaidBlock');
+
+			// Extract the mermaid syntax (text content of the pre element)
+			const diagramText = node.children
+				.map((child) => {
+					if (child.type === 'text') return child.value;
+					return '';
+				})
+				.join('');
+
+			// Store the mermaid syntax in data attribute for copy functionality
+			node.properties = {
+				...node.properties,
+				'data-mermaid-syntax': diagramText,
+				'data-mermaid-id': mermaidId
+			};
+
+			const actions = [
+				createCopyButton(mermaidId, 'data-mermaid-id', 'Copy mermaid syntax'),
+				createPreviewButton(mermaidId, 'data-mermaid-id', 'Preview diagram')
+			];
+
+			const header = createBlockHeader('mermaid', mermaidId, 'data-mermaid-id', actions);
+			const wrapper = createWrapper(
+				header,
+				node,
+				MERMAID_WRAPPER_CLASS,
+				MERMAID_SCROLL_CONTAINER_CLASS,
+				{ 'data-mermaid-id': mermaidId }
+			);
+
+			// Replace pre with wrapper in parent
+			(parent.children as ElementContent[])[index] = wrapper;
+		});
+	};
+};
diff --git a/examples/server/webui_llamacpp/src/lib/components/app/content/MarkdownContent/plugins/rehype/mermaid-pre.ts b/examples/server/webui_llamacpp/src/lib/components/app/content/MarkdownContent/plugins/rehype/mermaid-pre.ts
new file mode 100644
index 00000000..e2270a65
--- /dev/null
+++ b/examples/server/webui_llamacpp/src/lib/components/app/content/MarkdownContent/plugins/rehype/mermaid-pre.ts
@@ -0,0 +1,67 @@
+import type { Plugin } from 'unified';
+import type { Root, Element, ElementContent, Text } from 'hast';
+import { visit } from 'unist-util-visit';
+
+/**
+ * Recursively extracts all text content from a HAST node.
+ * Handles nested elements (e.g., span wrappers from syntax highlighting).
+ */
+function extractText(node: ElementContent): string {
+	if (node.type === 'text') return node.value;
+	if (node.type === 'element') {
+		return (node.children ?? []).map(extractText).join('');
+	}
+	return '';
+}
+
+/**
+ * Rehype plugin to convert mermaid code blocks to 
 elements.
+ *
+ * Transforms:
+ *   
graph TD; A-->B
+ * into: + *
graph TD; A-->B
+ * + * The mermaid library renders these client-side via mermaid.run(). + * + * Must run BEFORE rehypeEnhanceCodeBlocks so mermaid blocks are not wrapped + * with code block headers/buttons (they have no child, so they're skipped). + */ +export const rehypeMermaidPre: Plugin<[], Root> = () => { + return (tree: Root) => { + visit(tree, 'element', (node: Element, index, parent) => { + if (node.tagName !== 'pre' || !parent || index === undefined) return; + + const codeElement = node.children.find( + (child): child is Element => child.type === 'element' && child.tagName === 'code' + ); + + if (!codeElement) return; + + const className = codeElement.properties?.className; + if (!Array.isArray(className)) return; + + const isMermaid = className.some( + (cls) => typeof cls === 'string' && cls === 'language-mermaid' + ); + + if (!isMermaid) return; + + // Recursively extract text to handle nested spans from syntax highlighting + const diagramText = codeElement.children.map(extractText).join('').trim(); + + if (!diagramText) return; + + const mermaidPre: Element = { + type: 'element', + tagName: 'pre', + properties: { + className: ['mermaid'] + }, + children: [{ type: 'text', value: diagramText } as Text] + }; + + (parent.children as ElementContent[])[index] = mermaidPre; + }); + }; +}; diff --git a/examples/server/webui_llamacpp/src/lib/components/app/content/MarkdownContent/plugins/rehype/rehype-rtl-support.ts b/examples/server/webui_llamacpp/src/lib/components/app/content/MarkdownContent/plugins/rehype/rehype-rtl-support.ts new file mode 100644 index 00000000..0a8b93ad --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/content/MarkdownContent/plugins/rehype/rehype-rtl-support.ts @@ -0,0 +1,28 @@ +/** + * Rehype plugin to provide comprehensive RTL support by adding dir="auto" + * to all text-containing elements. + * + * This operates directly on the HAST tree, ensuring that all elements + * (including those not in a predefined list) receive the attribute. + */ + +import type { Plugin } from 'unified'; +import type { Root, Element } from 'hast'; +import { visit } from 'unist-util-visit'; + +/** + * Rehype plugin to add dir="auto" to all elements that have children. + * This provides bidirectional text support for mixed RTL/LTR content. + */ +export const rehypeRtlSupport: Plugin<[], Root> = () => { + return (tree: Root) => { + visit(tree, 'element', (node: Element) => { + if (node.children && node.children.length > 0) { + node.properties = { + ...node.properties, + dir: 'auto' + }; + } + }); + }; +}; diff --git a/examples/server/webui_llamacpp/src/lib/components/app/content/MarkdownContent/plugins/rehype/resolve-attachment-images.ts b/examples/server/webui_llamacpp/src/lib/components/app/content/MarkdownContent/plugins/rehype/resolve-attachment-images.ts new file mode 100644 index 00000000..36e7a319 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/content/MarkdownContent/plugins/rehype/resolve-attachment-images.ts @@ -0,0 +1,34 @@ +import type { Root as HastRoot } from 'hast'; +import { visit } from 'unist-util-visit'; +import type { DatabaseMessageExtra, DatabaseMessageExtraImageFile } from '$lib/types/database'; +import { AttachmentType, UrlProtocol } from '$lib/enums'; + +/** + * Rehype plugin to resolve attachment image sources. + * Converts attachment names (e.g., "mcp-attachment-xxx.png") to base64 data URLs. + */ +export function rehypeResolveAttachmentImages(options: { attachments?: DatabaseMessageExtra[] }) { + return (tree: HastRoot) => { + visit(tree, 'element', (node) => { + if (node.tagName === 'img' && node.properties?.src) { + const src = String(node.properties.src); + + // Skip data URLs and external URLs + if (src.startsWith(UrlProtocol.DATA) || src.startsWith(UrlProtocol.HTTP)) { + return; + } + + // Find matching attachment + const attachment = options.attachments?.find( + (a): a is DatabaseMessageExtraImageFile => + a.type === AttachmentType.IMAGE && a.name === src + ); + + // Replace with base64 URL if found + if (attachment?.base64Url) { + node.properties.src = attachment.base64Url; + } + } + }); + }; +} diff --git a/examples/server/webui_llamacpp/src/lib/components/app/content/MarkdownContent/plugins/rehype/table-html-restorer.ts b/examples/server/webui_llamacpp/src/lib/components/app/content/MarkdownContent/plugins/rehype/table-html-restorer.ts new file mode 100644 index 00000000..bc5d0346 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/content/MarkdownContent/plugins/rehype/table-html-restorer.ts @@ -0,0 +1,181 @@ +/** + * Rehype plugin to restore limited HTML elements inside Markdown table cells. + * + * ## Problem + * The remark/rehype pipeline neutralizes inline HTML as literal text + * (remarkLiteralHtml) so that XML/HTML snippets in LLM responses display + * as-is instead of being rendered. This causes
and
    markup in + * table cells to show as plain text. + * + * ## Solution + * This plugin traverses the HAST post-conversion, parses whitelisted HTML + * patterns from text nodes, and replaces them with actual HAST element nodes + * that will be rendered as real HTML. + * + * ## Supported HTML + * - `
    ` / `
    ` / `
    ` - Line breaks (inline) + * - `
    • ...
    ` - Unordered lists (block) + * + * ## Key Implementation Details + * + * ### 1. Sibling Combination (Critical) + * The Markdown pipeline may fragment content across multiple text nodes and `
    ` + * elements. For example, `
    • a
    ` might arrive as: + * - Text: `"
      "` + * - Element: `
      ` + * - Text: `"
    • a
    "` + * + * We must combine consecutive text nodes and `
    ` elements into a single string + * before attempting to parse list markup. Without this, list detection fails. + * + * ### 2. visitParents for Deep Traversal + * Table cell content may be wrapped in intermediate elements (e.g., `

    ` tags). + * Using `visitParents` instead of direct child iteration ensures we find text + * nodes at any depth within the cell. + * + * ### 3. Reference Comparison for No-Op Detection + * When checking if `
    ` expansion changed anything, we compare: + * `expanded.length !== 1 || expanded[0] !== textNode` + * + * This catches both cases: + * - Multiple nodes created (text was split) + * - Single NEW node created (original had only `
    `, now it's an element) + * + * A simple `length > 1` check would miss the single `
    ` case. + * + * ### 4. Strict List Validation + * `parseList()` rejects malformed markup by checking for garbage text between + * `

  • ` elements. This prevents creating broken DOM from partial matches like + * `
      garbage
    • a
    `. + * + * ### 5. Newline Substitution for `
    ` in Combined String + * When combining siblings, existing `
    ` elements become `\n` in the combined + * string. This allows list content to span visual lines while still being parsed + * as a single unit. + * + * @example + * // Input Markdown: + * // | Feature | Notes | + * // |---------|-------| + * // | Multi-line | First
    Second | + * // | List |
    • A
    • B
    | + * // + * // Without this plugin:
    and
      render as literal text + * // With this plugin:
      becomes line break,
        becomes actual list + */ + +import type { Plugin } from 'unified'; +import type { Element, ElementContent, Root, Text } from 'hast'; +import { visit } from 'unist-util-visit'; +import { visitParents } from 'unist-util-visit-parents'; +import { BR_PATTERN, LIST_PATTERN, LI_PATTERN } from '$lib/constants'; + +/** + * Expands text containing `
        ` tags into an array of text nodes and br elements. + */ +function expandBrTags(value: string): ElementContent[] { + const matches = [...value.matchAll(BR_PATTERN)]; + if (!matches.length) return [{ type: 'text', value } as Text]; + + const result: ElementContent[] = []; + let cursor = 0; + + for (const m of matches) { + if (m.index! > cursor) { + result.push({ type: 'text', value: value.slice(cursor, m.index) } as Text); + } + result.push({ type: 'element', tagName: 'br', properties: {}, children: [] } as Element); + cursor = m.index! + m[0].length; + } + + if (cursor < value.length) { + result.push({ type: 'text', value: value.slice(cursor) } as Text); + } + + return result; +} + +/** + * Parses a `
        • ...
        ` string into a HAST element. + * Returns null if the markup is malformed or contains unexpected content. + */ +function parseList(value: string): Element | null { + const match = value.trim().match(LIST_PATTERN); + if (!match) return null; + + const body = match[1]; + const items: ElementContent[] = []; + let cursor = 0; + + for (const liMatch of body.matchAll(LI_PATTERN)) { + // Reject if there's non-whitespace between list items + if (body.slice(cursor, liMatch.index!).trim()) return null; + + items.push({ + type: 'element', + tagName: 'li', + properties: {}, + children: expandBrTags(liMatch[1] ?? '') + } as Element); + + cursor = liMatch.index! + liMatch[0].length; + } + + // Reject if no items found or trailing garbage exists + if (!items.length || body.slice(cursor).trim()) return null; + + return { type: 'element', tagName: 'ul', properties: {}, children: items } as Element; +} + +/** + * Processes a single table cell, restoring HTML elements from text content. + */ +function processCell(cell: Element) { + visitParents(cell, 'text', (textNode: Text, ancestors) => { + const parent = ancestors[ancestors.length - 1]; + if (!parent || parent.type !== 'element') return; + + const parentEl = parent as Element; + const siblings = parentEl.children as ElementContent[]; + const startIndex = siblings.indexOf(textNode as ElementContent); + if (startIndex === -1) return; + + // Combine consecutive text nodes and
        elements into one string + let combined = ''; + let endIndex = startIndex; + + for (let i = startIndex; i < siblings.length; i++) { + const sib = siblings[i]; + if (sib.type === 'text') { + combined += (sib as Text).value; + endIndex = i; + } else if (sib.type === 'element' && (sib as Element).tagName === 'br') { + combined += '\n'; + endIndex = i; + } else { + break; + } + } + + // Try parsing as list first (replaces entire combined range) + const list = parseList(combined); + if (list) { + siblings.splice(startIndex, endIndex - startIndex + 1, list); + return; + } + + // Otherwise, just expand
        tags in this text node + const expanded = expandBrTags(textNode.value); + if (expanded.length !== 1 || expanded[0] !== textNode) { + siblings.splice(startIndex, 1, ...expanded); + } + }); +} + +export const rehypeRestoreTableHtml: Plugin<[], Root> = () => (tree) => { + visit(tree, 'element', (node: Element) => { + if (node.tagName === 'td' || node.tagName === 'th') { + processCell(node); + } + }); +}; diff --git a/examples/server/webui_llamacpp/src/lib/markdown/literal-html.ts b/examples/server/webui_llamacpp/src/lib/components/app/content/MarkdownContent/plugins/remark/literal-html.ts similarity index 99% rename from examples/server/webui_llamacpp/src/lib/markdown/literal-html.ts rename to examples/server/webui_llamacpp/src/lib/components/app/content/MarkdownContent/plugins/remark/literal-html.ts index d4ace01a..c974d8b1 100644 --- a/examples/server/webui_llamacpp/src/lib/markdown/literal-html.ts +++ b/examples/server/webui_llamacpp/src/lib/components/app/content/MarkdownContent/plugins/remark/literal-html.ts @@ -1,7 +1,7 @@ import type { Plugin } from 'unified'; import { visit } from 'unist-util-visit'; import type { Break, Content, Paragraph, PhrasingContent, Root, Text } from 'mdast'; -import { LINE_BREAK, NBSP, PHRASE_PARENTS, TAB_AS_SPACES } from '$lib/constants/literal-html'; +import { LINE_BREAK, NBSP, PHRASE_PARENTS, TAB_AS_SPACES } from '$lib/constants'; /** * remark plugin that rewrites raw HTML nodes into plain-text equivalents. diff --git a/examples/server/webui_llamacpp/src/lib/components/app/content/MermaidPreview.svelte b/examples/server/webui_llamacpp/src/lib/components/app/content/MermaidPreview.svelte new file mode 100644 index 00000000..d4825889 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/content/MermaidPreview.svelte @@ -0,0 +1,126 @@ + + +
        + +
        + + {@html svgHtml} +
        + + +
        + + diff --git a/examples/server/webui_llamacpp/src/lib/components/app/content/MermaidPreviewControls.svelte b/examples/server/webui_llamacpp/src/lib/components/app/content/MermaidPreviewControls.svelte new file mode 100644 index 00000000..bb3185f4 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/content/MermaidPreviewControls.svelte @@ -0,0 +1,74 @@ + + +
        +
        + + {Math.round(scale * 100)}% + +
        + + +
        + + +
        +
        diff --git a/examples/server/webui_llamacpp/src/lib/components/app/content/SyntaxHighlightedCode.svelte b/examples/server/webui_llamacpp/src/lib/components/app/content/SyntaxHighlightedCode.svelte new file mode 100644 index 00000000..c4d1706b --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/content/SyntaxHighlightedCode.svelte @@ -0,0 +1,90 @@ + + +
        + +
        {@html highlightedHtml}
        +
        + + diff --git a/examples/server/webui_llamacpp/src/lib/components/app/content/index.ts b/examples/server/webui_llamacpp/src/lib/components/app/content/index.ts new file mode 100644 index 00000000..5d2884bb --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/content/index.ts @@ -0,0 +1,98 @@ +/** + * + * CONTENT RENDERING + * + * Components for rendering rich content: markdown, code, and previews. + * + */ + +/** + * **MarkdownContent** - Rich markdown renderer + * + * Renders markdown content with syntax highlighting, LaTeX math, + * tables, links, and code blocks. Optimized for streaming with + * incremental block-based rendering. + * + * **Features:** + * - GFM (GitHub Flavored Markdown): tables, task lists, strikethrough + * - LaTeX math via KaTeX (`$inline$` and `$$block$$`) + * - Syntax highlighting (highlight.js) with language detection + * - Code copy buttons with click feedback + * - External links open in new tab with security attrs + * - Image attachment resolution from message extras + * - Dark/light theme support (auto-switching) + * - Streaming-optimized incremental rendering + * - Code preview dialog for large blocks + * + * @example + * ```svelte + * + * ``` + */ +export { default as MarkdownContent } from './MarkdownContent/MarkdownContent.svelte'; + +/** + * **SyntaxHighlightedCode** - Code syntax highlighting + * + * Renders code with syntax highlighting using highlight.js. + * Supports theme switching and scrollable containers. + * + * **Features:** + * - Auto language detection with fallback + * - Dark/light theme auto-switching + * - Scrollable container with configurable max dimensions + * - Monospace font styling + * - Preserves whitespace and formatting + * + * @example + * ```svelte + * + * ``` + */ +export { default as SyntaxHighlightedCode } from './SyntaxHighlightedCode.svelte'; + +/** + * **CollapsibleContentBlock** - Expandable content card + * + * Reusable collapsible card with header, icon, and auto-scroll. + * Used for tool calls and reasoning blocks in chat messages. + * + * **Features:** + * - Collapsible content with smooth animation + * - Custom icon and title display + * - Optional subtitle/status text + * - Auto-scroll during streaming (pauses on user scroll) + * - Configurable max height with overflow scroll + * + * @example + * ```svelte + * + * {reasoningContent} + * + * ``` + */ +export { default as CollapsibleContentBlock } from './CollapsibleContentBlock.svelte'; + +/** + * **MermaidPreview** - Interactive Mermaid diagram viewer + * + * Renders Mermaid-generated SVG diagrams with zoom, pan, and fit-to-view controls. + * + * **Features:** + * - Mouse wheel zoom in/out + * - Click-drag panning with pointer capture + * - Fit to view and reset view controls + * - Download as SVG + * - Responsive scaling with viewBox detection + * + * @example + * ```svelte + * + * ``` + */ +export { default as MermaidPreview } from './MermaidPreview.svelte'; diff --git a/examples/server/webui_llamacpp/src/lib/components/app/dialogs/DialogChatAttachmentPreview.svelte b/examples/server/webui_llamacpp/src/lib/components/app/dialogs/DialogChatAttachmentPreview.svelte deleted file mode 100644 index ac70b8dc..00000000 --- a/examples/server/webui_llamacpp/src/lib/components/app/dialogs/DialogChatAttachmentPreview.svelte +++ /dev/null @@ -1,78 +0,0 @@ - - - - - - {displayName} - - {displayType} - {#if displaySize} - • {formatFileSize(displaySize)} - {/if} - - - - - - diff --git a/examples/server/webui_llamacpp/src/lib/components/app/dialogs/DialogChatAttachmentsPreview.svelte b/examples/server/webui_llamacpp/src/lib/components/app/dialogs/DialogChatAttachmentsPreview.svelte new file mode 100644 index 00000000..533301df --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/dialogs/DialogChatAttachmentsPreview.svelte @@ -0,0 +1,88 @@ + + + + + + + + + + + + + + + diff --git a/examples/server/webui_llamacpp/src/lib/components/app/dialogs/DialogChatAttachmentsViewAll.svelte b/examples/server/webui_llamacpp/src/lib/components/app/dialogs/DialogChatAttachmentsViewAll.svelte deleted file mode 100644 index 8f6ca76d..00000000 --- a/examples/server/webui_llamacpp/src/lib/components/app/dialogs/DialogChatAttachmentsViewAll.svelte +++ /dev/null @@ -1,51 +0,0 @@ - - - - - - - - - All Attachments ({totalCount}) - View and manage all attached files - - - - - - diff --git a/examples/server/webui_llamacpp/src/lib/components/app/dialogs/DialogChatError.svelte b/examples/server/webui_llamacpp/src/lib/components/app/dialogs/DialogChatError.svelte index 8ecb5890..ff100531 100644 --- a/examples/server/webui_llamacpp/src/lib/components/app/dialogs/DialogChatError.svelte +++ b/examples/server/webui_llamacpp/src/lib/components/app/dialogs/DialogChatError.svelte @@ -1,17 +1,19 @@ - - - - - - diff --git a/examples/server/webui_llamacpp/src/lib/components/app/misc/CodePreviewDialog.svelte b/examples/server/webui_llamacpp/src/lib/components/app/dialogs/DialogCodePreview.svelte similarity index 99% rename from examples/server/webui_llamacpp/src/lib/components/app/misc/CodePreviewDialog.svelte rename to examples/server/webui_llamacpp/src/lib/components/app/dialogs/DialogCodePreview.svelte index 702519f9..fe5d9b50 100644 --- a/examples/server/webui_llamacpp/src/lib/components/app/misc/CodePreviewDialog.svelte +++ b/examples/server/webui_llamacpp/src/lib/components/app/dialogs/DialogCodePreview.svelte @@ -25,6 +25,7 @@ function handleOpenChange(nextOpen: boolean) { open = nextOpen; + onOpenChange?.(nextOpen); } @@ -46,6 +47,7 @@ aria-label="Close preview" > + Close preview diff --git a/examples/server/webui_llamacpp/src/lib/components/app/dialogs/DialogConfirmation.svelte b/examples/server/webui_llamacpp/src/lib/components/app/dialogs/DialogConfirmation.svelte index b5175a99..becc658d 100644 --- a/examples/server/webui_llamacpp/src/lib/components/app/dialogs/DialogConfirmation.svelte +++ b/examples/server/webui_llamacpp/src/lib/components/app/dialogs/DialogConfirmation.svelte @@ -1,6 +1,7 @@ + + + + + + {#if includeSensitiveData} + + {:else} + + {/if} + Export Settings + + + + {#if includeSensitiveData} +

        + Warning: This export will include sensitive data such as API keys and MCP server custom + headers (e.g., authorization tokens). Do not share this file with anyone you don't + trust. +

        + {:else} +

        + Sensitive data (API keys, MCP server custom headers) will not be included in the export + to protect your credentials. +

        + {/if} +
        +
        + +
        + + + +
        + + + Cancel + + {#if includeSensitiveData} + Export Anyway + {:else} + Export Without Sensitive Data + {/if} + + +
        +
        diff --git a/examples/server/webui_llamacpp/src/lib/components/app/dialogs/DialogFileUploadError.svelte b/examples/server/webui_llamacpp/src/lib/components/app/dialogs/DialogFileUploadError.svelte new file mode 100644 index 00000000..3bb2d357 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/dialogs/DialogFileUploadError.svelte @@ -0,0 +1,88 @@ + + + + + + + + + File Upload Error + + + Some files cannot be uploaded with the current model. + + + +
        + {#if fileErrorData.generallyUnsupported.length > 0} +
        +

        Unsupported File Types

        + +
        + {#each fileErrorData.generallyUnsupported as file (file.name)} +
        +

        + {file.name} +

        + +

        File type not supported

        +
        + {/each} +
        +
        + {/if} + + {#if fileErrorData.modalityUnsupported.length > 0} +
        +
        + {#each fileErrorData.modalityUnsupported as file (file.name)} +
        +

        + {file.name} +

        + +

        + {fileErrorData.modalityReasons[file.name] || 'Not supported by current model'} +

        +
        + {/each} +
        +
        + {/if} +
        + +
        +

        This model supports:

        + +

        + {fileErrorData.supportedTypes.join(', ')} +

        +
        + + + handleOpenChange(false)}>Got it + +
        +
        +
        diff --git a/examples/server/webui_llamacpp/src/lib/components/app/dialogs/DialogMcpResourcePreview.svelte b/examples/server/webui_llamacpp/src/lib/components/app/dialogs/DialogMcpResourcePreview.svelte new file mode 100644 index 00000000..7bf28408 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/dialogs/DialogMcpResourcePreview.svelte @@ -0,0 +1,122 @@ + + + + + + {extra.name} + + +
        + {extra.uri} + + {#if serverName} + + · + {#if favicon} + { + (e.currentTarget as HTMLImageElement).style.display = 'none'; + }} + /> + {/if} + {serverName} + + {/if} + + {#if extra.mimeType} + {extra.mimeType} + {/if} +
        +
        +
        + +
        + + + +
        + +
        + {#if isImageResource(extra.mimeType, extra.uri) && extra.content} +
        + {extra.name} +
        + {:else if isCodeResource(extra.mimeType, extra.uri) && extra.content} + + {:else if extra.content} +
        {extra.content}
        + {:else} +
        No content available
        + {/if} +
        +
        +
        diff --git a/examples/server/webui_llamacpp/src/lib/components/app/dialogs/DialogMcpResourcesBrowser.svelte b/examples/server/webui_llamacpp/src/lib/components/app/dialogs/DialogMcpResourcesBrowser.svelte new file mode 100644 index 00000000..eb162a55 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/dialogs/DialogMcpResourcesBrowser.svelte @@ -0,0 +1,394 @@ + + + + + + + + + MCP Resources + + {#if totalCount > 0} + ({totalCount}) + {/if} + + + + Browse and attach resources from connected MCP servers to your chat context. + + + +
        +
        + +
        + +
        + {#if selectedTemplate && !templatePreviewContent} +
        +
        + + + + {selectedTemplate.title || selectedTemplate.name} + +
        + + {#if selectedTemplate.description} +

        + {selectedTemplate.description} +

        + {/if} + +
        +

        + {selectedTemplate.uriTemplate} +

        +
        + + {#if templatePreviewLoading} +
        + +
        + {:else if templatePreviewError} +
        + {templatePreviewError} + + +
        + {:else} + + {/if} +
        + {:else if hasTemplateResult} + + + {:else if selectedResources.size === 1} + {@const allResources = getAllResourcesFlatInTreeOrder()} + {@const selectedResource = allResources.find((r) => selectedResources.has(r.uri))} + + + {:else if selectedResources.size > 1} +
        + {#each getAllResourcesFlatInTreeOrder() as resource (resource.uri)} + {#if selectedResources.has(resource.uri)} + + {/if} + {/each} +
        + {:else} +
        + Select a resource to preview +
        + {/if} +
        +
        + + + + + {#if hasTemplateResult} + + {:else} + + {/if} + +
        +
        diff --git a/examples/server/webui_llamacpp/src/lib/components/app/dialogs/DialogMcpServerAddNew.svelte b/examples/server/webui_llamacpp/src/lib/components/app/dialogs/DialogMcpServerAddNew.svelte new file mode 100644 index 00000000..349f7e7f --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/dialogs/DialogMcpServerAddNew.svelte @@ -0,0 +1,88 @@ + + + + + + Add New Server + + +
        + (newServerUrl = v)} + onHeadersChange={(v) => (newServerHeaders = v)} + urlError={newServerUrl ? newServerUrlError : null} + id="new-server" + /> +
        + + + + + + +
        +
        diff --git a/examples/server/webui_llamacpp/src/lib/components/app/dialogs/DialogMermaidPreview.svelte b/examples/server/webui_llamacpp/src/lib/components/app/dialogs/DialogMermaidPreview.svelte new file mode 100644 index 00000000..9cbeebc3 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/dialogs/DialogMermaidPreview.svelte @@ -0,0 +1,20 @@ + + + + + + + diff --git a/examples/server/webui_llamacpp/src/lib/components/app/dialogs/DialogModelInformation.svelte b/examples/server/webui_llamacpp/src/lib/components/app/dialogs/DialogModelInformation.svelte new file mode 100644 index 00000000..5a10859a --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/dialogs/DialogModelInformation.svelte @@ -0,0 +1,270 @@ + + + + + + + + Model Information + + Current model details and capabilities + + +
        + {#if isLoadingModels || isLoadingRouterProps} +
        +
        Loading model information...
        +
        + {:else if firstModel} + {@const modelMeta = firstModel.meta} + + {#if serverProps} + + + + Model + + +
        + + {modelName} + + + +
        +
        +
        +
        + + + + File Path + + + + {serverProps.model_path} + + + + + + + + {#if serverProps?.default_generation_settings?.n_ctx} + + Context Size + + {formatNumber(serverProps.default_generation_settings.n_ctx)} tokens + + {:else} + + Context Size + + Not available + + {/if} + + + {#if modelMeta?.n_ctx_train} + + Training Context + + {formatNumber(modelMeta.n_ctx_train)} tokens + + {/if} + + + {#if modelMeta?.size} + + Model Size + + {formatFileSize(modelMeta.size)} + + {/if} + + + {#if modelMeta?.n_params} + + Parameters + + {formatParameters(modelMeta.n_params)} + + {/if} + + + {#if modelMeta?.n_embd} + + Embedding Size + + {formatNumber(modelMeta.n_embd)} + + {/if} + + + {#if modelMeta?.n_vocab} + + Vocabulary Size + + {formatNumber(modelMeta.n_vocab)} tokens + + {/if} + + + {#if modelMeta?.vocab_type} + + Vocabulary Type + {modelMeta.vocab_type} + + {/if} + + + + Parallel Slots + + {serverProps.total_slots} + + + + {#if modalities.length > 0} + + Modalities + + +
        + +
        +
        +
        + {/if} + + + + Build Info + + {serverProps.build_info} + + + + {#if serverProps.chat_template} + + Chat Template + + +
        +
        {serverProps.chat_template}
        +
        +
        +
        + {/if} +
        +
        + {/if} + {:else if !isLoadingModels} +
        +
        No model information available
        +
        + {/if} +
        +
        +
        diff --git a/examples/server/webui_llamacpp/src/lib/components/app/dialogs/DialogModelNotAvailable.svelte b/examples/server/webui_llamacpp/src/lib/components/app/dialogs/DialogModelNotAvailable.svelte new file mode 100644 index 00000000..a6c20291 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/dialogs/DialogModelNotAvailable.svelte @@ -0,0 +1,76 @@ + + + + + + + + Model Not Available + + + + The requested model could not be found. Select an available model to continue. + + + +
        +
        +

        + Requested: {modelName} +

        +
        + + {#if availableModels.length > 0} +
        +

        Select an available model:

        +
        + {#each availableModels as model (model)} + + {/each} +
        +
        + {/if} +
        + + + handleOpenChange(false)}>Cancel + +
        +
        diff --git a/examples/server/webui_llamacpp/src/lib/components/app/dialogs/index.ts b/examples/server/webui_llamacpp/src/lib/components/app/dialogs/index.ts new file mode 100644 index 00000000..29136308 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/dialogs/index.ts @@ -0,0 +1,508 @@ +/** + * + * DIALOGS + * + * Modal dialog components for the chat application. + * + * All dialogs use ShadCN Dialog or AlertDialog components for consistent + * styling, accessibility, and animation. They integrate with application + * stores for state management and data access. + * + */ + +/** + * **DialogMcpServerAddNew** - Add new MCP server dialog + * + * Modal dialog for adding a new MCP server with URL and optional headers. + * Validates URL format and integrates with mcpStore and conversationsStore. + */ +export { default as DialogMcpServerAddNew } from './DialogMcpServerAddNew.svelte'; + +/** + * **DialogExportSettings** - Settings export dialog with sensitive data warning + * + * Dialog for exporting settings with an option to include or exclude + * sensitive data (API keys, MCP server custom headers). Defaults to excluding + * sensitive data for security. User must explicitly opt-in to include them. + * + * **Architecture:** + * - Uses ShadCN AlertDialog + * - Checkbox to toggle sensitive data inclusion (defaults to false) + * - Warning icon and message when sensitive data is included + * - Destructive variant for the action button when exporting with sensitive data + * + * **Features:** + * - Secure default: sensitive data excluded by default + * - User must explicitly opt-in to include sensitive data + * - Visual warning (ShieldOff icon) when sensitive data is included + * - Different action text based on sensitive data state + * + * @example + * ```svelte + * showExportSettings = false} + * /> + * ``` + */ +export { default as DialogExportSettings } from './DialogExportSettings.svelte'; + +/** + * + * CONFIRMATION DIALOGS + * + * Dialogs for user action confirmations. Use AlertDialog for blocking + * confirmations that require explicit user decision before proceeding. + * + */ + +/** + * **DialogConfirmation** - Generic confirmation dialog + * + * Reusable confirmation dialog with customizable title, description, + * and action buttons. Supports destructive action styling and custom icons. + * Used for delete confirmations, irreversible actions, and important decisions. + * + * **Architecture:** + * - Uses ShadCN AlertDialog + * - Supports variant styling (default, destructive) + * - Customizable button labels and callbacks + * + * **Features:** + * - Customizable title and description text + * - Destructive variant with red styling for dangerous actions + * - Custom icon support in header + * - Cancel and confirm button callbacks + * - Keyboard accessible (Escape to cancel, Enter to confirm) + * + * @example + * ```svelte + * showDelete = false} + * /> + * ``` + */ +export { default as DialogConfirmation } from './DialogConfirmation.svelte'; + +/** + * **DialogConversationTitleUpdate** - Conversation rename confirmation + * + * Confirmation dialog shown when editing the first user message in a conversation. + * Asks user whether to update the conversation title to match the new message content. + * + * **Architecture:** + * - Uses ShadCN AlertDialog + * - Shows current vs proposed title comparison + * - Triggered by ChatMessages when first message is edited + * + * **Features:** + * - Side-by-side display of current and new title + * - "Keep Current Title" and "Update Title" action buttons + * - Styled title previews in muted background boxes + * + * @example + * ```svelte + * showTitleUpdate = false} + * /> + * ``` + */ +export { default as DialogConversationTitleUpdate } from './DialogConversationTitleUpdate.svelte'; + +/** + * + * CONTENT PREVIEW DIALOGS + * + * Dialogs for previewing and displaying content in full-screen or modal views. + * + */ + +/** + * **DialogCodePreview** - Full-screen code/HTML preview + * + * Full-screen dialog for previewing HTML or code in an isolated iframe. + * Used by MarkdownContent component for previewing rendered HTML blocks + * from code blocks in chat messages. + * + * **Architecture:** + * - Uses ShadCN Dialog with full viewport layout + * - Sandboxed iframe execution (allow-scripts only) + * - Clears content when closed for security + * + * **Features:** + * - Full viewport iframe preview + * - Sandboxed execution environment + * - Close button with mix-blend-difference for visibility over any content + * - Automatic content cleanup on close + * - Supports HTML preview with proper isolation + * + * @example + * ```svelte + * + * ``` + */ +export { default as DialogCodePreview } from './DialogCodePreview.svelte'; + +/** + * + * ATTACHMENT DIALOGS + * + * Dialogs for viewing and managing file attachments. Support both + * uploaded files (pending) and stored attachments (in messages). + * + */ + +/** + * **DialogChatAttachmentsPreview** - Unified attachment preview dialog + * + * Modal dialog for previewing file attachments. Automatically adapts to the + * number of items: shows a single file preview without carousel for one item, + * or a gallery with carousel navigation for multiple items. + * + * **Architecture:** + * - Wraps ChatAttachmentsPreview component in ShadCN Dialog + * - Accepts uploadedFiles and attachments arrays as data sources + * - Filters out MCP prompts and MCP resources from display + * + * **Features:** + * - Single item mode: direct preview without navigation controls + * - Multi-item mode: gallery with left/right arrows and thumbnail strip + * - File type aware preview (images, text, PDFs, audio) + * - File name and size/count display in header + * + * @example + * ```svelte + * + * + * ``` + */ +export { default as DialogChatAttachmentsPreview } from './DialogChatAttachmentsPreview.svelte'; + +/** + * + * ERROR & ALERT DIALOGS + * + * Dialogs for displaying errors, warnings, and alerts to users. + * Provide context about what went wrong and recovery options. + * + */ + +/** + * **DialogChatError** - Chat/generation error display + * + * Alert dialog for displaying chat and generation errors with context + * information. Supports different error types with appropriate styling + * and messaging. + * + * **Architecture:** + * - Uses ShadCN AlertDialog for modal display + * - Differentiates between timeout and server errors + * - Shows context info when available (token counts) + * + * **Error Types:** + * - **timeout**: TCP timeout with timer icon, red destructive styling + * - **server**: Server error with warning icon, amber warning styling + * + * **Features:** + * - Type-specific icons (TimerOff for timeout, AlertTriangle for server) + * - Error message display in styled badge + * - Context info showing prompt tokens and context size + * - Close button to dismiss + * + * @example + * ```svelte + * + * ``` + */ +export { default as DialogChatError } from './DialogChatError.svelte'; + +/** + * **DialogEmptyFileAlert** - Empty file upload warning + * + * Alert dialog shown when user attempts to upload empty files. Lists the + * empty files that were detected and removed from attachments, with + * explanation of why empty files cannot be processed. + * + * **Architecture:** + * - Uses ShadCN AlertDialog for modal display + * - Receives list of empty file names from ChatScreen + * - Triggered during file upload validation + * + * **Features:** + * - FileX icon indicating file error + * - List of empty file names in monospace font + * - Explanation of what happened and why + * - Single "Got it" dismiss button + * + * @example + * ```svelte + * + * ``` + */ +export { default as DialogEmptyFileAlert } from './DialogEmptyFileAlert.svelte'; + +/** + * **DialogFileUploadError** - File upload compatibility error + * + * Alert dialog shown when files cannot be uploaded due to type incompatibility + * or model modality restrictions. Displays a categorized list of problematic + * files with explanations and shows which file types the current model supports. + * + * **Architecture:** + * - Uses ShadCN AlertDialog for modal display + * - Receives structured file error data from ChatScreen + * - Triggered during file upload validation in processFiles() + * + * **Features:** + * - Categorized display: unsupported types vs modality restrictions + * - File name in monospace with contextual error messages + * - Summary of supported file types for the current model + * - Scrollable content area for large error lists + * - Single "Got it" dismiss button + * + * @example + * ```svelte + * + * ``` + */ +export { default as DialogFileUploadError } from './DialogFileUploadError.svelte'; + +/** + * **DialogModelNotAvailable** - Model unavailable error + * + * Alert dialog shown when the requested model (from URL params or selection) + * is not available on the server. Displays the requested model name and + * offers selection from available models. + * + * **Architecture:** + * - Uses ShadCN AlertDialog for modal display + * - Integrates with SvelteKit navigation for model switching + * - Receives available models list from modelsStore + * + * **Features:** + * - Warning icon with amber styling + * - Requested model name display in styled badge + * - Scrollable list of available models + * - Click model to navigate with updated URL params + * - Cancel button to dismiss without selection + * + * @example + * ```svelte + * + * ``` + */ +export { default as DialogModelNotAvailable } from './DialogModelNotAvailable.svelte'; + +/** + * + * DATA MANAGEMENT DIALOGS + * + * Dialogs for managing conversation data, including import/export + * and selection operations. + * + */ + +/** + * **DialogConversationSelection** - Conversation picker for import/export + * + * Dialog for selecting conversations during import or export operations. + * Displays list of conversations with checkboxes for multi-selection. + * Used by ChatSettingsImportExportTab for data management. + * + * **Architecture:** + * - Wraps ConversationSelection component in ShadCN Dialog + * - Supports export mode (select from local) and import mode (select from file) + * - Resets selection state when dialog opens + * - High z-index to appear above settings dialog + * + * **Features:** + * - Multi-select with checkboxes + * - Conversation title and message count display + * - Select all / deselect all controls + * - Mode-specific descriptions (export vs import) + * - Cancel and confirm callbacks with selected conversations + * + * @example + * ```svelte + * showExportSelection = false} + * /> + * ``` + */ +export { default as DialogConversationSelection } from './DialogConversationSelection.svelte'; + +/** + * + * MODEL INFORMATION DIALOGS + * + * Dialogs for displaying model and server information. + * + */ + +/** + * **DialogModelInformation** - Model details display + * + * Dialog showing comprehensive information about the currently loaded model + * and server configuration. Displays model metadata, capabilities, and + * server settings in a structured table format. + * + * **Architecture:** + * - Uses ShadCN Dialog with wide layout for table display + * - Fetches data from serverStore (props) and modelsStore (metadata) + * - Auto-fetches models when dialog opens if not loaded + * + * **Information Displayed:** + * - **Model**: Name with copy button + * - **File Path**: Full path to model file with copy button + * - **Context Size**: Current context window size + * - **Training Context**: Original training context (if available) + * - **Model Size**: File size in human-readable format + * - **Parameters**: Parameter count (e.g., "7B", "70B") + * - **Embedding Size**: Embedding dimension + * - **Vocabulary Size**: Token vocabulary size + * - **Vocabulary Type**: Tokenizer type (BPE, etc.) + * - **Parallel Slots**: Number of concurrent request slots + * - **Modalities**: Supported input types (text, vision, audio) + * - **Build Info**: Server build information + * - **Chat Template**: Full Jinja template in scrollable code block + * + * **Features:** + * - Copy buttons for model name and path + * - Modality badges with icons + * - Responsive table layout with container queries + * - Loading state while fetching model info + * - Scrollable chat template display + * + * @example + * ```svelte + * + * ``` + */ +export { default as DialogModelInformation } from './DialogModelInformation.svelte'; + +/** + * **DialogMcpResourcesBrowser** - MCP resources browser dialog + * + * Dialog for browsing and attaching MCP resources to chat context. + * Displays resources from connected MCP servers in a tree structure + * with preview panel and multi-select support. + * + * **Architecture:** + * - Uses ShadCN Dialog with two-panel layout + * - Left panel: McpResourcesBrowser with tree navigation + * - Right panel: McpResourcePreview for selected resource + * - Integrates with mcpStore for resource fetching and attachment + * + * **Features:** + * - Tree-based resource navigation by server and path + * - Single and multi-select with shift+click + * - Resource preview with content display + * - Quick attach button per resource + * - Batch attach for multiple selections + * + * @example + * ```svelte + * + * ``` + */ +export { default as DialogMcpResourcesBrowser } from './DialogMcpResourcesBrowser.svelte'; + +/** + * **DialogMcpResourcePreview** - MCP resource content preview + * + * Dialog for previewing the content of a stored MCP resource attachment. + * Displays the resource content with syntax highlighting for code, + * image rendering for images, and plain text for other content. + * + * **Features:** + * - Syntax highlighted code preview + * - Image rendering for image resources + * - Copy to clipboard and download actions + * - Server name and favicon display + * - MIME type badge + * + * @example + * ```svelte + * + * ``` + */ +export { default as DialogMcpResourcePreview } from './DialogMcpResourcePreview.svelte'; + +/** + * **DialogMermaidPreview** - Full-screen Mermaid diagram preview with zoom and pan + * + * Full-screen dialog for previewing Mermaid diagrams with interactive controls. + * Supports mouse wheel zoom, drag-to-pan, and toolbar buttons for zoom in/out, + * fit to view, and reset. + * + * **Architecture:** + * - Uses UI dialog components (`Dialog.Root`, `Dialog.Overlay`, `Dialog.Content`) + * for consistent styling, animations, and accessibility + * - CSS transform-based zoom and pan (no external dependencies) + * - Pointer events for cross-device drag support (mouse + touch) + * - Wheel events for zoom-to-cursor functionality + * + * **Features:** + * - Scroll wheel zoom centered on cursor position + * - Click and drag to pan the diagram + * - Toolbar with zoom in, zoom out, fit to view, reset controls + * - Zoom percentage indicator + * - Keyboard accessible close button + * - Dark/light theme support + * + * @example + * ```svelte + * + * ``` + */ +export { default as DialogMermaidPreview } from './DialogMermaidPreview.svelte'; diff --git a/examples/server/webui_llamacpp/src/lib/components/app/forms/InputWithSuggestions.svelte b/examples/server/webui_llamacpp/src/lib/components/app/forms/InputWithSuggestions.svelte new file mode 100644 index 00000000..5d047c59 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/forms/InputWithSuggestions.svelte @@ -0,0 +1,78 @@ + + +
        + + + onInput(e.currentTarget.value)} + onkeydown={onKeydown} + onblur={onBlur} + onfocus={onFocus} + placeholder="Enter {name}" + autocomplete="off" + /> + + {#if isAutocompleteActive && suggestions.length > 0} +
        + {#each suggestions as suggestion, i (suggestion)} + + {/each} +
        + {/if} +
        diff --git a/examples/server/webui_llamacpp/src/lib/components/app/forms/KeyValuePairs.svelte b/examples/server/webui_llamacpp/src/lib/components/app/forms/KeyValuePairs.svelte new file mode 100644 index 00000000..e0bd8d98 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/forms/KeyValuePairs.svelte @@ -0,0 +1,143 @@ + + +
        +
        + {#if sectionLabel} + + {sectionLabel} + {#if sectionLabelOptional} + (optional) + {/if} + + {/if} + + +
        + {#if pairs.length > 0} +
        + {#each pairs as pair, index (index)} +
        + updatePairKey(index, e.currentTarget.value)} + onblur={(e) => trimPairKey(index, e.currentTarget.value)} + class="flex-1" + /> + + + + +
        + {/each} +
        + {:else} +

        {emptyMessage}

        + {/if} +
        diff --git a/examples/server/webui_llamacpp/src/lib/components/app/forms/SearchInput.svelte b/examples/server/webui_llamacpp/src/lib/components/app/forms/SearchInput.svelte new file mode 100644 index 00000000..19dd7e6a --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/forms/SearchInput.svelte @@ -0,0 +1,75 @@ + + +
        + + + + + {#if showClearButton} + + {/if} +
        diff --git a/examples/server/webui_llamacpp/src/lib/components/app/forms/index.ts b/examples/server/webui_llamacpp/src/lib/components/app/forms/index.ts new file mode 100644 index 00000000..4cf56cdc --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/forms/index.ts @@ -0,0 +1,44 @@ +/** + * + * FORMS & INPUTS + * + * Form-related utility components. + * + */ + +/** + * **InputWithSuggestions** - Input field with autocomplete suggestions + * + * Text input with dropdown suggestions and keyboard navigation. + * Supports autocomplete functionality with suggestion loading. + * + * **Features:** + * - Autocomplete dropdown with suggestions + * - Keyboard navigation (arrow keys, enter) + * - Loading state for suggestions + * - Focus and blur handling + */ +export { default as InputWithSuggestions } from './InputWithSuggestions.svelte'; + +/** + * **KeyValuePairs** - Editable key-value list + * + * Dynamic list of key-value pairs with add/remove functionality. + * Used for HTTP headers, metadata, and configuration. + * + * **Features:** + * - Add new pairs with button + * - Remove individual pairs + * - Customizable placeholders and labels + * - Empty state message + * - Auto-resize value textarea + */ +export { default as KeyValuePairs } from './KeyValuePairs.svelte'; + +/** + * **SearchInput** - Search field with clear button + * + * Input field optimized for search with clear button and keyboard handling. + * Supports placeholder, autofocus, and change callbacks. + */ +export { default as SearchInput } from './SearchInput.svelte'; diff --git a/examples/server/webui_llamacpp/src/lib/components/app/index.ts b/examples/server/webui_llamacpp/src/lib/components/app/index.ts index 54bd8d5a..4914c743 100644 --- a/examples/server/webui_llamacpp/src/lib/components/app/index.ts +++ b/examples/server/webui_llamacpp/src/lib/components/app/index.ts @@ -1,63 +1,12 @@ -// Chat - -export { default as ChatAttachmentPreview } from './chat/ChatAttachments/ChatAttachmentPreview.svelte'; -export { default as ChatAttachmentThumbnailFile } from './chat/ChatAttachments/ChatAttachmentThumbnailFile.svelte'; -export { default as ChatAttachmentThumbnailImage } from './chat/ChatAttachments/ChatAttachmentThumbnailImage.svelte'; -export { default as ChatAttachmentsList } from './chat/ChatAttachments/ChatAttachmentsList.svelte'; -export { default as ChatAttachmentsViewAll } from './chat/ChatAttachments/ChatAttachmentsViewAll.svelte'; - -export { default as ChatForm } from './chat/ChatForm/ChatForm.svelte'; -export { default as ChatFormActionFileAttachments } from './chat/ChatForm/ChatFormActions/ChatFormActionFileAttachments.svelte'; -export { default as ChatFormActionRecord } from './chat/ChatForm/ChatFormActions/ChatFormActionRecord.svelte'; -export { default as ChatFormActions } from './chat/ChatForm/ChatFormActions/ChatFormActions.svelte'; -export { default as ChatFormFileInputInvisible } from './chat/ChatForm/ChatFormFileInputInvisible.svelte'; -export { default as ChatFormHelperText } from './chat/ChatForm/ChatFormHelperText.svelte'; -export { default as ChatFormModelSelector } from './chat/ChatForm/ChatFormModelSelector.svelte'; -export { default as ChatFormTextarea } from './chat/ChatForm/ChatFormTextarea.svelte'; - -export { default as ChatMessage } from './chat/ChatMessages/ChatMessage.svelte'; -export { default as ChatMessages } from './chat/ChatMessages/ChatMessages.svelte'; -export { default as ChatMessageBranchingControls } from './chat/ChatMessages/ChatMessageBranchingControls.svelte'; -export { default as ChatMessageThinkingBlock } from './chat/ChatMessages/ChatMessageThinkingBlock.svelte'; - -export { default as ChatScreen } from './chat/ChatScreen/ChatScreen.svelte'; -export { default as ChatScreenHeader } from './chat/ChatScreen/ChatScreenHeader.svelte'; -export { default as ChatScreenProcessingInfo } from './chat/ChatScreen/ChatScreenProcessingInfo.svelte'; -export { default as ChatScreenWarning } from './chat/ChatScreen/ChatScreenWarning.svelte'; - -export { default as ChatSettings } from './chat/ChatSettings/ChatSettings.svelte'; -export { default as ChatSettingsFooter } from './chat/ChatSettings/ChatSettingsFooter.svelte'; -export { default as ChatSettingsFields } from './chat/ChatSettings/ChatSettingsFields.svelte'; -export { default as ChatSettingsImportExportTab } from './chat/ChatSettings/ChatSettingsImportExportTab.svelte'; -export { default as ChatSettingsParameterSourceIndicator } from './chat/ChatSettings/ChatSettingsParameterSourceIndicator.svelte'; - -export { default as ChatSidebar } from './chat/ChatSidebar/ChatSidebar.svelte'; -export { default as ChatSidebarConversationItem } from './chat/ChatSidebar/ChatSidebarConversationItem.svelte'; -export { default as ChatSidebarSearch } from './chat/ChatSidebar/ChatSidebarSearch.svelte'; - -// Dialogs - -export { default as DialogChatAttachmentPreview } from './dialogs/DialogChatAttachmentPreview.svelte'; -export { default as DialogChatAttachmentsViewAll } from './dialogs/DialogChatAttachmentsViewAll.svelte'; -export { default as DialogChatError } from './dialogs/DialogChatError.svelte'; -export { default as DialogChatSettings } from './dialogs/DialogChatSettings.svelte'; -export { default as DialogConfirmation } from './dialogs/DialogConfirmation.svelte'; -export { default as DialogConversationSelection } from './dialogs/DialogConversationSelection.svelte'; -export { default as DialogConversationTitleUpdate } from './dialogs/DialogConversationTitleUpdate.svelte'; -export { default as DialogEmptyFileAlert } from './dialogs/DialogEmptyFileAlert.svelte'; - -// Miscellanous - -export { default as ActionButton } from './misc/ActionButton.svelte'; -export { default as ActionDropdown } from './misc/ActionDropdown.svelte'; -export { default as ConversationSelection } from './misc/ConversationSelection.svelte'; -export { default as KeyboardShortcutInfo } from './misc/KeyboardShortcutInfo.svelte'; -export { default as MarkdownContent } from './misc/MarkdownContent.svelte'; -export { default as RemoveButton } from './misc/RemoveButton.svelte'; - -// Server - -export { default as ServerStatus } from './server/ServerStatus.svelte'; -export { default as ServerErrorSplash } from './server/ServerErrorSplash.svelte'; -export { default as ServerLoadingSplash } from './server/ServerLoadingSplash.svelte'; -export { default as ServerInfo } from './server/ServerInfo.svelte'; +export * from './actions'; +export * from './badges'; +export * from './chat'; +export * from './content'; +export * from './dialogs'; +export * from './forms'; +export * from './mcp'; +export * from './misc'; +export * from './settings'; +export * from './models'; +export * from './navigation'; +export * from './server'; diff --git a/examples/server/webui_llamacpp/src/lib/components/app/mcp/McpActiveServersAvatars.svelte b/examples/server/webui_llamacpp/src/lib/components/app/mcp/McpActiveServersAvatars.svelte new file mode 100644 index 00000000..2f732cfd --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/mcp/McpActiveServersAvatars.svelte @@ -0,0 +1,89 @@ + + +{#if !hasEnabledMcpServers} + +{:else if mcpFavicons.length > 0} + +{/if} diff --git a/examples/server/webui_llamacpp/src/lib/components/app/mcp/McpCapabilitiesBadges.svelte b/examples/server/webui_llamacpp/src/lib/components/app/mcp/McpCapabilitiesBadges.svelte new file mode 100644 index 00000000..d17b24eb --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/mcp/McpCapabilitiesBadges.svelte @@ -0,0 +1,61 @@ + + +{#if capabilities} + {#if capabilities.server.tools} + + + + Tools + + {/if} + + {#if capabilities.server.resources} + + + + Resources + + {/if} + + {#if capabilities.server.prompts} + + + + Prompts + + {/if} + + {#if capabilities.server.logging} + + + + Logging + + {/if} + + {#if capabilities.server.completions} + + + + Completions + + {/if} + + {#if capabilities.server.tasks} + + + + Tasks + + {/if} +{/if} diff --git a/examples/server/webui_llamacpp/src/lib/components/app/mcp/McpConnectionLogs.svelte b/examples/server/webui_llamacpp/src/lib/components/app/mcp/McpConnectionLogs.svelte new file mode 100644 index 00000000..305c9db3 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/mcp/McpConnectionLogs.svelte @@ -0,0 +1,81 @@ + + +{#if logs.length > 0} + +
        + + {#if isExpanded} + + {:else} + + {/if} + + Connection Log ({logs.length}) + + {#if connectionTimeMs !== undefined} + · Connected in {connectionTimeMs}ms + {/if} + +
        + + +
        + {#each logs as log (log.timestamp.getTime() + log.message)} + {@const IconComponent = getMcpLogLevelIcon(log.level)} + +
        + + {formatTime(log.timestamp)} + + + + + {log.message} +
        + + {#if log.details !== undefined} +
        + details + +
        +{formatLogDetails(log.details)}
        +
        + {/if} + {/each} +
        +
        +
        +{/if} diff --git a/examples/server/webui_llamacpp/src/lib/components/app/mcp/McpLogo.svelte b/examples/server/webui_llamacpp/src/lib/components/app/mcp/McpLogo.svelte new file mode 100644 index 00000000..9f73db84 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/mcp/McpLogo.svelte @@ -0,0 +1,111 @@ + + + diff --git a/examples/server/webui_llamacpp/src/lib/components/app/mcp/McpResourcePreview.svelte b/examples/server/webui_llamacpp/src/lib/components/app/mcp/McpResourcePreview.svelte new file mode 100644 index 00000000..55e1e20a --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/mcp/McpResourcePreview.svelte @@ -0,0 +1,174 @@ + + +
        + {#if !resource} +
        + + + Select a resource to preview +
        + {:else} +
        +
        +

        {resource.title || resource.name}

        + +

        {resource.uri}

        + + {#if resource.description} +

        {resource.description}

        + {/if} +
        + +
        + + + +
        +
        + +
        + {#if isLoading} +
        + +
        + {:else if error} +
        + + + {error} +
        + {:else if content} + {@const textContent = getResourceTextContent(content)} + {@const blobContent = getResourceBlobContent(content)} + + {#if textContent} +
        {textContent}
        + {/if} + + {#each blobContent as blob (blob.uri)} + {#if isImageMimeType(blob.mimeType ?? MimeTypeApplication.OCTET_STREAM)} + Resource content + {:else} +
        + + + Binary content ({blob.mimeType || 'unknown type'}) +
        + {/if} + {/each} + + {#if !textContent && blobContent.length === 0} +
        No content available
        + {/if} + {/if} +
        + + {#if resource.mimeType || resource.annotations} +
        + {#if resource.mimeType} + {resource.mimeType} + {/if} + + {#if resource.annotations?.priority !== undefined} + + Priority: {resource.annotations.priority} + + {/if} + + + Server: {resource.serverName} + +
        + {/if} + {/if} +
        diff --git a/examples/server/webui_llamacpp/src/lib/components/app/mcp/McpResourceTemplateForm.svelte b/examples/server/webui_llamacpp/src/lib/components/app/mcp/McpResourceTemplateForm.svelte new file mode 100644 index 00000000..f6263251 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/mcp/McpResourceTemplateForm.svelte @@ -0,0 +1,171 @@ + + +
        + {#each variables as variable (variable.name)} + handleArgInput(variable.name, value)} + onKeydown={(e) => handleArgKeydown(e, variable.name)} + onBlur={() => handleArgBlur(variable.name)} + onFocus={() => handleArgFocus(variable.name)} + onSelectSuggestion={(value) => selectSuggestion(variable.name, value)} + /> + {/each} + + {#if isComplete} +
        +

        Resolved URI:

        + +

        {expandedUri}

        +
        + {/if} + +
        + + + +
        + diff --git a/examples/server/webui_llamacpp/src/lib/components/app/mcp/McpResourcesBrowser/McpResourcesBrowser.svelte b/examples/server/webui_llamacpp/src/lib/components/app/mcp/McpResourcesBrowser/McpResourcesBrowser.svelte new file mode 100644 index 00000000..24538e8d --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/mcp/McpResourcesBrowser/McpResourcesBrowser.svelte @@ -0,0 +1,153 @@ + + +
        + (searchQuery = q)} + {searchQuery} + /> + +
        + {#if filteredResources.size === 0} + + {:else} + {#each [...filteredResources.entries()] as [serverName, serverRes] (serverName)} + toggleServer(serverName as string)} + onToggleFolder={toggleFolder} + {onSelect} + {onToggle} + {onTemplateSelect} + {searchQuery} + /> + {/each} + {/if} +
        +
        diff --git a/examples/server/webui_llamacpp/src/lib/components/app/mcp/McpResourcesBrowser/McpResourcesBrowserEmptyState.svelte b/examples/server/webui_llamacpp/src/lib/components/app/mcp/McpResourcesBrowser/McpResourcesBrowserEmptyState.svelte new file mode 100644 index 00000000..4fb0c1e2 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/mcp/McpResourcesBrowser/McpResourcesBrowserEmptyState.svelte @@ -0,0 +1,15 @@ + + +
        + {#if isLoading} + Loading resources... + {:else} + No resources available + {/if} +
        diff --git a/examples/server/webui_llamacpp/src/lib/components/app/mcp/McpResourcesBrowser/McpResourcesBrowserHeader.svelte b/examples/server/webui_llamacpp/src/lib/components/app/mcp/McpResourcesBrowser/McpResourcesBrowserHeader.svelte new file mode 100644 index 00000000..419654c1 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/mcp/McpResourcesBrowser/McpResourcesBrowserHeader.svelte @@ -0,0 +1,41 @@ + + +
        +
        + onSearch?.(value)} + /> + + +
        + +

        Available resources

        +
        diff --git a/examples/server/webui_llamacpp/src/lib/components/app/mcp/McpResourcesBrowser/McpResourcesBrowserServerItem.svelte b/examples/server/webui_llamacpp/src/lib/components/app/mcp/McpResourcesBrowser/McpResourcesBrowserServerItem.svelte new file mode 100644 index 00000000..9acd101c --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/mcp/McpResourcesBrowser/McpResourcesBrowserServerItem.svelte @@ -0,0 +1,230 @@ + + +{#snippet renderTreeNode(node: ResourceTreeNode, depth: number, parentPath: string)} + {@const isFolder = !node.resource && node.children.size > 0} + {@const folderId = `${serverName}:${parentPath}/${node.name}`} + {@const isFolderExpanded = expandedFolders.has(folderId)} + + {#if isFolder} + {@const folderCount = countTreeResources(node)} + onToggleFolder(folderId)}> + + {#if isFolderExpanded} + + {:else} + + {/if} + + + + {node.name} + + ({folderCount}) + + + +
        + {#each sortTreeChildren( [...node.children.values()] ) as child (child.resource?.uri || `${serverName}:${parentPath}/${node.name}/${child.name}`)} + {@render renderTreeNode(child, depth + 1, `${parentPath}/${node.name}`)} + {/each} +
        +
        +
        + {:else if node.resource} + {@const resource = node.resource} + {@const ResourceIcon = getResourceIcon(resource.mimeType, resource.uri)} + {@const isSelected = isResourceSelected(resource)} + {@const resourceDisplayName = resource.title || getDisplayName(node.name)} + +
        + {#if onToggle} + + handleCheckboxChange(resource, checked === true)} + class="h-4 w-4" + /> + {/if} + + +
        + {/if} +{/snippet} + + + + {#if isExpanded} + + {:else} + + {/if} + + +
        + +
        + + + ({serverRes.resources.length} resource{serverRes.resources.length !== 1 + ? 's' + : ''}{#if hasTemplates}, {serverRes.templates.length} template{serverRes.templates + .length !== 1 + ? 's' + : ''}{/if}) + +
        + + {#if serverRes.loading} + + {/if} +
        + + +
        + {#if serverRes.error} +
        + Error: {serverRes.error} +
        + {:else if !hasContent} +
        No resources
        + {:else} + {#if hasResources} + {#each sortTreeChildren( [...resourceTree.children.values()] ) as child (child.resource?.uri || `${serverName}:${child.name}`)} + {@render renderTreeNode(child, 1, '')} + {/each} + {/if} + + {#if hasTemplates && onTemplateSelect} + {#if hasResources} +
        + {/if} + +
        + Templates +
        + + {#each templateInfos as template (template.uriTemplate)} + + {/each} + {/if} + {/if} +
        +
        +
        diff --git a/examples/server/webui_llamacpp/src/lib/components/app/mcp/McpResourcesBrowser/mcp-resources-browser.ts b/examples/server/webui_llamacpp/src/lib/components/app/mcp/McpResourcesBrowser/mcp-resources-browser.ts new file mode 100644 index 00000000..804fa7fe --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/mcp/McpResourcesBrowser/mcp-resources-browser.ts @@ -0,0 +1,118 @@ +import type { MCPResource, MCPResourceInfo } from '$lib/types'; +import { parseResourcePath } from '$lib/utils'; + +export interface ResourceTreeNode { + name: string; + resource?: MCPResourceInfo; + children: Map; + isFiltered?: boolean; +} + +function resourceMatchesSearch(resource: MCPResource, query: string): boolean { + return ( + resource.title?.toLowerCase().includes(query) || resource.uri.toLowerCase().includes(query) + ); +} + +export function buildResourceTree( + resourceList: MCPResource[], + serverName: string, + searchQuery?: string +): ResourceTreeNode { + const root: ResourceTreeNode = { name: 'root', children: new Map() }; + + if (!searchQuery || !searchQuery.trim()) { + for (const resource of resourceList) { + const pathParts = parseResourcePath(resource.uri); + let current = root; + + for (let i = 0; i < pathParts.length - 1; i++) { + const part = pathParts[i]; + if (!current.children.has(part)) { + current.children.set(part, { name: part, children: new Map() }); + } + current = current.children.get(part)!; + } + + const fileName = pathParts[pathParts.length - 1] || resource.name; + current.children.set(resource.uri, { + name: fileName, + resource: { ...resource, serverName }, + children: new Map() + }); + } + + return root; + } + + const query = searchQuery.toLowerCase(); + + // Build tree with filtering + for (const resource of resourceList) { + if (!resourceMatchesSearch(resource, query)) continue; + + const pathParts = parseResourcePath(resource.uri); + let current = root; + + for (let i = 0; i < pathParts.length - 1; i++) { + const part = pathParts[i]; + if (!current.children.has(part)) { + current.children.set(part, { name: part, children: new Map(), isFiltered: true }); + } + current = current.children.get(part)!; + } + + const fileName = pathParts[pathParts.length - 1] || resource.name; + + current.children.set(resource.uri, { + name: fileName, + resource: { ...resource, serverName }, + children: new Map(), + isFiltered: true + }); + } + + function cleanupEmptyFolders(node: ResourceTreeNode): boolean { + if (node.resource) return true; + + const toDelete: string[] = []; + for (const [name, child] of node.children.entries()) { + if (!cleanupEmptyFolders(child)) { + toDelete.push(name); + } + } + + for (const name of toDelete) { + node.children.delete(name); + } + + return node.children.size > 0; + } + + cleanupEmptyFolders(root); + + return root; +} + +export function countTreeResources(node: ResourceTreeNode): number { + if (node.resource) return 1; + let count = 0; + + for (const child of node.children.values()) { + count += countTreeResources(child); + } + + return count; +} + +export function sortTreeChildren(children: ResourceTreeNode[]): ResourceTreeNode[] { + return children.sort((a, b) => { + const aIsFolder = !a.resource && a.children.size > 0; + const bIsFolder = !b.resource && b.children.size > 0; + + if (aIsFolder && !bIsFolder) return -1; + if (!aIsFolder && bIsFolder) return 1; + + return a.name.localeCompare(b.name); + }); +} diff --git a/examples/server/webui_llamacpp/src/lib/components/app/mcp/McpServerCard/McpServerCard.svelte b/examples/server/webui_llamacpp/src/lib/components/app/mcp/McpServerCard/McpServerCard.svelte new file mode 100644 index 00000000..199cb145 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/mcp/McpServerCard/McpServerCard.svelte @@ -0,0 +1,192 @@ + + + + {#if isEditing} + + {:else} + + + {#if isError && errorMessage} +

        {errorMessage}

        + {/if} + + {#if isConnected && serverInfo?.description} +

        + {serverInfo.description} +

        + {/if} + +
        + {#if showSkeleton} +
        +
        + + +
        +
        + + + +
        +
        + +
        +
        + + +
        +
        + {:else} + {#if isConnected && instructions} + + {/if} + + {#if tools.length > 0} + + {/if} + + {#if connectionLogs.length > 0} + + {/if} + {/if} +
        + +
        + {#if showSkeleton} + + {:else if protocolVersion} +
        + + Protocol version: {protocolVersion} + +
        + {/if} + + +
        + {/if} +
        + + (showDeleteDialog = open)} + onConfirm={onDelete} +/> diff --git a/examples/server/webui_llamacpp/src/lib/components/app/mcp/McpServerCard/McpServerCardActions.svelte b/examples/server/webui_llamacpp/src/lib/components/app/mcp/McpServerCard/McpServerCardActions.svelte new file mode 100644 index 00000000..6f137fa2 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/mcp/McpServerCard/McpServerCardActions.svelte @@ -0,0 +1,40 @@ + + +
        + + + + + +
        diff --git a/examples/server/webui_llamacpp/src/lib/components/app/mcp/McpServerCard/McpServerCardDeleteDialog.svelte b/examples/server/webui_llamacpp/src/lib/components/app/mcp/McpServerCard/McpServerCardDeleteDialog.svelte new file mode 100644 index 00000000..8f650148 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/mcp/McpServerCard/McpServerCardDeleteDialog.svelte @@ -0,0 +1,36 @@ + + + + + + Delete Server + + + Are you sure you want to delete {displayName}? This action cannot be + undone. + + + + + Cancel + + + Delete + + + + diff --git a/examples/server/webui_llamacpp/src/lib/components/app/mcp/McpServerCard/McpServerCardEditForm.svelte b/examples/server/webui_llamacpp/src/lib/components/app/mcp/McpServerCard/McpServerCardEditForm.svelte new file mode 100644 index 00000000..6727a900 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/mcp/McpServerCard/McpServerCardEditForm.svelte @@ -0,0 +1,64 @@ + + +
        +

        Configure Server

        + + (editUrl = v)} + onHeadersChange={(v) => (editHeaders = v)} + onUseProxyChange={(v) => (editUseProxy = v)} + urlError={editUrl ? urlError : null} + id={serverId} + /> + +
        + + + +
        +
        diff --git a/examples/server/webui_llamacpp/src/lib/components/app/mcp/McpServerCard/McpServerCardHeader.svelte b/examples/server/webui_llamacpp/src/lib/components/app/mcp/McpServerCard/McpServerCardHeader.svelte new file mode 100644 index 00000000..5544bcec --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/mcp/McpServerCard/McpServerCardHeader.svelte @@ -0,0 +1,70 @@ + + +
        +
        +
        +
        + +
        + + {#if capabilities || transportType} +
        + {#if transportType} + {@const TransportIcon = MCP_TRANSPORT_ICONS[transportType]} + + {#if TransportIcon} + + {/if} + + {MCP_TRANSPORT_LABELS[transportType] || transportType} + + {/if} + + {#if capabilities} + + {/if} +
        + {/if} +
        + +
        + +
        +
        +
        diff --git a/examples/server/webui_llamacpp/src/lib/components/app/mcp/McpServerCard/McpServerCardToolsList.svelte b/examples/server/webui_llamacpp/src/lib/components/app/mcp/McpServerCard/McpServerCardToolsList.svelte new file mode 100644 index 00000000..d0397c17 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/mcp/McpServerCard/McpServerCardToolsList.svelte @@ -0,0 +1,47 @@ + + + + + {#if isExpanded} + + {:else} + + {/if} + + {toolsCount} tools available · Show details + + + +
        + {#each tools as tool (tool.name)} +
        + {tool.name} + + {#if tool.description} +

        {tool.description}

        + {/if} +
        + {/each} +
        +
        +
        diff --git a/examples/server/webui_llamacpp/src/lib/components/app/mcp/McpServerCardSkeleton.svelte b/examples/server/webui_llamacpp/src/lib/components/app/mcp/McpServerCardSkeleton.svelte new file mode 100644 index 00000000..39a13728 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/mcp/McpServerCardSkeleton.svelte @@ -0,0 +1,34 @@ + + + +
        +
        + + + +
        + +
        + +
        + + + +
        + +
        + + +
        + + + +
        + + + +
        +
        diff --git a/examples/server/webui_llamacpp/src/lib/components/app/mcp/McpServerForm.svelte b/examples/server/webui_llamacpp/src/lib/components/app/mcp/McpServerForm.svelte new file mode 100644 index 00000000..79738e30 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/mcp/McpServerForm.svelte @@ -0,0 +1,111 @@ + + +
        +
        + + + onUrlChange(e.currentTarget.value)} + class={urlError ? 'border-destructive' : ''} + /> + + {#if urlError} +

        {urlError}

        + {/if} + + {#if !isWebSocket && onUseProxyChange} + + {/if} +
        + + +
        diff --git a/examples/server/webui_llamacpp/src/lib/components/app/mcp/McpServerIdentity.svelte b/examples/server/webui_llamacpp/src/lib/components/app/mcp/McpServerIdentity.svelte new file mode 100644 index 00000000..feafc5d8 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/mcp/McpServerIdentity.svelte @@ -0,0 +1,67 @@ + + + + {#if faviconUrl} + { + (e.currentTarget as HTMLImageElement).style.display = 'none'; + }} + /> + {/if} + + + + {#if showVersion && serverInfo?.version} + + + + {/if} + + {#if showWebsite && safeWebsiteUrl} + e.stopPropagation()} + > + + + {/if} + diff --git a/examples/server/webui_llamacpp/src/lib/components/app/mcp/McpServerInfo.svelte b/examples/server/webui_llamacpp/src/lib/components/app/mcp/McpServerInfo.svelte new file mode 100644 index 00000000..aecae6e5 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/mcp/McpServerInfo.svelte @@ -0,0 +1,35 @@ + + +{#if instructions} + + + {#if isExpanded} + + {:else} + + {/if} + + Server instructions + + + +

        + {instructions} +

        +
        +
        +{/if} diff --git a/examples/server/webui_llamacpp/src/lib/components/app/mcp/index.ts b/examples/server/webui_llamacpp/src/lib/components/app/mcp/index.ts new file mode 100644 index 00000000..3d30bb3b --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/mcp/index.ts @@ -0,0 +1,254 @@ +/** + * + * MCP (Model Context Protocol) + * + * Components for managing MCP server connections and displaying server status. + * MCP enables agentic workflows by connecting to external tool servers. + * + * The MCP system integrates with: + * - `mcpStore` for server CRUD operations and health checks + * - `conversationsStore` for per-conversation server enable/disable + * + */ + +/** + * **McpServersSettings** - MCP servers configuration section + * + * Settings section for configuring MCP server connections. + * Displays server cards with status, tools, and management actions. + * Used within the MCP tab of ChatSettings. + * + * **Architecture:** + * - Manages add server form state locally + * - Delegates server display to McpServerCard components + * - Integrates with mcpStore for server operations + * - Shows skeleton loading states during health checks + * + * **Features:** + * - Add new MCP servers by URL with validation + * - Server cards with connection status indicators + * - Health check status (connected/disconnected/error) + * - Tools list per server showing available capabilities + * - Enable/disable toggle per conversation + * - Edit/delete server actions + * - Skeleton loading states during connection + * - Empty state with helpful message + * + * @example + * ```svelte + * + * ``` + */ +export { default as McpServersSettings } from '../settings/SettingsMcpServers.svelte'; + +/** + * **McpActiveServersAvatars** - Active MCP servers indicator + * + * Compact avatar row showing favicons of active MCP servers. + * Displays up to 3 server icons with "+N" counter for additional servers. + * Clickable to open MCP settings dialog. + * + * **Architecture:** + * - Filters servers by enabled status and health check + * - Fetches favicons from server URLs + * - Integrates with conversationsStore for per-chat server state + * + * **Features:** + * - Overlapping favicon avatars (max 3 visible) + * - "+N" counter for additional servers + * - Click handler for settings navigation + * - Disabled state support + * - Only shows healthy, enabled servers + * + * @example + * ```svelte + * showMcpSettings = true} + * /> + * ``` + */ +export { default as McpActiveServersAvatars } from './McpActiveServersAvatars.svelte'; + +/** + * **McpCapabilitiesBadges** - Server capabilities display + * + * Displays MCP server capabilities as colored badges. + * Shows which features the server supports (tools, resources, prompts, etc.). + * + * **Features:** + * - Tools badge (green) - server provides callable tools + * - Resources badge (blue) - server provides data resources + * - Prompts badge (purple) - server provides prompt templates + * - Logging badge (orange) - server supports logging + * - Completions badge (cyan) - server provides completions + * - Tasks badge (pink) - server supports task management + */ +export { default as McpCapabilitiesBadges } from './McpCapabilitiesBadges.svelte'; + +/** + * **McpConnectionLogs** - Connection log viewer + * + * Collapsible panel showing MCP server connection logs. + * Displays timestamped log entries with level-based styling. + * + * **Features:** + * - Collapsible log list with entry count + * - Connection time display in milliseconds + * - Log level icons and color coding + * - Scrollable log container with max height + * - Monospace font for log readability + */ +export { default as McpConnectionLogs } from './McpConnectionLogs.svelte'; + +/** + * **McpServerForm** - Server URL and headers input form + * + * Reusable form for entering MCP server connection details. + * Used in both add new server and edit server flows. + * + * **Features:** + * - URL input with validation error display + * - Custom headers key-value pairs editor + * - Controlled component with change callbacks + * + * @example + * ```svelte + * serverUrl = v} + * onHeadersChange={(v) => serverHeaders = v} + * urlError={validationError} + * /> + * ``` + */ +export { default as McpServerForm } from './McpServerForm.svelte'; + +/** + * MCP protocol logo SVG component. Renders the official MCP icon + * with customizable size via class and style props. + */ +export { default as McpLogo } from './McpLogo.svelte'; + +/** + * + * SERVER CARD + * + * Components for displaying individual MCP server status and controls. + * McpServerCard is the main component, with sub-components for specific sections. + * + */ + +/** + * **McpServerCard** - Individual server display card + * + * Main component for displaying a single MCP server with all its details. + * Manages edit mode, delete confirmation, and health check actions. + * + * **Architecture:** + * - Composes header, tools list, logs, and actions sub-components + * - Manages local edit/delete state + * - Reads health state from mcpStore + * - Triggers health checks via mcpStore + * + * **Features:** + * - Server header with favicon, name, version, and toggle + * - Capabilities badges display + * - Tools list with descriptions + * - Connection logs viewer + * - Edit form for URL and headers + * - Delete confirmation dialog + * - Skeleton loading states + */ +export { default as McpServerCard } from './McpServerCard/McpServerCard.svelte'; + +/** Server card header with favicon, name, version badge, and enable toggle. */ +export { default as McpServerCardHeader } from './McpServerCard/McpServerCardHeader.svelte'; + +/** Action buttons row: edit, refresh, delete. */ +export { default as McpServerCardActions } from './McpServerCard/McpServerCardActions.svelte'; + +/** Collapsible tools list showing available server tools with descriptions. */ +export { default as McpServerCardToolsList } from './McpServerCard/McpServerCardToolsList.svelte'; + +/** Inline edit form for server URL and custom headers. */ +export { default as McpServerCardEditForm } from './McpServerCard/McpServerCardEditForm.svelte'; + +/** Delete confirmation dialog with server name display. */ +export { default as McpServerCardDeleteDialog } from './McpServerCard/McpServerCardDeleteDialog.svelte'; + +/** Skeleton loading state for server card during health checks. */ +export { default as McpServerCardSkeleton } from './McpServerCardSkeleton.svelte'; + +/** + * **McpServerIdentity** - Server identity display (icon, name, version) + * + * Reusable headless component for displaying server name, favicon/icon, and version badge. + * Accepts all data via props with no store dependencies for predictable rendering. + * + * **Features:** + * - Server favicon/icon with fallback + * - Truncated display name with max-width + * - Optional version badge (v1.2.3) + * - Optional external link to server website + * + * @example + * ```svelte + * + * ``` + */ +export { default as McpServerIdentity } from './McpServerIdentity.svelte'; + +/** + * **McpServerInfo** - Server instructions display + * + * Collapsible panel showing server-provided instructions. + * Displays guidance text from the MCP server for users. + */ +export { default as McpServerInfo } from './McpServerInfo.svelte'; + +/** + * **McpResourcesBrowser** - MCP resources tree browser + * + * Tree view component showing resources grouped by server. + * Supports resource selection and quick attach actions. + * + * **Features:** + * - Collapsible server sections + * - Resource icons based on MIME type + * - Resource selection highlighting + * - Quick attach button per resource + * - Refresh all resources action + * - Loading states per server + */ +export { default as McpResourcesBrowser } from './McpResourcesBrowser/McpResourcesBrowser.svelte'; + +/** + * **McpResourcePreview** - MCP resource content preview + * + * Preview panel showing resource content with metadata. + * Supports text and binary content display. + * + * **Features:** + * - Text content display with monospace formatting + * - Image preview for image MIME types + * - Copy to clipboard action + * - Download content action + * - Resource metadata display (MIME type, priority, server) + * - Loading and error states + */ +export { default as McpResourcePreview } from './McpResourcePreview.svelte'; + +/** + * **McpResourceTemplateForm** - MCP resource template variable form + * + * Form for filling in resource template variables with auto-completion + * via the Completions API. Shows live URI preview as variables are filled. + * + * **Features:** + * - Template variable input fields + * - Completions API integration for variable auto-complete + * - Live URI preview as variables are filled + * - Read resolved resource action + */ +export { default as McpResourceTemplateForm } from './McpResourceTemplateForm.svelte'; diff --git a/examples/server/webui_llamacpp/src/lib/components/app/misc/ActionButton.svelte b/examples/server/webui_llamacpp/src/lib/components/app/misc/ActionButton.svelte deleted file mode 100644 index 11c4679a..00000000 --- a/examples/server/webui_llamacpp/src/lib/components/app/misc/ActionButton.svelte +++ /dev/null @@ -1,48 +0,0 @@ - - - - - - - - -

        {tooltip}

        -
        -
        diff --git a/examples/server/webui_llamacpp/src/lib/components/app/misc/CodeBlockActions.svelte b/examples/server/webui_llamacpp/src/lib/components/app/misc/CodeBlockActions.svelte new file mode 100644 index 00000000..fa12d1c6 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/misc/CodeBlockActions.svelte @@ -0,0 +1,33 @@ + + +
        + + + {#if showPreview} + onPreview!(code, language)} + /> + {/if} +
        diff --git a/examples/server/webui_llamacpp/src/lib/components/app/misc/ConversationSelection.svelte b/examples/server/webui_llamacpp/src/lib/components/app/misc/ConversationSelection.svelte index e2095e08..db14fd63 100644 --- a/examples/server/webui_llamacpp/src/lib/components/app/misc/ConversationSelection.svelte +++ b/examples/server/webui_llamacpp/src/lib/components/app/misc/ConversationSelection.svelte @@ -1,8 +1,7 @@
        -
        - - - - - {#if searchQuery} - - {/if} -
        +
        @@ -165,15 +154,15 @@ {#each filteredConversations as conv (conv.id)} toggleConversation(conv.id, e.shiftKey)} + onclick={(event) => toggleConversation(conv.id, event.shiftKey)} > { - e.preventDefault(); - e.stopPropagation(); - toggleConversation(conv.id, e.shiftKey); + onclick={(event) => { + event.preventDefault(); + event.stopPropagation(); + toggleConversation(conv.id, event.shiftKey); }} /> diff --git a/examples/server/webui_llamacpp/src/lib/components/app/misc/HorizontalScrollCarousel.svelte b/examples/server/webui_llamacpp/src/lib/components/app/misc/HorizontalScrollCarousel.svelte new file mode 100644 index 00000000..06d0e3a0 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/misc/HorizontalScrollCarousel.svelte @@ -0,0 +1,94 @@ + + +
        + + +
        + {@render children?.()} +
        + + +
        diff --git a/examples/server/webui_llamacpp/src/lib/components/app/misc/KeyboardShortcutInfo.svelte b/examples/server/webui_llamacpp/src/lib/components/app/misc/KeyboardShortcutInfo.svelte index 5b7522fe..da55abda 100644 --- a/examples/server/webui_llamacpp/src/lib/components/app/misc/KeyboardShortcutInfo.svelte +++ b/examples/server/webui_llamacpp/src/lib/components/app/misc/KeyboardShortcutInfo.svelte @@ -11,7 +11,9 @@ let baseClasses = 'px-1 pointer-events-none inline-flex select-none items-center gap-0.5 font-sans text-md font-medium opacity-0 transition-opacity -my-1'; - let variantClasses = variant === 'destructive' ? 'text-destructive' : 'text-muted-foreground'; + let variantClasses = $derived( + variant === 'destructive' ? 'text-destructive' : 'text-muted-foreground' + ); diff --git a/examples/server/webui_llamacpp/src/lib/components/app/misc/MarkdownContent.svelte b/examples/server/webui_llamacpp/src/lib/components/app/misc/MarkdownContent.svelte deleted file mode 100644 index 7e83d30f..00000000 --- a/examples/server/webui_llamacpp/src/lib/components/app/misc/MarkdownContent.svelte +++ /dev/null @@ -1,769 +0,0 @@ - - -
        - - {@html processedHtml} -
        - - - - diff --git a/examples/server/webui_llamacpp/src/lib/components/app/misc/RemoveButton.svelte b/examples/server/webui_llamacpp/src/lib/components/app/misc/RemoveButton.svelte deleted file mode 100644 index 17368551..00000000 --- a/examples/server/webui_llamacpp/src/lib/components/app/misc/RemoveButton.svelte +++ /dev/null @@ -1,26 +0,0 @@ - - - diff --git a/examples/server/webui_llamacpp/src/lib/components/app/misc/TruncatedText.svelte b/examples/server/webui_llamacpp/src/lib/components/app/misc/TruncatedText.svelte new file mode 100644 index 00000000..a6b7cb48 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/misc/TruncatedText.svelte @@ -0,0 +1,49 @@ + + +{#if isTruncated && showTooltip} + + + + {text} + + + + +

        {text}

        +
        +
        +{:else} + + {text} + +{/if} diff --git a/examples/server/webui_llamacpp/src/lib/components/app/misc/index.ts b/examples/server/webui_llamacpp/src/lib/components/app/misc/index.ts new file mode 100644 index 00000000..64b76fb7 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/misc/index.ts @@ -0,0 +1,53 @@ +/** + * + * MISC + * + * Miscellaneous utility components. + * + */ + +/** + * **ConversationSelection** - Multi-select conversation picker + * + * List of conversations with checkboxes for multi-selection. + * Used in import/export dialogs for selecting conversations. + * + * **Features:** + * - Search/filter conversations by name + * - Select all / deselect all controls + * - Shift-click for range selection + * - Message count display per conversation + * - Mode-specific UI (export vs import) + */ +export { default as ConversationSelection } from './ConversationSelection.svelte'; + +/** + * Horizontal scrollable carousel with navigation arrows. + * Used for displaying items in a horizontally scrollable container + * with left/right navigation buttons that appear on hover. + */ +export { default as HorizontalScrollCarousel } from './HorizontalScrollCarousel.svelte'; + +/** + * **TruncatedText** - Text with ellipsis and tooltip + * + * Displays text with automatic truncation and full content in tooltip. + * Useful for long names or paths in constrained spaces. + */ +export { default as TruncatedText } from './TruncatedText.svelte'; + +/** + * **KeyboardShortcutInfo** - Keyboard shortcut hint display + * + * Displays keyboard shortcut hints (e.g., "⌘ + Enter"). + * Supports special keys like shift, cmd, and custom text. + */ +export { default as KeyboardShortcutInfo } from './KeyboardShortcutInfo.svelte'; + +/** + * **CodeBlockActions** - Actions bar for code blocks (copy, preview) + * + * Displays copy-to-clipboard and preview buttons for code blocks. + * Preview button is shown only for HTML code blocks. + */ +export { default as CodeBlockActions } from './CodeBlockActions.svelte'; diff --git a/examples/server/webui_llamacpp/src/lib/components/app/models/ModelBadge.svelte b/examples/server/webui_llamacpp/src/lib/components/app/models/ModelBadge.svelte new file mode 100644 index 00000000..cc1d1848 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/models/ModelBadge.svelte @@ -0,0 +1,60 @@ + + +{#snippet badgeContent()} + + {#snippet icon()} + + {/snippet} + + {#if model} + + {/if} + + {#if showCopyIcon} + + {/if} + +{/snippet} + +{#if shouldShow} + {#if showTooltip} + + + {@render badgeContent()} + + + + {onclick ? 'Click for model details' : model} + + + {:else} + {@render badgeContent()} + {/if} +{/if} diff --git a/examples/server/webui_llamacpp/src/lib/components/app/models/ModelId.svelte b/examples/server/webui_llamacpp/src/lib/components/app/models/ModelId.svelte new file mode 100644 index 00000000..f566b55e --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/models/ModelId.svelte @@ -0,0 +1,82 @@ + + +{#if resolvedShowRaw} + +{:else} + + + {#if !hideOrgName && parsed.orgName}{parsed.orgName}/{/if}{displayName} + + + {#if parsed.params} + + {parsed.params}{parsed.activatedParams ? `-${parsed.activatedParams}` : ''} + + {/if} + + {#if parsed.quantization && !resolvedHideQuantization} + + {parsed.quantization} + + {/if} + + {#if primaryAlias} + {#if primaryAlias !== parsed.modelName} + {parsed.modelName ?? modelId} + {/if} + {:else if uniqueAliases.length > 1} + {#each uniqueAliases as alias (alias)} + {alias} + {/each} + {/if} + + {#if uniqueTags.length > 0 && !resolvedHideTags} + {#each uniqueTags as tag (tag)} + {tag} + {/each} + {/if} + +{/if} diff --git a/examples/server/webui_llamacpp/src/lib/components/app/models/ModelsSelectorDropdown.svelte b/examples/server/webui_llamacpp/src/lib/components/app/models/ModelsSelectorDropdown.svelte new file mode 100644 index 00000000..0f1fba88 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/models/ModelsSelectorDropdown.svelte @@ -0,0 +1,290 @@ + + +
        + {#if ms.loading && ms.options.length === 0 && ms.isRouter} +
        + + + Loading models… +
        + {:else if ms.options.length === 0 && ms.isRouter} + {#if currentModel} + + + + {:else} +

        No models available.

        + {/if} + {:else} + {@const selectedOption = ms.getDisplayOption()} + + {#if ms.isRouter} + + + + + {#if selectedOption} + + + + {#snippet child({ props })} + + {/snippet} + + + +

        {selectedOption.model}

        +
        +
        + {:else} + Select model + {/if} + + {#if ms.updating || ms.isLoadingModel} + + {:else} + + {/if} +
        + + + ms.setSearchTerm(v)} + placeholder="Search models..." + onSearchKeyDown={handleSearchKeyDown} + emptyMessage="No models found." + isEmpty={ms.filteredOptions.length === 0 && ms.isCurrentModelInCache} + > +
        + {#if !ms.isCurrentModelInCache && currentModel} + + + {/if} + + {#if ms.filteredOptions.length === 0} +

        No models found.

        + {/if} + + {#snippet modelOption(item: ModelItem, hideOrgName: boolean)} + {@const { option, flatIndex } = item} + {@const isSelected = currentModel === option.model || ms.activeId === option.id} + {@const isHighlighted = flatIndex === highlightedIndex} + {@const isFav = ms.isFavorite(option.model)} + + (highlightedIndex = flatIndex)} + onKeyDown={(event) => { + if (event.key === KeyboardKey.ENTER || event.key === KeyboardKey.SPACE) { + event.preventDefault(); + ms.handleSelect(option.id); + } + }} + /> + {/snippet} + + +
        +
        +
        +
        + {:else} + + {/if} + {/if} +
        + +{#if ms.showModelDialog} + ms.setShowModelDialog(v)} + modelId={ms.infoModelId} + /> +{/if} diff --git a/examples/server/webui_llamacpp/src/lib/components/app/models/ModelsSelectorList.svelte b/examples/server/webui_llamacpp/src/lib/components/app/models/ModelsSelectorList.svelte new file mode 100644 index 00000000..61a4cf0f --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/models/ModelsSelectorList.svelte @@ -0,0 +1,72 @@ + + +{#snippet defaultOption(item: ModelItem, hideOrgName: boolean)} + {@const { option } = item} + {@const isSelected = currentModel === option.model || activeId === option.id} + {@const isFav = modelsStore.favoriteModelIds.has(option.model)} + + {}} + onKeyDown={() => {}} + /> +{/snippet} + +{#if groups.loaded.length > 0} +

        Loaded models

        + {#each groups.loaded as item (`loaded-${item.option.id}`)} + {@render render(item, false)} + {/each} +{/if} + +{#if groups.favorites.length > 0} +

        Favorite models

        + {#each groups.favorites as item (`fav-${item.option.id}`)} + {@render render(item, true)} + {/each} +{/if} + +{#if groups.available.length > 0} +

        Available models

        + {#each groups.available as group (group.orgName)} + {#if group.orgName} +

        {group.orgName}

        + {/if} + {#each group.items as item (item.option.id)} + {@render render(item, true)} + {/each} + {/each} +{/if} diff --git a/examples/server/webui_llamacpp/src/lib/components/app/models/ModelsSelectorOption.svelte b/examples/server/webui_llamacpp/src/lib/components/app/models/ModelsSelectorOption.svelte new file mode 100644 index 00000000..d103d4b6 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/models/ModelsSelectorOption.svelte @@ -0,0 +1,181 @@ + + +
        onSelect(option.id)} + onmouseenter={onMouseEnter} + onkeydown={onKeyDown} +> + + +
        + + +
        e.stopPropagation()} + > + {#if isFav} + modelsStore.toggleFavorite(option.model)} + /> + {:else} + modelsStore.toggleFavorite(option.model)} + /> + {/if} + + + {#if isLoaded && onInfoClick} + onInfoClick(option.model)} + /> + {/if} +
        + + {#if isLoading} + + {:else if isFailed} +
        + + + +
        + {:else if isSleeping} +
        + + + +
        + {:else if isLoaded} +
        + + + +
        + {:else} +
        + + + +
        + {/if} +
        +
        diff --git a/examples/server/webui_llamacpp/src/lib/components/app/models/ModelsSelectorSheet.svelte b/examples/server/webui_llamacpp/src/lib/components/app/models/ModelsSelectorSheet.svelte new file mode 100644 index 00000000..2ddbf240 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/models/ModelsSelectorSheet.svelte @@ -0,0 +1,189 @@ + + +
        + {#if ms.loading && ms.options.length === 0 && ms.isRouter} +
        + + Loading models… +
        + {:else if ms.options.length === 0 && ms.isRouter} +

        No models available.

        + {:else} + {@const selectedOption = ms.getDisplayOption()} + + {#if ms.isRouter} + + + + + + Select Model + + + Choose a model to use for the conversation + + + +
        +
        + ms.setSearchTerm(v)} + /> +
        + +
        + {#if !ms.isCurrentModelInCache && currentModel} + +
        + {/if} + + {#if ms.filteredOptions.length === 0} +

        No models found.

        + {/if} + + +
        +
        +
        +
        + {:else} + + {/if} + {/if} +
        + +{#if ms.showModelDialog} + ms.setShowModelDialog(v)} + modelId={ms.infoModelId} + /> +{/if} diff --git a/examples/server/webui_llamacpp/src/lib/components/app/models/index.ts b/examples/server/webui_llamacpp/src/lib/components/app/models/index.ts new file mode 100644 index 00000000..3ac6ecb6 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/models/index.ts @@ -0,0 +1,111 @@ +/** + * + * MODELS + * + * Components for model selection and display. Supports two server modes: + * - **Single model mode**: Server runs with one model, selector shows model info + * - **Router mode**: Server runs with multiple models, selector enables switching + * + * Integrates with modelsStore for model data and serverStore for mode detection. + * + */ + +/** + * **ModelsSelectorDropdown** - Model selection dropdown (desktop) + * + * Dropdown for selecting AI models with status indicators, + * search, and model information display. Adapts UI based on server mode. + * + * **Architecture:** + * - Uses DropdownMenuSearchable for model list + * - Integrates with modelsStore for model options and selection + * - Detects router vs single mode from serverStore + * - Opens DialogModelInformation for model details + * + * **Features:** + * - Searchable model list with keyboard navigation + * - Model status indicators (loading/ready/error/updating) + * - Model capabilities badges (vision, tools, etc.) + * - Current/active model highlighting + * - Model information dialog on info button click + * - Router mode: shows all available models with status + * - Single mode: shows current model name only + * - Loading/updating skeleton states + * - Global selection support for form integration + * + * @example + * ```svelte + * updateModel(id)} + * useGlobalSelection + * /> + * ``` + */ +export { default as ModelsSelectorDropdown } from './ModelsSelectorDropdown.svelte'; + +/** + * **ModelsSelectorList** - Grouped model options list + * + * Renders grouped model options (loaded, favorites, available) with section + * headers and org subgroups. Shared between ModelsSelectorDropdown and ModelsSelectorSheet + * to avoid template duplication. + * + * Accepts an optional `renderOption` snippet to customize how each option is + * rendered (e.g., to add keyboard navigation or highlighting). + */ +export { default as ModelsSelectorList } from './ModelsSelectorList.svelte'; + +/** + * **ModelsSelectorOption** - Single model option row + * + * Renders a single model option with selection state, favorite toggle, + * load/unload actions, status indicators, and an info button. + * Used inside ModelsSelectorList or directly in custom render snippets. + */ +export { default as ModelsSelectorOption } from './ModelsSelectorOption.svelte'; + +/** + * **ModelsSelectorSheet** - Mobile model selection sheet + * + * Bottom sheet variant of ModelsSelectorDropdown optimized for touch interaction + * on mobile devices. Same functionality as ModelsSelectorDropdown but uses Sheet UI + * instead of DropdownMenu. + */ +export { default as ModelsSelectorSheet } from './ModelsSelectorSheet.svelte'; + +/** * **ModelBadge** - Model name display badge + * + * Compact badge showing current model name with package icon. + * Only visible in single model mode. Supports tooltip and copy functionality. + * + * **Architecture:** + * - Reads model name from modelsStore or prop + * - Checks server mode from serverStore + * - Uses BadgeInfo for consistent styling + * + * **Features:** + * - Optional copy to clipboard button + * - Optional tooltip with model details + * - Click handler for model info dialog + * - Only renders in model mode (not router) + * + * @example + * ```svelte + * showModelInfo = true} + * showTooltip + * showCopyIcon + * /> + * ``` + */ +export { default as ModelBadge } from './ModelBadge.svelte'; + +/** + * **ModelId** - Parsed model identifier display + * + * Displays a model ID with optional org name, parameter badges, quantization, + * aliases, and tags. Supports raw mode to show the unprocessed model name. + * Respects the user's `showRawModelNames` setting. + */ +export { default as ModelId } from './ModelId.svelte'; diff --git a/examples/server/webui_llamacpp/src/lib/components/app/models/utils.ts b/examples/server/webui_llamacpp/src/lib/components/app/models/utils.ts new file mode 100644 index 00000000..ae1f511e --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/models/utils.ts @@ -0,0 +1,75 @@ +import { SvelteMap } from 'svelte/reactivity'; +import type { ModelOption } from '$lib/types/models'; + +export interface ModelItem { + option: ModelOption; + flatIndex: number; +} + +export interface OrgGroup { + orgName: string | null; + items: ModelItem[]; +} + +export interface GroupedModelOptions { + loaded: ModelItem[]; + favorites: ModelItem[]; + available: OrgGroup[]; +} + +export function filterModelOptions(options: ModelOption[], searchTerm: string): ModelOption[] { + const term = searchTerm.trim().toLowerCase(); + if (!term) return options; + + return options.filter( + (option) => + option.model.toLowerCase().includes(term) || + option.name?.toLowerCase().includes(term) || + option.aliases?.some((alias: string) => alias.toLowerCase().includes(term)) || + option.tags?.some((tag: string) => tag.toLowerCase().includes(term)) + ); +} + +export function groupModelOptions( + filteredOptions: ModelOption[], + favoriteIds: Set, + isModelLoaded: (model: string) => boolean +): GroupedModelOptions { + // Loaded models + const loaded: ModelItem[] = []; + for (let i = 0; i < filteredOptions.length; i++) { + if (isModelLoaded(filteredOptions[i].model)) { + loaded.push({ option: filteredOptions[i], flatIndex: i }); + } + } + + // Favorites (excluding loaded) + const loadedModelIds = new Set(loaded.map((item) => item.option.model)); + const favorites: ModelItem[] = []; + for (let i = 0; i < filteredOptions.length; i++) { + if ( + favoriteIds.has(filteredOptions[i].model) && + !loadedModelIds.has(filteredOptions[i].model) + ) { + favorites.push({ option: filteredOptions[i], flatIndex: i }); + } + } + + // Available models grouped by org (excluding loaded and favorites) + const available: OrgGroup[] = []; + const orgGroups = new SvelteMap(); + for (let i = 0; i < filteredOptions.length; i++) { + const option = filteredOptions[i]; + if (loadedModelIds.has(option.model) || favoriteIds.has(option.model)) continue; + + const key = option.parsedId?.orgName ?? ''; + if (!orgGroups.has(key)) orgGroups.set(key, []); + orgGroups.get(key)!.push({ option, flatIndex: i }); + } + + for (const [orgName, items] of orgGroups) { + available.push({ orgName: orgName || null, items }); + } + + return { loaded, favorites, available }; +} diff --git a/examples/server/webui_llamacpp/src/lib/components/app/navigation/DesktopIconStrip.svelte b/examples/server/webui_llamacpp/src/lib/components/app/navigation/DesktopIconStrip.svelte new file mode 100644 index 00000000..e92b9528 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/navigation/DesktopIconStrip.svelte @@ -0,0 +1,84 @@ + + + + + + diff --git a/examples/server/webui_llamacpp/src/lib/components/app/misc/ActionDropdown.svelte b/examples/server/webui_llamacpp/src/lib/components/app/navigation/DropdownMenuActions.svelte similarity index 95% rename from examples/server/webui_llamacpp/src/lib/components/app/misc/ActionDropdown.svelte rename to examples/server/webui_llamacpp/src/lib/components/app/navigation/DropdownMenuActions.svelte index da29e258..83d856d1 100644 --- a/examples/server/webui_llamacpp/src/lib/components/app/misc/ActionDropdown.svelte +++ b/examples/server/webui_llamacpp/src/lib/components/app/navigation/DropdownMenuActions.svelte @@ -2,7 +2,6 @@ import * as DropdownMenu from '$lib/components/ui/dropdown-menu'; import * as Tooltip from '$lib/components/ui/tooltip'; import { KeyboardShortcutInfo } from '$lib/components/app'; - import { TOOLTIP_DELAY_DURATION } from '$lib/constants/tooltip-config'; import type { Component } from 'svelte'; interface ActionItem { @@ -40,7 +39,7 @@ onclick={(e) => e.stopPropagation()} > {#if triggerTooltip} - + {@render iconComponent(triggerIcon, 'h-3 w-3')} {triggerTooltip} diff --git a/examples/server/webui_llamacpp/src/lib/components/app/navigation/DropdownMenuSearchable.svelte b/examples/server/webui_llamacpp/src/lib/components/app/navigation/DropdownMenuSearchable.svelte new file mode 100644 index 00000000..3bd68d3b --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/navigation/DropdownMenuSearchable.svelte @@ -0,0 +1,50 @@ + + +
        + +
        + +
        + {@render children()} + + {#if isEmpty} +
        {emptyMessage}
        + {/if} +
        + +{#if footer} + + + {@render footer()} +{/if} diff --git a/examples/server/webui_llamacpp/src/lib/components/app/navigation/SidebarNavigation/SidebarNavigation.svelte b/examples/server/webui_llamacpp/src/lib/components/app/navigation/SidebarNavigation/SidebarNavigation.svelte new file mode 100644 index 00000000..ddaf4d5b --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/navigation/SidebarNavigation/SidebarNavigation.svelte @@ -0,0 +1,299 @@ + + +
        + + +
        + +

        + {APP_NAME} +

        +
        + + +
        + + +
        + + + {#if (filteredConversations.length > 0 && isSearchModeActive) || !isSearchModeActive} + + {isSearchModeActive ? 'Search results' : 'Recent conversations'} + + {/if} + + + + {#each conversationTree as { conversation, depth } (conversation.id)} + + + + {/each} + + {#if conversationTree.length === 0} +
        +

        + {searchQuery.length > 0 + ? 'No results found' + : isSearchModeActive + ? 'Start typing to see results' + : 'No conversations yet'} +

        +
        + {/if} +
        +
        +
        +
        +
        + + { + showDeleteDialog = false; + selectedConversation = null; + }} +> + {#if selectedConversationHasDescendants} +
        + + + +
        + {/if} +
        + + { + showEditDialog = false; + selectedConversation = null; + }} + onKeydown={(event) => { + if (event.key === 'Enter') { + event.preventDefault(); + event.stopImmediatePropagation(); + handleConfirmEdit(); + } + }} +> + + diff --git a/examples/server/webui_llamacpp/src/lib/components/app/navigation/SidebarNavigation/SidebarNavigationActions.svelte b/examples/server/webui_llamacpp/src/lib/components/app/navigation/SidebarNavigation/SidebarNavigationActions.svelte new file mode 100644 index 00000000..f0d63970 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/navigation/SidebarNavigation/SidebarNavigationActions.svelte @@ -0,0 +1,96 @@ + + +{#snippet itemIcon(IconComponent: Component)} + +{/snippet} + +
        + {#if isSearchModeActive} + e.key === 'Escape' && handleSearchModeDeactivate()} + placeholder="Search conversations..." + {isCancelAlwaysVisible} + /> + {:else} + {#each SIDEBAR_ACTIONS_ITEMS as item (item.route)} + {#if !item.route} + + {:else} + + {/if} + {/each} + {/if} +
        diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatSidebar/ChatSidebarConversationItem.svelte b/examples/server/webui_llamacpp/src/lib/components/app/navigation/SidebarNavigation/SidebarNavigationConversationItem.svelte similarity index 50% rename from examples/server/webui_llamacpp/src/lib/components/app/chat/ChatSidebar/ChatSidebarConversationItem.svelte rename to examples/server/webui_llamacpp/src/lib/components/app/navigation/SidebarNavigation/SidebarNavigationConversationItem.svelte index 51692c8c..dad8d954 100644 --- a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatSidebar/ChatSidebarConversationItem.svelte +++ b/examples/server/webui_llamacpp/src/lib/components/app/navigation/SidebarNavigation/SidebarNavigationConversationItem.svelte @@ -1,31 +1,46 @@ + + diff --git a/examples/server/webui_llamacpp/src/lib/components/app/navigation/index.ts b/examples/server/webui_llamacpp/src/lib/components/app/navigation/index.ts new file mode 100644 index 00000000..d4ca9145 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/navigation/index.ts @@ -0,0 +1,138 @@ +/** + * + * NAVIGATION & MENUS + * + * Components for dropdown menus and action selection. + * + */ + +/** + * **DropdownMenuSearchable** - Searchable content for dropdown menus + * + * Renders a search input with filtered content area, empty state, and optional footer. + * Designed to be injected into any dropdown container (DropdownMenu.Content, + * DropdownMenu.SubContent, etc.) without providing its own Root. + * + * **Features:** + * - Search/filter input + * - Keyboard navigation support + * - Custom content and footer via snippets + * - Empty state message + * + * @example + * ```svelte + * + * ... + * + * + * {#each items as item}{/each} + * + * + * + * ``` + */ +export { default as DropdownMenuSearchable } from './DropdownMenuSearchable.svelte'; + +/** + * **DropdownMenuActions** - Multi-action dropdown menu + * + * Dropdown menu for multiple action options with icons and shortcuts. + * Supports destructive variants and keyboard shortcut hints. + * + * **Features:** + * - Configurable trigger icon with tooltip + * - Action items with icons and labels + * - Destructive variant styling + * - Keyboard shortcut display + * - Separator support between groups + * + * @example + * ```svelte + * + * ``` + */ +export { default as DropdownMenuActions } from './DropdownMenuActions.svelte'; + +/** + * **DesktopIconStrip** - Fixed icon strip for desktop sidebar + * + * Vertical icon strip shown on desktop when the sidebar is collapsed. + * Contains navigation shortcuts for new chat, search, MCP, import/export, and settings. + */ +export { default as DesktopIconStrip } from './DesktopIconStrip.svelte'; + +/** + * **SidebarNavigation** - Sidebar with actions menu and conversation list + * + * Collapsible sidebar displaying conversation history with search and + * management actions. Integrates with ShadCN sidebar component for + * consistent styling and mobile responsiveness. + * + * **Architecture:** + * - Uses ShadCN Sidebar.* components for structure + * - Fetches conversations from conversationsStore + * - Manages search state and filtered results locally + * - Handles conversation CRUD operations via conversationsStore + * + * **Navigation:** + * - Click conversation to navigate to `/chat/[id]` + * - New chat button navigates to `/` (root) + * - Active conversation highlighted based on route params + * + * **Conversation Management:** + * - Right-click or menu button for context menu + * - Rename: Opens inline edit dialog + * - Delete: Shows confirmation with conversation preview + * - Delete All: Removes all conversations with confirmation + * + * **Features:** + * - Search/filter conversations by title + * - Conversation list with message previews (first message truncated) + * - Active conversation highlighting + * - Mobile-responsive collapse/expand via ShadCN sidebar + * - New chat button in header + * - Settings button opens DialogChatSettings + * + * **Exported API:** + * - `activateSearchMode()` - Focus search input programmatically + * - `editActiveConversation()` - Open rename dialog for current conversation + * + * @example + * ```svelte + * + * ``` + */ +export { default as SidebarNavigation } from './SidebarNavigation/SidebarNavigation.svelte'; + +/** + * Action buttons for sidebar header. Contains new chat button, settings button, + * and delete all conversations button. Manages dialog states for settings and + * delete confirmation. + */ +export { default as SidebarNavigationActions } from './SidebarNavigation/SidebarNavigationActions.svelte'; + +/** + * Single conversation item in sidebar. Displays conversation title (truncated), + * last message preview, and timestamp. Shows context menu on right-click with + * rename and delete options. Highlights when active (matches current route). + * Handles click to navigate and keyboard accessibility. + */ +export { default as SidebarNavigationConversationItem } from './SidebarNavigation/SidebarNavigationConversationItem.svelte'; + +/** + * Search input for filtering conversations in sidebar. Filters conversation + * list by title as user types. Shows clear button when query is not empty. + * Integrated into sidebar header with proper styling. + */ +export { default as SidebarNavigationSearch } from './SidebarNavigation/SidebarNavigationSearch.svelte'; diff --git a/examples/server/webui_llamacpp/src/lib/components/app/server/ServerErrorSplash.svelte b/examples/server/webui_llamacpp/src/lib/components/app/server/ServerErrorSplash.svelte index af142e32..4da0d1dd 100644 --- a/examples/server/webui_llamacpp/src/lib/components/app/server/ServerErrorSplash.svelte +++ b/examples/server/webui_llamacpp/src/lib/components/app/server/ServerErrorSplash.svelte @@ -1,12 +1,16 @@ - -{#if props} -
        - {#if model} - - - - {model} - - {/if} - -
        - {#if props.default_generation_settings.n_ctx} - - ctx: {props.default_generation_settings.n_ctx.toLocaleString()} - - {/if} - - {#if modalities.length > 0} - {#each modalities as modality (modality)} - - {#if modality === 'vision'} - - {:else if modality === 'audio'} - - {/if} - - {modality} - - {/each} - {/if} -
        -
        -{/if} diff --git a/examples/server/webui_llamacpp/src/lib/components/app/server/ServerLoadingSplash.svelte b/examples/server/webui_llamacpp/src/lib/components/app/server/ServerLoadingSplash.svelte index 505325d6..95fa61e9 100644 --- a/examples/server/webui_llamacpp/src/lib/components/app/server/ServerLoadingSplash.svelte +++ b/examples/server/webui_llamacpp/src/lib/components/app/server/ServerLoadingSplash.svelte @@ -8,8 +8,7 @@ message?: string; } - let { class: className = '', message = 'Initializing connection to llama.cpp server...' }: Props = - $props(); + let { class: className = '', message = 'Initializing connection to server...' }: Props = $props();
        diff --git a/examples/server/webui_llamacpp/src/lib/components/app/server/ServerStatus.svelte b/examples/server/webui_llamacpp/src/lib/components/app/server/ServerStatus.svelte index f04c954d..86a962de 100644 --- a/examples/server/webui_llamacpp/src/lib/components/app/server/ServerStatus.svelte +++ b/examples/server/webui_llamacpp/src/lib/components/app/server/ServerStatus.svelte @@ -2,7 +2,8 @@ import { AlertTriangle, Server } from '@lucide/svelte'; import { Badge } from '$lib/components/ui/badge'; import { Button } from '$lib/components/ui/button'; - import { serverProps, serverLoading, serverError, modelName } from '$lib/stores/server.svelte'; + import { serverProps, serverLoading, serverError } from '$lib/stores/server.svelte'; + import { singleModelName } from '$lib/stores/models.svelte'; interface Props { class?: string; @@ -13,7 +14,7 @@ let error = $derived(serverError()); let loading = $derived(serverLoading()); - let model = $derived(modelName()); + let model = $derived(singleModelName()); let serverData = $derived(serverProps()); function getStatusColor() { @@ -47,7 +48,7 @@ {model || 'Unknown Model'} - {#if serverData.default_generation_settings.n_ctx} + {#if serverData?.default_generation_settings?.n_ctx} ctx: {serverData.default_generation_settings.n_ctx.toLocaleString()} diff --git a/examples/server/webui_llamacpp/src/lib/components/app/server/index.ts b/examples/server/webui_llamacpp/src/lib/components/app/server/index.ts new file mode 100644 index 00000000..39ac5b48 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/server/index.ts @@ -0,0 +1,80 @@ +/** + * + * SERVER + * + * Components for displaying server connection state and handling + * connection errors. Integrates with serverStore for state management. + * + */ + +/** + * **ServerStatus** - Server connection status indicator + * + * Compact status display showing connection state, model name, + * and context size. Used in headers and loading screens. + * + * **Architecture:** + * - Reads state from serverStore (props, loading, error) + * - Displays model name from modelsStore + * + * **Features:** + * - Status dot: green (connected), yellow (connecting), red (error), gray (unknown) + * - Status text label + * - Model name badge with icon + * - Context size badge + * - Optional error action button + * + * @example + * ```svelte + * + * ``` + */ +export { default as ServerStatus } from './ServerStatus.svelte'; + +/** + * **ServerErrorSplash** - Full-screen connection error display + * + * Blocking error screen shown when server connection fails. + * Provides retry options and API key input for authentication errors. + * + * **Architecture:** + * - Detects access denied errors for API key flow + * - Validates API key against server before saving + * - Integrates with settingsStore for API key persistence + * + * **Features:** + * - Error message display with icon + * - Retry connection button with loading state + * - API key input for authentication errors + * - API key validation with success/error feedback + * - Troubleshooting section with server start commands + * - Animated transitions for UI elements + * + * @example + * ```svelte + * + * ``` + */ +export { default as ServerErrorSplash } from './ServerErrorSplash.svelte'; + +/** + * **ServerLoadingSplash** - Full-screen loading display + * + * Shown during initial server connection. Displays loading animation + * with ServerStatus component for real-time connection state. + * + * **Features:** + * - Animated server icon + * - Customizable loading message + * - Embedded ServerStatus for live updates + * + * @example + * ```svelte + * + * ``` + */ +export { default as ServerLoadingSplash } from './ServerLoadingSplash.svelte'; diff --git a/examples/server/webui_llamacpp/src/lib/components/app/settings/SettingsChat/SettingsChat.svelte b/examples/server/webui_llamacpp/src/lib/components/app/settings/SettingsChat/SettingsChat.svelte new file mode 100644 index 00000000..69a120b7 --- /dev/null +++ b/examples/server/webui_llamacpp/src/lib/components/app/settings/SettingsChat/SettingsChat.svelte @@ -0,0 +1,179 @@ + + +
        +
        + section.slug === activeSlug} + getHref={getSectionHref ?? + ((section: SettingsSection) => RouterService.settings(section.slug))} + /> + + section.slug === activeSlug} + getHref={getSectionHref ?? + ((section: SettingsSection) => RouterService.settings(section.slug))} + bind:this={mobileHeader} + /> + +
        +
        +
        +
        + +

        {currentSection.title}

        +
        + + {#if currentSection.title === SETTINGS_SECTION_TITLES.TOOLS} + + {:else if currentSection.title === SETTINGS_SECTION_TITLES.IMPORT_EXPORT} + + {:else if currentSection.fields} +
        + +
        + {/if} +
        + +
        +

        Settings are saved in browser's localStorage

        +
        +
        + + +
        +
        +
        diff --git a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatSettings/ChatSettingsFields.svelte b/examples/server/webui_llamacpp/src/lib/components/app/settings/SettingsChat/SettingsChatFields.svelte similarity index 60% rename from examples/server/webui_llamacpp/src/lib/components/app/chat/ChatSettings/ChatSettingsFields.svelte rename to examples/server/webui_llamacpp/src/lib/components/app/settings/SettingsChat/SettingsChatFields.svelte index 8834e3e3..7c1c5c89 100644 --- a/examples/server/webui_llamacpp/src/lib/components/app/chat/ChatSettings/ChatSettingsFields.svelte +++ b/examples/server/webui_llamacpp/src/lib/components/app/settings/SettingsChat/SettingsChatFields.svelte @@ -5,11 +5,13 @@ import Label from '$lib/components/ui/label/label.svelte'; import * as Select from '$lib/components/ui/select'; import { Textarea } from '$lib/components/ui/textarea'; - import { SETTING_CONFIG_DEFAULT, SETTING_CONFIG_INFO } from '$lib/constants/settings-config'; - import { supportsVision } from '$lib/stores/server.svelte'; - import { getParameterInfo, resetParameterToServerDefault } from '$lib/stores/settings.svelte'; - import { ParameterSyncService } from '$lib/services/parameter-sync'; - import { ChatSettingsParameterSourceIndicator } from '$lib/components/app'; + import { SETTING_CONFIG_INFO, SETTINGS_KEYS } from '$lib/constants'; + import { SettingsFieldType } from '$lib/enums/settings.enums'; + import { settingsStore } from '$lib/stores/settings.svelte'; + import { serverStore } from '$lib/stores/server.svelte'; + import { modelsStore, selectedModelName, propsCacheVersion } from '$lib/stores/models.svelte'; + import { normalizeFloatingPoint } from '$lib/utils/precision'; + import { SettingsChatParameterSourceIndicator } from '$lib/components/app/settings'; import type { Component } from 'svelte'; interface Props { @@ -21,35 +23,42 @@ let { fields, localConfig, onConfigChange, onThemeChange }: Props = $props(); - // Helper function to get parameter source info for syncable parameters - function getParameterSourceInfo(key: string) { - if (!ParameterSyncService.canSyncParameter(key)) { - return null; - } + let currentModelParams = $derived.by(() => { + propsCacheVersion(); - return getParameterInfo(key); - } + if (serverStore.isRouterMode) { + const currentModelName = selectedModelName(); + + if (currentModelName) { + const currentModelProps = modelsStore.getModelProps(currentModelName); + + return (currentModelProps?.default_generation_settings?.params ?? {}) as Record< + string, + unknown + >; + } + } + return (serverStore.defaultParams ?? {}) as Record; + }); {#each fields as field (field.key)}
        - {#if field.type === 'input'} - {@const paramInfo = getParameterSourceInfo(field.key)} + {#if field.type === SettingsFieldType.INPUT} {@const currentValue = String(localConfig[field.key] ?? '')} - {@const propsDefault = paramInfo?.serverDefault} + {@const serverDefault = currentModelParams[field.key]} {@const isCustomRealTime = (() => { - if (!paramInfo || propsDefault === undefined) return false; + if (serverDefault == null) return false; + if (currentValue === '') return false; - // Apply same rounding logic for real-time comparison - const inputValue = currentValue; - const numericInput = parseFloat(inputValue); + const numericInput = parseFloat(currentValue); const normalizedInput = !isNaN(numericInput) ? Math.round(numericInput * 1000000) / 1000000 - : inputValue; + : currentValue; const normalizedDefault = - typeof propsDefault === 'number' - ? Math.round(propsDefault * 1000000) / 1000000 - : propsDefault; + typeof serverDefault === 'number' + ? Math.round(serverDefault * 1000000) / 1000000 + : serverDefault; return normalizedInput !== normalizedDefault; })()} @@ -63,29 +72,31 @@ {/if} {#if isCustomRealTime} - + {/if}
        -
        +
        { // Update local config immediately for real-time badge feedback onConfigChange(field.key, e.currentTarget.value); }} - placeholder={`Default: ${SETTING_CONFIG_DEFAULT[field.key] ?? 'none'}`} + placeholder={currentModelParams[field.key] != null + ? `Default: ${normalizeFloatingPoint(currentModelParams[field.key])}` + : ''} class="w-full {isCustomRealTime ? 'pr-8' : ''}" /> {#if isCustomRealTime}
        {#if field.help || SETTING_CONFIG_INFO[field.key]}

        - {field.help || SETTING_CONFIG_INFO[field.key]} + {@html field.help || SETTING_CONFIG_INFO[field.key]}

        {/if} - {:else if field.type === 'textarea'} - + {/if}