diff --git a/tools/ui/src/app.d.ts b/tools/ui/src/app.d.ts
index a7583eec59..5264e5cc4d 100644
--- a/tools/ui/src/app.d.ts
+++ b/tools/ui/src/app.d.ts
@@ -19,6 +19,10 @@ import type {
ApiErrorResponse,
ApiLlamaCppServerProps,
ApiModelDataEntry,
+ ApiModelLoadStage,
+ ApiModelsSseProgress,
+ ApiModelsSseData,
+ ApiModelsSseEvent,
ApiModelListResponse,
ApiProcessingState,
ApiRouterModelMeta,
@@ -52,6 +56,7 @@ import type {
// Model types
ModelModalities,
ModelOption,
+ ModelLoadProgress,
// Settings types
SettingsChatServiceOptions,
SettingsConfigValue,
@@ -83,6 +88,10 @@ declare global {
ApiErrorResponse,
ApiLlamaCppServerProps,
ApiModelDataEntry,
+ ApiModelLoadStage,
+ ApiModelsSseProgress,
+ ApiModelsSseData,
+ ApiModelsSseEvent,
ApiModelListResponse,
ApiProcessingState,
ApiRouterModelMeta,
@@ -120,6 +129,7 @@ declare global {
// Model types
ModelModalities,
ModelOption,
+ ModelLoadProgress,
// Settings types
SettingsChatServiceOptions,
SettingsConfigValue,
diff --git a/tools/ui/src/lib/components/app/chat/ChatMessages/ChatMessage/ChatMessageAssistant/ChatMessageAssistant.svelte b/tools/ui/src/lib/components/app/chat/ChatMessages/ChatMessage/ChatMessageAssistant/ChatMessageAssistant.svelte
index 4c74206f1b..2272eaedb3 100644
--- a/tools/ui/src/lib/components/app/chat/ChatMessages/ChatMessage/ChatMessageAssistant/ChatMessageAssistant.svelte
+++ b/tools/ui/src/lib/components/app/chat/ChatMessages/ChatMessage/ChatMessageAssistant/ChatMessageAssistant.svelte
@@ -10,7 +10,7 @@
import { getMessageEditContext } from '$lib/contexts';
import { useProcessingState } from '$lib/hooks/use-processing-state.svelte';
import { isLoading, isChatStreaming } from '$lib/stores/chat.svelte';
- import { copyToClipboard, deriveAgenticSections } from '$lib/utils';
+ import { copyToClipboard, deriveAgenticSections, modelLoadProgressText } from '$lib/utils';
import { AgenticSectionType } from '$lib/enums';
import { REASONING_TAGS } from '$lib/constants/agentic';
import { tick } from 'svelte';
@@ -185,6 +185,13 @@
let hasNoContent = $derived(!message?.content?.trim());
let isActivelyProcessing = $derived(isCurrentlyLoading || isStreaming);
+ // during a router auto-load the message has no model yet, so target the selected one
+ let loadTargetModel = $derived(message.model ?? modelsStore.selectedModelName);
+ let modelLoadProgress = $derived(
+ isRouter && loadTargetModel ? modelsStore.getLoadProgress(loadTargetModel) : null
+ );
+ let modelLoadingText = $derived(modelLoadProgressText(modelLoadProgress));
+
let showProcessingInfoTop = $derived(
message?.role === MessageRole.ASSISTANT &&
isActivelyProcessing &&
@@ -220,7 +227,8 @@
- {processingState.getPromptProgressText() ??
+ {modelLoadingText ??
+ processingState.getPromptProgressText() ??
processingState.getProcessingMessage() ??
'Processing...'}
@@ -252,7 +260,8 @@
- {processingState.getPromptProgressText() ??
+ {modelLoadingText ??
+ processingState.getPromptProgressText() ??
processingState.getProcessingMessage() ??
'Processing...'}
diff --git a/tools/ui/src/lib/components/app/models/ModelsSelectorOption.svelte b/tools/ui/src/lib/components/app/models/ModelsSelectorOption.svelte
index fef1490f37..f2a024d31d 100644
--- a/tools/ui/src/lib/components/app/models/ModelsSelectorOption.svelte
+++ b/tools/ui/src/lib/components/app/models/ModelsSelectorOption.svelte
@@ -13,6 +13,7 @@
import type { ModelOption } from '$lib/types/models';
import { ServerModelStatus } from '$lib/enums';
import { modelsStore, routerModels } from '$lib/stores/models.svelte';
+ import { modelLoadFraction, modelLoadProgressText } from '$lib/utils';
interface Props {
option: ModelOption;
@@ -50,11 +51,15 @@
(serverStatus === ServerModelStatus.LOADED || isSleeping) && !isOperationInProgress
);
let isLoading = $derived(serverStatus === ServerModelStatus.LOADING || isOperationInProgress);
+
+ let loadProgress = $derived(isLoading ? modelsStore.getLoadProgress(option.model) : null);
+ let loadPercent = $derived(Math.round(modelLoadFraction(loadProgress) * 100));
+ let loadTitle = $derived(modelLoadProgressText(loadProgress));
onSelect(option.id)}
onmouseenter={onMouseEnter}
@@ -188,4 +194,15 @@
{/if}
+
+ {#if isLoading}
+
+ {/if}
diff --git a/tools/ui/src/lib/constants/api-endpoints.ts b/tools/ui/src/lib/constants/api-endpoints.ts
index 9eb6c74e75..a410905057 100644
--- a/tools/ui/src/lib/constants/api-endpoints.ts
+++ b/tools/ui/src/lib/constants/api-endpoints.ts
@@ -1,7 +1,8 @@
export const API_MODELS = {
LIST: '/v1/models',
LOAD: '/models/load',
- UNLOAD: '/models/unload'
+ UNLOAD: '/models/unload',
+ SSE: '/models/sse'
};
// chat completion routes, the control route drives realtime inference (e.g. end reasoning)
diff --git a/tools/ui/src/lib/constants/index.ts b/tools/ui/src/lib/constants/index.ts
index c51d84cdc2..4993ab647a 100644
--- a/tools/ui/src/lib/constants/index.ts
+++ b/tools/ui/src/lib/constants/index.ts
@@ -37,6 +37,8 @@ export * from './mcp-form';
export * from './mcp-resource';
export * from './message-export';
export * from './model-id';
+export * from './model-loading';
+export * from './sse';
export * from './precision';
export * from './processing-info';
export * from './pwa';
diff --git a/tools/ui/src/lib/constants/model-loading.ts b/tools/ui/src/lib/constants/model-loading.ts
new file mode 100644
index 0000000000..a55ba708b1
--- /dev/null
+++ b/tools/ui/src/lib/constants/model-loading.ts
@@ -0,0 +1,14 @@
+/**
+ * Labels shown while a model loads, keyed by the stage reported on /models/sse.
+ */
+export const MODEL_LOAD_STAGE_LABELS: Record
= {
+ text_model: 'Loading weights',
+ spec_model: 'Loading draft',
+ mmproj_model: 'Loading projector'
+};
+
+/**
+ * Share of the bar reserved for each load phase after text_model.
+ * text_model fills the rest, so a plain model reaches 100% on its own.
+ */
+export const MODEL_LOAD_TAIL_SHARE = 0.1;
diff --git a/tools/ui/src/lib/constants/sse.ts b/tools/ui/src/lib/constants/sse.ts
new file mode 100644
index 0000000000..0eb4b6edee
--- /dev/null
+++ b/tools/ui/src/lib/constants/sse.ts
@@ -0,0 +1,16 @@
+/**
+ * Server-sent events wire format, shared by the chat stream and the
+ * /models/sse status feed (text/event-stream).
+ */
+
+// blank line between two events
+export const SSE_RECORD_SEPARATOR = '\n\n';
+
+// line break inside an event
+export const SSE_LINE_SEPARATOR = '\n';
+
+// data field prefix, the value follows after an optional space
+export const SSE_DATA_PREFIX = 'data:';
+
+// end-of-stream marker on the chat completion stream
+export const SSE_DONE_MARKER = '[DONE]';
diff --git a/tools/ui/src/lib/enums/index.ts b/tools/ui/src/lib/enums/index.ts
index 449e4f90a9..811744fd9a 100644
--- a/tools/ui/src/lib/enums/index.ts
+++ b/tools/ui/src/lib/enums/index.ts
@@ -54,7 +54,7 @@ export {
export { ModelModality } from './model.enums';
-export { ServerRole, ServerModelStatus } from './server.enums';
+export { ServerRole, ServerModelStatus, ServerModelsSseEventType } from './server.enums';
export { ParameterSource, SyncableParameterType, SettingsFieldType } from './settings.enums';
diff --git a/tools/ui/src/lib/enums/server.enums.ts b/tools/ui/src/lib/enums/server.enums.ts
index c9d599c52b..446af84be7 100644
--- a/tools/ui/src/lib/enums/server.enums.ts
+++ b/tools/ui/src/lib/enums/server.enums.ts
@@ -19,3 +19,17 @@ export enum ServerModelStatus {
SLEEPING = 'sleeping',
FAILED = 'failed'
}
+
+/**
+ * /models/sse event type enum - discriminates the records broadcast on the
+ * model status feed in ROUTER mode. Matches the event names emitted by
+ * tools/server/server-models.cpp from the C++ server.
+ */
+export enum ServerModelsSseEventType {
+ STATUS_CHANGE = 'status_change',
+ MODEL_STATUS = 'model_status',
+ STATUS_UPDATE = 'status_update',
+ MODELS_RELOAD = 'models_reload',
+ MODEL_REMOVE = 'model_remove',
+ DOWNLOAD_PROGRESS = 'download_progress'
+}
diff --git a/tools/ui/src/lib/services/chat.service.ts b/tools/ui/src/lib/services/chat.service.ts
index 70844f57ee..9001c9572f 100644
--- a/tools/ui/src/lib/services/chat.service.ts
+++ b/tools/ui/src/lib/services/chat.service.ts
@@ -10,7 +10,10 @@ import {
SETTINGS_KEYS,
API_CHAT,
API_SLOTS,
- CONTROL_ACTION
+ CONTROL_ACTION,
+ SSE_LINE_SEPARATOR,
+ SSE_DATA_PREFIX,
+ SSE_DONE_MARKER
} from '$lib/constants';
import {
AttachmentType,
@@ -18,8 +21,7 @@ import {
FileTypeAudio,
MessageRole,
MimeTypeAudio,
- ReasoningFormat,
- UrlProtocol
+ ReasoningFormat
} from '$lib/enums';
import type {
ApiChatMessageContentPart,
@@ -642,15 +644,15 @@ export class ChatService {
if (abortSignal?.aborted) break;
chunk += decoder.decode(value, { stream: true });
- const lines = chunk.split('\n');
+ const lines = chunk.split(SSE_LINE_SEPARATOR);
chunk = lines.pop() || '';
for (const line of lines) {
if (abortSignal?.aborted) break;
- if (line.startsWith(UrlProtocol.DATA)) {
- const data = line.slice(6);
- if (data === '[DONE]') {
+ if (line.startsWith(SSE_DATA_PREFIX)) {
+ const data = line.slice(SSE_DATA_PREFIX.length).trim();
+ if (data === SSE_DONE_MARKER) {
streamFinished = true;
continue;
diff --git a/tools/ui/src/lib/stores/models.svelte.ts b/tools/ui/src/lib/stores/models.svelte.ts
index 1990ba6049..2ce450d423 100644
--- a/tools/ui/src/lib/stores/models.svelte.ts
+++ b/tools/ui/src/lib/stores/models.svelte.ts
@@ -1,6 +1,7 @@
+import { base } from '$app/paths';
import { SvelteMap, SvelteSet } from 'svelte/reactivity';
import { toast } from 'svelte-sonner';
-import { ServerModelStatus, ModelModality } from '$lib/enums';
+import { ServerModelStatus, ServerModelsSseEventType, ModelModality } from '$lib/enums';
import { ModelsService } from '$lib/services/models.service';
import { PropsService } from '$lib/services/props.service';
import { serverStore, isRouterMode } from '$lib/stores/server.svelte';
@@ -8,11 +9,15 @@ import {
detectThinkingSupport,
detectThinkingSupportWithReason
} from '$lib/utils/chat-template-thinking-detector';
-import { TTLCache } from '$lib/utils';
+import { TTLCache, getAuthHeaders } from '$lib/utils';
import {
MODEL_PROPS_CACHE_TTL_MS,
MODEL_PROPS_CACHE_MAX_ENTRIES,
- FAVORITE_MODELS_LOCALSTORAGE_KEY
+ FAVORITE_MODELS_LOCALSTORAGE_KEY,
+ API_MODELS,
+ SSE_RECORD_SEPARATOR,
+ SSE_LINE_SEPARATOR,
+ SSE_DATA_PREFIX
} from '$lib/constants';
import { conversationsStore } from '$lib/stores/conversations.svelte';
@@ -55,6 +60,15 @@ class ModelsStore {
private modelUsage = $state