From 099b579acb9f7fd0eefcbb2198fd453b00c6e787 Mon Sep 17 00:00:00 2001 From: Pascal Date: Mon, 22 Jun 2026 15:55:30 +0200 Subject: [PATCH] ui: model status and load progress via /models/sse feed (#24878) * ui: model status and load progress via /models/sse feed * ui: centralize SSE wire-format delimiters into shared constants for the chat and /models/sse parsers * ui: type /models/sse event names as a ServerModelsSseEventType enum Address review from allozaur --- tools/ui/src/app.d.ts | 10 + .../ChatMessageAssistant.svelte | 15 +- .../app/models/ModelsSelectorOption.svelte | 19 +- tools/ui/src/lib/constants/api-endpoints.ts | 3 +- tools/ui/src/lib/constants/index.ts | 2 + tools/ui/src/lib/constants/model-loading.ts | 14 + tools/ui/src/lib/constants/sse.ts | 16 + tools/ui/src/lib/enums/index.ts | 2 +- tools/ui/src/lib/enums/server.enums.ts | 14 + tools/ui/src/lib/services/chat.service.ts | 16 +- tools/ui/src/lib/stores/models.svelte.ts | 280 +++++++++++++++--- tools/ui/src/lib/types/api.d.ts | 48 ++- tools/ui/src/lib/types/index.ts | 11 +- tools/ui/src/lib/types/models.d.ts | 13 +- tools/ui/src/lib/utils/index.ts | 3 + tools/ui/src/lib/utils/progress.ts | 43 +++ tools/ui/src/routes/+layout.svelte | 14 + 17 files changed, 466 insertions(+), 57 deletions(-) create mode 100644 tools/ui/src/lib/constants/model-loading.ts create mode 100644 tools/ui/src/lib/constants/sse.ts create mode 100644 tools/ui/src/lib/utils/progress.ts diff --git a/tools/ui/src/app.d.ts b/tools/ui/src/app.d.ts index a7583eec59..5264e5cc4d 100644 --- a/tools/ui/src/app.d.ts +++ b/tools/ui/src/app.d.ts @@ -19,6 +19,10 @@ import type { ApiErrorResponse, ApiLlamaCppServerProps, ApiModelDataEntry, + ApiModelLoadStage, + ApiModelsSseProgress, + ApiModelsSseData, + ApiModelsSseEvent, ApiModelListResponse, ApiProcessingState, ApiRouterModelMeta, @@ -52,6 +56,7 @@ import type { // Model types ModelModalities, ModelOption, + ModelLoadProgress, // Settings types SettingsChatServiceOptions, SettingsConfigValue, @@ -83,6 +88,10 @@ declare global { ApiErrorResponse, ApiLlamaCppServerProps, ApiModelDataEntry, + ApiModelLoadStage, + ApiModelsSseProgress, + ApiModelsSseData, + ApiModelsSseEvent, ApiModelListResponse, ApiProcessingState, ApiRouterModelMeta, @@ -120,6 +129,7 @@ declare global { // Model types ModelModalities, ModelOption, + ModelLoadProgress, // Settings types SettingsChatServiceOptions, SettingsConfigValue, diff --git a/tools/ui/src/lib/components/app/chat/ChatMessages/ChatMessage/ChatMessageAssistant/ChatMessageAssistant.svelte b/tools/ui/src/lib/components/app/chat/ChatMessages/ChatMessage/ChatMessageAssistant/ChatMessageAssistant.svelte index 4c74206f1b..2272eaedb3 100644 --- a/tools/ui/src/lib/components/app/chat/ChatMessages/ChatMessage/ChatMessageAssistant/ChatMessageAssistant.svelte +++ b/tools/ui/src/lib/components/app/chat/ChatMessages/ChatMessage/ChatMessageAssistant/ChatMessageAssistant.svelte @@ -10,7 +10,7 @@ import { getMessageEditContext } from '$lib/contexts'; import { useProcessingState } from '$lib/hooks/use-processing-state.svelte'; import { isLoading, isChatStreaming } from '$lib/stores/chat.svelte'; - import { copyToClipboard, deriveAgenticSections } from '$lib/utils'; + import { copyToClipboard, deriveAgenticSections, modelLoadProgressText } from '$lib/utils'; import { AgenticSectionType } from '$lib/enums'; import { REASONING_TAGS } from '$lib/constants/agentic'; import { tick } from 'svelte'; @@ -185,6 +185,13 @@ let hasNoContent = $derived(!message?.content?.trim()); let isActivelyProcessing = $derived(isCurrentlyLoading || isStreaming); + // during a router auto-load the message has no model yet, so target the selected one + let loadTargetModel = $derived(message.model ?? modelsStore.selectedModelName); + let modelLoadProgress = $derived( + isRouter && loadTargetModel ? modelsStore.getLoadProgress(loadTargetModel) : null + ); + let modelLoadingText = $derived(modelLoadProgressText(modelLoadProgress)); + let showProcessingInfoTop = $derived( message?.role === MessageRole.ASSISTANT && isActivelyProcessing && @@ -220,7 +227,8 @@
- {processingState.getPromptProgressText() ?? + {modelLoadingText ?? + processingState.getPromptProgressText() ?? processingState.getProcessingMessage() ?? 'Processing...'} @@ -252,7 +260,8 @@
- {processingState.getPromptProgressText() ?? + {modelLoadingText ?? + processingState.getPromptProgressText() ?? processingState.getProcessingMessage() ?? 'Processing...'} diff --git a/tools/ui/src/lib/components/app/models/ModelsSelectorOption.svelte b/tools/ui/src/lib/components/app/models/ModelsSelectorOption.svelte index fef1490f37..f2a024d31d 100644 --- a/tools/ui/src/lib/components/app/models/ModelsSelectorOption.svelte +++ b/tools/ui/src/lib/components/app/models/ModelsSelectorOption.svelte @@ -13,6 +13,7 @@ import type { ModelOption } from '$lib/types/models'; import { ServerModelStatus } from '$lib/enums'; import { modelsStore, routerModels } from '$lib/stores/models.svelte'; + import { modelLoadFraction, modelLoadProgressText } from '$lib/utils'; interface Props { option: ModelOption; @@ -50,11 +51,15 @@ (serverStatus === ServerModelStatus.LOADED || isSleeping) && !isOperationInProgress ); let isLoading = $derived(serverStatus === ServerModelStatus.LOADING || isOperationInProgress); + + let loadProgress = $derived(isLoading ? modelsStore.getLoadProgress(option.model) : null); + let loadPercent = $derived(Math.round(modelLoadFraction(loadProgress) * 100)); + let loadTitle = $derived(modelLoadProgressText(loadProgress));
onSelect(option.id)} onmouseenter={onMouseEnter} @@ -188,4 +194,15 @@
{/if}
+ + {#if isLoading} +
+
+
+ {/if}
diff --git a/tools/ui/src/lib/constants/api-endpoints.ts b/tools/ui/src/lib/constants/api-endpoints.ts index 9eb6c74e75..a410905057 100644 --- a/tools/ui/src/lib/constants/api-endpoints.ts +++ b/tools/ui/src/lib/constants/api-endpoints.ts @@ -1,7 +1,8 @@ export const API_MODELS = { LIST: '/v1/models', LOAD: '/models/load', - UNLOAD: '/models/unload' + UNLOAD: '/models/unload', + SSE: '/models/sse' }; // chat completion routes, the control route drives realtime inference (e.g. end reasoning) diff --git a/tools/ui/src/lib/constants/index.ts b/tools/ui/src/lib/constants/index.ts index c51d84cdc2..4993ab647a 100644 --- a/tools/ui/src/lib/constants/index.ts +++ b/tools/ui/src/lib/constants/index.ts @@ -37,6 +37,8 @@ export * from './mcp-form'; export * from './mcp-resource'; export * from './message-export'; export * from './model-id'; +export * from './model-loading'; +export * from './sse'; export * from './precision'; export * from './processing-info'; export * from './pwa'; diff --git a/tools/ui/src/lib/constants/model-loading.ts b/tools/ui/src/lib/constants/model-loading.ts new file mode 100644 index 0000000000..a55ba708b1 --- /dev/null +++ b/tools/ui/src/lib/constants/model-loading.ts @@ -0,0 +1,14 @@ +/** + * Labels shown while a model loads, keyed by the stage reported on /models/sse. + */ +export const MODEL_LOAD_STAGE_LABELS: Record = { + text_model: 'Loading weights', + spec_model: 'Loading draft', + mmproj_model: 'Loading projector' +}; + +/** + * Share of the bar reserved for each load phase after text_model. + * text_model fills the rest, so a plain model reaches 100% on its own. + */ +export const MODEL_LOAD_TAIL_SHARE = 0.1; diff --git a/tools/ui/src/lib/constants/sse.ts b/tools/ui/src/lib/constants/sse.ts new file mode 100644 index 0000000000..0eb4b6edee --- /dev/null +++ b/tools/ui/src/lib/constants/sse.ts @@ -0,0 +1,16 @@ +/** + * Server-sent events wire format, shared by the chat stream and the + * /models/sse status feed (text/event-stream). + */ + +// blank line between two events +export const SSE_RECORD_SEPARATOR = '\n\n'; + +// line break inside an event +export const SSE_LINE_SEPARATOR = '\n'; + +// data field prefix, the value follows after an optional space +export const SSE_DATA_PREFIX = 'data:'; + +// end-of-stream marker on the chat completion stream +export const SSE_DONE_MARKER = '[DONE]'; diff --git a/tools/ui/src/lib/enums/index.ts b/tools/ui/src/lib/enums/index.ts index 449e4f90a9..811744fd9a 100644 --- a/tools/ui/src/lib/enums/index.ts +++ b/tools/ui/src/lib/enums/index.ts @@ -54,7 +54,7 @@ export { export { ModelModality } from './model.enums'; -export { ServerRole, ServerModelStatus } from './server.enums'; +export { ServerRole, ServerModelStatus, ServerModelsSseEventType } from './server.enums'; export { ParameterSource, SyncableParameterType, SettingsFieldType } from './settings.enums'; diff --git a/tools/ui/src/lib/enums/server.enums.ts b/tools/ui/src/lib/enums/server.enums.ts index c9d599c52b..446af84be7 100644 --- a/tools/ui/src/lib/enums/server.enums.ts +++ b/tools/ui/src/lib/enums/server.enums.ts @@ -19,3 +19,17 @@ export enum ServerModelStatus { SLEEPING = 'sleeping', FAILED = 'failed' } + +/** + * /models/sse event type enum - discriminates the records broadcast on the + * model status feed in ROUTER mode. Matches the event names emitted by + * tools/server/server-models.cpp from the C++ server. + */ +export enum ServerModelsSseEventType { + STATUS_CHANGE = 'status_change', + MODEL_STATUS = 'model_status', + STATUS_UPDATE = 'status_update', + MODELS_RELOAD = 'models_reload', + MODEL_REMOVE = 'model_remove', + DOWNLOAD_PROGRESS = 'download_progress' +} diff --git a/tools/ui/src/lib/services/chat.service.ts b/tools/ui/src/lib/services/chat.service.ts index 70844f57ee..9001c9572f 100644 --- a/tools/ui/src/lib/services/chat.service.ts +++ b/tools/ui/src/lib/services/chat.service.ts @@ -10,7 +10,10 @@ import { SETTINGS_KEYS, API_CHAT, API_SLOTS, - CONTROL_ACTION + CONTROL_ACTION, + SSE_LINE_SEPARATOR, + SSE_DATA_PREFIX, + SSE_DONE_MARKER } from '$lib/constants'; import { AttachmentType, @@ -18,8 +21,7 @@ import { FileTypeAudio, MessageRole, MimeTypeAudio, - ReasoningFormat, - UrlProtocol + ReasoningFormat } from '$lib/enums'; import type { ApiChatMessageContentPart, @@ -642,15 +644,15 @@ export class ChatService { if (abortSignal?.aborted) break; chunk += decoder.decode(value, { stream: true }); - const lines = chunk.split('\n'); + const lines = chunk.split(SSE_LINE_SEPARATOR); chunk = lines.pop() || ''; for (const line of lines) { if (abortSignal?.aborted) break; - if (line.startsWith(UrlProtocol.DATA)) { - const data = line.slice(6); - if (data === '[DONE]') { + if (line.startsWith(SSE_DATA_PREFIX)) { + const data = line.slice(SSE_DATA_PREFIX.length).trim(); + if (data === SSE_DONE_MARKER) { streamFinished = true; continue; diff --git a/tools/ui/src/lib/stores/models.svelte.ts b/tools/ui/src/lib/stores/models.svelte.ts index 1990ba6049..2ce450d423 100644 --- a/tools/ui/src/lib/stores/models.svelte.ts +++ b/tools/ui/src/lib/stores/models.svelte.ts @@ -1,6 +1,7 @@ +import { base } from '$app/paths'; import { SvelteMap, SvelteSet } from 'svelte/reactivity'; import { toast } from 'svelte-sonner'; -import { ServerModelStatus, ModelModality } from '$lib/enums'; +import { ServerModelStatus, ServerModelsSseEventType, ModelModality } from '$lib/enums'; import { ModelsService } from '$lib/services/models.service'; import { PropsService } from '$lib/services/props.service'; import { serverStore, isRouterMode } from '$lib/stores/server.svelte'; @@ -8,11 +9,15 @@ import { detectThinkingSupport, detectThinkingSupportWithReason } from '$lib/utils/chat-template-thinking-detector'; -import { TTLCache } from '$lib/utils'; +import { TTLCache, getAuthHeaders } from '$lib/utils'; import { MODEL_PROPS_CACHE_TTL_MS, MODEL_PROPS_CACHE_MAX_ENTRIES, - FAVORITE_MODELS_LOCALSTORAGE_KEY + FAVORITE_MODELS_LOCALSTORAGE_KEY, + API_MODELS, + SSE_RECORD_SEPARATOR, + SSE_LINE_SEPARATOR, + SSE_DATA_PREFIX } from '$lib/constants'; import { conversationsStore } from '$lib/stores/conversations.svelte'; @@ -55,6 +60,15 @@ class ModelsStore { private modelUsage = $state>>(new Map()); private modelLoadingStates = new SvelteMap(); + // /models/sse feed state, the single source of truth for status and load progress + private statusAbort: AbortController | null = null; + private statusReaderActive = false; + private loadProgress = new SvelteMap(); + private statusWaiters = new Map< + string, + { target: ServerModelStatus; resolve: () => void; reject: (e: Error) => void } + >(); + favoriteModelIds = $state>(this.loadFavoritesFromStorage()); /** @@ -626,49 +640,218 @@ class ModelsStore { * */ - /** - * WORKAROUND: Polling for model status after load/unload operations. - * - * Currently, `/models/load` and `/models/unload` return success before - * the operation actually completes on the server. - * - * TODO: Remove polling once llama-server properly waits for the operation - * to complete before returning success. - */ - - private static readonly STATUS_POLL_INTERVAL = 500; + // reconnect delay after the feed drops or the server is not ready yet + private static readonly SSE_RECONNECT_MS = 1000; /** - * Poll for expected model status after load/unload operation. - * Keeps polling until the model reaches the expected status or fails. + * Open the /models/sse feed and keep it live with auto reconnect. + * Idempotent and router mode only. The feed drives status and progress, + * so it replaces any post-operation polling. */ - private async pollForModelStatus( - modelId: string, - expectedStatus: ServerModelStatus - ): Promise { - let attempt = 0; - while (true) { - await this.fetchRouterModels(); + subscribeStatus(): void { + if (this.statusReaderActive) return; + if (!isRouterMode()) return; - const currentStatus = this.getModelStatus(modelId); - if (currentStatus === expectedStatus) return; + this.statusReaderActive = true; + this.statusAbort = new AbortController(); + void this.runStatusReader(this.statusAbort.signal); + } - if (currentStatus === ServerModelStatus.FAILED) { - throw new Error( - `Model failed to ${expectedStatus === ServerModelStatus.LOADED ? 'load' : 'unload'}` - ); + /** + * Close the /models/sse feed and drop transient progress. + */ + unsubscribeStatus(): void { + this.statusReaderActive = false; + this.statusAbort?.abort(); + this.statusAbort = null; + this.loadProgress.clear(); + } + + /** + * Current load progress for a model, or null when not loading. + */ + getLoadProgress(modelId: string): ModelLoadProgress | null { + return this.loadProgress.get(modelId) ?? null; + } + + /** + * Read the feed and reconnect until unsubscribed. Splits the byte stream + * into SSE records on the blank line boundary. + */ + private async runStatusReader(signal: AbortSignal): Promise { + const decoder = new TextDecoder(); + + while (!signal.aborted) { + try { + const response = await fetch(`${base}${API_MODELS.SSE}`, { + headers: getAuthHeaders(), + signal + }); + + if (response.ok && response.body) { + const reader = response.body.getReader(); + let buffer = ''; + + while (!signal.aborted) { + const { value, done } = await reader.read(); + if (done) break; + + buffer += decoder.decode(value, { stream: true }); + + let boundary = buffer.indexOf(SSE_RECORD_SEPARATOR); + while (boundary !== -1) { + this.handleStatusRecord(buffer.slice(0, boundary)); + buffer = buffer.slice(boundary + SSE_RECORD_SEPARATOR.length); + boundary = buffer.indexOf(SSE_RECORD_SEPARATOR); + } + } + } + } catch { + // network drop or abort falls through to the reconnect delay } - if ( - expectedStatus === ServerModelStatus.LOADED && - currentStatus === ServerModelStatus.UNLOADED && - attempt > 2 - ) { - throw new Error('Model was unloaded unexpectedly during loading'); - } + if (signal.aborted) return; - attempt++; - await new Promise((resolve) => setTimeout(resolve, ModelsStore.STATUS_POLL_INTERVAL)); + await new Promise((resolve) => setTimeout(resolve, ModelsStore.SSE_RECONNECT_MS)); + } + } + + /** + * Parse one SSE record. The payload rides in the data lines as a JSON + * envelope that carries its own model, event and data fields. + */ + private handleStatusRecord(record: string): void { + const payload = record + .split(SSE_LINE_SEPARATOR) + .filter((line) => line.startsWith(SSE_DATA_PREFIX)) + .map((line) => line.slice(SSE_DATA_PREFIX.length).trim()) + .join(SSE_LINE_SEPARATOR); + + if (payload.length === 0) return; + + let envelope: ApiModelsSseEvent; + try { + envelope = JSON.parse(payload); + } catch { + return; + } + + this.applyStatusEvent(envelope); + } + + /** + * Route one feed record by event kind. Only the status_* events carry a + * status payload, models_reload triggers a list refresh, model_remove drops + * the row, download_* belong to the download surface, not here. + */ + private applyStatusEvent(event: ApiModelsSseEvent): void { + switch (event.event) { + case ServerModelsSseEventType.STATUS_CHANGE: + case ServerModelsSseEventType.MODEL_STATUS: + case ServerModelsSseEventType.STATUS_UPDATE: + this.applyModelStatus(event); + break; + case ServerModelsSseEventType.MODELS_RELOAD: + void this.fetchRouterModels(); + break; + case ServerModelsSseEventType.MODEL_REMOVE: + this.removeRouterModel(event.model); + break; + case ServerModelsSseEventType.DOWNLOAD_PROGRESS: + break; + } + } + + /** + * Apply a status envelope: update the model row, track or clear progress, + * settle any pending load or unload awaiter. + */ + private applyModelStatus(event: ApiModelsSseEvent): void { + const model = event.model; + const data = event.data; + if (!model || !data?.status) return; + + const status = data.status; + + this.setRouterModelStatus(model, status); + + if (status === ServerModelStatus.LOADING) { + if (data.progress) this.loadProgress.set(model, data.progress); + } else { + this.loadProgress.delete(model); + } + + if (status === ServerModelStatus.LOADED) { + void this.updateModelModalities(model); + } + + const failed = + status === ServerModelStatus.FAILED || + (status === ServerModelStatus.UNLOADED && (data.exit_code ?? 0) !== 0); + + if (failed) { + this.rejectStatus(model, new Error(`Model failed: ${this.toDisplayName(model)}`)); + return; + } + + this.settleStatus(model, status); + } + + /** + * Drop a model row reported gone by the feed and settle its awaiters. + */ + private removeRouterModel(modelId: string): void { + if (this.routerModels.findIndex((m) => m.id === modelId) === -1) return; + + this.routerModels = this.routerModels.filter((m) => m.id !== modelId); + this.loadProgress.delete(modelId); + this.rejectStatus(modelId, new Error(`Model removed: ${this.toDisplayName(modelId)}`)); + } + + /** + * Update one model row status in place, reassigning to trigger reactivity. + */ + private setRouterModelStatus(modelId: string, status: ServerModelStatus): void { + const idx = this.routerModels.findIndex((m) => m.id === modelId); + if (idx === -1) return; + + const current = this.routerModels[idx]; + if (current.status.value === status) return; + + const next = [...this.routerModels]; + next[idx] = { ...current, status: { ...current.status, value: status } }; + this.routerModels = next; + } + + /** + * Register an awaiter that resolves when the feed reports target status. + * One operation runs per model at a time, so one awaiter per model is kept. + */ + private waitForStatus(modelId: string, target: ServerModelStatus): Promise { + return new Promise((resolve, reject) => { + this.statusWaiters.set(modelId, { target, resolve, reject }); + }); + } + + /** + * Resolve and drop the awaiter when the model reaches its target status. + */ + private settleStatus(modelId: string, status: ServerModelStatus): void { + const waiter = this.statusWaiters.get(modelId); + if (waiter && waiter.target === status) { + this.statusWaiters.delete(modelId); + waiter.resolve(); + } + } + + /** + * Reject and drop the awaiter for a model. + */ + private rejectStatus(modelId: string, error: Error): void { + const waiter = this.statusWaiters.get(modelId); + if (waiter) { + this.statusWaiters.delete(modelId); + waiter.reject(error); } } @@ -679,12 +862,18 @@ class ModelsStore { this.modelLoadingStates.set(modelId, true); this.error = null; + // the feed drives completion, so it must be live before the request + this.subscribeStatus(); + + const reachedLoaded = this.waitForStatus(modelId, ServerModelStatus.LOADED); + reachedLoaded.catch(() => {}); + try { await ModelsService.load(modelId); - await this.pollForModelStatus(modelId, ServerModelStatus.LOADED); - await this.updateModelModalities(modelId); + await reachedLoaded; toast.success(`Model loaded: ${this.toDisplayName(modelId)}`); } catch (error) { + this.rejectStatus(modelId, error instanceof Error ? error : new Error('load failed')); this.error = error instanceof Error ? error.message : 'Failed to load model'; toast.error(`Failed to load model: ${this.toDisplayName(modelId)}`); throw error; @@ -700,11 +889,17 @@ class ModelsStore { this.modelLoadingStates.set(modelId, true); this.error = null; + this.subscribeStatus(); + + const reachedUnloaded = this.waitForStatus(modelId, ServerModelStatus.UNLOADED); + reachedUnloaded.catch(() => {}); + try { await ModelsService.unload(modelId); - await this.pollForModelStatus(modelId, ServerModelStatus.UNLOADED); + await reachedUnloaded; toast.info(`Model unloaded: ${this.toDisplayName(modelId)}`); } catch (error) { + this.rejectStatus(modelId, error instanceof Error ? error : new Error('unload failed')); this.error = error instanceof Error ? error.message : 'Failed to unload model'; toast.error(`Failed to unload model: ${this.toDisplayName(modelId)}`); throw error; @@ -783,6 +978,9 @@ class ModelsStore { } clear(): void { + this.unsubscribeStatus(); + this.statusWaiters.forEach((waiter) => waiter.reject(new Error('Models store cleared'))); + this.statusWaiters.clear(); this.models = []; this.routerModels = []; this.loading = false; diff --git a/tools/ui/src/lib/types/api.d.ts b/tools/ui/src/lib/types/api.d.ts index f620d67351..2a2524d002 100644 --- a/tools/ui/src/lib/types/api.d.ts +++ b/tools/ui/src/lib/types/api.d.ts @@ -1,4 +1,10 @@ -import type { ContentPartType, FileTypeAudio, ServerModelStatus, ServerRole } from '$lib/enums'; +import type { + ContentPartType, + FileTypeAudio, + ServerModelStatus, + ServerModelsSseEventType, + ServerRole +} from '$lib/enums'; import type { ChatMessagePromptProgress, ChatRole } from './chat'; export type AudioInputFormat = FileTypeAudio.WAV | FileTypeAudio.MP3; @@ -96,6 +102,46 @@ export interface ApiModelDataEntry { meta?: Record | null; } +/** + * Load stage reported by the /models/sse feed, in load order. + */ +export type ApiModelLoadStage = 'text_model' | 'spec_model' | 'mmproj_model'; + +/** + * Load progress snapshot: the full ordered stage plan, the active stage, + * and its fractional value (0.0 -> 1.0). + */ +export interface ApiModelsSseProgress { + stages: ApiModelLoadStage[]; + current: ApiModelLoadStage; + value: number; +} + +/** + * Status payload carried by a /models/sse envelope. + * exit_code appears on unload. + */ +export interface ApiModelsSseData { + status: ServerModelStatus; + progress?: ApiModelsSseProgress; + exit_code?: number; +} + +/** + * Event kind multiplexed on the /models/sse feed. + * Only the status_* events carry a status payload, models_reload signals a + * full list refresh, model_remove drops a row, download_* drive download UI. + */ +/** + * One /models/sse record. event discriminates the kind, model names the + * target instance, data carries the status payload when present. + */ +export interface ApiModelsSseEvent { + model: string; + event: ServerModelsSseEventType; + data: ApiModelsSseData; +} + export interface ApiModelDetails { name: string; model: string; diff --git a/tools/ui/src/lib/types/index.ts b/tools/ui/src/lib/types/index.ts index c5f9488981..9b0b118045 100644 --- a/tools/ui/src/lib/types/index.ts +++ b/tools/ui/src/lib/types/index.ts @@ -11,6 +11,10 @@ export type { ApiChatMessageData, ApiModelStatus, ApiModelDataEntry, + ApiModelLoadStage, + ApiModelsSseProgress, + ApiModelsSseData, + ApiModelsSseEvent, ApiModelDetails, ApiModelListResponse, ApiLlamaCppServerProps, @@ -70,7 +74,12 @@ export type { } from './database'; // Model types -export type { ModelModalities, ModelOption, ModalityCapabilities } from './models'; +export type { + ModelModalities, + ModelOption, + ModelLoadProgress, + ModalityCapabilities +} from './models'; // Settings types export type { diff --git a/tools/ui/src/lib/types/models.d.ts b/tools/ui/src/lib/types/models.d.ts index 51069599d7..b32c16f6f2 100644 --- a/tools/ui/src/lib/types/models.d.ts +++ b/tools/ui/src/lib/types/models.d.ts @@ -1,4 +1,4 @@ -import type { ApiModelDataEntry, ApiModelDetails } from '$lib/types/api'; +import type { ApiModelDataEntry, ApiModelDetails, ApiModelLoadStage } from '$lib/types/api'; export interface ModelModalities { vision: boolean; @@ -20,6 +20,17 @@ export interface ModelOption { tags?: string[]; } +/** + * Ephemeral UI-only load progress for one model instance. + * Lives only while a load runs, driven by the /models/sse feed. + * stage is absent until the feed reports its first stage. + */ +export interface ModelLoadProgress { + stages: ApiModelLoadStage[]; + current: ApiModelLoadStage; + value: number; +} + export interface ParsedModelId { raw: string; orgName: string | null; diff --git a/tools/ui/src/lib/utils/index.ts b/tools/ui/src/lib/utils/index.ts index 637db8812c..61b9932d3f 100644 --- a/tools/ui/src/lib/utils/index.ts +++ b/tools/ui/src/lib/utils/index.ts @@ -44,6 +44,9 @@ export { buildProxiedUrl, buildProxiedHeaders } from './cors-proxy'; // URL utilities export { extractRootDomain, sanitizeExternalUrl } from './url'; +// Progress helpers +export { modelLoadFraction, modelLoadProgressText } from './progress'; + // Conversation utilities export { createMessageCountMap, getMessageCount } from './conversation-utils'; diff --git a/tools/ui/src/lib/utils/progress.ts b/tools/ui/src/lib/utils/progress.ts new file mode 100644 index 0000000000..4d7e223882 --- /dev/null +++ b/tools/ui/src/lib/utils/progress.ts @@ -0,0 +1,43 @@ +/** + * Model load progress helpers for the /models/sse surfaces + * (selector row and chat message). + */ + +import { MODEL_LOAD_STAGE_LABELS, MODEL_LOAD_TAIL_SHARE } from '$lib/constants'; + +/** + * Human label for a model load stage. + */ +export function modelLoadStageLabel(stage: ApiModelLoadStage): string { + return MODEL_LOAD_STAGE_LABELS[stage]; +} + +/** + * Overall load fraction (0.0 -> 1.0) across the declared stage plan. + * text_model fills [0, 1 - tail], each later phase owns one tail slice. + */ +export function modelLoadFraction(progress: ModelLoadProgress | null): number { + if (!progress) return 0; + + const { stages, current, value } = progress; + const tailCount = Math.max(stages.length - 1, 0); + const textCeiling = 1 - tailCount * MODEL_LOAD_TAIL_SHARE; + const idx = stages.indexOf(current); + + if (idx <= 0) { + return value * textCeiling; + } + + return textCeiling + (idx - 1 + value) * MODEL_LOAD_TAIL_SHARE; +} + +/** + * Single line describing load progress: active stage label and overall percent. + * Returns null when there is no progress to show. + */ +export function modelLoadProgressText(progress: ModelLoadProgress | null): string | null { + if (!progress) return null; + + const label = modelLoadStageLabel(progress.current); + return `${label} ${Math.round(modelLoadFraction(progress) * 100)}%`; +} diff --git a/tools/ui/src/routes/+layout.svelte b/tools/ui/src/routes/+layout.svelte index fdba9a9d37..1269692a78 100644 --- a/tools/ui/src/routes/+layout.svelte +++ b/tools/ui/src/routes/+layout.svelte @@ -230,6 +230,20 @@ } }); + // Live model status and load progress via the /models/sse feed (router mode) + $effect(() => { + if (!browser) return; + if (!isRouterMode()) return; + + untrack(() => { + modelsStore.subscribeStatus(); + }); + + return () => { + modelsStore.unsubscribeStatus(); + }; + }); + // Background MCP server health checks on app load // Fetch enabled servers from settings and run health checks in background $effect(() => {