mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2026-06-27 23:50:20 -05:00
ui: model status and load progress via /models/sse feed (#24878)
* ui: model status and load progress via /models/sse feed * ui: centralize SSE wire-format delimiters into shared constants for the chat and /models/sse parsers * ui: type /models/sse event names as a ServerModelsSseEventType enum Address review from allozaur
This commit is contained in:
parent
f8cc15f163
commit
099b579acb
10
tools/ui/src/app.d.ts
vendored
10
tools/ui/src/app.d.ts
vendored
@ -19,6 +19,10 @@ import type {
|
||||
ApiErrorResponse,
|
||||
ApiLlamaCppServerProps,
|
||||
ApiModelDataEntry,
|
||||
ApiModelLoadStage,
|
||||
ApiModelsSseProgress,
|
||||
ApiModelsSseData,
|
||||
ApiModelsSseEvent,
|
||||
ApiModelListResponse,
|
||||
ApiProcessingState,
|
||||
ApiRouterModelMeta,
|
||||
@ -52,6 +56,7 @@ import type {
|
||||
// Model types
|
||||
ModelModalities,
|
||||
ModelOption,
|
||||
ModelLoadProgress,
|
||||
// Settings types
|
||||
SettingsChatServiceOptions,
|
||||
SettingsConfigValue,
|
||||
@ -83,6 +88,10 @@ declare global {
|
||||
ApiErrorResponse,
|
||||
ApiLlamaCppServerProps,
|
||||
ApiModelDataEntry,
|
||||
ApiModelLoadStage,
|
||||
ApiModelsSseProgress,
|
||||
ApiModelsSseData,
|
||||
ApiModelsSseEvent,
|
||||
ApiModelListResponse,
|
||||
ApiProcessingState,
|
||||
ApiRouterModelMeta,
|
||||
@ -120,6 +129,7 @@ declare global {
|
||||
// Model types
|
||||
ModelModalities,
|
||||
ModelOption,
|
||||
ModelLoadProgress,
|
||||
// Settings types
|
||||
SettingsChatServiceOptions,
|
||||
SettingsConfigValue,
|
||||
|
||||
@ -10,7 +10,7 @@
|
||||
import { getMessageEditContext } from '$lib/contexts';
|
||||
import { useProcessingState } from '$lib/hooks/use-processing-state.svelte';
|
||||
import { isLoading, isChatStreaming } from '$lib/stores/chat.svelte';
|
||||
import { copyToClipboard, deriveAgenticSections } from '$lib/utils';
|
||||
import { copyToClipboard, deriveAgenticSections, modelLoadProgressText } from '$lib/utils';
|
||||
import { AgenticSectionType } from '$lib/enums';
|
||||
import { REASONING_TAGS } from '$lib/constants/agentic';
|
||||
import { tick } from 'svelte';
|
||||
@ -185,6 +185,13 @@
|
||||
let hasNoContent = $derived(!message?.content?.trim());
|
||||
let isActivelyProcessing = $derived(isCurrentlyLoading || isStreaming);
|
||||
|
||||
// during a router auto-load the message has no model yet, so target the selected one
|
||||
let loadTargetModel = $derived(message.model ?? modelsStore.selectedModelName);
|
||||
let modelLoadProgress = $derived(
|
||||
isRouter && loadTargetModel ? modelsStore.getLoadProgress(loadTargetModel) : null
|
||||
);
|
||||
let modelLoadingText = $derived(modelLoadProgressText(modelLoadProgress));
|
||||
|
||||
let showProcessingInfoTop = $derived(
|
||||
message?.role === MessageRole.ASSISTANT &&
|
||||
isActivelyProcessing &&
|
||||
@ -220,7 +227,8 @@
|
||||
<div class="mt-6 w-full max-w-[48rem]" in:fade>
|
||||
<div class="processing-container">
|
||||
<span class="processing-text">
|
||||
{processingState.getPromptProgressText() ??
|
||||
{modelLoadingText ??
|
||||
processingState.getPromptProgressText() ??
|
||||
processingState.getProcessingMessage() ??
|
||||
'Processing...'}
|
||||
</span>
|
||||
@ -252,7 +260,8 @@
|
||||
<div class="mt-4 w-full max-w-[48rem]" in:fade>
|
||||
<div class="processing-container">
|
||||
<span class="processing-text">
|
||||
{processingState.getPromptProgressText() ??
|
||||
{modelLoadingText ??
|
||||
processingState.getPromptProgressText() ??
|
||||
processingState.getProcessingMessage() ??
|
||||
'Processing...'}
|
||||
</span>
|
||||
|
||||
@ -13,6 +13,7 @@
|
||||
import type { ModelOption } from '$lib/types/models';
|
||||
import { ServerModelStatus } from '$lib/enums';
|
||||
import { modelsStore, routerModels } from '$lib/stores/models.svelte';
|
||||
import { modelLoadFraction, modelLoadProgressText } from '$lib/utils';
|
||||
|
||||
interface Props {
|
||||
option: ModelOption;
|
||||
@ -50,11 +51,15 @@
|
||||
(serverStatus === ServerModelStatus.LOADED || isSleeping) && !isOperationInProgress
|
||||
);
|
||||
let isLoading = $derived(serverStatus === ServerModelStatus.LOADING || isOperationInProgress);
|
||||
|
||||
let loadProgress = $derived(isLoading ? modelsStore.getLoadProgress(option.model) : null);
|
||||
let loadPercent = $derived(Math.round(modelLoadFraction(loadProgress) * 100));
|
||||
let loadTitle = $derived(modelLoadProgressText(loadProgress));
|
||||
</script>
|
||||
|
||||
<div
|
||||
class={[
|
||||
'group flex w-full items-center gap-2 rounded-sm p-2 text-left text-sm transition focus:outline-none',
|
||||
'group relative flex w-full items-center gap-2 rounded-sm p-2 text-left text-sm transition focus:outline-none',
|
||||
'cursor-pointer hover:bg-muted focus:bg-muted',
|
||||
(isSelected || isHighlighted) && 'bg-accent text-accent-foreground',
|
||||
!(isSelected || isHighlighted) && 'hover:bg-accent hover:text-accent-foreground',
|
||||
@ -62,6 +67,7 @@
|
||||
]}
|
||||
role="option"
|
||||
aria-selected={isSelected || isHighlighted}
|
||||
title={loadTitle}
|
||||
tabindex="0"
|
||||
onclick={() => onSelect(option.id)}
|
||||
onmouseenter={onMouseEnter}
|
||||
@ -188,4 +194,15 @@
|
||||
</div>
|
||||
{/if}
|
||||
</div>
|
||||
|
||||
{#if isLoading}
|
||||
<div
|
||||
class="pointer-events-none absolute inset-x-0 bottom-0 h-0.5 overflow-hidden rounded-b-sm bg-muted"
|
||||
>
|
||||
<div
|
||||
class="h-full bg-primary transition-[width] duration-200 ease-out"
|
||||
style="width: {loadPercent}%"
|
||||
></div>
|
||||
</div>
|
||||
{/if}
|
||||
</div>
|
||||
|
||||
@ -1,7 +1,8 @@
|
||||
export const API_MODELS = {
|
||||
LIST: '/v1/models',
|
||||
LOAD: '/models/load',
|
||||
UNLOAD: '/models/unload'
|
||||
UNLOAD: '/models/unload',
|
||||
SSE: '/models/sse'
|
||||
};
|
||||
|
||||
// chat completion routes, the control route drives realtime inference (e.g. end reasoning)
|
||||
|
||||
@ -37,6 +37,8 @@ export * from './mcp-form';
|
||||
export * from './mcp-resource';
|
||||
export * from './message-export';
|
||||
export * from './model-id';
|
||||
export * from './model-loading';
|
||||
export * from './sse';
|
||||
export * from './precision';
|
||||
export * from './processing-info';
|
||||
export * from './pwa';
|
||||
|
||||
14
tools/ui/src/lib/constants/model-loading.ts
Normal file
14
tools/ui/src/lib/constants/model-loading.ts
Normal file
@ -0,0 +1,14 @@
|
||||
/**
|
||||
* Labels shown while a model loads, keyed by the stage reported on /models/sse.
|
||||
*/
|
||||
export const MODEL_LOAD_STAGE_LABELS: Record<ApiModelLoadStage, string> = {
|
||||
text_model: 'Loading weights',
|
||||
spec_model: 'Loading draft',
|
||||
mmproj_model: 'Loading projector'
|
||||
};
|
||||
|
||||
/**
|
||||
* Share of the bar reserved for each load phase after text_model.
|
||||
* text_model fills the rest, so a plain model reaches 100% on its own.
|
||||
*/
|
||||
export const MODEL_LOAD_TAIL_SHARE = 0.1;
|
||||
16
tools/ui/src/lib/constants/sse.ts
Normal file
16
tools/ui/src/lib/constants/sse.ts
Normal file
@ -0,0 +1,16 @@
|
||||
/**
|
||||
* Server-sent events wire format, shared by the chat stream and the
|
||||
* /models/sse status feed (text/event-stream).
|
||||
*/
|
||||
|
||||
// blank line between two events
|
||||
export const SSE_RECORD_SEPARATOR = '\n\n';
|
||||
|
||||
// line break inside an event
|
||||
export const SSE_LINE_SEPARATOR = '\n';
|
||||
|
||||
// data field prefix, the value follows after an optional space
|
||||
export const SSE_DATA_PREFIX = 'data:';
|
||||
|
||||
// end-of-stream marker on the chat completion stream
|
||||
export const SSE_DONE_MARKER = '[DONE]';
|
||||
@ -54,7 +54,7 @@ export {
|
||||
|
||||
export { ModelModality } from './model.enums';
|
||||
|
||||
export { ServerRole, ServerModelStatus } from './server.enums';
|
||||
export { ServerRole, ServerModelStatus, ServerModelsSseEventType } from './server.enums';
|
||||
|
||||
export { ParameterSource, SyncableParameterType, SettingsFieldType } from './settings.enums';
|
||||
|
||||
|
||||
@ -19,3 +19,17 @@ export enum ServerModelStatus {
|
||||
SLEEPING = 'sleeping',
|
||||
FAILED = 'failed'
|
||||
}
|
||||
|
||||
/**
|
||||
* /models/sse event type enum - discriminates the records broadcast on the
|
||||
* model status feed in ROUTER mode. Matches the event names emitted by
|
||||
* tools/server/server-models.cpp from the C++ server.
|
||||
*/
|
||||
export enum ServerModelsSseEventType {
|
||||
STATUS_CHANGE = 'status_change',
|
||||
MODEL_STATUS = 'model_status',
|
||||
STATUS_UPDATE = 'status_update',
|
||||
MODELS_RELOAD = 'models_reload',
|
||||
MODEL_REMOVE = 'model_remove',
|
||||
DOWNLOAD_PROGRESS = 'download_progress'
|
||||
}
|
||||
|
||||
@ -10,7 +10,10 @@ import {
|
||||
SETTINGS_KEYS,
|
||||
API_CHAT,
|
||||
API_SLOTS,
|
||||
CONTROL_ACTION
|
||||
CONTROL_ACTION,
|
||||
SSE_LINE_SEPARATOR,
|
||||
SSE_DATA_PREFIX,
|
||||
SSE_DONE_MARKER
|
||||
} from '$lib/constants';
|
||||
import {
|
||||
AttachmentType,
|
||||
@ -18,8 +21,7 @@ import {
|
||||
FileTypeAudio,
|
||||
MessageRole,
|
||||
MimeTypeAudio,
|
||||
ReasoningFormat,
|
||||
UrlProtocol
|
||||
ReasoningFormat
|
||||
} from '$lib/enums';
|
||||
import type {
|
||||
ApiChatMessageContentPart,
|
||||
@ -642,15 +644,15 @@ export class ChatService {
|
||||
if (abortSignal?.aborted) break;
|
||||
|
||||
chunk += decoder.decode(value, { stream: true });
|
||||
const lines = chunk.split('\n');
|
||||
const lines = chunk.split(SSE_LINE_SEPARATOR);
|
||||
chunk = lines.pop() || '';
|
||||
|
||||
for (const line of lines) {
|
||||
if (abortSignal?.aborted) break;
|
||||
|
||||
if (line.startsWith(UrlProtocol.DATA)) {
|
||||
const data = line.slice(6);
|
||||
if (data === '[DONE]') {
|
||||
if (line.startsWith(SSE_DATA_PREFIX)) {
|
||||
const data = line.slice(SSE_DATA_PREFIX.length).trim();
|
||||
if (data === SSE_DONE_MARKER) {
|
||||
streamFinished = true;
|
||||
|
||||
continue;
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
import { base } from '$app/paths';
|
||||
import { SvelteMap, SvelteSet } from 'svelte/reactivity';
|
||||
import { toast } from 'svelte-sonner';
|
||||
import { ServerModelStatus, ModelModality } from '$lib/enums';
|
||||
import { ServerModelStatus, ServerModelsSseEventType, ModelModality } from '$lib/enums';
|
||||
import { ModelsService } from '$lib/services/models.service';
|
||||
import { PropsService } from '$lib/services/props.service';
|
||||
import { serverStore, isRouterMode } from '$lib/stores/server.svelte';
|
||||
@ -8,11 +9,15 @@ import {
|
||||
detectThinkingSupport,
|
||||
detectThinkingSupportWithReason
|
||||
} from '$lib/utils/chat-template-thinking-detector';
|
||||
import { TTLCache } from '$lib/utils';
|
||||
import { TTLCache, getAuthHeaders } from '$lib/utils';
|
||||
import {
|
||||
MODEL_PROPS_CACHE_TTL_MS,
|
||||
MODEL_PROPS_CACHE_MAX_ENTRIES,
|
||||
FAVORITE_MODELS_LOCALSTORAGE_KEY
|
||||
FAVORITE_MODELS_LOCALSTORAGE_KEY,
|
||||
API_MODELS,
|
||||
SSE_RECORD_SEPARATOR,
|
||||
SSE_LINE_SEPARATOR,
|
||||
SSE_DATA_PREFIX
|
||||
} from '$lib/constants';
|
||||
|
||||
import { conversationsStore } from '$lib/stores/conversations.svelte';
|
||||
@ -55,6 +60,15 @@ class ModelsStore {
|
||||
private modelUsage = $state<Map<string, SvelteSet<string>>>(new Map());
|
||||
private modelLoadingStates = new SvelteMap<string, boolean>();
|
||||
|
||||
// /models/sse feed state, the single source of truth for status and load progress
|
||||
private statusAbort: AbortController | null = null;
|
||||
private statusReaderActive = false;
|
||||
private loadProgress = new SvelteMap<string, ModelLoadProgress>();
|
||||
private statusWaiters = new Map<
|
||||
string,
|
||||
{ target: ServerModelStatus; resolve: () => void; reject: (e: Error) => void }
|
||||
>();
|
||||
|
||||
favoriteModelIds = $state<Set<string>>(this.loadFavoritesFromStorage());
|
||||
|
||||
/**
|
||||
@ -626,49 +640,218 @@ class ModelsStore {
|
||||
*
|
||||
*/
|
||||
|
||||
/**
|
||||
* WORKAROUND: Polling for model status after load/unload operations.
|
||||
*
|
||||
* Currently, `/models/load` and `/models/unload` return success before
|
||||
* the operation actually completes on the server.
|
||||
*
|
||||
* TODO: Remove polling once llama-server properly waits for the operation
|
||||
* to complete before returning success.
|
||||
*/
|
||||
|
||||
private static readonly STATUS_POLL_INTERVAL = 500;
|
||||
// reconnect delay after the feed drops or the server is not ready yet
|
||||
private static readonly SSE_RECONNECT_MS = 1000;
|
||||
|
||||
/**
|
||||
* Poll for expected model status after load/unload operation.
|
||||
* Keeps polling until the model reaches the expected status or fails.
|
||||
* Open the /models/sse feed and keep it live with auto reconnect.
|
||||
* Idempotent and router mode only. The feed drives status and progress,
|
||||
* so it replaces any post-operation polling.
|
||||
*/
|
||||
private async pollForModelStatus(
|
||||
modelId: string,
|
||||
expectedStatus: ServerModelStatus
|
||||
): Promise<void> {
|
||||
let attempt = 0;
|
||||
while (true) {
|
||||
await this.fetchRouterModels();
|
||||
subscribeStatus(): void {
|
||||
if (this.statusReaderActive) return;
|
||||
if (!isRouterMode()) return;
|
||||
|
||||
const currentStatus = this.getModelStatus(modelId);
|
||||
if (currentStatus === expectedStatus) return;
|
||||
this.statusReaderActive = true;
|
||||
this.statusAbort = new AbortController();
|
||||
void this.runStatusReader(this.statusAbort.signal);
|
||||
}
|
||||
|
||||
if (currentStatus === ServerModelStatus.FAILED) {
|
||||
throw new Error(
|
||||
`Model failed to ${expectedStatus === ServerModelStatus.LOADED ? 'load' : 'unload'}`
|
||||
);
|
||||
/**
|
||||
* Close the /models/sse feed and drop transient progress.
|
||||
*/
|
||||
unsubscribeStatus(): void {
|
||||
this.statusReaderActive = false;
|
||||
this.statusAbort?.abort();
|
||||
this.statusAbort = null;
|
||||
this.loadProgress.clear();
|
||||
}
|
||||
|
||||
/**
|
||||
* Current load progress for a model, or null when not loading.
|
||||
*/
|
||||
getLoadProgress(modelId: string): ModelLoadProgress | null {
|
||||
return this.loadProgress.get(modelId) ?? null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Read the feed and reconnect until unsubscribed. Splits the byte stream
|
||||
* into SSE records on the blank line boundary.
|
||||
*/
|
||||
private async runStatusReader(signal: AbortSignal): Promise<void> {
|
||||
const decoder = new TextDecoder();
|
||||
|
||||
while (!signal.aborted) {
|
||||
try {
|
||||
const response = await fetch(`${base}${API_MODELS.SSE}`, {
|
||||
headers: getAuthHeaders(),
|
||||
signal
|
||||
});
|
||||
|
||||
if (response.ok && response.body) {
|
||||
const reader = response.body.getReader();
|
||||
let buffer = '';
|
||||
|
||||
while (!signal.aborted) {
|
||||
const { value, done } = await reader.read();
|
||||
if (done) break;
|
||||
|
||||
buffer += decoder.decode(value, { stream: true });
|
||||
|
||||
let boundary = buffer.indexOf(SSE_RECORD_SEPARATOR);
|
||||
while (boundary !== -1) {
|
||||
this.handleStatusRecord(buffer.slice(0, boundary));
|
||||
buffer = buffer.slice(boundary + SSE_RECORD_SEPARATOR.length);
|
||||
boundary = buffer.indexOf(SSE_RECORD_SEPARATOR);
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// network drop or abort falls through to the reconnect delay
|
||||
}
|
||||
|
||||
if (
|
||||
expectedStatus === ServerModelStatus.LOADED &&
|
||||
currentStatus === ServerModelStatus.UNLOADED &&
|
||||
attempt > 2
|
||||
) {
|
||||
throw new Error('Model was unloaded unexpectedly during loading');
|
||||
}
|
||||
if (signal.aborted) return;
|
||||
|
||||
attempt++;
|
||||
await new Promise((resolve) => setTimeout(resolve, ModelsStore.STATUS_POLL_INTERVAL));
|
||||
await new Promise((resolve) => setTimeout(resolve, ModelsStore.SSE_RECONNECT_MS));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse one SSE record. The payload rides in the data lines as a JSON
|
||||
* envelope that carries its own model, event and data fields.
|
||||
*/
|
||||
private handleStatusRecord(record: string): void {
|
||||
const payload = record
|
||||
.split(SSE_LINE_SEPARATOR)
|
||||
.filter((line) => line.startsWith(SSE_DATA_PREFIX))
|
||||
.map((line) => line.slice(SSE_DATA_PREFIX.length).trim())
|
||||
.join(SSE_LINE_SEPARATOR);
|
||||
|
||||
if (payload.length === 0) return;
|
||||
|
||||
let envelope: ApiModelsSseEvent;
|
||||
try {
|
||||
envelope = JSON.parse(payload);
|
||||
} catch {
|
||||
return;
|
||||
}
|
||||
|
||||
this.applyStatusEvent(envelope);
|
||||
}
|
||||
|
||||
/**
|
||||
* Route one feed record by event kind. Only the status_* events carry a
|
||||
* status payload, models_reload triggers a list refresh, model_remove drops
|
||||
* the row, download_* belong to the download surface, not here.
|
||||
*/
|
||||
private applyStatusEvent(event: ApiModelsSseEvent): void {
|
||||
switch (event.event) {
|
||||
case ServerModelsSseEventType.STATUS_CHANGE:
|
||||
case ServerModelsSseEventType.MODEL_STATUS:
|
||||
case ServerModelsSseEventType.STATUS_UPDATE:
|
||||
this.applyModelStatus(event);
|
||||
break;
|
||||
case ServerModelsSseEventType.MODELS_RELOAD:
|
||||
void this.fetchRouterModels();
|
||||
break;
|
||||
case ServerModelsSseEventType.MODEL_REMOVE:
|
||||
this.removeRouterModel(event.model);
|
||||
break;
|
||||
case ServerModelsSseEventType.DOWNLOAD_PROGRESS:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply a status envelope: update the model row, track or clear progress,
|
||||
* settle any pending load or unload awaiter.
|
||||
*/
|
||||
private applyModelStatus(event: ApiModelsSseEvent): void {
|
||||
const model = event.model;
|
||||
const data = event.data;
|
||||
if (!model || !data?.status) return;
|
||||
|
||||
const status = data.status;
|
||||
|
||||
this.setRouterModelStatus(model, status);
|
||||
|
||||
if (status === ServerModelStatus.LOADING) {
|
||||
if (data.progress) this.loadProgress.set(model, data.progress);
|
||||
} else {
|
||||
this.loadProgress.delete(model);
|
||||
}
|
||||
|
||||
if (status === ServerModelStatus.LOADED) {
|
||||
void this.updateModelModalities(model);
|
||||
}
|
||||
|
||||
const failed =
|
||||
status === ServerModelStatus.FAILED ||
|
||||
(status === ServerModelStatus.UNLOADED && (data.exit_code ?? 0) !== 0);
|
||||
|
||||
if (failed) {
|
||||
this.rejectStatus(model, new Error(`Model failed: ${this.toDisplayName(model)}`));
|
||||
return;
|
||||
}
|
||||
|
||||
this.settleStatus(model, status);
|
||||
}
|
||||
|
||||
/**
|
||||
* Drop a model row reported gone by the feed and settle its awaiters.
|
||||
*/
|
||||
private removeRouterModel(modelId: string): void {
|
||||
if (this.routerModels.findIndex((m) => m.id === modelId) === -1) return;
|
||||
|
||||
this.routerModels = this.routerModels.filter((m) => m.id !== modelId);
|
||||
this.loadProgress.delete(modelId);
|
||||
this.rejectStatus(modelId, new Error(`Model removed: ${this.toDisplayName(modelId)}`));
|
||||
}
|
||||
|
||||
/**
|
||||
* Update one model row status in place, reassigning to trigger reactivity.
|
||||
*/
|
||||
private setRouterModelStatus(modelId: string, status: ServerModelStatus): void {
|
||||
const idx = this.routerModels.findIndex((m) => m.id === modelId);
|
||||
if (idx === -1) return;
|
||||
|
||||
const current = this.routerModels[idx];
|
||||
if (current.status.value === status) return;
|
||||
|
||||
const next = [...this.routerModels];
|
||||
next[idx] = { ...current, status: { ...current.status, value: status } };
|
||||
this.routerModels = next;
|
||||
}
|
||||
|
||||
/**
|
||||
* Register an awaiter that resolves when the feed reports target status.
|
||||
* One operation runs per model at a time, so one awaiter per model is kept.
|
||||
*/
|
||||
private waitForStatus(modelId: string, target: ServerModelStatus): Promise<void> {
|
||||
return new Promise((resolve, reject) => {
|
||||
this.statusWaiters.set(modelId, { target, resolve, reject });
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve and drop the awaiter when the model reaches its target status.
|
||||
*/
|
||||
private settleStatus(modelId: string, status: ServerModelStatus): void {
|
||||
const waiter = this.statusWaiters.get(modelId);
|
||||
if (waiter && waiter.target === status) {
|
||||
this.statusWaiters.delete(modelId);
|
||||
waiter.resolve();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Reject and drop the awaiter for a model.
|
||||
*/
|
||||
private rejectStatus(modelId: string, error: Error): void {
|
||||
const waiter = this.statusWaiters.get(modelId);
|
||||
if (waiter) {
|
||||
this.statusWaiters.delete(modelId);
|
||||
waiter.reject(error);
|
||||
}
|
||||
}
|
||||
|
||||
@ -679,12 +862,18 @@ class ModelsStore {
|
||||
this.modelLoadingStates.set(modelId, true);
|
||||
this.error = null;
|
||||
|
||||
// the feed drives completion, so it must be live before the request
|
||||
this.subscribeStatus();
|
||||
|
||||
const reachedLoaded = this.waitForStatus(modelId, ServerModelStatus.LOADED);
|
||||
reachedLoaded.catch(() => {});
|
||||
|
||||
try {
|
||||
await ModelsService.load(modelId);
|
||||
await this.pollForModelStatus(modelId, ServerModelStatus.LOADED);
|
||||
await this.updateModelModalities(modelId);
|
||||
await reachedLoaded;
|
||||
toast.success(`Model loaded: ${this.toDisplayName(modelId)}`);
|
||||
} catch (error) {
|
||||
this.rejectStatus(modelId, error instanceof Error ? error : new Error('load failed'));
|
||||
this.error = error instanceof Error ? error.message : 'Failed to load model';
|
||||
toast.error(`Failed to load model: ${this.toDisplayName(modelId)}`);
|
||||
throw error;
|
||||
@ -700,11 +889,17 @@ class ModelsStore {
|
||||
this.modelLoadingStates.set(modelId, true);
|
||||
this.error = null;
|
||||
|
||||
this.subscribeStatus();
|
||||
|
||||
const reachedUnloaded = this.waitForStatus(modelId, ServerModelStatus.UNLOADED);
|
||||
reachedUnloaded.catch(() => {});
|
||||
|
||||
try {
|
||||
await ModelsService.unload(modelId);
|
||||
await this.pollForModelStatus(modelId, ServerModelStatus.UNLOADED);
|
||||
await reachedUnloaded;
|
||||
toast.info(`Model unloaded: ${this.toDisplayName(modelId)}`);
|
||||
} catch (error) {
|
||||
this.rejectStatus(modelId, error instanceof Error ? error : new Error('unload failed'));
|
||||
this.error = error instanceof Error ? error.message : 'Failed to unload model';
|
||||
toast.error(`Failed to unload model: ${this.toDisplayName(modelId)}`);
|
||||
throw error;
|
||||
@ -783,6 +978,9 @@ class ModelsStore {
|
||||
}
|
||||
|
||||
clear(): void {
|
||||
this.unsubscribeStatus();
|
||||
this.statusWaiters.forEach((waiter) => waiter.reject(new Error('Models store cleared')));
|
||||
this.statusWaiters.clear();
|
||||
this.models = [];
|
||||
this.routerModels = [];
|
||||
this.loading = false;
|
||||
|
||||
48
tools/ui/src/lib/types/api.d.ts
vendored
48
tools/ui/src/lib/types/api.d.ts
vendored
@ -1,4 +1,10 @@
|
||||
import type { ContentPartType, FileTypeAudio, ServerModelStatus, ServerRole } from '$lib/enums';
|
||||
import type {
|
||||
ContentPartType,
|
||||
FileTypeAudio,
|
||||
ServerModelStatus,
|
||||
ServerModelsSseEventType,
|
||||
ServerRole
|
||||
} from '$lib/enums';
|
||||
import type { ChatMessagePromptProgress, ChatRole } from './chat';
|
||||
|
||||
export type AudioInputFormat = FileTypeAudio.WAV | FileTypeAudio.MP3;
|
||||
@ -96,6 +102,46 @@ export interface ApiModelDataEntry {
|
||||
meta?: Record<string, unknown> | null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Load stage reported by the /models/sse feed, in load order.
|
||||
*/
|
||||
export type ApiModelLoadStage = 'text_model' | 'spec_model' | 'mmproj_model';
|
||||
|
||||
/**
|
||||
* Load progress snapshot: the full ordered stage plan, the active stage,
|
||||
* and its fractional value (0.0 -> 1.0).
|
||||
*/
|
||||
export interface ApiModelsSseProgress {
|
||||
stages: ApiModelLoadStage[];
|
||||
current: ApiModelLoadStage;
|
||||
value: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Status payload carried by a /models/sse envelope.
|
||||
* exit_code appears on unload.
|
||||
*/
|
||||
export interface ApiModelsSseData {
|
||||
status: ServerModelStatus;
|
||||
progress?: ApiModelsSseProgress;
|
||||
exit_code?: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Event kind multiplexed on the /models/sse feed.
|
||||
* Only the status_* events carry a status payload, models_reload signals a
|
||||
* full list refresh, model_remove drops a row, download_* drive download UI.
|
||||
*/
|
||||
/**
|
||||
* One /models/sse record. event discriminates the kind, model names the
|
||||
* target instance, data carries the status payload when present.
|
||||
*/
|
||||
export interface ApiModelsSseEvent {
|
||||
model: string;
|
||||
event: ServerModelsSseEventType;
|
||||
data: ApiModelsSseData;
|
||||
}
|
||||
|
||||
export interface ApiModelDetails {
|
||||
name: string;
|
||||
model: string;
|
||||
|
||||
@ -11,6 +11,10 @@ export type {
|
||||
ApiChatMessageData,
|
||||
ApiModelStatus,
|
||||
ApiModelDataEntry,
|
||||
ApiModelLoadStage,
|
||||
ApiModelsSseProgress,
|
||||
ApiModelsSseData,
|
||||
ApiModelsSseEvent,
|
||||
ApiModelDetails,
|
||||
ApiModelListResponse,
|
||||
ApiLlamaCppServerProps,
|
||||
@ -70,7 +74,12 @@ export type {
|
||||
} from './database';
|
||||
|
||||
// Model types
|
||||
export type { ModelModalities, ModelOption, ModalityCapabilities } from './models';
|
||||
export type {
|
||||
ModelModalities,
|
||||
ModelOption,
|
||||
ModelLoadProgress,
|
||||
ModalityCapabilities
|
||||
} from './models';
|
||||
|
||||
// Settings types
|
||||
export type {
|
||||
|
||||
13
tools/ui/src/lib/types/models.d.ts
vendored
13
tools/ui/src/lib/types/models.d.ts
vendored
@ -1,4 +1,4 @@
|
||||
import type { ApiModelDataEntry, ApiModelDetails } from '$lib/types/api';
|
||||
import type { ApiModelDataEntry, ApiModelDetails, ApiModelLoadStage } from '$lib/types/api';
|
||||
|
||||
export interface ModelModalities {
|
||||
vision: boolean;
|
||||
@ -20,6 +20,17 @@ export interface ModelOption {
|
||||
tags?: string[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Ephemeral UI-only load progress for one model instance.
|
||||
* Lives only while a load runs, driven by the /models/sse feed.
|
||||
* stage is absent until the feed reports its first stage.
|
||||
*/
|
||||
export interface ModelLoadProgress {
|
||||
stages: ApiModelLoadStage[];
|
||||
current: ApiModelLoadStage;
|
||||
value: number;
|
||||
}
|
||||
|
||||
export interface ParsedModelId {
|
||||
raw: string;
|
||||
orgName: string | null;
|
||||
|
||||
@ -44,6 +44,9 @@ export { buildProxiedUrl, buildProxiedHeaders } from './cors-proxy';
|
||||
// URL utilities
|
||||
export { extractRootDomain, sanitizeExternalUrl } from './url';
|
||||
|
||||
// Progress helpers
|
||||
export { modelLoadFraction, modelLoadProgressText } from './progress';
|
||||
|
||||
// Conversation utilities
|
||||
export { createMessageCountMap, getMessageCount } from './conversation-utils';
|
||||
|
||||
|
||||
43
tools/ui/src/lib/utils/progress.ts
Normal file
43
tools/ui/src/lib/utils/progress.ts
Normal file
@ -0,0 +1,43 @@
|
||||
/**
|
||||
* Model load progress helpers for the /models/sse surfaces
|
||||
* (selector row and chat message).
|
||||
*/
|
||||
|
||||
import { MODEL_LOAD_STAGE_LABELS, MODEL_LOAD_TAIL_SHARE } from '$lib/constants';
|
||||
|
||||
/**
|
||||
* Human label for a model load stage.
|
||||
*/
|
||||
export function modelLoadStageLabel(stage: ApiModelLoadStage): string {
|
||||
return MODEL_LOAD_STAGE_LABELS[stage];
|
||||
}
|
||||
|
||||
/**
|
||||
* Overall load fraction (0.0 -> 1.0) across the declared stage plan.
|
||||
* text_model fills [0, 1 - tail], each later phase owns one tail slice.
|
||||
*/
|
||||
export function modelLoadFraction(progress: ModelLoadProgress | null): number {
|
||||
if (!progress) return 0;
|
||||
|
||||
const { stages, current, value } = progress;
|
||||
const tailCount = Math.max(stages.length - 1, 0);
|
||||
const textCeiling = 1 - tailCount * MODEL_LOAD_TAIL_SHARE;
|
||||
const idx = stages.indexOf(current);
|
||||
|
||||
if (idx <= 0) {
|
||||
return value * textCeiling;
|
||||
}
|
||||
|
||||
return textCeiling + (idx - 1 + value) * MODEL_LOAD_TAIL_SHARE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Single line describing load progress: active stage label and overall percent.
|
||||
* Returns null when there is no progress to show.
|
||||
*/
|
||||
export function modelLoadProgressText(progress: ModelLoadProgress | null): string | null {
|
||||
if (!progress) return null;
|
||||
|
||||
const label = modelLoadStageLabel(progress.current);
|
||||
return `${label} ${Math.round(modelLoadFraction(progress) * 100)}%`;
|
||||
}
|
||||
@ -230,6 +230,20 @@
|
||||
}
|
||||
});
|
||||
|
||||
// Live model status and load progress via the /models/sse feed (router mode)
|
||||
$effect(() => {
|
||||
if (!browser) return;
|
||||
if (!isRouterMode()) return;
|
||||
|
||||
untrack(() => {
|
||||
modelsStore.subscribeStatus();
|
||||
});
|
||||
|
||||
return () => {
|
||||
modelsStore.unsubscribeStatus();
|
||||
};
|
||||
});
|
||||
|
||||
// Background MCP server health checks on app load
|
||||
// Fetch enabled servers from settings and run health checks in background
|
||||
$effect(() => {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user