tentative Metal support

This commit is contained in:
Piotr Wilkin 2026-05-19 11:52:22 +02:00
parent 28ef941775
commit 2bfe4ff9ca
7 changed files with 496 additions and 0 deletions

View File

@ -36,6 +36,32 @@ void ggml_metal_set_abort_callback (ggml_metal_t ctx, ggml_abort_callback abort
bool ggml_metal_supports_family (ggml_metal_t ctx, int family);
void ggml_metal_capture_next_compute(ggml_metal_t ctx);
//
// Profiling
//
// Opaque profiler state, owned by the C++ backend layer (ggml-metal.cpp). Holds the std::vector of
// records returned via the ggml_backend_profiler interface. ggml_metal_t keeps a borrowed pointer
// so that graph_compute can push records when sampling is active.
struct ggml_metal_profiler_state;
// Inject (or clear, with NULL) the profiler state pointer. Called once at backend init.
void ggml_metal_set_profiler_state(ggml_metal_t ctx, struct ggml_metal_profiler_state * state);
// Bridge function implemented in ggml-metal.cpp. Used by graph_compute (in .m) to push records.
void ggml_metal_profiler_push_record(
struct ggml_metal_profiler_state * state,
const struct ggml_tensor * node,
uint64_t start_ns,
uint64_t end_ns);
// Query whether the injected profiler state is currently enabled.
// (Avoids exposing the C++ struct layout to the .m file.)
bool ggml_metal_profiler_is_enabled(struct ggml_metal_profiler_state * state);
// Query the split-id currently set on the profiler state.
int ggml_metal_profiler_get_split_id(struct ggml_metal_profiler_state * state);
#ifdef __cplusplus
}
#endif

View File

@ -6,6 +6,7 @@
#import "ggml-metal-impl.h"
#import "ggml-metal-common.h"
#import "ggml-metal-ops.h"
#import "ggml-profiler.h"
#import <Foundation/Foundation.h>
@ -79,6 +80,19 @@ struct ggml_metal {
// error state - set when a command buffer fails during synchronize
// once set, graph_compute will return GGML_STATUS_FAILED until the backend is recreated
bool has_error;
// Profiling
// Borrowed; owned by the C++ backend layer (ggml-metal.cpp). NULL when profiling is unavailable.
struct ggml_metal_profiler_state * profiler_state;
// Per-graph-compute scratch state populated when profiling is active for the current invocation.
// Lifetime: from start of ggml_metal_graph_compute until its end.
bool profiler_active;
ggml_metal_sample_buf_t profiler_sample_buf;
size_t profiler_total_slots;
uint64_t profiler_cpu_anchor_ns;
uint64_t profiler_gpu_anchor_ns;
struct ggml_metal_op_sample_slot * profiler_slot_map; // size = gf->n_nodes
};
ggml_metal_t ggml_metal_init(ggml_metal_device_t dev) {
@ -450,6 +464,34 @@ enum ggml_status ggml_metal_graph_compute(ggml_metal_t ctx, struct ggml_cgraph *
// keep the memory wired
ggml_metal_device_rsets_keep_alive(ctx->dev);
// Decide whether profiling is active for this invocation. Activation is sticky for the whole
// call: allocate sample buffer + slot map up front so the encode_async block can use them.
ctx->profiler_active = false;
if (ctx->profiler_state != NULL && ggml_metal_profiler_is_enabled(ctx->profiler_state)) {
if (ggml_metal_device_supports_profiling(ctx->dev) && gf->n_nodes > 0) {
const size_t total_slots = 2 * (size_t) gf->n_nodes;
ctx->profiler_sample_buf = ggml_metal_device_create_sample_buf(ctx->dev, total_slots);
if (ctx->profiler_sample_buf != NULL) {
ctx->profiler_total_slots = total_slots;
ctx->profiler_slot_map = (struct ggml_metal_op_sample_slot *) calloc(
(size_t) gf->n_nodes, sizeof(struct ggml_metal_op_sample_slot));
if (ctx->profiler_slot_map != NULL) {
// Mark all entries unused (node_idx < 0).
for (int i = 0; i < gf->n_nodes; ++i) {
ctx->profiler_slot_map[i].node_idx = -1;
}
ggml_metal_device_sample_timestamps(ctx->dev,
&ctx->profiler_cpu_anchor_ns,
&ctx->profiler_gpu_anchor_ns);
ctx->profiler_active = true;
} else {
ggml_metal_sample_buf_free(ctx->profiler_sample_buf);
ctx->profiler_sample_buf = NULL;
}
}
}
}
// submit the ggml compute graph to the GPU by creating command buffers and encoding the ops in them
// the first n_nodes_0 are encoded and submitted for processing directly by the calling thread
// while these nodes are processing, we start n_cb threads to enqueue the rest of the nodes
@ -609,6 +651,66 @@ enum ggml_status ggml_metal_graph_compute(ggml_metal_t ctx, struct ggml_cgraph *
ctx->capture_started = false;
}
// Profiling drain: wait for all command buffers to complete, resolve timestamps, push records.
// This forces a synchronous wait Vulkan does the same when its profiler is active.
if (ctx->profiler_active) {
{
id<MTLCommandBuffer> cmd_buf = ctx->cmd_bufs[n_cb].obj;
if (cmd_buf) {
[cmd_buf waitUntilCompleted];
}
}
for (int i = 0; i < n_cb; ++i) {
id<MTLCommandBuffer> cmd_buf = ctx->cmd_bufs[i].obj;
if (cmd_buf) {
// Ensure cmd_bufs that were not auto-enqueued get committed.
if ([cmd_buf status] == MTLCommandBufferStatusNotEnqueued) {
[cmd_buf commit];
}
[cmd_buf waitUntilCompleted];
}
}
uint64_t * ns = (uint64_t *) calloc(ctx->profiler_total_slots, sizeof(uint64_t));
if (ns != NULL) {
ggml_metal_sample_buf_resolve(ctx->profiler_sample_buf,
/*base=*/0,
ctx->profiler_total_slots,
ctx->profiler_cpu_anchor_ns,
ctx->profiler_gpu_anchor_ns,
ns);
for (int i = 0; i < gf->n_nodes; ++i) {
const struct ggml_metal_op_sample_slot * slot = &ctx->profiler_slot_map[i];
if (slot->node_idx < 0) {
continue;
}
if (slot->slot_start >= ctx->profiler_total_slots ||
slot->slot_end >= ctx->profiler_total_slots) {
continue;
}
const uint64_t t0 = ns[slot->slot_start];
const uint64_t t1 = ns[slot->slot_end];
if (t0 == 0 || t1 == 0 || t1 < t0) {
continue;
}
ggml_metal_profiler_push_record(ctx->profiler_state,
ggml_graph_node(gf, slot->node_idx),
t0,
t1);
}
free(ns);
}
free(ctx->profiler_slot_map);
ctx->profiler_slot_map = NULL;
ggml_metal_sample_buf_free(ctx->profiler_sample_buf);
ctx->profiler_sample_buf = NULL;
ctx->profiler_total_slots = 0;
ctx->profiler_active = false;
}
}
return GGML_STATUS_SUCCESS;
@ -660,6 +762,10 @@ ggml_metal_event_t ggml_metal_get_ev_cpy(ggml_metal_t ctx) {
return ctx->ev_cpy;
}
void ggml_metal_set_profiler_state(ggml_metal_t ctx, struct ggml_metal_profiler_state * state) {
ctx->profiler_state = state;
}
void ggml_metal_set_n_cb(ggml_metal_t ctx, int n_cb) {
if (ctx->n_cb != n_cb) {
ctx->n_cb = MIN(n_cb, GGML_METAL_MAX_COMMAND_BUFFERS);
@ -704,6 +810,17 @@ void ggml_metal_set_n_cb(ggml_metal_t ctx, int n_cb) {
ctx->debug_graph,
ctx->debug_fusion);
if (ctx->profiler_active) {
// Base slot for this command buffer is 2 * (first graph-node index it processes).
// The encoder uses one (start, end) pair per encoded op group; we over-allocate so
// that empty/filtered nodes simply leave gaps in the sample buffer.
const size_t base_slot = 2 * (size_t) idx_start;
ggml_metal_op_enable_profiling(ctx_op,
ctx->profiler_sample_buf,
base_slot,
ctx->profiler_slot_map);
}
for (int idx = 0; idx < ggml_metal_op_n_nodes(ctx_op); ++idx) {
const int res = ggml_metal_op_encode(ctx_op, idx);
if (res == 0) {

View File

@ -91,6 +91,17 @@ void ggml_metal_encoder_memory_barrier(ggml_metal_encoder_t encoder);
void ggml_metal_encoder_end_encoding(ggml_metal_encoder_t encoder);
//
// MTLCounterSampleBuffer wrapper (used by the profiler)
//
typedef struct ggml_metal_sample_buf * ggml_metal_sample_buf_t;
// Insert a GPU timestamp sample on the encoder at the given slot.
// Caller must ensure (a) the encoder belongs to a command buffer using a counter sample buffer that
// supports MTLCounterSamplingPointAtDispatchBoundary, and (b) `index` is unique within the buffer.
void ggml_metal_encoder_sample_timestamp(ggml_metal_encoder_t encoder, ggml_metal_sample_buf_t buf, size_t index);
//
// MTLLibrary wrapper
//
@ -292,6 +303,35 @@ bool ggml_metal_device_supports_op(ggml_metal_device_t dev, const struct ggml_te
const struct ggml_metal_device_props * ggml_metal_device_get_props(ggml_metal_device_t dev);
//
// Profiling helpers
//
// Returns true if the device supports MTLCounterSamplingPointAtDispatchBoundary on compute encoders
// AND exposes the MTLCommonCounterSetTimestamp counter set.
bool ggml_metal_device_supports_profiling(ggml_metal_device_t dev);
// Allocate a counter sample buffer with `sample_count` slots backed by the timestamp counter set.
// Returns NULL on failure (e.g. unsupported device).
ggml_metal_sample_buf_t ggml_metal_device_create_sample_buf(ggml_metal_device_t dev, size_t sample_count);
void ggml_metal_sample_buf_free(ggml_metal_sample_buf_t buf);
// Capture correlated CPU/GPU timestamps (both in Mach-absolute units, i.e. nanoseconds on
// current Apple Silicon; the conversion via mach_timebase_info is applied internally).
// Used to anchor the GPU timestamp domain to the CPU clock returned by ggml_profiler_time_ns().
void ggml_metal_device_sample_timestamps(ggml_metal_device_t dev, uint64_t * cpu_ns, uint64_t * gpu_ns);
// Resolve `count` consecutive samples starting at `base` into nanosecond timestamps anchored against
// `cpu_anchor_ns` / `gpu_anchor_ns` (obtained from ggml_metal_device_sample_timestamps).
// `out_ns` must hold at least `count` uint64_t entries.
void ggml_metal_sample_buf_resolve(ggml_metal_sample_buf_t buf,
size_t base,
size_t count,
uint64_t cpu_anchor_ns,
uint64_t gpu_anchor_ns,
uint64_t * out_ns);
//
// device buffers
//

View File

@ -8,6 +8,7 @@
#include <Metal/Metal.h>
#include <stdatomic.h>
#include <mach/mach_time.h>
#ifndef TARGET_OS_VISION
#define TARGET_OS_VISION 0
@ -517,6 +518,21 @@ void ggml_metal_encoder_end_encoding(ggml_metal_encoder_t encoder) {
[encoder->obj endEncoding];
}
//
// MTLCounterSampleBuffer wrapper
//
struct ggml_metal_sample_buf {
id<MTLCounterSampleBuffer> obj;
size_t sample_count;
};
void ggml_metal_encoder_sample_timestamp(ggml_metal_encoder_t encoder, ggml_metal_sample_buf_t buf, size_t index) {
if (@available(macOS 11.0, iOS 14.0, *)) {
[encoder->obj sampleCountersInBuffer:buf->obj atSampleIndex:index withBarrier:YES];
}
}
struct ggml_metal_device {
id<MTLDevice> mtl_device;
@ -1341,6 +1357,139 @@ const struct ggml_metal_device_props * ggml_metal_device_get_props(ggml_metal_de
return &dev->props;
}
//
// Profiling helpers
//
// Look up the MTLCommonCounterSetTimestamp counter set, or nil if the device doesn't expose it.
static id<MTLCounterSet> ggml_metal_device_get_timestamp_counter_set(ggml_metal_device_t dev) {
if (@available(macOS 10.15, iOS 14.0, *)) {
NSArray<id<MTLCounterSet>> * sets = [dev->mtl_device counterSets];
for (id<MTLCounterSet> cs in sets) {
if ([[cs name] isEqualToString:MTLCommonCounterSetTimestamp]) {
return cs;
}
}
}
return nil;
}
bool ggml_metal_device_supports_profiling(ggml_metal_device_t dev) {
if (@available(macOS 11.0, iOS 14.0, *)) {
if (![dev->mtl_device supportsCounterSampling:MTLCounterSamplingPointAtDispatchBoundary]) {
return false;
}
return ggml_metal_device_get_timestamp_counter_set(dev) != nil;
}
return false;
}
ggml_metal_sample_buf_t ggml_metal_device_create_sample_buf(ggml_metal_device_t dev, size_t sample_count) {
if (sample_count == 0) {
return NULL;
}
if (@available(macOS 10.15, iOS 14.0, *)) {
id<MTLCounterSet> cs = ggml_metal_device_get_timestamp_counter_set(dev);
if (cs == nil) {
return NULL;
}
MTLCounterSampleBufferDescriptor * desc = [[MTLCounterSampleBufferDescriptor alloc] init];
desc.counterSet = cs;
desc.label = @"ggml-metal-profiler";
desc.storageMode = MTLStorageModeShared;
desc.sampleCount = sample_count;
NSError * error = nil;
id<MTLCounterSampleBuffer> sb = [dev->mtl_device newCounterSampleBufferWithDescriptor:desc error:&error];
[desc release];
if (sb == nil) {
GGML_LOG_WARN("%s: failed to create counter sample buffer: %s\n", __func__,
error ? [[error localizedDescription] UTF8String] : "unknown");
return NULL;
}
ggml_metal_sample_buf_t res = calloc(1, sizeof(struct ggml_metal_sample_buf));
res->obj = sb;
res->sample_count = sample_count;
return res;
}
return NULL;
}
void ggml_metal_sample_buf_free(ggml_metal_sample_buf_t buf) {
if (buf == NULL) {
return;
}
[buf->obj release];
free(buf);
}
void ggml_metal_device_sample_timestamps(ggml_metal_device_t dev, uint64_t * cpu_ns, uint64_t * gpu_ns) {
if (@available(macOS 10.15, iOS 14.0, *)) {
MTLTimestamp cpu_t = 0;
MTLTimestamp gpu_t = 0;
[dev->mtl_device sampleTimestamps:&cpu_t gpuTimestamp:&gpu_t];
// Apple docs: both timestamps are in Mach-absolute time units. Convert to nanoseconds.
static mach_timebase_info_data_t tb = {0, 0};
if (tb.denom == 0) {
mach_timebase_info(&tb);
}
if (cpu_ns) *cpu_ns = (uint64_t)((__uint128_t)cpu_t * tb.numer / tb.denom);
if (gpu_ns) *gpu_ns = (uint64_t)((__uint128_t)gpu_t * tb.numer / tb.denom);
} else {
if (cpu_ns) *cpu_ns = 0;
if (gpu_ns) *gpu_ns = 0;
}
}
void ggml_metal_sample_buf_resolve(ggml_metal_sample_buf_t buf,
size_t base,
size_t count,
uint64_t cpu_anchor_ns,
uint64_t gpu_anchor_ns,
uint64_t * out_ns) {
if (buf == NULL || count == 0 || out_ns == NULL) {
return;
}
if (@available(macOS 10.15, iOS 14.0, *)) {
NSRange range = NSMakeRange(base, count);
NSData * data = [buf->obj resolveCounterRange:range];
if (data == nil || [data length] < count * sizeof(MTLCounterResultTimestamp)) {
// Failed resolve: fill with zeros so the caller can detect dropped samples.
for (size_t i = 0; i < count; ++i) {
out_ns[i] = 0;
}
return;
}
static mach_timebase_info_data_t tb = {0, 0};
if (tb.denom == 0) {
mach_timebase_info(&tb);
}
const MTLCounterResultTimestamp * src = (const MTLCounterResultTimestamp *) [data bytes];
for (size_t i = 0; i < count; ++i) {
// MTLCounterErrorValue (~0ULL) marks an unrecorded sample slot; map to 0.
if (src[i].timestamp == MTLCounterErrorValue) {
out_ns[i] = 0;
continue;
}
const uint64_t gpu_ns = (uint64_t)((__uint128_t) src[i].timestamp * tb.numer / tb.denom);
// Anchor to CPU clock returned by ggml_profiler_time_ns().
out_ns[i] = (gpu_ns >= gpu_anchor_ns)
? cpu_anchor_ns + (gpu_ns - gpu_anchor_ns)
: cpu_anchor_ns - (gpu_anchor_ns - gpu_ns);
}
} else {
for (size_t i = 0; i < count; ++i) {
out_ns[i] = 0;
}
}
}
//
// device buffers
//

View File

@ -77,6 +77,11 @@ struct ggml_metal_op {
return ggml_graph_node(gf, idxs[i]);
}
int node_global_idx(int i) const {
assert(i >= 0 && i < (int) idxs.size());
return idxs[i];
}
bool can_fuse(int i0, const ggml_op * ops, int n_ops) const {
assert(use_fusion);
assert(i0 >= 0 && i0 < n_nodes());
@ -100,6 +105,12 @@ struct ggml_metal_op {
int debug_graph;
int debug_fusion;
// Profiling: when sample_buf is non-null, ggml_metal_op_encode brackets each impl call with
// two timestamp samples. The (node_idx, slot_start, slot_end) tuple is recorded in slot_map.
ggml_metal_sample_buf_t sample_buf = nullptr;
size_t next_slot = 0;
ggml_metal_op_sample_slot * slot_map = nullptr;
private:
ggml_cgraph * gf;
@ -144,6 +155,16 @@ int ggml_metal_op_n_nodes(ggml_metal_op_t ctx) {
return ctx->n_nodes();
}
void ggml_metal_op_enable_profiling(
ggml_metal_op_t ctx,
ggml_metal_sample_buf_t sample_buf,
size_t slot_base,
ggml_metal_op_sample_slot * slot_map) {
ctx->sample_buf = sample_buf;
ctx->next_slot = slot_base;
ctx->slot_map = slot_map;
}
static bool ggml_metal_op_concurrency_reset(ggml_metal_op_t ctx) {
if (!ctx->mem_ranges) {
return true;
@ -500,12 +521,31 @@ int ggml_metal_op_encode(ggml_metal_op_t ctx, int idx) {
ggml_metal_encoder_debug_group_push(ctx->enc, ggml_op_desc(ctx->node(idx)));
}
const size_t slot_start = ctx->next_slot;
const bool profiling = (ctx->sample_buf != nullptr && ctx->slot_map != nullptr);
if (profiling) {
ggml_metal_encoder_sample_timestamp(ctx->enc, ctx->sample_buf, slot_start);
ctx->next_slot++;
}
int res = ggml_metal_op_encode_impl(ctx, idx);
if (idx + res > ctx->n_nodes()) {
GGML_ABORT("fusion error: nodes spanning multiple encoders have been fused. this indicates a bug in the fusion logic %s",
"https://github.com/ggml-org/llama.cpp/pull/14849");
}
if (profiling) {
const size_t slot_end = ctx->next_slot;
ggml_metal_encoder_sample_timestamp(ctx->enc, ctx->sample_buf, slot_end);
ctx->next_slot++;
const int gidx = ctx->node_global_idx(idx);
ctx->slot_map[gidx].node_idx = gidx;
ctx->slot_map[gidx].n_fused = res;
ctx->slot_map[gidx].slot_start = slot_start;
ctx->slot_map[gidx].slot_end = slot_end;
}
if (ctx->use_capture) {
ggml_metal_encoder_debug_group_pop(ctx->enc);
}

View File

@ -26,6 +26,29 @@ int ggml_metal_op_n_nodes(ggml_metal_op_t ctx);
int ggml_metal_op_encode(ggml_metal_op_t ctx, int idx);
//
// Profiling: per-op timestamp sampling
//
// (node_idx, slot_start, slot_end) triple recorded for each encoded op group.
// node_idx < 0 marks an unused slot.
struct ggml_metal_op_sample_slot {
int node_idx;
int n_fused;
size_t slot_start;
size_t slot_end;
};
// Enable profiling on this op encoder. `slot_base` is the first index in `sample_buf` available
// to this encoder; `slot_map` is an array (size = number of graph nodes) into which
// the encoder will record one entry per non-empty encoded op group.
// Must be called before any ggml_metal_op_encode().
void ggml_metal_op_enable_profiling(
ggml_metal_op_t ctx,
ggml_metal_sample_buf_t sample_buf,
size_t slot_base,
struct ggml_metal_op_sample_slot * slot_map);
//
// available ops:
//

View File

@ -2,6 +2,7 @@
#include "ggml-impl.h"
#include "ggml-backend-impl.h"
#include "ggml-profiler.h"
#include "ggml-metal-device.h"
#include "ggml-metal-context.h"
@ -9,6 +10,7 @@
#include <mutex>
#include <string>
#include <vector>
#define GGML_METAL_NAME "MTL"
#define GGML_METAL_MAX_DEVICES 16
@ -20,6 +22,59 @@ static int g_devices = 1;
// forward declaration
static bool ggml_backend_buffer_is_metal(ggml_backend_buffer_t buffer);
//
// Profiler state (mirrors the per-backend profiler state from the Vulkan integration)
//
// Owned by ggml_backend_metal_init / device_init_backend; lifetime is the backend's lifetime.
// The Metal context holds a borrowed pointer and calls into the bridge functions below to push
// records during graph_compute.
//
struct ggml_metal_profiler_state {
bool enabled = false;
int split_id = -1;
std::vector<ggml_profile_record> records;
void reset() {
records.clear();
split_id = -1;
}
};
extern "C" {
void ggml_metal_profiler_push_record(
ggml_metal_profiler_state * state,
const ggml_tensor * node,
uint64_t start_ns,
uint64_t end_ns) {
if (state == nullptr || node == nullptr) {
return;
}
ggml_profile_record rec;
rec.type = GGML_PROFILE_EVENT_OP;
rec.name = ggml_op_name(node->op);
rec.backend_id = -1;
rec.split_id = state->split_id;
rec.start_ns = start_ns;
rec.end_ns = end_ns;
rec.bytes = ggml_nbytes(node);
rec.extra = nullptr;
ggml_profile_record_from_tensor(&rec, node);
state->records.push_back(rec);
}
bool ggml_metal_profiler_is_enabled(ggml_metal_profiler_state * state) {
return state != nullptr && state->enabled;
}
int ggml_metal_profiler_get_split_id(ggml_metal_profiler_state * state) {
return state != nullptr ? state->split_id : -1;
}
} // extern "C"
////////////////////////////////////////////////////////////////////////////////
// backend interface
////////////////////////////////////////////////////////////////////////////////
@ -589,6 +644,48 @@ static ggml_guid_t ggml_backend_metal_guid(void) {
return &guid;
}
// Register the per-backend ggml_backend_profiler interface on `backend` and inject the borrowed
// state pointer into the Metal context so graph_compute can push records.
static void ggml_backend_metal_register_profiler(ggml_backend_t backend, ggml_metal_t ctx) {
auto * state = new ggml_metal_profiler_state();
ggml_metal_set_profiler_state(ctx, state);
static auto metal_prof_enable = [](void * context, bool enable) {
auto * state = (ggml_metal_profiler_state *) context;
state->enabled = enable;
if (!enable) {
state->reset();
}
};
static auto metal_prof_reset = [](void * context) {
auto * state = (ggml_metal_profiler_state *) context;
state->reset();
};
static auto metal_prof_set_split_id = [](void * context, int split_id) {
auto * state = (ggml_metal_profiler_state *) context;
state->split_id = split_id;
};
static auto metal_prof_get_records = [](void * context, const ggml_profile_record ** out) -> int {
auto * state = (ggml_metal_profiler_state *) context;
*out = state->records.data();
return (int) state->records.size();
};
static auto metal_prof_free = [](void * context) {
auto * state = (ggml_metal_profiler_state *) context;
delete state;
};
auto * profiler = new ggml_backend_profiler {
/* .context = */ state,
/* .enable = */ metal_prof_enable,
/* .reset = */ metal_prof_reset,
/* .set_split_id = */ metal_prof_set_split_id,
/* .get_records = */ metal_prof_get_records,
/* .free_context = */ metal_prof_free,
};
ggml_backend_set_profiler(backend, profiler);
}
ggml_backend_t ggml_backend_metal_init(void) {
ggml_backend_dev_t dev = ggml_backend_reg_dev_get(ggml_backend_metal_reg(), 0);
ggml_metal_device_t ctx_dev = (ggml_metal_device_t)dev->context;
@ -611,6 +708,8 @@ ggml_backend_t ggml_backend_metal_init(void) {
ggml_backend_metal_set_n_cb(backend, 1);
ggml_backend_metal_register_profiler(backend, ctx);
return backend;
}
@ -706,6 +805,8 @@ static ggml_backend_t ggml_backend_metal_device_init_backend(ggml_backend_dev_t
ggml_backend_metal_set_n_cb(backend, 1);
ggml_backend_metal_register_profiler(backend, ctx);
return backend;
GGML_UNUSED(params);