ggml : add alloc_buffer_n to buffer type interface

Add alloc_buffer_n method to ggml_backend_buffer_type_i
interface, with a public API ggml_backend_buft_alloc_buffer_n.

- Default implementation in ggml-backend.cpp handles multi-buffer
  splitting and tensor allocation via ggml_tallocr
- Meta buffer type provides custom implementation that creates
  per-device sub-contexts and delegates to simple buffer types
- ggml_backend_alloc_ctx_tensors_from_buft now collects tensors
  into a list and delegates to the new API
- Remove temporary ggml_backend_meta_alloc_ctx_tensors_from_buft
- Add NULL alloc_buffer_n to all existing buffer type
  interfaces (cpu, metal, openvino, hexagon, webgpu, zdnn, virtgpu, repack)

Assisted-by: llama.cpp:local pi
This commit is contained in:
Georgi Gerganov 2026-05-25 16:50:13 +03:00
parent e3cab403bf
commit e0d7afdf74
No known key found for this signature in database
GPG Key ID: 449E073F9DC10735
22 changed files with 350 additions and 287 deletions

View File

@ -36,6 +36,7 @@ extern "C" {
GGML_API const char * ggml_backend_buft_name (ggml_backend_buffer_type_t buft);
GGML_API ggml_backend_buffer_t ggml_backend_buft_alloc_buffer (ggml_backend_buffer_type_t buft, size_t size);
GGML_API ggml_backend_buffer_t ggml_backend_buft_alloc_buffer_n(ggml_backend_buffer_type_t buft, struct ggml_tensor ** tensors, int n_tensors);
GGML_API size_t ggml_backend_buft_get_alignment (ggml_backend_buffer_type_t buft);
GGML_API size_t ggml_backend_buft_get_max_size (ggml_backend_buffer_type_t buft);
GGML_API size_t ggml_backend_buft_get_alloc_size(ggml_backend_buffer_type_t buft, const struct ggml_tensor * tensor);

View File

@ -1116,133 +1116,61 @@ size_t ggml_gallocr_get_buffer_size(ggml_gallocr_t galloc, int buffer_id) {
// utils
static void free_buffers(ggml_backend_buffer_t ** buffers, const size_t * n_buffers) {
for (size_t i = 0; i < *n_buffers; i++) {
ggml_backend_buffer_free((*buffers)[i]);
}
free(*buffers);
}
static bool alloc_tensor_range(struct ggml_context * ctx,
struct ggml_tensor * first, struct ggml_tensor * last,
ggml_backend_buffer_type_t buft, size_t size,
ggml_backend_buffer_t ** buffers, size_t * n_buffers) {
ggml_backend_buffer_t buffer = ggml_backend_buft_alloc_buffer(buft, size);
if (buffer == NULL) {
GGML_LOG_ERROR("%s: failed to allocate %s buffer of size %zu\n", __func__, ggml_backend_buft_name(buft), size);
free_buffers(buffers, n_buffers);
return false;
}
*buffers = realloc(*buffers, sizeof(ggml_backend_buffer_t) * (*n_buffers + 1));
(*buffers)[(*n_buffers)++] = buffer;
struct ggml_tallocr tallocr = ggml_tallocr_new(buffer);
for (struct ggml_tensor * t = first; t != last; t = ggml_get_next_tensor(ctx, t)) {
enum ggml_status status = GGML_STATUS_SUCCESS;
if (t->data == NULL) {
if (t->view_src == NULL) {
status = ggml_tallocr_alloc(&tallocr, t);
} else if (t->buffer == NULL) {
status = ggml_backend_view_init(t);
}
} else {
if (t->view_src != NULL && t->buffer == NULL) {
// view of a pre-allocated tensor
status = ggml_backend_view_init(t);
}
}
if (status != GGML_STATUS_SUCCESS) {
GGML_LOG_ERROR("%s: failed to initialize tensor %s\n", __func__, t->name);
free_buffers(buffers, n_buffers);
return false;
}
}
return true;
}
static ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft_impl(
struct ggml_context * ctx, ggml_backend_buffer_type_t buft, size_t * nbytes_total, bool no_alloc) {
struct ggml_context * ctx, ggml_backend_buffer_type_t buft) {
GGML_ASSERT(ggml_get_no_alloc(ctx) == true);
// collect tensors into a list
int n_tensors = 0;
for (struct ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
n_tensors++;
}
if (n_tensors == 0) {
return NULL;
}
struct ggml_tensor ** tensors = (struct ggml_tensor **) malloc(n_tensors * sizeof(struct ggml_tensor *));
int i = 0;
for (struct ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
tensors[i++] = t;
}
ggml_backend_buffer_t buffer = ggml_backend_buft_alloc_buffer_n(buft, tensors, n_tensors);
free(tensors);
return buffer;
}
ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_context * ctx, ggml_backend_buffer_type_t buft) {
return ggml_backend_alloc_ctx_tensors_from_buft_impl(ctx, buft);
}
size_t ggml_backend_alloc_ctx_tensors_from_buft_size(struct ggml_context * ctx, ggml_backend_buffer_type_t buft) {
GGML_ASSERT(ggml_get_no_alloc(ctx) == true);
size_t alignment = ggml_backend_buft_get_alignment(buft);
size_t max_size = ggml_backend_buft_get_max_size(buft);
ggml_backend_buffer_t * buffers = NULL;
size_t n_buffers = 0;
*nbytes_total = 0;
size_t nbytes_total = 0;
size_t cur_buf_size = 0;
struct ggml_tensor * first = ggml_get_first_tensor(ctx);
for (struct ggml_tensor * t = first; t != NULL; t = ggml_get_next_tensor(ctx, t)) {
for (struct ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
size_t this_size = 0;
if (t->data == NULL && t->view_src == NULL) {
this_size = GGML_PAD(ggml_backend_buft_get_alloc_size(buft, t), alignment);
}
if (cur_buf_size > 0 && (cur_buf_size + this_size) > max_size) {
// allocate tensors in the current buffer
if (!no_alloc && !alloc_tensor_range(ctx, first, t, buft, cur_buf_size, &buffers, &n_buffers)) {
return NULL;
}
first = t;
*nbytes_total += cur_buf_size;
nbytes_total += cur_buf_size;
cur_buf_size = this_size;
} else {
cur_buf_size += this_size;
}
}
nbytes_total += cur_buf_size;
// allocate remaining tensors
if (cur_buf_size > 0) {
*nbytes_total += cur_buf_size;
if (!no_alloc && !alloc_tensor_range(ctx, first, NULL, buft, cur_buf_size, &buffers, &n_buffers)) {
return NULL;
}
}
if (no_alloc) {
return NULL;
}
if (n_buffers == 0) {
#ifndef NDEBUG
GGML_LOG_DEBUG("%s: all tensors in the context are already allocated\n", __func__);
#endif
GGML_ASSERT(!buffers);
return NULL;
}
ggml_backend_buffer_t buffer;
if (n_buffers == 1) {
buffer = buffers[0];
} else {
buffer = ggml_backend_multi_buffer_alloc_buffer(buffers, n_buffers);
}
if (buffers) {
free(buffers); // can be NULL if context is empty or no_alloc
}
return buffer;
}
size_t ggml_backend_alloc_ctx_tensors_from_buft_size(struct ggml_context * ctx, ggml_backend_buffer_type_t buft) {
size_t nbytes_total = 0;
ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors_from_buft_impl(ctx, buft, &nbytes_total, /*no_alloc=*/ true);
GGML_ASSERT(!buf);
return nbytes_total;
}
ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_context * ctx, ggml_backend_buffer_type_t buft) {
size_t nbytes_total = 0;
if (ggml_backend_buft_is_meta(buft)) {
return ggml_backend_meta_alloc_ctx_tensors_from_buft(ctx, buft);
}
return ggml_backend_alloc_ctx_tensors_from_buft_impl(ctx, buft, &nbytes_total, /*no_alloc =*/ false);
}
ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors(struct ggml_context * ctx, ggml_backend_t backend) {
return ggml_backend_alloc_ctx_tensors_from_buft(ctx, ggml_backend_get_default_buffer_type(backend));
}

View File

@ -8,7 +8,7 @@
extern "C" {
#endif
#define GGML_BACKEND_API_VERSION 2
#define GGML_BACKEND_API_VERSION 3
//
// Backend buffer type
@ -18,6 +18,8 @@ extern "C" {
const char * (*get_name) (ggml_backend_buffer_type_t buft);
// allocate a buffer of this type
ggml_backend_buffer_t (*alloc_buffer) (ggml_backend_buffer_type_t buft, size_t size);
// (optional) allocate tensors from a list into a buffer of this type (defaults to alloc_buffer + linear allocator)
ggml_backend_buffer_t (*alloc_buffer_n)(ggml_backend_buffer_type_t buft, struct ggml_tensor ** tensors, int n_tensors);
// tensor alignment
size_t (*get_alignment) (ggml_backend_buffer_type_t buft);
// (optional) max buffer size that can be allocated (defaults to SIZE_MAX)
@ -95,9 +97,6 @@ extern "C" {
GGML_API size_t ggml_backend_meta_n_backends (ggml_backend_t meta_backend);
GGML_API ggml_backend_t ggml_backend_meta_simple_backend(ggml_backend_t meta_backend, size_t index);
// temporary workaround to statically allocate tensors from a context in a deduplicated way:
GGML_API struct ggml_backend_buffer * ggml_backend_meta_alloc_ctx_tensors_from_buft(struct ggml_context * ctx, ggml_backend_buffer_type_t buft);
//
// Backend (stream)
//

View File

@ -288,6 +288,8 @@ static ggml_backend_buffer_type_t ggml_backend_meta_buft_simple_buft(ggml_backen
static ggml_backend_buffer_t ggml_backend_meta_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size);
static ggml_backend_buffer_t ggml_backend_meta_buffer_type_alloc_buffer_n(ggml_backend_buffer_type_t buft, ggml_tensor ** tensors, int n_tensors);
static size_t ggml_backend_meta_buffer_type_get_alignment(ggml_backend_buffer_type_t buft) {
const size_t n_simple_bufts = ggml_backend_meta_buft_n_bufts(buft);
size_t max_alignment = 1;
@ -329,12 +331,13 @@ static bool ggml_backend_meta_buffer_type_is_host(ggml_backend_buffer_type_t buf
}
static const struct ggml_backend_buffer_type_i ggml_backend_meta_buffer_type_iface = {
/* .get_name = */ ggml_backend_meta_buffer_type_get_name,
/* .alloc_buffer = */ ggml_backend_meta_buffer_type_alloc_buffer,
/* .get_alignment = */ ggml_backend_meta_buffer_type_get_alignment,
/* .get_max_size = */ ggml_backend_meta_buffer_type_get_max_size,
/* .get_alloc_size = */ ggml_backend_meta_buffer_type_get_alloc_size,
/* .is_host = */ ggml_backend_meta_buffer_type_is_host,
/* .get_name = */ ggml_backend_meta_buffer_type_get_name,
/* .alloc_buffer = */ ggml_backend_meta_buffer_type_alloc_buffer,
/* .alloc_buffer_n = */ ggml_backend_meta_buffer_type_alloc_buffer_n,
/* .get_alignment = */ ggml_backend_meta_buffer_type_get_alignment,
/* .get_max_size = */ ggml_backend_meta_buffer_type_get_max_size,
/* .get_alloc_size = */ ggml_backend_meta_buffer_type_get_alloc_size,
/* .is_host = */ ggml_backend_meta_buffer_type_is_host,
};
bool ggml_backend_buft_is_meta(ggml_backend_buffer_type_t buft) {
@ -1517,17 +1520,17 @@ static ggml_backend_buffer_t ggml_backend_meta_buffer_type_alloc_buffer(ggml_bac
return ggml_backend_buffer_init(buft, ggml_backend_meta_buffer_iface, buf_ctx, max_size);
}
struct ggml_backend_buffer * ggml_backend_meta_alloc_ctx_tensors_from_buft(struct ggml_context * ctx, ggml_backend_buffer_type_t buft) {
static ggml_backend_buffer_t ggml_backend_meta_buffer_type_alloc_buffer_n(ggml_backend_buffer_type_t buft, ggml_tensor ** tensors, int n_tensors) {
const size_t n_simple_bufts = ggml_backend_meta_buft_n_bufts(buft);
constexpr size_t compute_headroom = 16; // Maximum number of views per statically allocated tensor that can be created between evals.
const ggml_init_params params_static = {
/*.mem_size =*/ ggml_get_mem_size(ctx),
/*.mem_size =*/ n_tensors * ggml_tensor_overhead(),
/*.mem_buffer =*/ nullptr,
/*.no_alloc =*/ true,
};
const ggml_init_params params_compute = {
/*.mem_size =*/ compute_headroom*ggml_get_mem_size(ctx),
/*.mem_size =*/ compute_headroom * n_tensors * ggml_tensor_overhead(),
/*.mem_buffer =*/ nullptr,
/*.no_alloc =*/ true,
};
@ -1539,7 +1542,8 @@ struct ggml_backend_buffer * ggml_backend_meta_alloc_ctx_tensors_from_buft(struc
ggml_backend_meta_buffer_context * meta_buf_ctx = new ggml_backend_meta_buffer_context(stc_static, stc_compute_0, stc_compute_1, bufs);
ggml_backend_buffer_t meta_buf = ggml_backend_buffer_init(buft, ggml_backend_meta_buffer_iface, meta_buf_ctx, 0);
for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != nullptr; t = ggml_get_next_tensor(ctx, t)) {
for (int i = 0; i < n_tensors; i++) {
ggml_tensor * t = tensors[i];
t->buffer = meta_buf;
ggml_backend_meta_buffer_init_tensor_impl(meta_buf_ctx->stc_static, t);
t->data = (void *) 0x2000000000000000; // FIXME

View File

@ -44,6 +44,107 @@ ggml_backend_buffer_t ggml_backend_buft_alloc_buffer(ggml_backend_buffer_type_t
return buft->iface.alloc_buffer(buft, size);
}
// default implementation of alloc_buffer_n
// allocates tensors from a list into one or more buffers of the given type
static ggml_backend_buffer_t ggml_backend_buft_alloc_buffer_n_default(ggml_backend_buffer_type_t buft, struct ggml_tensor ** tensors, int n_tensors) {
size_t alignment = ggml_backend_buft_get_alignment(buft);
size_t max_size = ggml_backend_buft_get_max_size(buft);
ggml_backend_buffer_t * buffers = NULL;
size_t n_buffers = 0;
size_t cur_buf_size = 0;
int first = 0;
for (int i = 0; i <= n_tensors; i++) {
size_t this_size = 0;
if (i < n_tensors) {
struct ggml_tensor * t = tensors[i];
if (t->data == NULL && t->view_src == NULL) {
this_size = GGML_PAD(ggml_backend_buft_get_alloc_size(buft, t), alignment);
}
}
// flush the current buffer if adding this tensor would exceed max_size, or if we are at the end
bool should_flush = (i == n_tensors) || (cur_buf_size > 0 && (cur_buf_size + this_size) > max_size);
if (should_flush && cur_buf_size > 0) {
// allocate the buffer with the computed size for this range
ggml_backend_buffer_t buffer = ggml_backend_buft_alloc_buffer(buft, cur_buf_size);
if (buffer == NULL) {
for (size_t b = 0; b < n_buffers; b++) {
ggml_backend_buffer_free(buffers[b]);
}
free(buffers);
return NULL;
}
struct ggml_tallocr tallocr = ggml_tallocr_new(buffer);
// allocate tensors in the current buffer
bool ok = true;
for (int j = first; j < i; j++) {
struct ggml_tensor * t = tensors[j];
if (t->data == NULL) {
if (t->view_src == NULL) {
if (ggml_tallocr_alloc(&tallocr, t) != GGML_STATUS_SUCCESS) {
ok = false;
break;
}
} else if (t->buffer == NULL) {
if (ggml_backend_view_init(t) != GGML_STATUS_SUCCESS) {
ok = false;
break;
}
}
} else {
if (t->view_src != NULL && t->buffer == NULL) {
// view of a pre-allocated tensor
if (ggml_backend_view_init(t) != GGML_STATUS_SUCCESS) {
ok = false;
break;
}
}
}
}
if (!ok) {
for (size_t b = 0; b < n_buffers; b++) {
ggml_backend_buffer_free(buffers[b]);
}
ggml_backend_buffer_free(buffer);
free(buffers);
return NULL;
}
buffers = (ggml_backend_buffer_t *) realloc(buffers, sizeof(ggml_backend_buffer_t) * (n_buffers + 1));
buffers[n_buffers++] = buffer;
cur_buf_size = 0;
first = i;
} else if (i < n_tensors) {
cur_buf_size += this_size;
}
}
if (n_buffers == 0) {
free(buffers);
return NULL;
}
ggml_backend_buffer_t result;
if (n_buffers == 1) {
result = buffers[0];
} else {
result = ggml_backend_multi_buffer_alloc_buffer(buffers, n_buffers);
}
free(buffers);
return result;
}
ggml_backend_buffer_t ggml_backend_buft_alloc_buffer_n(ggml_backend_buffer_type_t buft, struct ggml_tensor ** tensors, int n_tensors) {
GGML_ASSERT(buft);
if (buft->iface.alloc_buffer_n) {
return buft->iface.alloc_buffer_n(buft, tensors, n_tensors);
}
return ggml_backend_buft_alloc_buffer_n_default(buft, tensors, n_tensors);
}
size_t ggml_backend_buft_get_alignment(ggml_backend_buffer_type_t buft) {
GGML_ASSERT(buft);
return buft->iface.get_alignment(buft);
@ -2328,12 +2429,13 @@ static bool ggml_backend_cpu_buffer_type_is_host(ggml_backend_buffer_type_t buft
ggml_backend_buffer_type_t ggml_backend_cpu_buffer_type(void) {
static struct ggml_backend_buffer_type ggml_backend_cpu_buffer_type = {
/* .iface = */ {
/* .get_name = */ ggml_backend_cpu_buffer_type_get_name,
/* .alloc_buffer = */ ggml_backend_cpu_buffer_type_alloc_buffer,
/* .get_alignment = */ ggml_backend_cpu_buffer_type_get_alignment,
/* .get_max_size = */ NULL, // defaults to SIZE_MAX
/* .get_alloc_size = */ NULL, // defaults to ggml_nbytes
/* .is_host = */ ggml_backend_cpu_buffer_type_is_host,
/* .get_name = */ ggml_backend_cpu_buffer_type_get_name,
/* .alloc_buffer = */ ggml_backend_cpu_buffer_type_alloc_buffer,
/* .alloc_buffer_n = */ NULL,
/* .get_alignment = */ ggml_backend_cpu_buffer_type_get_alignment,
/* .get_max_size = */ NULL, // defaults to SIZE_MAX
/* .get_alloc_size = */ NULL, // defaults to ggml_nbytes
/* .is_host = */ ggml_backend_cpu_buffer_type_is_host,
},
/* .device = */ NULL, // FIXME ggml_backend_reg_dev_get(ggml_backend_cpu_reg(), 0),
/* .context = */ NULL,
@ -2351,12 +2453,13 @@ static const char * ggml_backend_cpu_buffer_from_ptr_type_get_name(ggml_backend_
static ggml_backend_buffer_type_t ggml_backend_cpu_buffer_from_ptr_type(void) {
static struct ggml_backend_buffer_type ggml_backend_cpu_buffer_type = {
/* .iface = */ {
/* .get_name = */ ggml_backend_cpu_buffer_from_ptr_type_get_name,
/* .alloc_buffer = */ ggml_backend_cpu_buffer_type_alloc_buffer,
/* .get_alignment = */ ggml_backend_cpu_buffer_type_get_alignment,
/* .get_max_size = */ NULL, // defaults to SIZE_MAX
/* .get_alloc_size = */ NULL, // defaults to ggml_nbytes
/* .is_host = */ ggml_backend_cpu_buffer_type_is_host,
/* .get_name = */ ggml_backend_cpu_buffer_from_ptr_type_get_name,
/* .alloc_buffer = */ ggml_backend_cpu_buffer_type_alloc_buffer,
/* .alloc_buffer_n = */ NULL,
/* .get_alignment = */ ggml_backend_cpu_buffer_type_get_alignment,
/* .get_max_size = */ NULL, // defaults to SIZE_MAX
/* .get_alloc_size = */ NULL, // defaults to ggml_nbytes
/* .is_host = */ ggml_backend_cpu_buffer_type_is_host,
},
/* .device = */ NULL, // FIXME ggml_backend_reg_dev_get(ggml_backend_cpu_reg(), 0),
/* .context = */ NULL,

View File

@ -1595,12 +1595,13 @@ static bool ggml_backend_cann_buffer_type_is_host(ggml_backend_buffer_type_t buf
* memory for CANN buffer types in the GGML backend.
*/
static const ggml_backend_buffer_type_i ggml_backend_cann_buffer_type_interface = {
/* .get_name = */ ggml_backend_cann_buffer_type_name,
/* .alloc_buffer = */ ggml_backend_cann_buffer_type_alloc_buffer,
/* .get_alignment = */ ggml_backend_cann_buffer_type_get_alignment,
/* .get_max_size = */ NULL, // defaults to SIZE_MAX
/* .get_alloc_size = */ ggml_backend_cann_buffer_type_get_alloc_size,
/* .is_host = */ ggml_backend_cann_buffer_type_is_host,
/* .get_name = */ ggml_backend_cann_buffer_type_name,
/* .alloc_buffer = */ ggml_backend_cann_buffer_type_alloc_buffer,
/* .alloc_buffer_n = */ NULL,
/* .get_alignment = */ ggml_backend_cann_buffer_type_get_alignment,
/* .get_max_size = */ NULL, // defaults to SIZE_MAX
/* .get_alloc_size = */ ggml_backend_cann_buffer_type_get_alloc_size,
/* .is_host = */ ggml_backend_cann_buffer_type_is_host,
};
/**
@ -1742,12 +1743,13 @@ static ggml_backend_buffer_t ggml_backend_cann_host_buffer_type_alloc_buffer(ggm
ggml_backend_buffer_type_t ggml_backend_cann_host_buffer_type() {
static struct ggml_backend_buffer_type ggml_backend_cann_buffer_type_host = {
/* .iface = */ {
/* .get_name = */ ggml_backend_cann_host_buffer_type_name,
/* .alloc_buffer = */ ggml_backend_cann_host_buffer_type_alloc_buffer,
/* .get_alignment = */ ggml_backend_cpu_buffer_type()->iface.get_alignment,
/* .get_max_size = */ NULL, // defaults to SIZE_MAX
/* .get_alloc_size = */ ggml_backend_cpu_buffer_type()->iface.get_alloc_size,
/* .is_host = */ ggml_backend_cpu_buffer_type()->iface.is_host,
/* .get_name = */ ggml_backend_cann_host_buffer_type_name,
/* .alloc_buffer = */ ggml_backend_cann_host_buffer_type_alloc_buffer,
/* .alloc_buffer_n = */ NULL,
/* .get_alignment = */ ggml_backend_cpu_buffer_type()->iface.get_alignment,
/* .get_max_size = */ NULL, // defaults to SIZE_MAX
/* .get_alloc_size = */ ggml_backend_cpu_buffer_type()->iface.get_alloc_size,
/* .is_host = */ ggml_backend_cpu_buffer_type()->iface.is_host,
},
/* .device = */
ggml_backend_reg_dev_get(ggml_backend_cann_reg(), 0),

View File

@ -228,12 +228,13 @@ static bool ggml_amx_init() {
ggml_backend_buffer_type_t ggml_backend_amx_buffer_type() {
static struct ggml_backend_buffer_type ggml_backend_buffer_type_amx = {
/* .iface = */ {
/* .get_name = */ ggml_backend_amx_buffer_type_get_name,
/* .alloc_buffer = */ ggml_backend_amx_buffer_type_alloc_buffer,
/* .get_alignment = */ ggml_backend_amx_buffer_type_get_alignment,
/* .get_max_size = */ nullptr, // defaults to SIZE_MAX
/* .get_alloc_size = */ ggml_backend_amx_buffer_type_get_alloc_size,
/* .is_host = */ nullptr,
/* .get_name = */ ggml_backend_amx_buffer_type_get_name,
/* .alloc_buffer = */ ggml_backend_amx_buffer_type_alloc_buffer,
/* .alloc_buffer_n = */ nullptr,
/* .get_alignment = */ ggml_backend_amx_buffer_type_get_alignment,
/* .get_max_size = */ nullptr, // defaults to SIZE_MAX
/* .get_alloc_size = */ ggml_backend_amx_buffer_type_get_alloc_size,
/* .is_host = */ nullptr,
},
/* .device = */ ggml_backend_reg_dev_get(ggml_backend_cpu_reg(), 0),
/* .context = */ new ggml::cpu::amx::extra_buffer_type(),

View File

@ -40,12 +40,13 @@ static ggml_backend_buffer_t ggml_backend_cpu_hbm_buffer_type_alloc_buffer(ggml_
ggml_backend_buffer_type_t ggml_backend_cpu_hbm_buffer_type(void) {
static struct ggml_backend_buffer_type ggml_backend_cpu_buffer_type_hbm = {
/* .iface = */ {
/* .get_name = */ ggml_backend_cpu_hbm_buffer_type_get_name,
/* .alloc_buffer = */ ggml_backend_cpu_hbm_buffer_type_alloc_buffer,
/* .get_alignment = */ ggml_backend_cpu_buffer_type_get_alignment,
/* .get_max_size = */ nullptr, // defaults to SIZE_MAX
/* .get_alloc_size = */ nullptr, // defaults to ggml_nbytes
/* .is_host = */ ggml_backend_cpu_buffer_type_is_host,
/* .get_name = */ ggml_backend_cpu_hbm_buffer_type_get_name,
/* .alloc_buffer = */ ggml_backend_cpu_hbm_buffer_type_alloc_buffer,
/* .alloc_buffer_n = */ nullptr,
/* .get_alignment = */ ggml_backend_cpu_buffer_type_get_alignment,
/* .get_max_size = */ nullptr, // defaults to SIZE_MAX
/* .get_alloc_size = */ nullptr, // defaults to ggml_nbytes
/* .is_host = */ ggml_backend_cpu_buffer_type_is_host,
},
/* .context = */ nullptr,
};

View File

@ -1506,12 +1506,13 @@ ggml_backend_buffer_type_t ggml_backend_cpu_kleidiai_buffer_type(void) {
static ggml::cpu::kleidiai::extra_buffer_type ctx;
static struct ggml_backend_buffer_type ggml_backend_cpu_buffer_type_kleidiai = {
/* .iface = */ {
/* .get_name = */ ggml_backend_cpu_kleidiai_buffer_type_get_name,
/* .alloc_buffer = */ ggml_backend_cpu_kleidiai_buffer_type_alloc_buffer,
/* .get_alignment = */ ggml_backend_cpu_kleidiai_buffer_type_get_alignment,
/* .get_max_size = */ nullptr, // defaults to SIZE_MAX
/* .get_alloc_size = */ ggml_backend_cpu_kleidiai_buffer_type_get_alloc_size,
/* .is_host = */ nullptr,
/* .get_name = */ ggml_backend_cpu_kleidiai_buffer_type_get_name,
/* .alloc_buffer = */ ggml_backend_cpu_kleidiai_buffer_type_alloc_buffer,
/* .alloc_buffer_n = */ nullptr,
/* .get_alignment = */ ggml_backend_cpu_kleidiai_buffer_type_get_alignment,
/* .get_max_size = */ nullptr, // defaults to SIZE_MAX
/* .get_alloc_size = */ ggml_backend_cpu_kleidiai_buffer_type_get_alloc_size,
/* .is_host = */ nullptr,
},
/* .device = */ ggml_backend_reg_dev_get(ggml_backend_cpu_reg(), 0),
/* .context = */ &ctx,

View File

@ -4821,12 +4821,13 @@ class extra_buffer_type : ggml::cpu::extra_buffer_type {
ggml_backend_buffer_type_t ggml_backend_cpu_repack_buffer_type(void) {
static struct ggml_backend_buffer_type ggml_backend_cpu_buffer_type_repack = {
/* .iface = */ {
/* .get_name = */ ggml_backend_cpu_repack_buffer_type_get_name,
/* .alloc_buffer = */ ggml_backend_cpu_repack_buffer_type_alloc_buffer,
/* .get_alignment = */ ggml_backend_cpu_repack_buffer_type_get_alignment,
/* .get_max_size = */ nullptr, // defaults to SIZE_MAX
/* .get_alloc_size = */ nullptr, // defaults to ggml_nbytes
/* .is_host = */ nullptr,
/* .get_name = */ ggml_backend_cpu_repack_buffer_type_get_name,
/* .alloc_buffer = */ ggml_backend_cpu_repack_buffer_type_alloc_buffer,
/* .alloc_buffer_n = */ nullptr,
/* .get_alignment = */ ggml_backend_cpu_repack_buffer_type_get_alignment,
/* .get_max_size = */ nullptr, // defaults to SIZE_MAX
/* .get_alloc_size = */ nullptr, // defaults to ggml_nbytes
/* .is_host = */ nullptr,
},
/* .device = */ ggml_backend_reg_dev_get(ggml_backend_cpu_reg(), 0),
/* .context = */ new ggml::cpu::repack::extra_buffer_type(),

View File

@ -1648,12 +1648,13 @@ ggml_backend_buffer_type_t ggml_backend_cpu_riscv64_spacemit_buffer_type(void) {
static ggml_backend_buffer_type ggml_backend_cpu_buffer_type_riscv64_spacemit = {
/* .iface = */
{
/* .get_name = */ ggml_backend_cpu_riscv64_spacemit_buffer_type_get_name,
/* .alloc_buffer = */ ggml_backend_cpu_riscv64_spacemit_buffer_type_alloc_buffer,
/* .get_alignment = */ ggml_backend_cpu_riscv64_spacemit_buffer_type_get_alignment,
/* .get_max_size = */ nullptr,
/* .get_alloc_size = */ ggml_backend_cpu_riscv64_spacemit_nbytes,
/* .is_host = */ nullptr,
/* .get_name = */ ggml_backend_cpu_riscv64_spacemit_buffer_type_get_name,
/* .alloc_buffer = */ ggml_backend_cpu_riscv64_spacemit_buffer_type_alloc_buffer,
/* .alloc_buffer_n = */ NULL,
/* .get_alignment = */ ggml_backend_cpu_riscv64_spacemit_buffer_type_get_alignment,
/* .get_max_size = */ nullptr,
/* .get_alloc_size = */ ggml_backend_cpu_riscv64_spacemit_nbytes,
/* .is_host = */ nullptr,
},
/* .device = */
ggml_backend_reg_dev_get(ggml_backend_cpu_reg(), 0),

View File

@ -844,12 +844,13 @@ static size_t ggml_backend_cuda_buffer_type_get_alloc_size(ggml_backend_buffer_t
}
static const ggml_backend_buffer_type_i ggml_backend_cuda_buffer_type_interface = {
/* .get_name = */ ggml_backend_cuda_buffer_type_get_name,
/* .alloc_buffer = */ ggml_backend_cuda_buffer_type_alloc_buffer,
/* .get_alignment = */ ggml_backend_cuda_buffer_type_get_alignment,
/* .get_max_size = */ NULL, // defaults to SIZE_MAX
/* .get_alloc_size = */ ggml_backend_cuda_buffer_type_get_alloc_size,
/* .is_host = */ NULL,
/* .get_name = */ ggml_backend_cuda_buffer_type_get_name,
/* .alloc_buffer = */ ggml_backend_cuda_buffer_type_alloc_buffer,
/* .alloc_buffer_n = */ NULL,
/* .get_alignment = */ ggml_backend_cuda_buffer_type_get_alignment,
/* .get_max_size = */ NULL, // defaults to SIZE_MAX
/* .get_alloc_size = */ ggml_backend_cuda_buffer_type_get_alloc_size,
/* .is_host = */ NULL,
};
ggml_backend_buffer_type_t ggml_backend_cuda_buffer_type(int device) {
@ -1163,12 +1164,13 @@ static bool ggml_backend_cuda_split_buffer_type_is_host(ggml_backend_buffer_type
}
static const ggml_backend_buffer_type_i ggml_backend_cuda_split_buffer_type_interface = {
/* .get_name = */ ggml_backend_cuda_split_buffer_type_get_name,
/* .alloc_buffer = */ ggml_backend_cuda_split_buffer_type_alloc_buffer,
/* .get_alignment = */ ggml_backend_cuda_split_buffer_type_get_alignment,
/* .get_max_size = */ NULL, // defaults to SIZE_MAX
/* .get_alloc_size = */ ggml_backend_cuda_split_buffer_type_get_alloc_size,
/* .is_host = */ ggml_backend_cuda_split_buffer_type_is_host,
/* .get_name = */ ggml_backend_cuda_split_buffer_type_get_name,
/* .alloc_buffer = */ ggml_backend_cuda_split_buffer_type_alloc_buffer,
/* .alloc_buffer_n = */ NULL,
/* .get_alignment = */ ggml_backend_cuda_split_buffer_type_get_alignment,
/* .get_max_size = */ NULL, // defaults to SIZE_MAX
/* .get_alloc_size = */ ggml_backend_cuda_split_buffer_type_get_alloc_size,
/* .is_host = */ ggml_backend_cuda_split_buffer_type_is_host,
};
// Communication context for multi-GPU AllReduce during tensor parallelism.
@ -1568,12 +1570,13 @@ static ggml_backend_buffer_t ggml_backend_cuda_host_buffer_type_alloc_buffer(ggm
ggml_backend_buffer_type_t ggml_backend_cuda_host_buffer_type() {
static struct ggml_backend_buffer_type ggml_backend_cuda_buffer_type_host = {
/* .iface = */ {
/* .get_name = */ ggml_backend_cuda_host_buffer_type_name,
/* .alloc_buffer = */ ggml_backend_cuda_host_buffer_type_alloc_buffer,
/* .get_alignment = */ ggml_backend_cpu_buffer_type()->iface.get_alignment,
/* .get_max_size = */ NULL, // defaults to SIZE_MAX
/* .get_alloc_size = */ ggml_backend_cpu_buffer_type()->iface.get_alloc_size,
/* .is_host = */ ggml_backend_cpu_buffer_type()->iface.is_host,
/* .get_name = */ ggml_backend_cuda_host_buffer_type_name,
/* .alloc_buffer = */ ggml_backend_cuda_host_buffer_type_alloc_buffer,
/* .alloc_buffer_n = */ NULL,
/* .get_alignment = */ ggml_backend_cpu_buffer_type()->iface.get_alignment,
/* .get_max_size = */ NULL, // defaults to SIZE_MAX
/* .get_alloc_size = */ ggml_backend_cpu_buffer_type()->iface.get_alloc_size,
/* .is_host = */ ggml_backend_cpu_buffer_type()->iface.is_host,
},
/* .device = */ ggml_backend_reg_dev_get(ggml_backend_cuda_reg(), 0),
/* .context = */ nullptr,

View File

@ -1766,21 +1766,23 @@ static bool ggml_backend_hexagon_repack_buffer_type_is_host(ggml_backend_buffer_
}
static ggml_backend_buffer_type_i ggml_backend_hexagon_buffer_type_interface = {
/* .get_name = */ ggml_backend_hexagon_buffer_type_name,
/* .alloc_buffer = */ ggml_backend_hexagon_buffer_type_alloc_buffer,
/* .get_alignment = */ ggml_backend_hexagon_buffer_type_get_alignment,
/* .get_max_size = */ ggml_backend_hexagon_buffer_type_get_max_size,
/* .get_alloc_size = */ ggml_backend_hexagon_buffer_type_get_alloc_size,
/* .is_host = */ ggml_backend_hexagon_buffer_type_is_host,
/* .get_name = */ ggml_backend_hexagon_buffer_type_name,
/* .alloc_buffer = */ ggml_backend_hexagon_buffer_type_alloc_buffer,
/* .alloc_buffer_n = */ NULL,
/* .get_alignment = */ ggml_backend_hexagon_buffer_type_get_alignment,
/* .get_max_size = */ ggml_backend_hexagon_buffer_type_get_max_size,
/* .get_alloc_size = */ ggml_backend_hexagon_buffer_type_get_alloc_size,
/* .is_host = */ ggml_backend_hexagon_buffer_type_is_host,
};
static ggml_backend_buffer_type_i ggml_backend_hexagon_repack_buffer_type_interface = {
/* .get_name = */ ggml_backend_hexagon_buffer_type_name,
/* .alloc_buffer = */ ggml_backend_hexagon_repack_buffer_type_alloc_buffer,
/* .get_alignment = */ ggml_backend_hexagon_buffer_type_get_alignment,
/* .get_max_size = */ ggml_backend_hexagon_buffer_type_get_max_size,
/* .get_alloc_size = */ ggml_backend_hexagon_buffer_type_get_alloc_size,
/* .is_host = */ ggml_backend_hexagon_repack_buffer_type_is_host,
/* .get_name = */ ggml_backend_hexagon_buffer_type_name,
/* .alloc_buffer = */ ggml_backend_hexagon_repack_buffer_type_alloc_buffer,
/* .alloc_buffer_n = */ NULL,
/* .get_alignment = */ ggml_backend_hexagon_buffer_type_get_alignment,
/* .get_max_size = */ ggml_backend_hexagon_buffer_type_get_max_size,
/* .get_alloc_size = */ ggml_backend_hexagon_buffer_type_get_alloc_size,
/* .is_host = */ ggml_backend_hexagon_repack_buffer_type_is_host,
};
struct ggml_hexagon_opbatch {

View File

@ -300,12 +300,13 @@ static ggml_backend_buffer_type_t ggml_backend_metal_buffer_type_shared(int devi
ggml_backend_buffer_type buft = {
/* .iface = */ {
/* .get_name = */ ggml_backend_metal_buffer_type_shared_get_name,
/* .alloc_buffer = */ ggml_backend_metal_buffer_type_shared_alloc_buffer,
/* .get_alignment = */ ggml_backend_metal_buffer_type_shared_get_alignment,
/* .get_max_size = */ ggml_backend_metal_buffer_type_shared_get_max_size,
/* .get_alloc_size = */ ggml_backend_metal_buffer_type_shared_get_alloc_size,
/* .is_host = */ ggml_backend_metal_buffer_type_shared_is_host,
/* .get_name = */ ggml_backend_metal_buffer_type_shared_get_name,
/* .alloc_buffer = */ ggml_backend_metal_buffer_type_shared_alloc_buffer,
/* .alloc_buffer_n = */ NULL,
/* .get_alignment = */ ggml_backend_metal_buffer_type_shared_get_alignment,
/* .get_max_size = */ ggml_backend_metal_buffer_type_shared_get_max_size,
/* .get_alloc_size = */ ggml_backend_metal_buffer_type_shared_get_alloc_size,
/* .is_host = */ ggml_backend_metal_buffer_type_shared_is_host,
},
/* .device = */ ggml_backend_reg_dev_get(ggml_backend_metal_reg(), i),
/* .context = */ raw_ctx,
@ -375,12 +376,13 @@ static ggml_backend_buffer_type_t ggml_backend_metal_buffer_type_private(int dev
ggml_backend_buffer_type buft = {
/* .iface = */ {
/* .get_name = */ ggml_backend_metal_buffer_type_private_get_name,
/* .alloc_buffer = */ ggml_backend_metal_buffer_type_private_alloc_buffer,
/* .get_alignment = */ ggml_backend_metal_buffer_type_private_get_alignment,
/* .get_max_size = */ ggml_backend_metal_buffer_type_private_get_max_size,
/* .get_alloc_size = */ ggml_backend_metal_buffer_type_private_get_alloc_size,
/* .is_host = */ ggml_backend_metal_buffer_type_private_is_host,
/* .get_name = */ ggml_backend_metal_buffer_type_private_get_name,
/* .alloc_buffer = */ ggml_backend_metal_buffer_type_private_alloc_buffer,
/* .alloc_buffer_n = */ NULL,
/* .get_alignment = */ ggml_backend_metal_buffer_type_private_get_alignment,
/* .get_max_size = */ ggml_backend_metal_buffer_type_private_get_max_size,
/* .get_alloc_size = */ ggml_backend_metal_buffer_type_private_get_alloc_size,
/* .is_host = */ ggml_backend_metal_buffer_type_private_is_host,
},
/* .device = */ ggml_backend_reg_dev_get(ggml_backend_metal_reg(), i),
/* .context = */ raw_ctx,
@ -453,12 +455,13 @@ static ggml_backend_buffer_type_t ggml_backend_metal_buffer_type_mapped(int devi
// https://github.com/ggml-org/llama.cpp/pull/15832#discussion_r2333177099
ggml_backend_buffer_type buft = {
/* .iface = */ {
/* .get_name = */ ggml_backend_metal_buffer_type_mapped_get_name,
/* .alloc_buffer = */ ggml_backend_metal_buffer_type_mapped_alloc_buffer,
/* .get_alignment = */ ggml_backend_metal_buffer_type_mapped_get_alignment,
/* .get_max_size = */ ggml_backend_metal_buffer_type_mapped_get_max_size,
/* .get_alloc_size = */ ggml_backend_metal_buffer_type_mapped_get_alloc_size,
/* .is_host = */ ggml_backend_metal_buffer_type_mapped_is_host,
/* .get_name = */ ggml_backend_metal_buffer_type_mapped_get_name,
/* .alloc_buffer = */ ggml_backend_metal_buffer_type_mapped_alloc_buffer,
/* .alloc_buffer_n = */ NULL,
/* .get_alignment = */ ggml_backend_metal_buffer_type_mapped_get_alignment,
/* .get_max_size = */ ggml_backend_metal_buffer_type_mapped_get_max_size,
/* .get_alloc_size = */ ggml_backend_metal_buffer_type_mapped_get_alloc_size,
/* .is_host = */ ggml_backend_metal_buffer_type_mapped_is_host,
},
/* .device = */ ggml_backend_reg_dev_get(ggml_backend_metal_reg(), i),
/* .context = */ raw_ctx,

View File

@ -8298,12 +8298,13 @@ static bool ggml_backend_opencl_buffer_type_supports_backend(ggml_backend_buffer
}
static ggml_backend_buffer_type_i ggml_backend_opencl_buffer_type_interface = {
/* .get_name = */ ggml_backend_opencl_buffer_type_get_name,
/* .alloc_buffer = */ ggml_backend_opencl_buffer_type_alloc_buffer,
/* .get_alignment = */ ggml_backend_opencl_buffer_type_get_alignment,
/* .get_max_size = */ ggml_backend_opencl_buffer_type_get_max_size,
/* .get_alloc_size = */ NULL,
/* .is_host = */ NULL,
/* .get_name = */ ggml_backend_opencl_buffer_type_get_name,
/* .alloc_buffer = */ ggml_backend_opencl_buffer_type_alloc_buffer,
/* .alloc_buffer_n = */ NULL,
/* .get_alignment = */ ggml_backend_opencl_buffer_type_get_alignment,
/* .get_max_size = */ ggml_backend_opencl_buffer_type_get_max_size,
/* .get_alloc_size = */ NULL,
/* .is_host = */ NULL,
};
//

View File

@ -475,12 +475,13 @@ static size_t ggml_backend_openvino_buffer_type_get_alloc_size(ggml_backend_buff
}
static const ggml_backend_buffer_type_i ggml_backend_openvino_buffer_type_interface = {
/* .get_name = */ ggml_backend_openvino_buffer_type_get_name,
/* .alloc_buffer = */ ggml_backend_openvino_buffer_type_alloc_buffer,
/* .get_alignment = */ ggml_backend_openvino_buffer_type_get_alignment,
/* .get_max_size = */ ggml_backend_openvino_buffer_type_get_max_size,
/* .get_alloc_size = */ ggml_backend_openvino_buffer_type_get_alloc_size,
/* .is_host = */ nullptr,
/* .get_name = */ ggml_backend_openvino_buffer_type_get_name,
/* .alloc_buffer = */ ggml_backend_openvino_buffer_type_alloc_buffer,
/* .alloc_buffer_n = */ NULL,
/* .get_alignment = */ ggml_backend_openvino_buffer_type_get_alignment,
/* .get_max_size = */ ggml_backend_openvino_buffer_type_get_max_size,
/* .get_alloc_size = */ ggml_backend_openvino_buffer_type_get_alloc_size,
/* .is_host = */ nullptr,
};
// Get buffer type for a specific device
@ -530,12 +531,13 @@ static bool ggml_backend_openvino_host_buffer_type_is_host(ggml_backend_buffer_t
}
static const ggml_backend_buffer_type_i ggml_backend_openvino_host_buffer_type_interface = {
/* .get_name = */ ggml_backend_openvino_host_buffer_type_get_name,
/* .alloc_buffer = */ ggml_backend_openvino_buffer_type_alloc_buffer,
/* .get_alignment = */ ggml_backend_openvino_buffer_type_get_alignment,
/* .get_max_size = */ ggml_backend_openvino_buffer_type_get_max_size,
/* .get_alloc_size = */ ggml_backend_openvino_buffer_type_get_alloc_size,
/* .is_host = */ ggml_backend_openvino_host_buffer_type_is_host,
/* .get_name = */ ggml_backend_openvino_host_buffer_type_get_name,
/* .alloc_buffer = */ ggml_backend_openvino_buffer_type_alloc_buffer,
/* .alloc_buffer_n = */ NULL,
/* .get_alignment = */ ggml_backend_openvino_buffer_type_get_alignment,
/* .get_max_size = */ ggml_backend_openvino_buffer_type_get_max_size,
/* .get_alloc_size = */ ggml_backend_openvino_buffer_type_get_alloc_size,
/* .is_host = */ ggml_backend_openvino_host_buffer_type_is_host,
};
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_openvino_host_buffer_type(int device) {

View File

@ -629,12 +629,13 @@ static size_t ggml_backend_rpc_buffer_type_get_alloc_size(ggml_backend_buffer_ty
}
static ggml_backend_buffer_type_i ggml_backend_rpc_buffer_type_interface = {
/* .get_name = */ ggml_backend_rpc_buffer_type_name,
/* .alloc_buffer = */ ggml_backend_rpc_buffer_type_alloc_buffer,
/* .get_alignment = */ ggml_backend_rpc_buffer_type_get_alignment,
/* .get_max_size = */ ggml_backend_rpc_get_max_size,
/* .get_alloc_size = */ ggml_backend_rpc_buffer_type_get_alloc_size,
/* .is_host = */ NULL,
/* .get_name = */ ggml_backend_rpc_buffer_type_name,
/* .alloc_buffer = */ ggml_backend_rpc_buffer_type_alloc_buffer,
/* .alloc_buffer_n = */ NULL,
/* .get_alignment = */ ggml_backend_rpc_buffer_type_get_alignment,
/* .get_max_size = */ ggml_backend_rpc_get_max_size,
/* .get_alloc_size = */ ggml_backend_rpc_buffer_type_get_alloc_size,
/* .is_host = */ NULL,
};
static const char * ggml_backend_rpc_name(ggml_backend_t backend) {

View File

@ -809,12 +809,13 @@ static size_t ggml_backend_sycl_buffer_type_get_alloc_size(ggml_backend_buffer_t
}
static const ggml_backend_buffer_type_i ggml_backend_sycl_buffer_type_interface = {
/* .get_name = */ ggml_backend_sycl_buffer_type_get_name,
/* .alloc_buffer = */ ggml_backend_sycl_buffer_type_alloc_buffer,
/* .get_alignment = */ ggml_backend_sycl_buffer_type_get_alignment,
/* .get_max_size = */ ggml_backend_sycl_buffer_type_get_max_size,
/* .get_alloc_size = */ ggml_backend_sycl_buffer_type_get_alloc_size,
/* .is_host = */ NULL,
/* .get_name = */ ggml_backend_sycl_buffer_type_get_name,
/* .alloc_buffer = */ ggml_backend_sycl_buffer_type_alloc_buffer,
/* .alloc_buffer_n = */ NULL,
/* .get_alignment = */ ggml_backend_sycl_buffer_type_get_alignment,
/* .get_max_size = */ ggml_backend_sycl_buffer_type_get_max_size,
/* .get_alloc_size = */ ggml_backend_sycl_buffer_type_get_alloc_size,
/* .is_host = */ NULL,
};
ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(int device) {
@ -1244,12 +1245,13 @@ static bool ggml_backend_sycl_split_buffer_type_is_host(ggml_backend_buffer_type
}
static ggml_backend_buffer_type_i ggml_backend_sycl_split_buffer_type_interface = {
/* .get_name = */ ggml_backend_sycl_split_buffer_type_get_name,
/* .alloc_buffer = */ ggml_backend_sycl_split_buffer_type_alloc_buffer,
/* .get_alignment = */ ggml_backend_sycl_split_buffer_type_get_alignment,
/* .get_max_size = */ NULL, // defaults to SIZE_MAX
/* .get_alloc_size = */ ggml_backend_sycl_split_buffer_type_get_alloc_size,
/* .is_host = */ ggml_backend_sycl_split_buffer_type_is_host,
/* .get_name = */ ggml_backend_sycl_split_buffer_type_get_name,
/* .alloc_buffer = */ ggml_backend_sycl_split_buffer_type_alloc_buffer,
/* .alloc_buffer_n = */ NULL,
/* .get_alignment = */ ggml_backend_sycl_split_buffer_type_get_alignment,
/* .get_max_size = */ NULL, // defaults to SIZE_MAX
/* .get_alloc_size = */ ggml_backend_sycl_split_buffer_type_get_alloc_size,
/* .is_host = */ ggml_backend_sycl_split_buffer_type_is_host,
};
ggml_backend_buffer_type_t ggml_backend_sycl_split_buffer_type(const float * tensor_split) {
@ -1339,12 +1341,13 @@ ggml_backend_buffer_type_t ggml_backend_sycl_host_buffer_type() {
GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_host_buffer_type\n");
static struct ggml_backend_buffer_type ggml_backend_sycl_buffer_type_host = {
/* .iface = */ {
/* .get_name = */ ggml_backend_sycl_host_buffer_type_name,
/* .alloc_buffer = */ ggml_backend_sycl_host_buffer_type_alloc_buffer,
/* .get_alignment = */ ggml_backend_cpu_buffer_type()->iface.get_alignment,
/* .get_max_size = */ NULL, // TODO: return device.maxBufferLength
/* .get_alloc_size = */ ggml_backend_cpu_buffer_type()->iface.get_alloc_size,
/* .is_host = */ ggml_backend_cpu_buffer_type()->iface.is_host,
/* .get_name = */ ggml_backend_sycl_host_buffer_type_name,
/* .alloc_buffer = */ ggml_backend_sycl_host_buffer_type_alloc_buffer,
/* .alloc_buffer_n = */ NULL,
/* .get_alignment = */ ggml_backend_cpu_buffer_type()->iface.get_alignment,
/* .get_max_size = */ NULL, // TODO: return device.maxBufferLength
/* .get_alloc_size = */ ggml_backend_cpu_buffer_type()->iface.get_alloc_size,
/* .is_host = */ ggml_backend_cpu_buffer_type()->iface.is_host,
},
/* .device = */ ggml_backend_reg_dev_get(ggml_backend_sycl_reg(), 0),
/* .context = */ nullptr,

View File

@ -63,19 +63,21 @@ static size_t ggml_backend_remoting_buffer_type_get_alloc_size(ggml_backend_buff
}
const ggml_backend_buffer_type_i ggml_backend_remoting_buffer_type_interface = {
/* .get_name = */ ggml_backend_remoting_buffer_type_get_name,
/* .alloc_buffer = */ ggml_backend_remoting_buffer_type_alloc_buffer,
/* .get_alignment = */ ggml_backend_remoting_buffer_type_get_alignment,
/* .get_max_size = */ ggml_backend_remoting_buffer_type_get_max_size,
/* .get_alloc_size = */ ggml_backend_remoting_buffer_type_get_alloc_size,
/* .is_host = */ NULL,
/* .get_name = */ ggml_backend_remoting_buffer_type_get_name,
/* .alloc_buffer = */ ggml_backend_remoting_buffer_type_alloc_buffer,
/* .alloc_buffer_n = */ NULL,
/* .get_alignment = */ ggml_backend_remoting_buffer_type_get_alignment,
/* .get_max_size = */ ggml_backend_remoting_buffer_type_get_max_size,
/* .get_alloc_size = */ ggml_backend_remoting_buffer_type_get_alloc_size,
/* .is_host = */ NULL,
};
const ggml_backend_buffer_type_i ggml_backend_remoting_buffer_from_ptr_type_interface = {
/* .get_name = */ ggml_backend_remoting_buffer_type_get_name,
/* .alloc_buffer = */ NULL,
/* .get_alignment = */ ggml_backend_remoting_buffer_type_get_alignment,
/* .get_max_size = */ ggml_backend_remoting_buffer_type_get_max_size,
/* .get_alloc_size = */ ggml_backend_remoting_buffer_type_get_alloc_size,
/* .is_host = */ NULL,
/* .get_name = */ ggml_backend_remoting_buffer_type_get_name,
/* .alloc_buffer = */ NULL,
/* .alloc_buffer_n = */ NULL,
/* .get_alignment = */ ggml_backend_remoting_buffer_type_get_alignment,
/* .get_max_size = */ ggml_backend_remoting_buffer_type_get_max_size,
/* .get_alloc_size = */ ggml_backend_remoting_buffer_type_get_alloc_size,
/* .is_host = */ NULL,
};

View File

@ -286,12 +286,13 @@ static size_t ggml_backend_vk_buffer_type_get_alignment(ggml_backend_buffer_type
static size_t ggml_backend_vk_buffer_type_get_max_size(ggml_backend_buffer_type_t buft);
static size_t ggml_backend_vk_buffer_type_get_alloc_size(ggml_backend_buffer_type_t buft, const ggml_tensor * tensor);
static ggml_backend_buffer_type_i ggml_backend_vk_buffer_type_interface = {
/* .get_name = */ ggml_backend_vk_buffer_type_name,
/* .alloc_buffer = */ ggml_backend_vk_buffer_type_alloc_buffer,
/* .get_alignment = */ ggml_backend_vk_buffer_type_get_alignment,
/* .get_max_size = */ ggml_backend_vk_buffer_type_get_max_size,
/* .get_alloc_size = */ ggml_backend_vk_buffer_type_get_alloc_size,
/* .is_host = */ NULL,
/* .get_name = */ ggml_backend_vk_buffer_type_name,
/* .alloc_buffer = */ ggml_backend_vk_buffer_type_alloc_buffer,
/* .alloc_buffer_n = */ NULL,
/* .get_alignment = */ ggml_backend_vk_buffer_type_get_alignment,
/* .get_max_size = */ ggml_backend_vk_buffer_type_get_max_size,
/* .get_alloc_size = */ ggml_backend_vk_buffer_type_get_alloc_size,
/* .is_host = */ NULL,
};
class vk_memory_logger;
@ -14916,12 +14917,13 @@ static size_t ggml_backend_vk_host_buffer_type_get_max_size(ggml_backend_buffer_
ggml_backend_buffer_type_t ggml_backend_vk_host_buffer_type() {
static struct ggml_backend_buffer_type ggml_backend_vk_buffer_type_host = {
/* .iface = */ {
/* .get_name = */ ggml_backend_vk_host_buffer_type_name,
/* .alloc_buffer = */ ggml_backend_vk_host_buffer_type_alloc_buffer,
/* .get_alignment = */ ggml_backend_vk_host_buffer_type_get_alignment,
/* .get_max_size = */ ggml_backend_vk_host_buffer_type_get_max_size,
/* .get_alloc_size = */ ggml_backend_cpu_buffer_type()->iface.get_alloc_size,
/* .is_host = */ ggml_backend_cpu_buffer_type()->iface.is_host,
/* .get_name = */ ggml_backend_vk_host_buffer_type_name,
/* .alloc_buffer = */ ggml_backend_vk_host_buffer_type_alloc_buffer,
/* .alloc_buffer_n = */ nullptr,
/* .get_alignment = */ ggml_backend_vk_host_buffer_type_get_alignment,
/* .get_max_size = */ ggml_backend_vk_host_buffer_type_get_max_size,
/* .get_alloc_size = */ ggml_backend_cpu_buffer_type()->iface.get_alloc_size,
/* .is_host = */ ggml_backend_cpu_buffer_type()->iface.is_host,
},
/* .device = */ ggml_backend_reg_dev_get(ggml_backend_vk_reg(), 0),
/* .context = */ nullptr,

View File

@ -4010,6 +4010,7 @@ static ggml_backend_buffer_type_t ggml_backend_webgpu_device_get_buffer_type(ggm
/* .iface = */ {
/* .get_name = */ ggml_backend_webgpu_buffer_type_get_name,
/* .alloc_buffer = */ ggml_backend_webgpu_buffer_type_alloc_buffer,
/* .alloc_buffer_n = */ NULL,
/* .get_alignment = */ ggml_backend_webgpu_buffer_type_get_alignment,
/* .get_max_size = */ ggml_backend_webgpu_buffer_type_get_max_size,
/* .get_alloc_size = */ ggml_backend_webgpu_buffer_type_get_alloc_size,

View File

@ -385,6 +385,7 @@ ggml_backend_buffer_type_t ggml_backend_zdnn_buffer_type(void) {
/* .iface = */ {
/* .get_name = */ ggml_backend_zdnn_buffer_type_get_name,
/* .alloc_buffer = */ ggml_backend_zdnn_buffer_type_alloc_buffer,
/* .alloc_buffer_n = */ NULL,
/* .get_alignment = */ ggml_backend_zdnn_buffer_type_get_alignment,
/* .get_max_size = */ NULL,
/* .get_alloc_size = */ NULL, // defaults to ggml_nbytes