From 0ffdf509ab007674032677181825ab10aeacb383 Mon Sep 17 00:00:00 2001
From: mb8565 <mabovermobile@gmail.com>
Date: Fri, 26 Jun 2026 01:46:26 -0500
Subject: [PATCH] ggml : fix set_rows CPU crash when the destination is F32
 (#2038)

The CPU `set_rows` kernel for F32 sources fetches `type_traits[dst->type].from_float`
and calls it for every scattered row. F32 has no `from_float` entry, it is NULL in
`type_traits`, so any `set_rows` into an F32 destination calls a NULL function pointer
and segfaults. Other destination types work because they all have a real `from_float`.

Repro (CPU backend, standalone ggml graph):

    dst = new_tensor_2d(F32, 8, 6)   // F32 destination
    src = new_tensor_2d(F32, 8, 4)
    idx = new_tensor_1d(I64, 4)      // {0,2,4,5}
    out = ggml_set_rows(dst, src, idx)
    // ggml_backend_graph_compute(cpu, ...) -> SIGSEGV on current main

When the destination is F32, copy the row with `memcpy` instead of going through
`from_float`. The I32 and I64 index branches both get the same treatment. An assert
guards the remaining case, non-F32 dst with a NULL `from_float`, so a future
unsupported type fails loudly instead of crashing.

I ran a normal model after this and it still decodes fine (DeepSeek-V2-Lite-Q4_K_M,
CPU, coherent output), and the non-F32 path is untouched. On the F32 path you pay one
`memcpy` per row in place of the indirect call.

Co-authored-by: local-llm <local-llm@local-llm-R740.cruvis.org>
Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>
---
 ggml/src/ggml.c | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c
index f3b59e9e..19ba0436 100644
--- a/ggml/src/ggml.c
+++ b/ggml/src/ggml.c
@@ -19007,6 +19007,11 @@ static void ggml_compute_forward_set_rows_f32(
     const int64_t ir1 = MIN(ir0 + dr, nr);
 
     ggml_from_float_t const from_float = type_traits[dst->type].from_float;
+    // F32 has no from_float entry in type_traits (it is NULL), so set_rows into an F32
+    // destination would call a NULL function pointer and crash. Handle F32 dst with a
+    // direct float copy. Hit by graphs that scatter F32 rows into an F32 base.
+    const bool dst_is_f32 = (dst->type == GGML_TYPE_F32);
+    GGML_ASSERT(dst_is_f32 || from_float);
 
     if (src1->type == GGML_TYPE_I64) {
         for (int64_t i03 = 0; i03 < ne03; ++i03) {
@@ -19020,8 +19025,10 @@ static void ggml_compute_forward_set_rows_f32(
 
                     GGML_ASSERT(i1 >= 0 && i1 < ne1);
 
-                    from_float((const float *) ((char *) src0->data +  i*nb01 + i02*nb02 + i03*nb03),
-                            ((char *)  dst->data + i1*nb1  + i02*nb2  + i03*nb3), nc);
+                    const float * src_row = (const float *) ((char *) src0->data + i*nb01 + i02*nb02 + i03*nb03);
+                    char *        dst_row = (char *) dst->data + i1*nb1 + i02*nb2 + i03*nb3;
+                    if (dst_is_f32) memcpy(dst_row, src_row, nc*sizeof(float));
+                    else            from_float(src_row, dst_row, nc);
                 }
             }
         }
@@ -19038,8 +19045,10 @@ static void ggml_compute_forward_set_rows_f32(
 
                     GGML_ASSERT(i1 >= 0 && i1 < ne1);
 
-                    from_float((const float *) ((char *) src0->data +  i*nb01 + i02*nb02 + i03*nb03),
-                            ((char *)  dst->data + i1*nb1  + i02*nb2  + i03*nb3), nc);
+                    const float * src_row = (const float *) ((char *) src0->data + i*nb01 + i02*nb02 + i03*nb03);
+                    char *        dst_row = (char *) dst->data + i1*nb1 + i02*nb2 + i03*nb3;
+                    if (dst_is_f32) memcpy(dst_row, src_row, nc*sizeof(float));
+                    else            from_float(src_row, dst_row, nc);
                 }
             }
         }