mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-06-28 04:30:15 -05:00
Fix minor GGML discrepencies (#2016)
* fix: wrong stride in batched quantized add1 (nb0 -> nb3) ggml_compute_forward_add1_q_f32 used i3*nb0 (element stride) instead of i3*nb3 (batch stride) for the destination row pointer. This causes all add1 operations with quantized types and batch > 1 to write to wrong memory locations. The src0 pointer on the line above correctly uses nb03. * fix: wrong dimension limits in dup_f16 non-contiguous path The destination index wrapping in ggml_compute_forward_dup_f16 used source dimensions (ne00/ne01/ne02/ne03) instead of destination dimensions (ne0/ne1/ne2/ne3). While source and destination shapes are currently identical for dup, using the wrong variables is incorrect by design. * fix: wrong dimension limits in dup_bf16 non-contiguous path Same fix as the dup_f16 path: destination index wrapping used source dimensions (ne00/ne01/ne02/ne03) instead of destination dimensions (ne0/ne1/ne2/ne3). Copy-paste error from the contiguous path. * fix: ACC work size uses src[1] instead of src[0] The dequantization work buffer for quantized ACC was sized using src[1]->ne[0] instead of src[0]->ne[0]. Since src[0] is the tensor being dequantized, its dimensions should determine the buffer size. * fix: missing work size for SOFT_CAP_MAX and ROPE_BACK Both ops dereference params->wdata in their forward functions but had no work size allocation (cur = 0), causing a NULL pointer dereference when any thread attempted to use wdata. * fix: wrong dim in sum_rows_f32 dimension decomposition Line 14404 used ne01*ne0 (= ne01*1) instead of ne01*ne02 for the i3 term in the flat row index formula. When ne02 > 1 (batched 2D inputs), this causes wrong memory access and corrupted results.
This commit is contained in:
parent
8686ea708b
commit
7cacf28eec
@ -11415,13 +11415,13 @@ static void ggml_compute_forward_dup_f16(
|
|||||||
|
|
||||||
memcpy(dst_ptr, src0_ptr, sizeof(ggml_fp16_t));
|
memcpy(dst_ptr, src0_ptr, sizeof(ggml_fp16_t));
|
||||||
|
|
||||||
if (++i10 == ne00) {
|
if (++i10 == ne0) {
|
||||||
i10 = 0;
|
i10 = 0;
|
||||||
if (++i11 == ne01) {
|
if (++i11 == ne1) {
|
||||||
i11 = 0;
|
i11 = 0;
|
||||||
if (++i12 == ne02) {
|
if (++i12 == ne2) {
|
||||||
i12 = 0;
|
i12 = 0;
|
||||||
if (++i13 == ne03) {
|
if (++i13 == ne3) {
|
||||||
i13 = 0;
|
i13 = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -11719,13 +11719,13 @@ static void ggml_compute_forward_dup_bf16(
|
|||||||
|
|
||||||
memcpy(dst_ptr, src0_ptr, sizeof(ggml_bf16_t));
|
memcpy(dst_ptr, src0_ptr, sizeof(ggml_bf16_t));
|
||||||
|
|
||||||
if (++i10 == ne00) {
|
if (++i10 == ne0) {
|
||||||
i10 = 0;
|
i10 = 0;
|
||||||
if (++i11 == ne01) {
|
if (++i11 == ne1) {
|
||||||
i11 = 0;
|
i11 = 0;
|
||||||
if (++i12 == ne02) {
|
if (++i12 == ne2) {
|
||||||
i12 = 0;
|
i12 = 0;
|
||||||
if (++i13 == ne03) {
|
if (++i13 == ne3) {
|
||||||
i13 = 0;
|
i13 = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -13341,7 +13341,7 @@ static void ggml_compute_forward_add1_q_f32(
|
|||||||
const int i1 = (ir - i3*ne2*ne1 - i2*ne1);
|
const int i1 = (ir - i3*ne2*ne1 - i2*ne1);
|
||||||
|
|
||||||
void * src0_row = (void *) ((char *) src0->data + (i1*nb01 + i2*nb02 + i3*nb03));
|
void * src0_row = (void *) ((char *) src0->data + (i1*nb01 + i2*nb02 + i3*nb03));
|
||||||
void * dst_row = (void *) ((char *) dst->data + (i1*nb1 + i2*nb2 + i3*nb0 ));
|
void * dst_row = (void *) ((char *) dst->data + (i1*nb1 + i2*nb2 + i3*nb3 ));
|
||||||
|
|
||||||
assert(ne0 % 32 == 0);
|
assert(ne0 % 32 == 0);
|
||||||
|
|
||||||
@ -14401,7 +14401,7 @@ static void ggml_compute_forward_sum_rows_f32(
|
|||||||
for (int ir = first_row; ir < last_row; ++ir) {
|
for (int ir = first_row; ir < last_row; ++ir) {
|
||||||
int i3 = ir / (ne01*ne02);
|
int i3 = ir / (ne01*ne02);
|
||||||
int i2 = (ir - i3*ne01*ne02)/ne01;
|
int i2 = (ir - i3*ne01*ne02)/ne01;
|
||||||
int i1 = ir - i3*ne01*ne0 - i2*ne01;
|
int i1 = ir - i3*ne01*ne02 - i2*ne01;
|
||||||
const float * src_row = (const float *)((const char *)src0->data + i1*nb01 + i2*nb02 + i3*nb03);
|
const float * src_row = (const float *)((const char *)src0->data + i1*nb01 + i2*nb02 + i3*nb03);
|
||||||
float * dst_row = ( float *)(( char *)dst->data + i1*nb1 + i2*nb2 + i3*nb3);
|
float * dst_row = ( float *)(( char *)dst->data + i1*nb1 + i2*nb2 + i3*nb3);
|
||||||
float row_sum = 0;
|
float row_sum = 0;
|
||||||
@ -26632,7 +26632,7 @@ struct ggml_cplan ggml_graph_plan(const struct ggml_cgraph * cgraph, int n_threa
|
|||||||
case GGML_OP_ACC:
|
case GGML_OP_ACC:
|
||||||
{
|
{
|
||||||
if (ggml_is_quantized(node->src[0]->type)) {
|
if (ggml_is_quantized(node->src[0]->type)) {
|
||||||
cur = ggml_type_size(GGML_TYPE_F32) * node->src[1]->ne[0] * n_tasks;
|
cur = ggml_type_size(GGML_TYPE_F32) * node->src[0]->ne[0] * n_tasks;
|
||||||
}
|
}
|
||||||
} break;
|
} break;
|
||||||
case GGML_OP_MUL_MAT:
|
case GGML_OP_MUL_MAT:
|
||||||
@ -26692,7 +26692,9 @@ struct ggml_cplan ggml_graph_plan(const struct ggml_cgraph * cgraph, int n_threa
|
|||||||
}
|
}
|
||||||
} break;
|
} break;
|
||||||
case GGML_OP_SOFT_MAX:
|
case GGML_OP_SOFT_MAX:
|
||||||
|
case GGML_OP_SOFT_CAP_MAX:
|
||||||
case GGML_OP_ROPE:
|
case GGML_OP_ROPE:
|
||||||
|
case GGML_OP_ROPE_BACK:
|
||||||
{
|
{
|
||||||
cur = ggml_type_size(GGML_TYPE_F32) * node->ne[0] * n_tasks;
|
cur = ggml_type_size(GGML_TYPE_F32) * node->ne[0] * n_tasks;
|
||||||
} break;
|
} break;
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user