diff --git a/src/llama-graph.cpp b/src/llama-graph.cpp index 4cc4a4a16a..3d942ba4fe 100644 --- a/src/llama-graph.cpp +++ b/src/llama-graph.cpp @@ -1873,9 +1873,9 @@ ggml_tensor * llm_graph_context::build_inp_embd(ggml_tensor * tok_embd) const { res->t_inp_embd = cur; // For Granite architecture - // NOTE: Only apply scale to token inputs. Raw embeddings are assumed to be - // multimodal inputs that should not be scaled. - if (ubatch.token && hparams.f_embedding_scale != 0.0f) { + // NOTE: For deepstack models, only apply scale to token inputs (ie text-only input). + // Raw embeddings are assumed to be multimodal inputs that should not be scaled. + if (hparams.f_embedding_scale != 0.0f && (ubatch.token || hparams.n_deepstack_layers == 0)) { if (!ggml_is_contiguous(cur)) { cur = ggml_cont(ctx0, cur); } diff --git a/tools/mtmd/tests.sh b/tools/mtmd/tests.sh index 83416fb272..5da48d61bf 100755 --- a/tools/mtmd/tests.sh +++ b/tools/mtmd/tests.sh @@ -91,7 +91,6 @@ add_test_vision "ggml-org/LightOnOCR-1B-1025-GGUF:Q8_0" add_test_vision "ggml-org/DeepSeek-OCR-GGUF:Q8_0" -p "Free OCR." --chat-template deepseek-ocr add_test_vision "ggml-org/dots.ocr-GGUF:Q8_0" -p "OCR" add_test_vision "ggml-org/HunyuanOCR-GGUF:Q8_0" -p "OCR" -add_test_vision "ggml-org/HunyuanVL-4B-GGUF:Q8_0" add_test_vision "ggml-org/gemma-4-E2B-it-GGUF:Q8_0" --jinja add_test_audio "ggml-org/ultravox-v0_5-llama-3_2-1b-GGUF:Q8_0"