From 44dbe8c5218829be1603a08dee8847f4d0bca323 Mon Sep 17 00:00:00 2001 From: samuraieng <89817709+samuraieng@users.noreply.github.com> Date: Fri, 8 May 2026 06:10:29 +0900 Subject: [PATCH] model: Support sarashina2.2-vision-3b model (#22103) --- convert_hf_to_gguf.py | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index fb1f5dd447..fb9bc23610 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -13684,6 +13684,27 @@ class DotsOCRVisionModel(MmprojModel): yield from super().modify_tensors(data_torch, name, bid) +@ModelBase.register("Sarashina2VisionForCausalLM") +class Sarashina2VLTextModel(LlamaModel): + model_arch = gguf.MODEL_ARCH.LLAMA + + @classmethod + def filter_tensors(cls, item: tuple[str, Callable[[], Tensor]]) -> tuple[str, Callable[[], Tensor]] | None: + name, gen = item + if name.startswith("llm."): + name = name.replace("llm.", "", 1) + elif name.startswith("norm."): + return None + return super().filter_tensors((name, gen)) + + +@ModelBase.register("Sarashina2VisionForCausalLM") +class Sarashina2VLVisionModel(Qwen2VLVisionModel): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.global_config['model_type'] = "qwen2_vl" + + ###### CONVERSION LOGIC ###### @@ -13940,7 +13961,7 @@ def get_model_architecture(hparams: dict[str, Any], model_type: ModelType) -> st # Step3-VL keeps text config under text_config but uses a custom top-level architecture. # For text conversion we route to a dedicated text-only class. # TODO: refactor this later to avoid adding exception here - if model_type == ModelType.TEXT and arch == "StepVLForConditionalGeneration": + if model_type == ModelType.TEXT and arch in ("StepVLForConditionalGeneration", "Sarashina2VisionForCausalLM"): return arch # if "architectures" is found in the sub-config, use that instead