diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 336e843323..e660361094 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -1064,7 +1064,7 @@ class TextModel(ModelBase): # Skip multimodal tensors if name.startswith(("mlp", "vit.", "vpm.", "siglip2.", "conformer.", "merger.", "resampler.", "sound_encoder.", "sound_projection.")) \ - or "visual." in name or "audio." in name or "talker." in name \ + or "visual." in name or "vision." in name or "audio." in name or "talker." in name \ or "vision_" in name or "audio_" in name or "sam_model" in name \ or "token2wav." in name or "code2wav." in name \ or "projector." in name or "pre_mm_projector_norm" in name \