mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2026-06-27 23:50:20 -05:00
convert : support Step3.7-Flash (#23845)
* feat: support step3.7 * fix: register Step-3.7 BPE pre-tokenizer hash * delete fromjson * register step3.7 arch to Step35Model * drop vit projector in base filter * Apply suggestion from @CISC Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com> * restore blank line --------- Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
This commit is contained in:
parent
4f3a4beb8d
commit
f7a0777a5c
@ -215,6 +215,7 @@ TEXT_MODEL_MAP: dict[str, str] = {
|
||||
"Starcoder2ForCausalLM": "starcoder",
|
||||
"Step3p5ForCausalLM": "step3",
|
||||
"StepVLForConditionalGeneration": "step3",
|
||||
"Step3p7ForConditionalGeneration": "step3",
|
||||
"T5EncoderModel": "t5",
|
||||
"T5ForConditionalGeneration": "t5",
|
||||
"T5WithLMHeadModel": "t5",
|
||||
@ -283,6 +284,7 @@ MMPROJ_MODEL_MAP: dict[str, str] = {
|
||||
"Sarashina2VisionForCausalLM": "sarashina2",
|
||||
"SmolVLMForConditionalGeneration": "smolvlm",
|
||||
"StepVLForConditionalGeneration": "step3",
|
||||
"Step3p7ForConditionalGeneration": "step3",
|
||||
"UltravoxModel": "ultravox",
|
||||
"VoxtralForConditionalGeneration": "ultravox",
|
||||
"YoutuVLForConditionalGeneration": "youtuvl",
|
||||
|
||||
@ -2593,7 +2593,7 @@ def get_model_architecture(hparams: dict[str, Any], model_type: ModelType) -> st
|
||||
# Step3-VL keeps text config under text_config but uses a custom top-level architecture.
|
||||
# For text conversion we route to a dedicated text-only class.
|
||||
# TODO: refactor this later to avoid adding exception here
|
||||
if model_type == ModelType.TEXT and arch in ("StepVLForConditionalGeneration", "Sarashina2VisionForCausalLM", "Exaone4_5_ForConditionalGeneration"):
|
||||
if model_type == ModelType.TEXT and arch in ("StepVLForConditionalGeneration", "Sarashina2VisionForCausalLM", "Exaone4_5_ForConditionalGeneration", "Step3p7ForConditionalGeneration"):
|
||||
return arch
|
||||
|
||||
# if "architectures" is found in the sub-config, use that instead
|
||||
|
||||
@ -15,7 +15,7 @@ from .base import MmprojModel, ModelBase, TextModel, _MISTRAL_COMMON_DATASET_MEA
|
||||
from .qwen import Qwen3Model
|
||||
|
||||
|
||||
@ModelBase.register("StepVLForConditionalGeneration")
|
||||
@ModelBase.register("StepVLForConditionalGeneration", "Step3p7ForConditionalGeneration")
|
||||
class Step3VLVisionModel(MmprojModel):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
@ -95,7 +95,7 @@ class Step3VLTextModel(Qwen3Model):
|
||||
model_arch = gguf.MODEL_ARCH.QWEN3
|
||||
|
||||
|
||||
@ModelBase.register("Step3p5ForCausalLM")
|
||||
@ModelBase.register("Step3p5ForCausalLM", "Step3p7ForConditionalGeneration")
|
||||
class Step35Model(TextModel):
|
||||
model_arch = gguf.MODEL_ARCH.STEP35
|
||||
|
||||
@ -203,11 +203,23 @@ class Step35Model(TextModel):
|
||||
if isinstance(rope_theta, list):
|
||||
rope_theta = rope_theta[0]
|
||||
base = float(rope_theta)
|
||||
if (dim := self.hparams.get("head_dim")) is None:
|
||||
dim = self.hparams["hidden_size"] // self.hparams["num_attention_heads"]
|
||||
dim = int(dim)
|
||||
|
||||
freqs = 1.0 / (base ** (torch.arange(0, dim, 2, dtype=torch.float32) / dim))
|
||||
if (storage_dim := self.hparams.get("head_dim")) is None:
|
||||
storage_dim = self.hparams["hidden_size"] // self.hparams["num_attention_heads"]
|
||||
storage_dim = int(storage_dim)
|
||||
|
||||
# Llama 3 factors apply only to the rotary dims used by full_attention layers
|
||||
# (partial_rotary_factor * head_dim). Remaining slots are padded with 1.0 so
|
||||
# sliding_attention layers remain unaffected. set_gguf_parameters already
|
||||
# guarantees at least one full_attention layer.
|
||||
layer_types = (self.hparams.get("layer_types") or [])[: self.block_count]
|
||||
partial_rotary_factors = (self.hparams.get("partial_rotary_factors") or [])[: self.block_count]
|
||||
full_attention_factor = next(
|
||||
float(f) for lt, f in zip(layer_types, partial_rotary_factors) if lt == "full_attention"
|
||||
)
|
||||
rotary_dim = int(storage_dim * full_attention_factor)
|
||||
|
||||
freqs = 1.0 / (base ** (torch.arange(0, rotary_dim, 2, dtype=torch.float32) / rotary_dim))
|
||||
|
||||
factor = float(rope_params.get("factor", 8.0))
|
||||
low_freq_factor = float(rope_params.get("low_freq_factor", 1.0))
|
||||
@ -228,4 +240,8 @@ class Step35Model(TextModel):
|
||||
smooth = (old_context_len / wavelen - low_freq_factor) / (high_freq_factor - low_freq_factor)
|
||||
rope_factors.append(1.0 / ((1.0 - smooth) / factor + smooth))
|
||||
|
||||
# Pad to head_dim/2 with 1.0 so non-scaled layers remain neutral.
|
||||
if len(rope_factors) < storage_dim // 2:
|
||||
rope_factors.extend([1.0] * (storage_dim // 2 - len(rope_factors)))
|
||||
|
||||
yield (self.format_tensor_name(gguf.MODEL_TENSOR.ROPE_FREQS), torch.tensor(rope_factors, dtype=torch.float32))
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user