model : Add LFM2.5-ColBERT-350M and LFM2.5-Embedding-350M (#24913)

* model : Add LFM2.5-ColBERT-350M and LFM2.5-Embedding-350M

* Restore LFM2 models in README.md
This commit is contained in:
Tarek Dakhran 2026-06-24 08:49:46 +02:00 committed by GitHub
parent ac4105d68b
commit 88636e178f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 28 additions and 8 deletions

View File

@ -142,7 +142,9 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo
- [x] [GigaChat-20B-A3B](https://huggingface.co/ai-sage/GigaChat-20B-A3B-instruct) - [x] [GigaChat-20B-A3B](https://huggingface.co/ai-sage/GigaChat-20B-A3B-instruct)
- [X] [Trillion-7B-preview](https://huggingface.co/trillionlabs/Trillion-7B-preview) - [X] [Trillion-7B-preview](https://huggingface.co/trillionlabs/Trillion-7B-preview)
- [x] [Ling models](https://huggingface.co/collections/inclusionAI/ling-67c51c85b34a7ea0aba94c32) - [x] [Ling models](https://huggingface.co/collections/inclusionAI/ling-67c51c85b34a7ea0aba94c32)
- [x] [LFM2 models](https://huggingface.co/collections/LiquidAI/lfm2-686d721927015b2ad73eaa38) - [x] [Liquid LFM2 models](https://huggingface.co/collections/LiquidAI/lfm2)
- [x] [Liquid LFM2.5 models](https://huggingface.co/collections/LiquidAI/lfm25)
- [x] [Liquid Nanos](https://huggingface.co/collections/LiquidAI/liquid-nanos)
- [x] [Hunyuan models](https://huggingface.co/collections/tencent/hunyuan-dense-model-6890632cda26b19119c9c5e7) - [x] [Hunyuan models](https://huggingface.co/collections/tencent/hunyuan-dense-model-6890632cda26b19119c9c5e7)
- [x] [BailingMoeV2 (Ring/Ling 2.0) models](https://huggingface.co/collections/inclusionAI/ling-v2-68bf1dd2fc34c306c1fa6f86) - [x] [BailingMoeV2 (Ring/Ling 2.0) models](https://huggingface.co/collections/inclusionAI/ling-v2-68bf1dd2fc34c306c1fa6f86)
- [x] [Mellum models](https://huggingface.co/JetBrains/models?search=mellum) - [x] [Mellum models](https://huggingface.co/JetBrains/models?search=mellum)

View File

@ -124,6 +124,7 @@ TEXT_MODEL_MAP: dict[str, str] = {
"LLaDAModelLM": "llada", "LLaDAModelLM": "llada",
"LLaMAForCausalLM": "llama", "LLaMAForCausalLM": "llama",
"Lfm25AudioTokenizer": "lfm2", "Lfm25AudioTokenizer": "lfm2",
"Lfm2BidirectionalModel": "lfm2",
"Lfm2ForCausalLM": "lfm2", "Lfm2ForCausalLM": "lfm2",
"Lfm2Model": "lfm2", "Lfm2Model": "lfm2",
"Lfm2MoeForCausalLM": "lfm2", "Lfm2MoeForCausalLM": "lfm2",

View File

@ -64,11 +64,17 @@ class LFM2Model(TextModel):
yield from super().modify_tensors(data_torch, name, bid) yield from super().modify_tensors(data_torch, name, bid)
@ModelBase.register("Lfm2Model") @ModelBase.register("Lfm2Model", "Lfm2BidirectionalModel")
class LFM2ColBertModel(LFM2Model): class LFM2ColBertModel(LFM2Model):
model_arch = gguf.MODEL_ARCH.LFM2 model_arch = gguf.MODEL_ARCH.LFM2
dense_tensor_name = "dense_2" dense_tensor_name = "dense_2"
def set_gguf_parameters(self):
super().set_gguf_parameters()
if self.hf_arch == "Lfm2BidirectionalModel":
self.gguf_writer.add_causal_attention(False)
self._try_set_pooling_type()
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]: def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
if not name.startswith(self.dense_tensor_name): if not name.startswith(self.dense_tensor_name):
name = "model." + name name = "model." + name
@ -76,10 +82,11 @@ class LFM2ColBertModel(LFM2Model):
yield from super().modify_tensors(data_torch, name, bid) yield from super().modify_tensors(data_torch, name, bid)
def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]: def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
# dense tensor is stored in a separate safetensors file # optional dense tensor is stored in a separate safetensors file
from safetensors.torch import load_file from safetensors.torch import load_file
tensors_file = self.dir_model / "1_Dense" / "model.safetensors" tensors_file = self.dir_model / "1_Dense" / "model.safetensors"
assert tensors_file.is_file() if not tensors_file.is_file():
return
tensor = load_file(tensors_file)["linear.weight"] tensor = load_file(tensors_file)["linear.weight"]
self.gguf_writer.add_embedding_length_out(tensor.shape[0]) self.gguf_writer.add_embedding_length_out(tensor.shape[0])
yield f"{self.dense_tensor_name}.weight", tensor.clone() yield f"{self.dense_tensor_name}.weight", tensor.clone()

View File

@ -190,7 +190,15 @@ llama_model_lfm2::graph<iswa>::graph(const llama_model & model, const llm_graph_
auto * conv_rs = build_rs(inp_recr, conv_state, hparams.n_embd_r(), n_seqs); auto * conv_rs = build_rs(inp_recr, conv_state, hparams.n_embd_r(), n_seqs);
auto * conv = ggml_reshape_3d(ctx0, conv_rs, d_conv, hparams.n_embd, n_seqs); auto * conv = ggml_reshape_3d(ctx0, conv_rs, d_conv, hparams.n_embd, n_seqs);
bx = ggml_concat(ctx0, conv, bx, 0); // causal prepends the state, non-causal pads symmetrically for a centered window
if (hparams.causal_attn) {
bx = ggml_concat(ctx0, conv, bx, 0);
} else {
const int64_t pad = (hparams.n_shortconv_l_cache - 1) / 2;
auto * left = ggml_cont(ctx0,
ggml_view_3d(ctx0, conv, pad, hparams.n_embd, n_seqs, conv->nb[1], conv->nb[2], (d_conv - pad) * conv->nb[0]));
bx = ggml_pad_ext(ctx0, ggml_concat(ctx0, left, bx, 0), 0, pad, 0, 0, 0, 0, 0, 0);
}
GGML_ASSERT(bx->ne[0] > conv->ne[0]); GGML_ASSERT(bx->ne[0] > conv->ne[0]);
// last d_conv columns is a new conv state // last d_conv columns is a new conv state
@ -266,10 +274,12 @@ llama_model_lfm2::graph<iswa>::graph(const llama_model & model, const llm_graph_
cb(cur, "result_norm", -1); cb(cur, "result_norm", -1);
res->t_embd = cur; res->t_embd = cur;
cur = build_lora_mm(model.output, cur, model.output_s); if (!cparams.embeddings) {
cb(cur, "result_output", -1); cur = build_lora_mm(model.output, cur, model.output_s);
cb(cur, "result_output", -1);
res->t_logits = cur; res->t_logits = cur;
}
ggml_build_forward_expand(gf, cur); ggml_build_forward_expand(gf, cur);
} }