mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2026-06-27 23:50:20 -05:00
convert : update mtp related help (#23334)
* update mtp related help * remove outdated experimental text
This commit is contained in:
parent
ac76808e4d
commit
b7393a4d19
@ -115,15 +115,15 @@ def parse_args() -> argparse.Namespace:
|
||||
)
|
||||
parser.add_argument(
|
||||
"--mmproj", action="store_true",
|
||||
help="(Experimental) Export multimodal projector (mmproj) for vision models. This will only work on some vision models. A prefix 'mmproj-' will be added to the output file name.",
|
||||
help="Export multimodal projector (mmproj) for vision models. This will only work on some vision models. An 'mmproj-' prefix will be added to the output file name.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--mtp", action="store_true",
|
||||
help="(Experimental) Export only the multi-token prediction (MTP) head as a separate GGUF, suitable for use as a speculative draft. Output file name will get a '-MTP' suffix.",
|
||||
help="Export only the multi-token prediction (MTP) head as a separate GGUF, suitable for use as a speculative draft. An 'mtp-' prefix will be added to the output file name.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--no-mtp", action="store_true",
|
||||
help="(Experimental) Exclude the multi-token prediction (MTP) head from the converted GGUF. Pair with --mtp on a second run to publish trunk and MTP as two files. Note: the split form duplicates embeddings, so the bundled default is more space-efficient overall.",
|
||||
help="Exclude the multi-token prediction (MTP) head from the converted GGUF. Pair with --mtp on a second run to publish trunk and MTP as two files. Note: the split form duplicates embeddings, but even though the bundled default is more space-efficient overall, this allows differing quantization which may be more performant.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--mistral-format", action="store_true",
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user