mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2026-06-27 23:50:20 -05:00
convert : minor fixes for numpy 2.x (#23571)
This commit is contained in:
parent
63248fc3e3
commit
5d246a792d
@ -1308,7 +1308,8 @@ def do_dump_model(model_plus: ModelPlus) -> None:
|
|||||||
|
|
||||||
def main(args_in: list[str] | None = None) -> None:
|
def main(args_in: list[str] | None = None) -> None:
|
||||||
output_choices = ["f32", "f16"]
|
output_choices = ["f32", "f16"]
|
||||||
if np.uint32(1) == np.uint32(1).newbyteorder("<"):
|
dummy_val = np.uint32(1)
|
||||||
|
if dummy_val == dummy_val.view(dummy_val.dtype.newbyteorder("<")):
|
||||||
# We currently only support Q8_0 output on little endian systems.
|
# We currently only support Q8_0 output on little endian systems.
|
||||||
output_choices.append("q8_0")
|
output_choices.append("q8_0")
|
||||||
parser = argparse.ArgumentParser(description="Convert a LLaMA model to a GGML compatible file")
|
parser = argparse.ArgumentParser(description="Convert a LLaMA model to a GGML compatible file")
|
||||||
|
|||||||
@ -28,6 +28,7 @@ def quant_shape_from_byte_shape(shape: Sequence[int], quant_type: GGMLQuantizati
|
|||||||
# This is faster than np.vectorize and np.apply_along_axis because it works on more than one row at a time
|
# This is faster than np.vectorize and np.apply_along_axis because it works on more than one row at a time
|
||||||
def _apply_over_grouped_rows(func: Callable[[np.ndarray], np.ndarray], arr: np.ndarray, otype: DTypeLike, oshape: tuple[int, ...]) -> np.ndarray:
|
def _apply_over_grouped_rows(func: Callable[[np.ndarray], np.ndarray], arr: np.ndarray, otype: DTypeLike, oshape: tuple[int, ...]) -> np.ndarray:
|
||||||
rows = arr.reshape((-1, arr.shape[-1]))
|
rows = arr.reshape((-1, arr.shape[-1]))
|
||||||
|
assert len(rows.shape)
|
||||||
osize = 1
|
osize = 1
|
||||||
for dim in oshape:
|
for dim in oshape:
|
||||||
osize *= dim
|
osize *= dim
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user