mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2026-06-27 23:50:20 -05:00
* Update to OV 2026.2.1, Make OV release packages self-contained * Update to OV 2026.2.1, Make OV release packages self-contained * OpenVINO Backend: Remove compute_op_type hardcoded sets (#222) * OpenVINO Backend: Remove compute_op_type hardcoded sets * revert get_op_type removal * OpenVINO backend: enable softmax with sink input * OpenVINO backend: opt mul_mat_id convert process for large size * OpenVINO backend: Modify add_id to support 2D/4D * OpenVINO Backend: Add glu_swiglu_oai * PR review: fix paths * PR review: fix path consistency --------- Co-authored-by: Mostafa <mostafas.main.email@gmail.com> Co-authored-by: Xuejun <Xuejun.Zhai@intel.com>
77 lines
2.9 KiB
C++
77 lines
2.9 KiB
C++
#include "../node_context.h"
|
|
#include "../op_table.h"
|
|
#include "../utils.h"
|
|
|
|
#include <memory>
|
|
#include <openvino/core/node.hpp>
|
|
#include <openvino/core/node_output.hpp>
|
|
#include <openvino/op/add.hpp>
|
|
#include <openvino/op/constant.hpp>
|
|
#include <openvino/op/convert.hpp>
|
|
#include <openvino/op/gather.hpp>
|
|
#include <openvino/op/reshape.hpp>
|
|
#include <openvino/op/shape_of.hpp>
|
|
|
|
namespace ov {
|
|
namespace frontend {
|
|
namespace ggml {
|
|
namespace op {
|
|
|
|
static ov::Output<ov::Node> reshape_add_id_input_to_2d(const ov::Output<ov::Node> & input,
|
|
const ov::PartialShape & input_shape,
|
|
const std::vector<int> & dims) {
|
|
const auto actual_shape = input.get_partial_shape();
|
|
if (actual_shape.rank().is_static() && actual_shape.rank().get_length() == 2) {
|
|
return input;
|
|
}
|
|
|
|
if (input_shape.rank().is_static() && input_shape.rank().get_length() == 2) {
|
|
return input;
|
|
}
|
|
|
|
auto shape = std::make_shared<ov::op::v3::ShapeOf>(input, ov::element::i64);
|
|
return std::make_shared<ov::op::v1::Reshape>(input, get_dimensions(shape, dims), false);
|
|
}
|
|
|
|
OutputVector translate_add_id(const NodeContext & context) {
|
|
num_inputs_check(context, 3, 3);
|
|
|
|
auto input = process_view_input_new(context, 0);
|
|
auto bias = process_view_input_new(context, 1);
|
|
auto ids = process_view_input_new(context, 2);
|
|
|
|
// OpenVINO uses reversed GGML dimensions:
|
|
// input: [1, n_token, n_used, n_embd]
|
|
// bias: [1, 1, n_expert, n_embd]
|
|
// ids: [1, 1, n_token, n_used]
|
|
// Model bias constants may already be stored as [n_expert, n_embd].
|
|
bias = reshape_add_id_input_to_2d(bias, context.get_input_shape(1), {2, 3});
|
|
ids = reshape_add_id_input_to_2d(ids, context.get_input_shape(2), {2, 3});
|
|
|
|
if (ids.get_element_type() != ov::element::i32 && ids.get_element_type() != ov::element::i64) {
|
|
ids = std::make_shared<ov::op::v0::Convert>(ids, ov::element::i32);
|
|
}
|
|
|
|
auto gather_axis = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{}, {0});
|
|
ov::Output<ov::Node> selected_bias = std::make_shared<ov::op::v8::Gather>(bias, ids, gather_axis);
|
|
selected_bias = std::make_shared<ov::op::v1::Reshape>(
|
|
selected_bias, std::make_shared<ov::op::v3::ShapeOf>(input, ov::element::i64), false);
|
|
|
|
if (selected_bias.get_element_type() != input.get_element_type()) {
|
|
selected_bias = std::make_shared<ov::op::v0::Convert>(selected_bias, input.get_element_type());
|
|
}
|
|
|
|
ov::Output<ov::Node> res = std::make_shared<ov::op::v1::Add>(input, selected_bias);
|
|
const auto output_type = context.get_output_type();
|
|
if (res.get_element_type() != output_type) {
|
|
res = std::make_shared<ov::op::v0::Convert>(res, output_type);
|
|
}
|
|
|
|
return rename_outputs_with_suffix({res}, context.get_name());
|
|
}
|
|
|
|
} // namespace op
|
|
} // namespace ggml
|
|
} // namespace frontend
|
|
} // namespace ov
|