mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2026-06-27 23:50:20 -05:00
server: add "X-Accel-Buffering": "no" header to streaming endpoints (#24774)
* server: add "X-Accel-Buffering": "no" header to streaming endpoints This header tells Nginx (as a reverse proxy) to NOT buffer responses. (only affects streaming endpoints) Without it, Nginx will break streaming with certain applications (notably the Pi coding harness).
This commit is contained in:
parent
a6b3260a42
commit
40f3aafc45
@ -492,6 +492,8 @@ using server_http_req_ptr = std::unique_ptr<server_http_req>;
|
||||
static void process_handler_response(server_http_req_ptr && request, server_http_res_ptr & response, httplib::Response & res) {
|
||||
if (response->is_stream()) {
|
||||
res.status = response->status;
|
||||
// Tell Nginx to not buffer any streamed response
|
||||
response->headers["X-Accel-Buffering"] = "no";
|
||||
set_headers(res, response->headers);
|
||||
const std::string content_type = response->content_type;
|
||||
// convert to shared_ptr as both chunked_content_provider() and on_complete() need to use it
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user