LongLeCE · pull · Feb 23, 2026 · Feb 23, 2026 · Feb 23, 2026
diff --git a/scripts/sync_vendor.py b/scripts/sync_vendor.py
@@ -5,7 +5,7 @@
 import sys
 import subprocess
 
-HTTPLIB_VERSION = "refs/tags/v0.33.1"
+HTTPLIB_VERSION = "refs/tags/v0.34.0"
 
 vendor = {
     "https://github.com/nlohmann/json/releases/latest/download/json.hpp":     "vendor/nlohmann/json.hpp",

diff --git a/tests/test-backend-sampler.cpp b/tests/test-backend-sampler.cpp
@@ -361,7 +361,7 @@ static void test_backend_temp_sampling(const test_params & params) {
             GGML_ASSERT(false && "Failed to decode token");
         }
 
-        // Verfify sequence 0
+        // Verify sequence 0
         {
             int32_t batch_idx = test_ctx.idx_for_seq(0);
             int n_logits = llama_get_sampled_logits_count_ith(test_ctx.ctx.get(), batch_idx);
@@ -379,7 +379,7 @@ static void test_backend_temp_sampling(const test_params & params) {
         }
 
 
-        // Verfify sequence 1
+        // Verify sequence 1
         {
             int32_t batch_idx = test_ctx.idx_for_seq(1);
 
@@ -395,7 +395,7 @@ static void test_backend_temp_sampling(const test_params & params) {
         }
     }
 
-    // lambda to testing non-positive temperature values.
+    // lambda for testing non-positive temperature values.
     auto test_argmax_temp = [&](float temp) {
         printf("\nTesting temperature = %.1f\n", temp);
 
@@ -454,7 +454,7 @@ static void test_backend_temp_ext_sampling(const test_params & params) {
         }
     }
 
-    // lambda to testing non-positive temp/delta/exponent values.
+    // lambda for testing non-positive temp/delta/exponent values.
     auto test_argmax_temp = [&](float temp, float delta, float exponent) {
         printf("\nTesting temperature = %.1f, delta = %1.f, exponent = %1.f\n", temp, delta, exponent);
 
@@ -530,7 +530,7 @@ static void test_backend_min_p_sampling(const test_params & params) {
     printf("min-p cpu sampled token id:%d, string: '%s'\n", token, token_str.c_str());
     GGML_ASSERT(token >= 0 && token < test_ctx.n_vocab);
 
-    // Decode and sampler 10 more tokens
+    // Decode and sample 10 more tokens
     for (int i = 0; i < 10; i++) {
         int32_t loop_idx = test_ctx.idx_for_seq(seq_id);
         llama_token token = llama_sampler_sample(chain.get(), test_ctx.ctx.get(), loop_idx);
@@ -582,7 +582,7 @@ static void test_backend_top_p_sampling(const test_params & params) {
     printf("top-p cpu sampled token id:%d, string: '%s'\n", token, token_str.c_str());
     GGML_ASSERT(token >= 0 && token < test_ctx.n_vocab);
 
-    // Decode and sampler 10 more tokens
+    // Decode and sample 10 more tokens
     for (int i = 0; i < 10; i++) {
         int32_t loop_idx = test_ctx.idx_for_seq(seq_id);
         llama_token token = llama_sampler_sample(chain.get(), test_ctx.ctx.get(), loop_idx);
@@ -619,7 +619,7 @@ static void test_backend_multi_sequence_sampling(const test_params & params) {
         GGML_ASSERT(false && "Failed to decode token");
     }
 
-    // Verfiy sequence 0
+    // Verify sequence 0
     {
         int32_t batch_idx = test_ctx.idx_for_seq(0);
         llama_token token = llama_get_sampled_token_ith(test_ctx.ctx.get(), batch_idx);
@@ -763,7 +763,7 @@ static void test_backend_logit_bias_sampling(const test_params & params) {
     printf("backend logit bias sampling test PASSED\n");
 }
 
-// This test verifies that it is possible to have two different backend sampler,
+// This test verifies that it is possible to have two different backend samplers,
 // one that uses the backend dist sampler, and another that uses CPU dist sampler.
 static void test_backend_mixed_sampling(const test_params & params) {
     struct llama_sampler_chain_params chain_params_0 = llama_sampler_chain_default_params();
@@ -791,7 +791,7 @@ static void test_backend_mixed_sampling(const test_params & params) {
         GGML_ASSERT(false && "Failed to decode token");
     }
 
-    // Verfiy sequence 0 that used the dist backend sampler.
+    // Verify sequence 0 that used the dist backend sampler.
     {
         int32_t batch_idx = test_ctx.idx_for_seq(0);
         llama_token token = llama_get_sampled_token_ith(test_ctx.ctx.get(), batch_idx);
@@ -802,7 +802,7 @@ static void test_backend_mixed_sampling(const test_params & params) {
         //GGML_ASSERT(llama_get_sampled_logits_count_ith(test_ctx.ctx.get(), batch_idx) == 0);
     }
 
-    // Verfiy sequence 1 that used the top-k backend sampler.
+    // Verify sequence 1 that used the top-k backend sampler.
     {
         int32_t batch_idx = test_ctx.idx_for_seq(1);
         float * logits = llama_get_sampled_logits_ith(test_ctx.ctx.get(), batch_idx);
@@ -934,7 +934,7 @@ static void test_backend_cpu_mixed_batch(const test_params & params) {
         // samplers.
         llama_set_sampler(test_ctx.ctx.get(), 0, nullptr);
 
-        // Create a CPU sampler and verify we can sampler from it.
+        // Create a CPU sampler and verify we can sample from it.
         struct llama_sampler_chain_params chain_params = llama_sampler_chain_default_params();
         llama_sampler_ptr chain(llama_sampler_chain_init(chain_params));
         llama_sampler_chain_add(chain.get(), llama_sampler_init_greedy());

diff --git a/vendor/cpp-httplib/httplib.cpp b/vendor/cpp-httplib/httplib.cpp
@@ -1660,6 +1660,7 @@ class SocketStream final : public Stream {
   bool is_readable() const override;
   bool wait_readable() const override;
   bool wait_writable() const override;
+  bool is_peer_alive() const override;
   ssize_t read(char *ptr, size_t size) override;
   ssize_t write(const char *ptr, size_t size) override;
   void get_remote_ip_and_port(std::string &ip, int &port) const override;
@@ -3313,10 +3314,10 @@ bool write_content_with_progress(Stream &strm,
     return ok;
   };
 
-  data_sink.is_writable = [&]() -> bool { return strm.wait_writable(); };
+  data_sink.is_writable = [&]() -> bool { return strm.is_peer_alive(); };
 
   while (offset < end_offset && !is_shutting_down()) {
-    if (!strm.wait_writable()) {
+    if (!strm.wait_writable() || !strm.is_peer_alive()) {
       error = Error::Write;
       return false;
     } else if (!content_provider(offset, end_offset - offset, data_sink)) {
@@ -3328,6 +3329,11 @@ bool write_content_with_progress(Stream &strm,
     }
   }
 
+  if (offset < end_offset) { // exited due to is_shutting_down(), not completion
+    error = Error::Write;
+    return false;
+  }
+
   error = Error::Success;
   return true;
 }
@@ -3367,20 +3373,21 @@ write_content_without_length(Stream &strm,
     return ok;
   };
 
-  data_sink.is_writable = [&]() -> bool { return strm.wait_writable(); };
+  data_sink.is_writable = [&]() -> bool { return strm.is_peer_alive(); };
 
   data_sink.done = [&](void) { data_available = false; };
 
   while (data_available && !is_shutting_down()) {
-    if (!strm.wait_writable()) {
+    if (!strm.wait_writable() || !strm.is_peer_alive()) {
       return false;
     } else if (!content_provider(offset, 0, data_sink)) {
       return false;
     } else if (!ok) {
       return false;
     }
   }
-  return true;
+  return !data_available; // true only if done() was called, false if shutting
+                          // down
 }
 
 template <typename T, typename U>
@@ -3416,7 +3423,7 @@ write_content_chunked(Stream &strm, const ContentProvider &content_provider,
     return ok;
   };
 
-  data_sink.is_writable = [&]() -> bool { return strm.wait_writable(); };
+  data_sink.is_writable = [&]() -> bool { return strm.is_peer_alive(); };
 
   auto done_with_trailer = [&](const Headers *trailer) {
     if (!ok) { return; }
@@ -3466,7 +3473,7 @@ write_content_chunked(Stream &strm, const ContentProvider &content_provider,
   };
 
   while (data_available && !is_shutting_down()) {
-    if (!strm.wait_writable()) {
+    if (!strm.wait_writable() || !strm.is_peer_alive()) {
       error = Error::Write;
       return false;
     } else if (!content_provider(offset, 0, data_sink)) {
@@ -3478,6 +3485,11 @@ write_content_chunked(Stream &strm, const ContentProvider &content_provider,
     }
   }
 
+  if (data_available) { // exited due to is_shutting_down(), not done()
+    error = Error::Write;
+    return false;
+  }
+
   error = Error::Success;
   return true;
 }
@@ -4646,6 +4658,7 @@ class SSLSocketStream final : public Stream {
   bool is_readable() const override;
   bool wait_readable() const override;
   bool wait_writable() const override;
+  bool is_peer_alive() const override;
   ssize_t read(char *ptr, size_t size) override;
   ssize_t write(const char *ptr, size_t size) override;
   void get_remote_ip_and_port(std::string &ip, int &port) const override;
@@ -6069,8 +6082,11 @@ bool SocketStream::wait_readable() const {
 }
 
 bool SocketStream::wait_writable() const {
-  return select_write(sock_, write_timeout_sec_, write_timeout_usec_) > 0 &&
-         is_socket_alive(sock_);
+  return select_write(sock_, write_timeout_sec_, write_timeout_usec_) > 0;
+}
+
+bool SocketStream::is_peer_alive() const {
+  return detail::is_socket_alive(sock_);
 }
 
 ssize_t SocketStream::read(char *ptr, size_t size) {
@@ -6401,7 +6417,11 @@ bool SSLSocketStream::wait_readable() const {
 
 bool SSLSocketStream::wait_writable() const {
   return select_write(sock_, write_timeout_sec_, write_timeout_usec_) > 0 &&
-         is_socket_alive(sock_) && !tls::is_peer_closed(session_, sock_);
+         !tls::is_peer_closed(session_, sock_);
+}
+
+bool SSLSocketStream::is_peer_alive() const {
+  return !tls::is_peer_closed(session_, sock_);
 }
 
 ssize_t SSLSocketStream::read(char *ptr, size_t size) {
@@ -6925,35 +6945,33 @@ bool Server::write_response_core(Stream &strm, bool close_connection,
   if (post_routing_handler_) { post_routing_handler_(req, res); }
 
   // Response line and headers
-  {
-    detail::BufferStream bstrm;
-    if (!detail::write_response_line(bstrm, res.status)) { return false; }
-    if (header_writer_(bstrm, res.headers) <= 0) { return false; }
+  detail::BufferStream bstrm;
+  if (!detail::write_response_line(bstrm, res.status)) { return false; }
+  if (header_writer_(bstrm, res.headers) <= 0) { return false; }
 
-    // Flush buffer
-    auto &data = bstrm.get_buffer();
-    detail::write_data(strm, data.data(), data.size());
+  // Combine small body with headers to reduce write syscalls
+  if (req.method != "HEAD" && !res.body.empty() && !res.content_provider_) {
+    bstrm.write(res.body.data(), res.body.size());
   }
 
-  // Body
+  // Log before writing to avoid race condition with client-side code that
+  // accesses logger-captured data immediately after receiving the response.
+  output_log(req, res);
+
+  // Flush buffer
+  auto &data = bstrm.get_buffer();
+  if (!detail::write_data(strm, data.data(), data.size())) { return false; }
+
+  // Streaming body
   auto ret = true;
-  if (req.method != "HEAD") {
-    if (!res.body.empty()) {
-      if (!detail::write_data(strm, res.body.data(), res.body.size())) {
-        ret = false;
-      }
-    } else if (res.content_provider_) {
-      if (write_content_with_provider(strm, req, res, boundary, content_type)) {
-        res.content_provider_success_ = true;
-      } else {
-        ret = false;
-      }
+  if (req.method != "HEAD" && res.content_provider_) {
+    if (write_content_with_provider(strm, req, res, boundary, content_type)) {
+      res.content_provider_success_ = true;
+    } else {
+      ret = false;
     }
   }
 
-  // Log
-  output_log(req, res);
-
   return ret;
 }
 

diff --git a/vendor/cpp-httplib/httplib.h b/vendor/cpp-httplib/httplib.h
@@ -8,8 +8,8 @@
 #ifndef CPPHTTPLIB_HTTPLIB_H
 #define CPPHTTPLIB_HTTPLIB_H
 
-#define CPPHTTPLIB_VERSION "0.33.1"
-#define CPPHTTPLIB_VERSION_NUM "0x002101"
+#define CPPHTTPLIB_VERSION "0.34.0"
+#define CPPHTTPLIB_VERSION_NUM "0x002200"
 
 /*
  * Platform compatibility check
@@ -1038,6 +1038,32 @@ make_file_provider(const std::string &name, const std::string &filepath,
   return fdp;
 }
 
+inline std::pair<size_t, ContentProvider>
+make_file_body(const std::string &filepath) {
+  std::ifstream f(filepath, std::ios::binary | std::ios::ate);
+  if (!f) { return {0, ContentProvider{}}; }
+  auto size = static_cast<size_t>(f.tellg());
+
+  ContentProvider provider = [filepath](size_t offset, size_t length,
+                                        DataSink &sink) -> bool {
+    std::ifstream f(filepath, std::ios::binary);
+    if (!f) { return false; }
+    f.seekg(static_cast<std::streamoff>(offset));
+    if (!f.good()) { return false; }
+    char buf[8192];
+    while (length > 0) {
+      auto to_read = (std::min)(sizeof(buf), length);
+      f.read(buf, static_cast<std::streamsize>(to_read));
+      auto n = static_cast<size_t>(f.gcount());
+      if (n == 0) { break; }
+      if (!sink.write(buf, n)) { return false; }
+      length -= n;
+    }
+    return true;
+  };
+  return {size, std::move(provider)};
+}
+
 using ContentReceiverWithProgress = std::function<bool(
     const char *data, size_t data_length, size_t offset, size_t total_length)>;
 
@@ -1352,6 +1378,7 @@ class Stream {
   virtual bool is_readable() const = 0;
   virtual bool wait_readable() const = 0;
   virtual bool wait_writable() const = 0;
+  virtual bool is_peer_alive() const { return wait_writable(); }
 
   virtual ssize_t read(char *ptr, size_t size) = 0;
   virtual ssize_t write(const char *ptr, size_t size) = 0;