Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion scripts/sync_vendor.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import sys
import subprocess

HTTPLIB_VERSION = "refs/tags/v0.33.1"
HTTPLIB_VERSION = "refs/tags/v0.34.0"

vendor = {
"https://github.com/nlohmann/json/releases/latest/download/json.hpp": "vendor/nlohmann/json.hpp",
Expand Down
22 changes: 11 additions & 11 deletions tests/test-backend-sampler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -361,7 +361,7 @@ static void test_backend_temp_sampling(const test_params & params) {
GGML_ASSERT(false && "Failed to decode token");
}

// Verfify sequence 0
// Verify sequence 0
{
int32_t batch_idx = test_ctx.idx_for_seq(0);
int n_logits = llama_get_sampled_logits_count_ith(test_ctx.ctx.get(), batch_idx);
Expand All @@ -379,7 +379,7 @@ static void test_backend_temp_sampling(const test_params & params) {
}


// Verfify sequence 1
// Verify sequence 1
{
int32_t batch_idx = test_ctx.idx_for_seq(1);

Expand All @@ -395,7 +395,7 @@ static void test_backend_temp_sampling(const test_params & params) {
}
}

// lambda to testing non-positive temperature values.
// lambda for testing non-positive temperature values.
auto test_argmax_temp = [&](float temp) {
printf("\nTesting temperature = %.1f\n", temp);

Expand Down Expand Up @@ -454,7 +454,7 @@ static void test_backend_temp_ext_sampling(const test_params & params) {
}
}

// lambda to testing non-positive temp/delta/exponent values.
// lambda for testing non-positive temp/delta/exponent values.
auto test_argmax_temp = [&](float temp, float delta, float exponent) {
printf("\nTesting temperature = %.1f, delta = %1.f, exponent = %1.f\n", temp, delta, exponent);

Expand Down Expand Up @@ -530,7 +530,7 @@ static void test_backend_min_p_sampling(const test_params & params) {
printf("min-p cpu sampled token id:%d, string: '%s'\n", token, token_str.c_str());
GGML_ASSERT(token >= 0 && token < test_ctx.n_vocab);

// Decode and sampler 10 more tokens
// Decode and sample 10 more tokens
for (int i = 0; i < 10; i++) {
int32_t loop_idx = test_ctx.idx_for_seq(seq_id);
llama_token token = llama_sampler_sample(chain.get(), test_ctx.ctx.get(), loop_idx);
Expand Down Expand Up @@ -582,7 +582,7 @@ static void test_backend_top_p_sampling(const test_params & params) {
printf("top-p cpu sampled token id:%d, string: '%s'\n", token, token_str.c_str());
GGML_ASSERT(token >= 0 && token < test_ctx.n_vocab);

// Decode and sampler 10 more tokens
// Decode and sample 10 more tokens
for (int i = 0; i < 10; i++) {
int32_t loop_idx = test_ctx.idx_for_seq(seq_id);
llama_token token = llama_sampler_sample(chain.get(), test_ctx.ctx.get(), loop_idx);
Expand Down Expand Up @@ -619,7 +619,7 @@ static void test_backend_multi_sequence_sampling(const test_params & params) {
GGML_ASSERT(false && "Failed to decode token");
}

// Verfiy sequence 0
// Verify sequence 0
{
int32_t batch_idx = test_ctx.idx_for_seq(0);
llama_token token = llama_get_sampled_token_ith(test_ctx.ctx.get(), batch_idx);
Expand Down Expand Up @@ -763,7 +763,7 @@ static void test_backend_logit_bias_sampling(const test_params & params) {
printf("backend logit bias sampling test PASSED\n");
}

// This test verifies that it is possible to have two different backend sampler,
// This test verifies that it is possible to have two different backend samplers,
// one that uses the backend dist sampler, and another that uses CPU dist sampler.
static void test_backend_mixed_sampling(const test_params & params) {
struct llama_sampler_chain_params chain_params_0 = llama_sampler_chain_default_params();
Expand Down Expand Up @@ -791,7 +791,7 @@ static void test_backend_mixed_sampling(const test_params & params) {
GGML_ASSERT(false && "Failed to decode token");
}

// Verfiy sequence 0 that used the dist backend sampler.
// Verify sequence 0 that used the dist backend sampler.
{
int32_t batch_idx = test_ctx.idx_for_seq(0);
llama_token token = llama_get_sampled_token_ith(test_ctx.ctx.get(), batch_idx);
Expand All @@ -802,7 +802,7 @@ static void test_backend_mixed_sampling(const test_params & params) {
//GGML_ASSERT(llama_get_sampled_logits_count_ith(test_ctx.ctx.get(), batch_idx) == 0);
}

// Verfiy sequence 1 that used the top-k backend sampler.
// Verify sequence 1 that used the top-k backend sampler.
{
int32_t batch_idx = test_ctx.idx_for_seq(1);
float * logits = llama_get_sampled_logits_ith(test_ctx.ctx.get(), batch_idx);
Expand Down Expand Up @@ -934,7 +934,7 @@ static void test_backend_cpu_mixed_batch(const test_params & params) {
// samplers.
llama_set_sampler(test_ctx.ctx.get(), 0, nullptr);

// Create a CPU sampler and verify we can sampler from it.
// Create a CPU sampler and verify we can sample from it.
struct llama_sampler_chain_params chain_params = llama_sampler_chain_default_params();
llama_sampler_ptr chain(llama_sampler_chain_init(chain_params));
llama_sampler_chain_add(chain.get(), llama_sampler_init_greedy());
Expand Down
82 changes: 50 additions & 32 deletions vendor/cpp-httplib/httplib.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1660,6 +1660,7 @@ class SocketStream final : public Stream {
bool is_readable() const override;
bool wait_readable() const override;
bool wait_writable() const override;
bool is_peer_alive() const override;
ssize_t read(char *ptr, size_t size) override;
ssize_t write(const char *ptr, size_t size) override;
void get_remote_ip_and_port(std::string &ip, int &port) const override;
Expand Down Expand Up @@ -3313,10 +3314,10 @@ bool write_content_with_progress(Stream &strm,
return ok;
};

data_sink.is_writable = [&]() -> bool { return strm.wait_writable(); };
data_sink.is_writable = [&]() -> bool { return strm.is_peer_alive(); };

while (offset < end_offset && !is_shutting_down()) {
if (!strm.wait_writable()) {
if (!strm.wait_writable() || !strm.is_peer_alive()) {
error = Error::Write;
return false;
} else if (!content_provider(offset, end_offset - offset, data_sink)) {
Expand All @@ -3328,6 +3329,11 @@ bool write_content_with_progress(Stream &strm,
}
}

if (offset < end_offset) { // exited due to is_shutting_down(), not completion
error = Error::Write;
return false;
}

error = Error::Success;
return true;
}
Expand Down Expand Up @@ -3367,20 +3373,21 @@ write_content_without_length(Stream &strm,
return ok;
};

data_sink.is_writable = [&]() -> bool { return strm.wait_writable(); };
data_sink.is_writable = [&]() -> bool { return strm.is_peer_alive(); };

data_sink.done = [&](void) { data_available = false; };

while (data_available && !is_shutting_down()) {
if (!strm.wait_writable()) {
if (!strm.wait_writable() || !strm.is_peer_alive()) {
return false;
} else if (!content_provider(offset, 0, data_sink)) {
return false;
} else if (!ok) {
return false;
}
}
return true;
return !data_available; // true only if done() was called, false if shutting
// down
}

template <typename T, typename U>
Expand Down Expand Up @@ -3416,7 +3423,7 @@ write_content_chunked(Stream &strm, const ContentProvider &content_provider,
return ok;
};

data_sink.is_writable = [&]() -> bool { return strm.wait_writable(); };
data_sink.is_writable = [&]() -> bool { return strm.is_peer_alive(); };

auto done_with_trailer = [&](const Headers *trailer) {
if (!ok) { return; }
Expand Down Expand Up @@ -3466,7 +3473,7 @@ write_content_chunked(Stream &strm, const ContentProvider &content_provider,
};

while (data_available && !is_shutting_down()) {
if (!strm.wait_writable()) {
if (!strm.wait_writable() || !strm.is_peer_alive()) {
error = Error::Write;
return false;
} else if (!content_provider(offset, 0, data_sink)) {
Expand All @@ -3478,6 +3485,11 @@ write_content_chunked(Stream &strm, const ContentProvider &content_provider,
}
}

if (data_available) { // exited due to is_shutting_down(), not done()
error = Error::Write;
return false;
}

error = Error::Success;
return true;
}
Expand Down Expand Up @@ -4646,6 +4658,7 @@ class SSLSocketStream final : public Stream {
bool is_readable() const override;
bool wait_readable() const override;
bool wait_writable() const override;
bool is_peer_alive() const override;
ssize_t read(char *ptr, size_t size) override;
ssize_t write(const char *ptr, size_t size) override;
void get_remote_ip_and_port(std::string &ip, int &port) const override;
Expand Down Expand Up @@ -6069,8 +6082,11 @@ bool SocketStream::wait_readable() const {
}

bool SocketStream::wait_writable() const {
return select_write(sock_, write_timeout_sec_, write_timeout_usec_) > 0 &&
is_socket_alive(sock_);
return select_write(sock_, write_timeout_sec_, write_timeout_usec_) > 0;
}

bool SocketStream::is_peer_alive() const {
return detail::is_socket_alive(sock_);
}

ssize_t SocketStream::read(char *ptr, size_t size) {
Expand Down Expand Up @@ -6401,7 +6417,11 @@ bool SSLSocketStream::wait_readable() const {

bool SSLSocketStream::wait_writable() const {
return select_write(sock_, write_timeout_sec_, write_timeout_usec_) > 0 &&
is_socket_alive(sock_) && !tls::is_peer_closed(session_, sock_);
!tls::is_peer_closed(session_, sock_);
}

bool SSLSocketStream::is_peer_alive() const {
return !tls::is_peer_closed(session_, sock_);
}

ssize_t SSLSocketStream::read(char *ptr, size_t size) {
Expand Down Expand Up @@ -6925,35 +6945,33 @@ bool Server::write_response_core(Stream &strm, bool close_connection,
if (post_routing_handler_) { post_routing_handler_(req, res); }

// Response line and headers
{
detail::BufferStream bstrm;
if (!detail::write_response_line(bstrm, res.status)) { return false; }
if (header_writer_(bstrm, res.headers) <= 0) { return false; }
detail::BufferStream bstrm;
if (!detail::write_response_line(bstrm, res.status)) { return false; }
if (header_writer_(bstrm, res.headers) <= 0) { return false; }

// Flush buffer
auto &data = bstrm.get_buffer();
detail::write_data(strm, data.data(), data.size());
// Combine small body with headers to reduce write syscalls
if (req.method != "HEAD" && !res.body.empty() && !res.content_provider_) {
bstrm.write(res.body.data(), res.body.size());
}

// Body
// Log before writing to avoid race condition with client-side code that
// accesses logger-captured data immediately after receiving the response.
output_log(req, res);

// Flush buffer
auto &data = bstrm.get_buffer();
if (!detail::write_data(strm, data.data(), data.size())) { return false; }

// Streaming body
auto ret = true;
if (req.method != "HEAD") {
if (!res.body.empty()) {
if (!detail::write_data(strm, res.body.data(), res.body.size())) {
ret = false;
}
} else if (res.content_provider_) {
if (write_content_with_provider(strm, req, res, boundary, content_type)) {
res.content_provider_success_ = true;
} else {
ret = false;
}
if (req.method != "HEAD" && res.content_provider_) {
if (write_content_with_provider(strm, req, res, boundary, content_type)) {
res.content_provider_success_ = true;
} else {
ret = false;
}
}

// Log
output_log(req, res);

return ret;
}

Expand Down
31 changes: 29 additions & 2 deletions vendor/cpp-httplib/httplib.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
#ifndef CPPHTTPLIB_HTTPLIB_H
#define CPPHTTPLIB_HTTPLIB_H

#define CPPHTTPLIB_VERSION "0.33.1"
#define CPPHTTPLIB_VERSION_NUM "0x002101"
#define CPPHTTPLIB_VERSION "0.34.0"
#define CPPHTTPLIB_VERSION_NUM "0x002200"

/*
* Platform compatibility check
Expand Down Expand Up @@ -1038,6 +1038,32 @@ make_file_provider(const std::string &name, const std::string &filepath,
return fdp;
}

inline std::pair<size_t, ContentProvider>
make_file_body(const std::string &filepath) {
std::ifstream f(filepath, std::ios::binary | std::ios::ate);
if (!f) { return {0, ContentProvider{}}; }
auto size = static_cast<size_t>(f.tellg());

ContentProvider provider = [filepath](size_t offset, size_t length,
DataSink &sink) -> bool {
std::ifstream f(filepath, std::ios::binary);
if (!f) { return false; }
f.seekg(static_cast<std::streamoff>(offset));
if (!f.good()) { return false; }
char buf[8192];
while (length > 0) {
auto to_read = (std::min)(sizeof(buf), length);
f.read(buf, static_cast<std::streamsize>(to_read));
auto n = static_cast<size_t>(f.gcount());
if (n == 0) { break; }
if (!sink.write(buf, n)) { return false; }
length -= n;
}
return true;
};
return {size, std::move(provider)};
}

using ContentReceiverWithProgress = std::function<bool(
const char *data, size_t data_length, size_t offset, size_t total_length)>;

Expand Down Expand Up @@ -1352,6 +1378,7 @@ class Stream {
virtual bool is_readable() const = 0;
virtual bool wait_readable() const = 0;
virtual bool wait_writable() const = 0;
virtual bool is_peer_alive() const { return wait_writable(); }

virtual ssize_t read(char *ptr, size_t size) = 0;
virtual ssize_t write(const char *ptr, size_t size) = 0;
Expand Down
Loading