From fd67524e07bb486a6f1cdc40ee9642555f9c5848 Mon Sep 17 00:00:00 2001 From: Daman Mulye Date: Wed, 11 Feb 2026 14:50:53 -0800 Subject: [PATCH 01/25] initial work --- .../consomme/src/dns_resolver/dns_tcp.rs | 299 ++++++++++++++++++ .../consomme/src/dns_resolver/mod.rs | 21 +- .../consomme/src/dns_resolver/windows/mod.rs | 6 +- .../net/net_consomme/consomme/src/lib.rs | 3 + .../net/net_consomme/consomme/src/tcp.rs | 188 +++++++++-- .../net/net_consomme/consomme/src/udp.rs | 3 +- 6 files changed, 497 insertions(+), 23 deletions(-) create mode 100644 vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs diff --git a/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs b/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs new file mode 100644 index 0000000000..e6eef628d6 --- /dev/null +++ b/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs @@ -0,0 +1,299 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//! DNS over TCP handler for consomme. +//! +//! Implements DNS TCP framing per RFC 1035 §4.2.2: each DNS message is +//! preceded by a 2-byte big-endian length prefix. This module intercepts +//! TCP connections to the gateway on port 53 and resolves queries using +//! the shared `DnsBackend`. + +use super::DnsBackend; +use super::DnsFlow; +use super::DnsRequest; +use super::DnsResponse; +use mesh_channel_core::Receiver; +use std::collections::VecDeque; +use std::sync::Arc; +use std::task::Context; +use std::task::Poll; + +/// Maximum DNS message size over TCP (2-byte length field can represent up to 65535). +const MAX_DNS_TCP_MESSAGE_SIZE: usize = 65535; + +pub struct DnsTcpHandler { + backend: Arc, + receiver: Receiver, + flow: DnsFlow, + /// Data received from the guest, accumulating DNS TCP framed messages. + rx_buf: Vec, + /// Length-prefixed DNS responses waiting to be sent to the guest. + tx_buf: VecDeque, + /// The guest has sent FIN; no more data will arrive. + guest_fin: bool, +} + +impl DnsTcpHandler { + pub fn new(backend: Arc, flow: DnsFlow) -> Self { + let receiver = Receiver::new(); + Self { + backend, + receiver, + flow, + rx_buf: Vec::new(), + tx_buf: VecDeque::new(), + guest_fin: false, + } + } + + /// Feed data received from the guest into the handler. + /// Extracts complete DNS messages and submits them for resolution. + pub fn ingest(&mut self, data: &[u8]) { + // Limit rx_buf growth to prevent unbounded memory use from a + // guest that sends a large length prefix but trickles data slowly. + let remaining_capacity = MAX_DNS_TCP_MESSAGE_SIZE.saturating_sub(self.rx_buf.len()); + let accepted = data.len().min(remaining_capacity); + if accepted > 0 { + self.rx_buf.extend_from_slice(&data[..accepted]); + } + if accepted < data.len() { + tracelimit::warn_ratelimited!( + dropped = data.len() - accepted, + "DNS TCP rx_buf full, dropping excess data" + ); + } + self.extract_and_submit_queries(); + } + + /// Parse the rx buffer for complete DNS TCP-framed messages + /// (2-byte big-endian length prefix + payload) and submit each query. + fn extract_and_submit_queries(&mut self) { + loop { + if self.rx_buf.len() < 2 { + break; + } + let msg_len = u16::from_be_bytes([self.rx_buf[0], self.rx_buf[1]]) as usize; + if msg_len == 0 || msg_len > MAX_DNS_TCP_MESSAGE_SIZE { + // Malformed: discard the length prefix and try to resync. + self.rx_buf.drain(..2); + continue; + } + if self.rx_buf.len() < 2 + msg_len { + // Incomplete message; wait for more data. + break; + } + // Include the 2-byte TCP length prefix in the query data + // passed to the backend, as platform-specific resolvers + // expect the full TCP-framed message. + let query = self.rx_buf[0..2 + msg_len].to_vec(); + self.rx_buf.drain(..2 + msg_len); + + let request = DnsRequest { + flow: self.flow.clone(), + dns_query: &query, + }; + self.backend.query(&request, self.receiver.sender()); + } + } + + /// Poll for completed DNS responses and length-prefix them into the + /// transmit buffer. + pub fn poll_responses(&mut self, cx: &mut Context<'_>) { + loop { + match self.receiver.poll_recv(cx) { + Poll::Ready(Ok(response)) => { + let len = response.response_data.len() as u16; + self.tx_buf.extend(&len.to_be_bytes()); + self.tx_buf.extend(&response.response_data); + } + Poll::Ready(Err(_)) | Poll::Pending => break, + } + } + } + + /// Drain available response data into the provided buffer. + /// Returns the number of bytes written. + pub fn drain_tx(&mut self, buf: &mut [u8]) -> usize { + let n = buf.len().min(self.tx_buf.len()); + for (dst, src) in buf[..n].iter_mut().zip(self.tx_buf.drain(..n)) { + *dst = src; + } + n + } + + pub fn has_pending_tx(&self) -> bool { + !self.tx_buf.is_empty() + } + + pub fn set_guest_fin(&mut self) { + self.guest_fin = true; + } + + /// Returns true when the guest has sent FIN and all responses have + /// been flushed, so the server side can send FIN too. + pub fn should_close(&self) -> bool { + self.guest_fin && self.tx_buf.is_empty() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + /// A test DNS backend that echoes the query back as the response. + struct EchoBackend; + + impl DnsBackend for EchoBackend { + fn query( + &self, + request: &DnsRequest<'_>, + response_sender: mesh_channel_core::Sender, + ) { + response_sender.send(DnsResponse { + flow: request.flow.clone(), + response_data: request.dns_query.to_vec(), + }); + } + } + + fn test_flow() -> DnsFlow { + use smoltcp::wire::EthernetAddress; + use smoltcp::wire::IpAddress; + use smoltcp::wire::Ipv4Address; + DnsFlow { + src_addr: IpAddress::Ipv4(Ipv4Address::new(10, 0, 0, 2)), + dst_addr: IpAddress::Ipv4(Ipv4Address::new(10, 0, 0, 1)), + src_port: 12345, + dst_port: 53, + gateway_mac: EthernetAddress([0x52, 0x55, 10, 0, 0, 1]), + client_mac: EthernetAddress([0, 0, 0, 0, 1, 0]), + transport: crate::dns_resolver::DnsTransport::Tcp, + } + } + + fn make_tcp_dns_message(payload: &[u8]) -> Vec { + let len = payload.len() as u16; + let mut msg = len.to_be_bytes().to_vec(); + msg.extend_from_slice(payload); + msg + } + + #[test] + fn single_query_response() { + let backend = Arc::new(EchoBackend); + let mut handler = DnsTcpHandler::new(backend, test_flow()); + + // 20-byte fake DNS query (> 12-byte header minimum) + let query = vec![ + 0xAB, 0xCD, 0x01, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x77, + 0x77, 0x77, 0x03, 0x63, 0x6F, 0x6D, + ]; + let msg = make_tcp_dns_message(&query); + + handler.ingest(&msg); + + let waker = std::task::Waker::from(Arc::new(NoopWaker)); + let mut cx = Context::from_waker(&waker); + handler.poll_responses(&mut cx); + + assert!(handler.has_pending_tx()); + + let mut buf = vec![0u8; 256]; + let n = handler.drain_tx(&mut buf); + // The echo backend returns the full TCP-framed query (2-byte prefix + DNS payload). + // poll_responses then wraps that in another 2-byte length prefix for transmission. + let echoed_len = 2 + query.len(); // length prefix + DNS payload + assert_eq!(n, 2 + echoed_len); // tx framing prefix + echoed data + assert_eq!( + u16::from_be_bytes([buf[0], buf[1]]) as usize, + echoed_len + ); + // The echoed data should be the original TCP-framed message. + assert_eq!(&buf[2..2 + 2], &(query.len() as u16).to_be_bytes()); + assert_eq!(&buf[4..n], &query[..]); + } + + #[test] + fn partial_message_buffering() { + let backend = Arc::new(EchoBackend); + let mut handler = DnsTcpHandler::new(backend, test_flow()); + + let query = vec![ + 0xAB, 0xCD, 0x01, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x66, + 0x6F, 0x6F, + ]; + let msg = make_tcp_dns_message(&query); + + // Feed just the length prefix + handler.ingest(&msg[..2]); + + let waker = std::task::Waker::from(Arc::new(NoopWaker)); + let mut cx = Context::from_waker(&waker); + handler.poll_responses(&mut cx); + assert!(!handler.has_pending_tx()); + + // Feed the rest + handler.ingest(&msg[2..]); + handler.poll_responses(&mut cx); + assert!(handler.has_pending_tx()); + } + + #[test] + fn multiple_queries_in_one_write() { + let backend = Arc::new(EchoBackend); + let mut handler = DnsTcpHandler::new(backend, test_flow()); + + let q1 = vec![ + 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x61, + 0x61, 0x61, + ]; + let q2 = vec![ + 0x00, 0x02, 0x01, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x62, + 0x62, 0x62, + ]; + let mut combined = make_tcp_dns_message(&q1); + combined.extend(make_tcp_dns_message(&q2)); + + handler.ingest(&combined); + + let waker = std::task::Waker::from(Arc::new(NoopWaker)); + let mut cx = Context::from_waker(&waker); + handler.poll_responses(&mut cx); + + let mut buf = vec![0u8; 512]; + let n = handler.drain_tx(&mut buf); + // Each echoed response includes the 2-byte TCP prefix + DNS payload, + // then poll_responses adds another 2-byte tx framing prefix. + let per_response = 2 + (2 + q1.len()); // tx prefix + (tcp prefix + DNS payload) + assert_eq!(n, 2 * per_response); + } + + #[test] + fn should_close_after_fin_and_drain() { + let backend = Arc::new(EchoBackend); + let mut handler = DnsTcpHandler::new(backend, test_flow()); + + let query = vec![ + 0xAB, 0xCD, 0x01, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x78, + 0x78, 0x78, + ]; + handler.ingest(&make_tcp_dns_message(&query)); + handler.set_guest_fin(); + + let waker = std::task::Waker::from(Arc::new(NoopWaker)); + let mut cx = Context::from_waker(&waker); + handler.poll_responses(&mut cx); + + assert!(!handler.should_close()); + + let mut buf = vec![0u8; 256]; + handler.drain_tx(&mut buf); + + assert!(handler.should_close()); + } + + struct NoopWaker; + impl std::task::Wake for NoopWaker { + fn wake(self: Arc) {} + } +} diff --git a/vm/devices/net/net_consomme/consomme/src/dns_resolver/mod.rs b/vm/devices/net/net_consomme/consomme/src/dns_resolver/mod.rs index d3bae3265f..c04965a783 100644 --- a/vm/devices/net/net_consomme/consomme/src/dns_resolver/mod.rs +++ b/vm/devices/net/net_consomme/consomme/src/dns_resolver/mod.rs @@ -6,11 +6,14 @@ use mesh_channel_core::Receiver; use mesh_channel_core::Sender; use smoltcp::wire::EthernetAddress; use smoltcp::wire::IpAddress; +use std::sync::Arc; use std::task::Context; use std::task::Poll; use crate::DropReason; +pub mod dns_tcp; + #[cfg(unix)] mod unix; @@ -19,6 +22,13 @@ mod windows; static DNS_HEADER_SIZE: usize = 12; +/// Transport protocol for a DNS query. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum DnsTransport { + Udp, + Tcp, +} + #[derive(Debug, Clone)] pub struct DnsFlow { pub src_addr: IpAddress, @@ -27,6 +37,7 @@ pub struct DnsFlow { pub dst_port: u16, pub gateway_mac: EthernetAddress, pub client_mac: EthernetAddress, + pub transport: DnsTransport, } #[derive(Debug, Clone)] @@ -49,7 +60,7 @@ pub(crate) trait DnsBackend: Send + Sync { #[derive(Inspect)] pub struct DnsResolver { #[inspect(skip)] - backend: Box, + backend: Arc, #[inspect(skip)] receiver: Receiver, pending_requests: usize, @@ -70,7 +81,7 @@ impl DnsResolver { let receiver = Receiver::new(); Ok(Self { - backend: Box::new(WindowsDnsResolverBackend::new()?), + backend: Arc::new(WindowsDnsResolverBackend::new()?), receiver, pending_requests: 0, max_pending_requests, @@ -87,7 +98,7 @@ impl DnsResolver { let receiver = Receiver::new(); Ok(Self { - backend: Box::new(UnixDnsResolverBackend::new()?), + backend: Arc::new(UnixDnsResolverBackend::new()?), receiver, pending_requests: 0, max_pending_requests, @@ -122,6 +133,10 @@ impl DnsResolver { Poll::Ready(Err(_)) | Poll::Pending => Poll::Pending, } } + + pub fn backend(&self) -> &Arc { + &self.backend + } } /// Internal DNS request structure used by backend implementations. diff --git a/vm/devices/net/net_consomme/consomme/src/dns_resolver/windows/mod.rs b/vm/devices/net/net_consomme/consomme/src/dns_resolver/windows/mod.rs index 3a60e98bf8..0f00411379 100644 --- a/vm/devices/net/net_consomme/consomme/src/dns_resolver/windows/mod.rs +++ b/vm/devices/net/net_consomme/consomme/src/dns_resolver/windows/mod.rs @@ -21,6 +21,7 @@ use std::ptr::null_mut; use std::sync::Arc; use windows_sys::Win32::Foundation::DNS_REQUEST_PENDING; use windows_sys::Win32::Foundation::NO_ERROR; +use windows_sys::Win32::NetworkManagement::Dns::DNS_PROTOCOL_TCP; use windows_sys::Win32::NetworkManagement::Dns::DNS_PROTOCOL_UDP; use windows_sys::Win32::NetworkManagement::Dns::DNS_QUERY_NO_MULTICAST; use windows_sys::Win32::NetworkManagement::Dns::DNS_QUERY_RAW_CANCEL; @@ -117,7 +118,10 @@ impl DnsBackend for WindowsDnsResolverBackend { queryRawOptions: 0, customServersSize: 0, customServers: null_mut(), - protocol: DNS_PROTOCOL_UDP, + protocol: match request.flow.transport { + super::DnsTransport::Tcp => DNS_PROTOCOL_TCP, + super::DnsTransport::Udp => DNS_PROTOCOL_UDP, + }, Anonymous: DNS_QUERY_RAW_REQUEST_0::default(), }; diff --git a/vm/devices/net/net_consomme/consomme/src/lib.rs b/vm/devices/net/net_consomme/consomme/src/lib.rs index 9736e7371a..f3234d7da0 100644 --- a/vm/devices/net/net_consomme/consomme/src/lib.rs +++ b/vm/devices/net/net_consomme/consomme/src/lib.rs @@ -29,6 +29,9 @@ mod udp; mod unix; mod windows; +/// Standard DNS port number. +const DNS_PORT: u16 = 53; + use inspect::Inspect; use inspect::InspectMut; use pal_async::driver::Driver; diff --git a/vm/devices/net/net_consomme/consomme/src/tcp.rs b/vm/devices/net/net_consomme/consomme/src/tcp.rs index 8737d84e0b..5059c88644 100644 --- a/vm/devices/net/net_consomme/consomme/src/tcp.rs +++ b/vm/devices/net/net_consomme/consomme/src/tcp.rs @@ -9,6 +9,7 @@ use super::DropReason; use crate::ChecksumState; use crate::ConsommeState; use crate::IpAddresses; +use crate::dns_resolver::dns_tcp::DnsTcpHandler; use futures::AsyncRead; use futures::AsyncWrite; use inspect::Inspect; @@ -118,10 +119,22 @@ enum LoopbackPortInfo { ProxyForGuestPort { sending_port: u16, guest_port: u16 }, } +/// The I/O backend for a TCP connection. +/// +/// A connection is either backed by a real host socket or a virtual DNS +/// handler that resolves DNS queries without a real socket. +enum TcpBackend { + /// A real host socket. The socket may be `None` while the connection is + /// being constructed, or after both ends have closed. + Socket(Option>), + /// A virtual DNS TCP handler (no real socket). + Dns(DnsTcpHandler), +} + #[derive(Inspect)] struct TcpConnection { #[inspect(skip)] - socket: Option>, + backend: TcpBackend, loopback_port: LoopbackPortInfo, state: TcpState, @@ -302,13 +315,17 @@ impl Access<'_, T> { pub(crate) fn refresh_tcp_driver(&mut self) { self.inner.tcp.connections.retain(|_, conn| { - let Some(socket) = conn.socket.take() else { + let TcpBackend::Socket(opt_socket) = &mut conn.backend else { + // DNS connections have no real socket to refresh. + return true; + }; + let Some(socket) = opt_socket.take() else { return true; }; let socket = socket.into_inner(); match PolledSocket::new(self.client.driver(), socket) { Ok(socket) => { - conn.socket = Some(socket); + *opt_socket = Some(socket); true } Err(err) => { @@ -348,6 +365,8 @@ impl Access<'_, T> { }; tracing::trace!(?tcp, "tcp packet"); + let is_dns_tcp = is_gateway_dns_tcp(&ft, &self.inner.state.params, self.inner.dns.is_some()); + let mut sender = Sender { ft: &ft, client: self.client, @@ -368,8 +387,22 @@ impl Access<'_, T> { // This is for an old connection. Send reset. sender.rst(ack, None); } else if tcp.control == TcpControl::Syn { - let conn = TcpConnection::new(&mut sender, &tcp)?; - e.insert(conn); + if is_dns_tcp { + tracing::info!( + src = %ft.src, + dst = %ft.dst, + "intercepting DNS TCP connection to gateway" + ); + let conn = TcpConnection::new_dns( + &mut sender, + &tcp, + self.inner.dns.as_ref().unwrap(), + )?; + e.insert(conn); + } else { + let conn = TcpConnection::new(&mut sender, &tcp)?; + e.insert(conn); + } } else { // Ignore the packet. } @@ -539,7 +572,7 @@ impl Default for TcpConnection { let tx_buffer_size = 16384; Self { - socket: None, + backend: TcpBackend::Socket(None), loopback_port: LoopbackPortInfo::None, state: TcpState::Connecting, rx_buffer: VecDeque::with_capacity(rx_buffer_size), @@ -617,7 +650,7 @@ impl TcpConnection { } } } - this.socket = Some(socket); + this.backend = TcpBackend::Socket(Some(socket)); Ok(this) } @@ -626,9 +659,9 @@ impl TcpConnection { socket: Socket, ) -> Result { let mut this = Self { - socket: Some( + backend: TcpBackend::Socket(Some( PolledSocket::new(sender.client.driver(), socket).map_err(DropReason::Io)?, - ), + )), state: TcpState::SynSent, ..Default::default() }; @@ -636,6 +669,36 @@ impl TcpConnection { Ok(this) } + /// Create a virtual DNS TCP connection (no real host socket). + /// The connection completes the TCP handshake with the guest and + /// routes DNS queries through the provided resolver backend. + fn new_dns( + sender: &mut Sender<'_, impl Client>, + tcp: &TcpRepr<'_>, + dns: &crate::dns_resolver::DnsResolver, + ) -> Result { + let mut this = Self::default(); + this.initialize_from_first_client_packet(tcp)?; + + let flow = crate::dns_resolver::DnsFlow { + src_addr: sender.ft.src.ip().into(), + dst_addr: sender.ft.dst.ip().into(), + src_port: sender.ft.src.port(), + dst_port: sender.ft.dst.port(), + gateway_mac: sender.state.params.gateway_mac, + client_mac: sender.state.params.client_mac, + transport: crate::dns_resolver::DnsTransport::Tcp, + }; + + this.backend = TcpBackend::Dns(DnsTcpHandler::new(dns.backend().clone(), flow)); + // Immediately transition to SynReceived so the handshake SYN-ACK is sent. + this.state = TcpState::SynReceived; + this.rx_window_cap = this.rx_buffer.capacity(); + this.send_syn(sender, Some(this.rx_seq)); + + Ok(this) + } + fn initialize_from_first_client_packet(&mut self, tcp: &TcpRepr<'_>) -> Result<(), DropReason> { // The TCPv4 default maximum segment size is 536. This can be bigger for // IPv6. @@ -671,16 +734,90 @@ impl TcpConnection { } fn poll_conn(&mut self, cx: &mut Context<'_>, sender: &mut Sender<'_, impl Client>) -> bool { + // Temporarily take the backend to split the mutable borrow between + // the backend and the rest of TcpConnection's fields. + let mut backend = std::mem::replace(&mut self.backend, TcpBackend::Socket(None)); + let result = match &mut backend { + TcpBackend::Dns(dns_handler) => self.poll_dns_backend(cx, sender, dns_handler), + TcpBackend::Socket(opt_socket) => self.poll_socket_backend(cx, sender, opt_socket), + }; + self.backend = backend; + result + } + + /// Poll the DNS TCP virtual connection backend. + /// + /// There is no real socket; data flows through the [`DnsTcpHandler`]. + fn poll_dns_backend( + &mut self, + cx: &mut Context<'_>, + sender: &mut Sender<'_, impl Client>, + dns_handler: &mut DnsTcpHandler, + ) -> bool { + // rx path: feed guest data into the DNS handler for query extraction. + let rx_data: Vec = self.rx_buffer.drain(..).collect(); + if !rx_data.is_empty() { + dns_handler.ingest(&rx_data); + } + + if self.state.rx_fin() { + dns_handler.set_guest_fin(); + } + + // Poll for resolved DNS responses. + dns_handler.poll_responses(cx); + + // tx path: copy response data from the handler into tx_buffer. + while !self.tx_buffer.is_full() { + let (a, b) = self.tx_buffer.unwritten_slices_mut(); + let n = dns_handler.drain_tx(a); + if n == 0 { + let n2 = dns_handler.drain_tx(b); + if n2 == 0 { + break; + } + self.tx_buffer.extend_by(n2); + } else { + self.tx_buffer.extend_by(n); + } + } + + let want_close = dns_handler.should_close() && !self.state.tx_fin(); + let has_pending_tx = dns_handler.has_pending_tx(); + + if want_close { + tracing::info!("DNS TCP connection closing after all responses flushed"); + self.close(); + } + + self.send_next(sender); + !(self.state == TcpState::TimeWait + || self.state == TcpState::LastAck + || (self.state.tx_fin() + && self.state.rx_fin() + && self.tx_buffer.len() == 0 + && !has_pending_tx)) + } + + /// Poll the real-socket TCP connection backend. + /// + /// Reads data from the host socket into the tx buffer (host -> guest) and + /// writes guest rx data into the host socket (guest -> host). + fn poll_socket_backend( + &mut self, + cx: &mut Context<'_>, + sender: &mut Sender<'_, impl Client>, + opt_socket: &mut Option>, + ) -> bool { + // Wait for the outbound connection to complete. if self.state == TcpState::Connecting { - match self - .socket + let socket = opt_socket .as_mut() - .unwrap() - .poll_ready(cx, PollEvents::OUT) - { + .expect("Connecting state requires a socket"); + match socket.poll_ready(cx, PollEvents::OUT) { Poll::Ready(r) => { if r.has_err() { - let err = take_socket_error(self.socket.as_mut().unwrap()); + let err = take_socket_error(socket); let reset = match err.kind() { ErrorKind::TimedOut => { // Avoid resetting so that the guest doesn't @@ -733,7 +870,7 @@ impl TcpConnection { } // Handle the tx path. - if let Some(socket) = &mut self.socket { + if let Some(socket) = opt_socket.as_mut() { if self.state.tx_fin() { if let Poll::Ready(events) = socket.poll_ready(cx, PollEvents::EMPTY) { if events.has_err() { @@ -750,7 +887,7 @@ impl TcpConnection { } // Both ends are closed. Close the actual socket. - self.socket = None; + *opt_socket = None; } } else { while !self.tx_buffer.is_full() { @@ -785,7 +922,7 @@ impl TcpConnection { } // Handle the rx path. - if let Some(socket) = &mut self.socket { + if let Some(socket) = opt_socket.as_mut() { while !self.rx_buffer.is_empty() { let (a, b) = self.rx_buffer.as_slices(); let bufs = [IoSlice::new(a), IoSlice::new(b)]; @@ -1294,3 +1431,18 @@ fn seq_min(seqs: [TcpSeqNumber; N]) -> TcpSeqNumber { } min } + +/// Check if a TCP connection targets the gateway's DNS port. +fn is_gateway_dns_tcp( + ft: &FourTuple, + params: &crate::ConsommeParams, + dns_available: bool, +) -> bool { + if !dns_available || ft.dst.port() != crate::DNS_PORT { + return false; + } + match ft.dst.ip() { + IpAddr::V4(ip) => Ipv4Addr::from(params.gateway_ip) == ip, + IpAddr::V6(ip) => Ipv6Addr::from(params.gateway_link_local_ipv6) == ip, + } +} diff --git a/vm/devices/net/net_consomme/consomme/src/udp.rs b/vm/devices/net/net_consomme/consomme/src/udp.rs index 536a3fb27e..2870446960 100644 --- a/vm/devices/net/net_consomme/consomme/src/udp.rs +++ b/vm/devices/net/net_consomme/consomme/src/udp.rs @@ -53,7 +53,7 @@ use std::task::Poll; use std::time::Duration; use std::time::Instant; -pub const DNS_PORT: u16 = 53; +use crate::DNS_PORT; pub(crate) struct Udp { connections: HashMap, @@ -442,6 +442,7 @@ impl Access<'_, T> { dst_port: udp.dst_port(), gateway_mac: self.inner.state.params.gateway_mac, client_mac: frame.src_addr, + transport: crate::dns_resolver::DnsTransport::Udp, }, dns_query: udp.payload(), }; From 5a8921a8bcbfde12513ab921c3edabe1926759cf Mon Sep 17 00:00:00 2001 From: Daman Mulye Date: Wed, 11 Feb 2026 15:52:54 -0800 Subject: [PATCH 02/25] Fixes for glibc and musl --- .../consomme/src/dns_resolver/dns_tcp.rs | 45 +++--- .../consomme/src/dns_resolver/unix/glibc.rs | 146 +++++++++++++----- .../consomme/src/dns_resolver/windows/mod.rs | 15 +- .../net/net_consomme/consomme/src/tcp.rs | 15 +- .../net/net_consomme/consomme/src/tcp/ring.rs | 1 - 5 files changed, 149 insertions(+), 73 deletions(-) diff --git a/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs b/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs index e6eef628d6..206958a6e6 100644 --- a/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs +++ b/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs @@ -82,10 +82,11 @@ impl DnsTcpHandler { // Incomplete message; wait for more data. break; } - // Include the 2-byte TCP length prefix in the query data - // passed to the backend, as platform-specific resolvers - // expect the full TCP-framed message. - let query = self.rx_buf[0..2 + msg_len].to_vec(); + // Extract the DNS query payload WITHOUT the 2-byte TCP length prefix. + // The TCP framing is only for the wire protocol between guest and host. + // Platform resolvers (glibc res_nsend, musl res_send) expect raw DNS + // messages and handle TCP framing internally when needed. + let query = self.rx_buf[2..2 + msg_len].to_vec(); self.rx_buf.drain(..2 + msg_len); let request = DnsRequest { @@ -99,15 +100,10 @@ impl DnsTcpHandler { /// Poll for completed DNS responses and length-prefix them into the /// transmit buffer. pub fn poll_responses(&mut self, cx: &mut Context<'_>) { - loop { - match self.receiver.poll_recv(cx) { - Poll::Ready(Ok(response)) => { - let len = response.response_data.len() as u16; - self.tx_buf.extend(&len.to_be_bytes()); - self.tx_buf.extend(&response.response_data); - } - Poll::Ready(Err(_)) | Poll::Pending => break, - } + while let Poll::Ready(Ok(response)) = self.receiver.poll_recv(cx) { + let len = response.response_data.len() as u16; + self.tx_buf.extend(&len.to_be_bytes()); + self.tx_buf.extend(&response.response_data); } } @@ -200,17 +196,12 @@ mod tests { let mut buf = vec![0u8; 256]; let n = handler.drain_tx(&mut buf); - // The echo backend returns the full TCP-framed query (2-byte prefix + DNS payload). - // poll_responses then wraps that in another 2-byte length prefix for transmission. - let echoed_len = 2 + query.len(); // length prefix + DNS payload - assert_eq!(n, 2 + echoed_len); // tx framing prefix + echoed data - assert_eq!( - u16::from_be_bytes([buf[0], buf[1]]) as usize, - echoed_len - ); - // The echoed data should be the original TCP-framed message. - assert_eq!(&buf[2..2 + 2], &(query.len() as u16).to_be_bytes()); - assert_eq!(&buf[4..n], &query[..]); + // The echo backend returns the raw DNS query (without TCP length prefix). + // poll_responses then wraps that in a 2-byte length prefix for transmission. + assert_eq!(n, 2 + query.len()); // tx framing prefix + DNS payload + assert_eq!(u16::from_be_bytes([buf[0], buf[1]]) as usize, query.len()); + // The echoed data should be the raw DNS query. + assert_eq!(&buf[2..n], &query[..]); } #[test] @@ -262,9 +253,9 @@ mod tests { let mut buf = vec![0u8; 512]; let n = handler.drain_tx(&mut buf); - // Each echoed response includes the 2-byte TCP prefix + DNS payload, - // then poll_responses adds another 2-byte tx framing prefix. - let per_response = 2 + (2 + q1.len()); // tx prefix + (tcp prefix + DNS payload) + // Each echoed response is the raw DNS query (without TCP prefix), + // then poll_responses adds a 2-byte tx framing prefix. + let per_response = 2 + q1.len(); // tx prefix + DNS payload assert_eq!(n, 2 * per_response); } diff --git a/vm/devices/net/net_consomme/consomme/src/dns_resolver/unix/glibc.rs b/vm/devices/net/net_consomme/consomme/src/dns_resolver/unix/glibc.rs index 397b410da7..3ec63e5018 100644 --- a/vm/devices/net/net_consomme/consomme/src/dns_resolver/unix/glibc.rs +++ b/vm/devices/net/net_consomme/consomme/src/dns_resolver/unix/glibc.rs @@ -10,6 +10,15 @@ use super::DnsRequestInternal; use super::DnsResponse; use super::build_servfail_response; use libc::c_int; +use libc::c_ulong; +use zerocopy::FromZeros; +use zerocopy::Immutable; +use zerocopy::IntoBytes; +use zerocopy::KnownLayout; + +/// RES_USEVC option flag - use TCP (virtual circuit) instead of UDP. +/// From glibc resolv/resolv.h: https://sourceware.org/git/?p=glibc.git;a=blob_plain;f=resolv/resolv.h;hb=HEAD +const RES_USEVC: c_ulong = 0x00000040; /// Size of the `res_state` structure for different platforms. /// These values were derived from including resolv.h and using sizeof(struct __res_state). @@ -18,16 +27,44 @@ const RES_STATE_SIZE: usize = 552; #[cfg(target_os = "linux")] const RES_STATE_SIZE: usize = 568; +/// The prefix of the glibc `struct __res_state` that we need to access. +/// This matches the layout defined in glibc resolv/bits/types/res_state.h: +/// See: https://sourceware.org/git/?p=glibc.git;a=blob_plain;f=resolv/bits/types/res_state.h;hb=HEAD +/// See: https://github.com/apple-oss-distributions/libresolv/blob/main/resolv.h +/// +/// ```c +/// struct __res_state { +/// int retrans; /* retransmission time interval */ +/// int retry; /* number of times to retransmit */ +/// unsigned long options; /* option flags */ +/// ... +/// } +/// ``` +#[repr(C)] +#[derive(IntoBytes, Immutable, KnownLayout, FromZeros)] +struct ResStatePrefix { + retrans: c_int, + retry: c_int, + options: c_ulong, +} + +/// Wrapper around the glibc/macOS resolver state structure. #[repr(C)] +#[derive(IntoBytes, Immutable, KnownLayout, FromZeros)] pub struct ResState { - _data: [u8; RES_STATE_SIZE], + prefix: ResStatePrefix, + _rest: [u8; RES_STATE_SIZE - size_of::()], } impl ResState { - pub fn zeroed() -> Self { - Self { - _data: [0u8; RES_STATE_SIZE], - } + /// Set the options field in the resolver state. + pub fn set_options(&mut self, options: c_ulong) { + self.prefix.options = options; + } + + /// Get the options field from the resolver state. + pub fn options(&self) -> c_ulong { + self.prefix.options } } @@ -59,7 +96,7 @@ unsafe extern "C" { /// Handle a DNS query using reentrant resolver functions (macOS and GNU libc). pub fn handle_dns_query(request: DnsRequestInternal) { let mut answer = vec![0u8; 4096]; - let mut state = ResState::zeroed(); + let mut state = ResState::new_zeroed(); // SAFETY: res_ninit initializes the resolver state by reading /etc/resolv.conf. // The state is properly sized and aligned. @@ -74,6 +111,10 @@ pub fn handle_dns_query(request: DnsRequestInternal) { return; } + // Set RES_USEVC to force TCP for DNS queries. + if request.flow.transport == crate::dns_resolver::DnsTransport::Tcp { + state.set_options(state.options() | RES_USEVC); + } // SAFETY: res_nsend is called with valid state, query buffer and answer buffer. // All buffers are properly sized and aligned. The state was initialized above. let answer_len = unsafe { @@ -110,17 +151,9 @@ pub fn handle_dns_query(request: DnsRequestInternal) { mod tests { use super::*; - #[test] - fn test_res_ninit_and_res_nsend_callable() { - // Test that the reentrant resolver functions are callable - let mut state = ResState::zeroed(); - - // SAFETY: res_ninit initializes the resolver state - let init_result = unsafe { res_ninit(&mut state) }; - assert_eq!(init_result, 0, "res_ninit() should succeed"); - - // Example DNS query buffer for google.com A record - let dns_query: Vec = vec![ + /// Example DNS query buffer for google.com A record. + fn sample_dns_query() -> Vec { + vec![ 0x12, 0x34, // Transaction ID 0x01, 0x00, // Flags: standard query 0x00, 0x01, // Questions: 1 @@ -131,23 +164,66 @@ mod tests { 0x00, // null terminator 0x00, 0x01, // Type: A 0x00, 0x01, // Class: IN - ]; - - let mut answer = vec![0u8; 4096]; - - // SAFETY: res_nsend is called with valid state, query buffer and answer buffer. - let _answer_len = unsafe { - res_nsend( - &mut state, - dns_query.as_ptr(), - dns_query.len() as c_int, - answer.as_mut_ptr(), - answer.len() as c_int, - ) - }; - - // Clean up - // SAFETY: res_nclose frees resources associated with the resolver state. - unsafe { res_nclose(&mut state) }; + ] + } + + /// RAII wrapper for ResState that ensures proper cleanup. + struct InitializedResState { + state: ResState, + } + + impl InitializedResState { + fn new() -> Self { + let mut state = ResState::new_zeroed(); + // SAFETY: res_ninit initializes the resolver state + let result = unsafe { res_ninit(&mut state) }; + assert_eq!(result, 0, "res_ninit() should succeed"); + Self { state } + } + + /// Send a DNS query and return the response length. + fn send_query(&mut self, query: &[u8]) -> c_int { + let mut answer = vec![0u8; 4096]; + // SAFETY: res_nsend is called with valid state, query buffer and answer buffer. + unsafe { + res_nsend( + &mut self.state, + query.as_ptr(), + query.len() as c_int, + answer.as_mut_ptr(), + answer.len() as c_int, + ) + } + } + } + + impl Drop for InitializedResState { + fn drop(&mut self) { + // SAFETY: res_nclose frees resources associated with the resolver state. + unsafe { res_nclose(&mut self.state) }; + } + } + + #[test] + fn test_res_ninit_and_res_nsend_callable() { + let mut state = InitializedResState::new(); + let _answer_len = state.send_query(&sample_dns_query()); + } + + #[test] + fn test_res_usevc_flag_for_tcp() { + let mut state = InitializedResState::new(); + + // Verify we can read and modify the options field + let original_options = state.state.options(); + state.state.set_options(original_options | RES_USEVC); + assert_ne!( + state.state.options() & RES_USEVC, + 0, + "RES_USEVC flag should be set" + ); + + // With RES_USEVC set, this should use TCP instead of UDP. + let _answer_len = state.send_query(&sample_dns_query()); } } diff --git a/vm/devices/net/net_consomme/consomme/src/dns_resolver/windows/mod.rs b/vm/devices/net/net_consomme/consomme/src/dns_resolver/windows/mod.rs index 0f00411379..d29a5783f2 100644 --- a/vm/devices/net/net_consomme/consomme/src/dns_resolver/windows/mod.rs +++ b/vm/devices/net/net_consomme/consomme/src/dns_resolver/windows/mod.rs @@ -75,10 +75,23 @@ impl DnsBackend for WindowsDnsResolverBackend { // Clone the sender for error handling let response_sender_clone = response_sender.clone(); + // For TCP transport, prepend the 2-byte length prefix as required by the wire format. + // The DnsQueryRaw API with DNS_PROTOCOL_TCP expects TCP-framed messages. + let query_data = match request.flow.transport { + super::DnsTransport::Tcp => { + let len = request.dns_query.len() as u16; + let mut framed = Vec::with_capacity(2 + request.dns_query.len()); + framed.extend_from_slice(&len.to_be_bytes()); + framed.extend_from_slice(request.dns_query); + framed + } + super::DnsTransport::Udp => request.dns_query.to_vec(), + }; + // Create internal request let internal_request = DnsRequestInternal { flow: request.flow.clone(), - query: request.dns_query.to_vec(), + query: query_data, response_sender, }; diff --git a/vm/devices/net/net_consomme/consomme/src/tcp.rs b/vm/devices/net/net_consomme/consomme/src/tcp.rs index 5059c88644..5830f6cc52 100644 --- a/vm/devices/net/net_consomme/consomme/src/tcp.rs +++ b/vm/devices/net/net_consomme/consomme/src/tcp.rs @@ -365,7 +365,8 @@ impl Access<'_, T> { }; tracing::trace!(?tcp, "tcp packet"); - let is_dns_tcp = is_gateway_dns_tcp(&ft, &self.inner.state.params, self.inner.dns.is_some()); + let is_dns_tcp = + is_gateway_dns_tcp(&ft, &self.inner.state.params, self.inner.dns.is_some()); let mut sender = Sender { ft: &ft, @@ -795,7 +796,7 @@ impl TcpConnection { || self.state == TcpState::LastAck || (self.state.tx_fin() && self.state.rx_fin() - && self.tx_buffer.len() == 0 + && self.tx_buffer.is_empty() && !has_pending_tx)) } @@ -1433,16 +1434,12 @@ fn seq_min(seqs: [TcpSeqNumber; N]) -> TcpSeqNumber { } /// Check if a TCP connection targets the gateway's DNS port. -fn is_gateway_dns_tcp( - ft: &FourTuple, - params: &crate::ConsommeParams, - dns_available: bool, -) -> bool { +fn is_gateway_dns_tcp(ft: &FourTuple, params: &crate::ConsommeParams, dns_available: bool) -> bool { if !dns_available || ft.dst.port() != crate::DNS_PORT { return false; } match ft.dst.ip() { - IpAddr::V4(ip) => Ipv4Addr::from(params.gateway_ip) == ip, - IpAddr::V6(ip) => Ipv6Addr::from(params.gateway_link_local_ipv6) == ip, + IpAddr::V4(ip) => params.gateway_ip == ip, + IpAddr::V6(ip) => params.gateway_link_local_ipv6 == ip, } } diff --git a/vm/devices/net/net_consomme/consomme/src/tcp/ring.rs b/vm/devices/net/net_consomme/consomme/src/tcp/ring.rs index 7402535700..f1fa24e3dd 100644 --- a/vm/devices/net/net_consomme/consomme/src/tcp/ring.rs +++ b/vm/devices/net/net_consomme/consomme/src/tcp/ring.rs @@ -38,7 +38,6 @@ impl Ring { self.view(0..self.len()).as_slices() } - #[cfg(test)] pub fn is_empty(&self) -> bool { self.len() == 0 } From 12bb1f437d7a32a149a2f3ccd68108c7a4295a5f Mon Sep 17 00:00:00 2001 From: Daman Mulye Date: Wed, 11 Feb 2026 16:15:59 -0800 Subject: [PATCH 03/25] fix --- .../net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs b/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs index 206958a6e6..901fa35a2c 100644 --- a/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs +++ b/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs @@ -101,8 +101,6 @@ impl DnsTcpHandler { /// transmit buffer. pub fn poll_responses(&mut self, cx: &mut Context<'_>) { while let Poll::Ready(Ok(response)) = self.receiver.poll_recv(cx) { - let len = response.response_data.len() as u16; - self.tx_buf.extend(&len.to_be_bytes()); self.tx_buf.extend(&response.response_data); } } From 2b9402b4ac70aa0619e7edf3e401a09f99f5231f Mon Sep 17 00:00:00 2001 From: Daman Mulye Date: Fri, 13 Feb 2026 10:17:48 -0800 Subject: [PATCH 04/25] fixes --- .../consomme/src/dns_resolver/dns_tcp.rs | 113 ++++++++++-------- .../net/net_consomme/consomme/src/tcp.rs | 27 ++--- 2 files changed, 69 insertions(+), 71 deletions(-) diff --git a/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs b/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs index 901fa35a2c..54972f740a 100644 --- a/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs +++ b/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs @@ -14,6 +14,7 @@ use super::DnsRequest; use super::DnsResponse; use mesh_channel_core::Receiver; use std::collections::VecDeque; +use std::io::IoSliceMut; use std::sync::Arc; use std::task::Context; use std::task::Poll; @@ -26,7 +27,7 @@ pub struct DnsTcpHandler { receiver: Receiver, flow: DnsFlow, /// Data received from the guest, accumulating DNS TCP framed messages. - rx_buf: Vec, + rx_buf: VecDeque, /// Length-prefixed DNS responses waiting to be sent to the guest. tx_buf: VecDeque, /// The guest has sent FIN; no more data will arrive. @@ -40,7 +41,7 @@ impl DnsTcpHandler { backend, receiver, flow, - rx_buf: Vec::new(), + rx_buf: VecDeque::new(), tx_buf: VecDeque::new(), guest_fin: false, } @@ -48,19 +49,21 @@ impl DnsTcpHandler { /// Feed data received from the guest into the handler. /// Extracts complete DNS messages and submits them for resolution. - pub fn ingest(&mut self, data: &[u8]) { - // Limit rx_buf growth to prevent unbounded memory use from a - // guest that sends a large length prefix but trickles data slowly. - let remaining_capacity = MAX_DNS_TCP_MESSAGE_SIZE.saturating_sub(self.rx_buf.len()); - let accepted = data.len().min(remaining_capacity); - if accepted > 0 { - self.rx_buf.extend_from_slice(&data[..accepted]); - } - if accepted < data.len() { - tracelimit::warn_ratelimited!( - dropped = data.len() - accepted, - "DNS TCP rx_buf full, dropping excess data" - ); + pub fn ingest(&mut self, data: &[&[u8]]) { + for chunk in data { + // Limit rx_buf growth to prevent unbounded memory use from a + // guest that sends a large length prefix but trickles data slowly. + let remaining_capacity = MAX_DNS_TCP_MESSAGE_SIZE.saturating_sub(self.rx_buf.len()); + let accepted = chunk.len().min(remaining_capacity); + if accepted > 0 { + self.rx_buf.extend(&chunk[..accepted]); + } + if accepted < chunk.len() { + tracelimit::warn_ratelimited!( + dropped = chunk.len() - accepted, + "DNS TCP rx_buf full, dropping excess data" + ); + } } self.extract_and_submit_queries(); } @@ -82,43 +85,51 @@ impl DnsTcpHandler { // Incomplete message; wait for more data. break; } - // Extract the DNS query payload WITHOUT the 2-byte TCP length prefix. - // The TCP framing is only for the wire protocol between guest and host. - // Platform resolvers (glibc res_nsend, musl res_send) expect raw DNS - // messages and handle TCP framing internally when needed. - let query = self.rx_buf[2..2 + msg_len].to_vec(); - self.rx_buf.drain(..2 + msg_len); + // Drain the 2-byte length prefix, then drain the payload. + self.rx_buf.drain(..2); + let bytes: Vec = self.rx_buf.drain(..msg_len).collect(); let request = DnsRequest { flow: self.flow.clone(), - dns_query: &query, + dns_query: &bytes, }; self.backend.query(&request, self.receiver.sender()); } } - /// Poll for completed DNS responses and length-prefix them into the - /// transmit buffer. - pub fn poll_responses(&mut self, cx: &mut Context<'_>) { + /// Poll for completed DNS responses and write length-prefixed data + /// Returns the total number of bytes written. + pub fn poll_read(&mut self, cx: &mut Context<'_>, bufs: &mut [IoSliceMut<'_>]) -> usize { while let Poll::Ready(Ok(response)) = self.receiver.poll_recv(cx) { self.tx_buf.extend(&response.response_data); } + self.drain_buffered(bufs) } - /// Drain available response data into the provided buffer. - /// Returns the number of bytes written. - pub fn drain_tx(&mut self, buf: &mut [u8]) -> usize { - let n = buf.len().min(self.tx_buf.len()); - for (dst, src) in buf[..n].iter_mut().zip(self.tx_buf.drain(..n)) { - *dst = src; + /// Drain buffered tx data into the provided buffers. + fn drain_buffered(&mut self, bufs: &mut [IoSliceMut<'_>]) -> usize { + let mut total = 0; + for buf in bufs.iter_mut() { + if self.tx_buf.is_empty() { + break; + } + let n = buf.len().min(self.tx_buf.len()); + for (dst, src) in buf[..n].iter_mut().zip(self.tx_buf.drain(..n)) { + *dst = src; + } + total += n; } - n + total } pub fn has_pending_tx(&self) -> bool { !self.tx_buf.is_empty() } + pub fn guest_fin(&self) -> bool { + self.guest_fin + } + pub fn set_guest_fin(&mut self) { self.guest_fin = true; } @@ -184,19 +195,16 @@ mod tests { ]; let msg = make_tcp_dns_message(&query); - handler.ingest(&msg); + handler.ingest(&[&msg]); let waker = std::task::Waker::from(Arc::new(NoopWaker)); let mut cx = Context::from_waker(&waker); - handler.poll_responses(&mut cx); - - assert!(handler.has_pending_tx()); let mut buf = vec![0u8; 256]; - let n = handler.drain_tx(&mut buf); + let n = handler.poll_read(&mut cx, &mut [IoSliceMut::new(&mut buf)]); // The echo backend returns the raw DNS query (without TCP length prefix). // poll_responses then wraps that in a 2-byte length prefix for transmission. - assert_eq!(n, 2 + query.len()); // tx framing prefix + DNS payload + assert_eq!(n, query.len()); // tx framing prefix + DNS payload assert_eq!(u16::from_be_bytes([buf[0], buf[1]]) as usize, query.len()); // The echoed data should be the raw DNS query. assert_eq!(&buf[2..n], &query[..]); @@ -214,17 +222,16 @@ mod tests { let msg = make_tcp_dns_message(&query); // Feed just the length prefix - handler.ingest(&msg[..2]); + handler.ingest(&[&msg[..2]]); let waker = std::task::Waker::from(Arc::new(NoopWaker)); let mut cx = Context::from_waker(&waker); - handler.poll_responses(&mut cx); - assert!(!handler.has_pending_tx()); + let mut buf = vec![0u8; 256]; + assert_eq!(handler.poll_read(&mut cx, &mut [IoSliceMut::new(&mut buf)]), 0); // Feed the rest - handler.ingest(&msg[2..]); - handler.poll_responses(&mut cx); - assert!(handler.has_pending_tx()); + handler.ingest(&[&msg[2..]]); + assert!(handler.poll_read(&mut cx, &mut [IoSliceMut::new(&mut buf)]) > 0); } #[test] @@ -243,17 +250,16 @@ mod tests { let mut combined = make_tcp_dns_message(&q1); combined.extend(make_tcp_dns_message(&q2)); - handler.ingest(&combined); + handler.ingest(&[&combined]); let waker = std::task::Waker::from(Arc::new(NoopWaker)); let mut cx = Context::from_waker(&waker); - handler.poll_responses(&mut cx); let mut buf = vec![0u8; 512]; - let n = handler.drain_tx(&mut buf); + let n = handler.poll_read(&mut cx, &mut [IoSliceMut::new(&mut buf)]); // Each echoed response is the raw DNS query (without TCP prefix), // then poll_responses adds a 2-byte tx framing prefix. - let per_response = 2 + q1.len(); // tx prefix + DNS payload + let per_response = q1.len(); // tx prefix + DNS payload assert_eq!(n, 2 * per_response); } @@ -266,17 +272,18 @@ mod tests { 0xAB, 0xCD, 0x01, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x78, 0x78, 0x78, ]; - handler.ingest(&make_tcp_dns_message(&query)); + handler.ingest(&[&make_tcp_dns_message(&query)]); handler.set_guest_fin(); let waker = std::task::Waker::from(Arc::new(NoopWaker)); let mut cx = Context::from_waker(&waker); - handler.poll_responses(&mut cx); - - assert!(!handler.should_close()); let mut buf = vec![0u8; 256]; - handler.drain_tx(&mut buf); + let _ = handler.poll_read(&mut cx, &mut [IoSliceMut::new(&mut buf)]); + + // tx_buf is now drained, but we need to verify should_close + // only returns true after all data is consumed. + assert!(!handler.has_pending_tx()); assert!(handler.should_close()); } diff --git a/vm/devices/net/net_consomme/consomme/src/tcp.rs b/vm/devices/net/net_consomme/consomme/src/tcp.rs index 5830f6cc52..2d4438c7e9 100644 --- a/vm/devices/net/net_consomme/consomme/src/tcp.rs +++ b/vm/devices/net/net_consomme/consomme/src/tcp.rs @@ -756,38 +756,29 @@ impl TcpConnection { dns_handler: &mut DnsTcpHandler, ) -> bool { // rx path: feed guest data into the DNS handler for query extraction. - let rx_data: Vec = self.rx_buffer.drain(..).collect(); - if !rx_data.is_empty() { - dns_handler.ingest(&rx_data); - } + let (a, b) = self.rx_buffer.as_slices(); + dns_handler.ingest(&[a, b]); + self.rx_buffer.clear(); - if self.state.rx_fin() { + if self.state.rx_fin() && !dns_handler.guest_fin() { dns_handler.set_guest_fin(); } - // Poll for resolved DNS responses. - dns_handler.poll_responses(cx); - - // tx path: copy response data from the handler into tx_buffer. + // tx path: poll DNS responses directly into tx_buffer. while !self.tx_buffer.is_full() { let (a, b) = self.tx_buffer.unwritten_slices_mut(); - let n = dns_handler.drain_tx(a); + let mut bufs = [IoSliceMut::new(a), IoSliceMut::new(b)]; + let n = dns_handler.poll_read(cx, &mut bufs); if n == 0 { - let n2 = dns_handler.drain_tx(b); - if n2 == 0 { - break; - } - self.tx_buffer.extend_by(n2); - } else { - self.tx_buffer.extend_by(n); + break; } + self.tx_buffer.extend_by(n); } let want_close = dns_handler.should_close() && !self.state.tx_fin(); let has_pending_tx = dns_handler.has_pending_tx(); if want_close { - tracing::info!("DNS TCP connection closing after all responses flushed"); self.close(); } From 3b4b8b5c6443bc2bf995e8a72a487e167c068052 Mon Sep 17 00:00:00 2001 From: Daman Mulye Date: Fri, 13 Feb 2026 10:34:14 -0800 Subject: [PATCH 05/25] test fixes --- .../net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs b/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs index 54972f740a..ac9694c934 100644 --- a/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs +++ b/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs @@ -190,7 +190,7 @@ mod tests { // 20-byte fake DNS query (> 12-byte header minimum) let query = vec![ - 0xAB, 0xCD, 0x01, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x77, + 0x00, 0x14, 0xAB, 0xCD, 0x01, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x77, 0x77, 0x77, 0x03, 0x63, 0x6F, 0x6D, ]; let msg = make_tcp_dns_message(&query); @@ -205,9 +205,7 @@ mod tests { // The echo backend returns the raw DNS query (without TCP length prefix). // poll_responses then wraps that in a 2-byte length prefix for transmission. assert_eq!(n, query.len()); // tx framing prefix + DNS payload - assert_eq!(u16::from_be_bytes([buf[0], buf[1]]) as usize, query.len()); - // The echoed data should be the raw DNS query. - assert_eq!(&buf[2..n], &query[..]); + assert_eq!(u16::from_be_bytes([buf[0], buf[1]]) as usize, query.len() - 2); } #[test] From 15aedfd02487282d5f674513d8e8d4aba6b7eb6b Mon Sep 17 00:00:00 2001 From: Daman Mulye Date: Fri, 13 Feb 2026 11:39:02 -0800 Subject: [PATCH 06/25] fix dns over tcp for linux host --- .../net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs b/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs index ac9694c934..3bd49b9b57 100644 --- a/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs +++ b/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs @@ -51,8 +51,6 @@ impl DnsTcpHandler { /// Extracts complete DNS messages and submits them for resolution. pub fn ingest(&mut self, data: &[&[u8]]) { for chunk in data { - // Limit rx_buf growth to prevent unbounded memory use from a - // guest that sends a large length prefix but trickles data slowly. let remaining_capacity = MAX_DNS_TCP_MESSAGE_SIZE.saturating_sub(self.rx_buf.len()); let accepted = chunk.len().min(remaining_capacity); if accepted > 0 { @@ -101,6 +99,11 @@ impl DnsTcpHandler { /// Returns the total number of bytes written. pub fn poll_read(&mut self, cx: &mut Context<'_>, bufs: &mut [IoSliceMut<'_>]) -> usize { while let Poll::Ready(Ok(response)) = self.receiver.poll_recv(cx) { + #[cfg(unix)] + { + let len = response.response_data.len() as u16; + self.tx_buf.extend(&len.to_be_bytes()); + } self.tx_buf.extend(&response.response_data); } self.drain_buffered(bufs) From d298d105b507b480f33b2f05ec702bca2c72efca Mon Sep 17 00:00:00 2001 From: Daman Mulye Date: Fri, 13 Feb 2026 11:48:42 -0800 Subject: [PATCH 07/25] minor cleanup --- .../net_consomme/consomme/src/dns_resolver/windows/mod.rs | 3 ++- vm/devices/net/net_consomme/consomme/src/tcp.rs | 5 ----- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/vm/devices/net/net_consomme/consomme/src/dns_resolver/windows/mod.rs b/vm/devices/net/net_consomme/consomme/src/dns_resolver/windows/mod.rs index d29a5783f2..7de7f9bf4c 100644 --- a/vm/devices/net/net_consomme/consomme/src/dns_resolver/windows/mod.rs +++ b/vm/devices/net/net_consomme/consomme/src/dns_resolver/windows/mod.rs @@ -76,7 +76,8 @@ impl DnsBackend for WindowsDnsResolverBackend { let response_sender_clone = response_sender.clone(); // For TCP transport, prepend the 2-byte length prefix as required by the wire format. - // The DnsQueryRaw API with DNS_PROTOCOL_TCP expects TCP-framed messages. + // The DnsQueryRaw API with DNS_PROTOCOL_TCP expects TCP-framed + // messages. Unix resolvers handle this themselves. let query_data = match request.flow.transport { super::DnsTransport::Tcp => { let len = request.dns_query.len() as u16; diff --git a/vm/devices/net/net_consomme/consomme/src/tcp.rs b/vm/devices/net/net_consomme/consomme/src/tcp.rs index 2d4438c7e9..30a142c076 100644 --- a/vm/devices/net/net_consomme/consomme/src/tcp.rs +++ b/vm/devices/net/net_consomme/consomme/src/tcp.rs @@ -389,11 +389,6 @@ impl Access<'_, T> { sender.rst(ack, None); } else if tcp.control == TcpControl::Syn { if is_dns_tcp { - tracing::info!( - src = %ft.src, - dst = %ft.dst, - "intercepting DNS TCP connection to gateway" - ); let conn = TcpConnection::new_dns( &mut sender, &tcp, From fa6904d93f5a6c14ef7209e959c5e5e6a0c610fd Mon Sep 17 00:00:00 2001 From: Daman Mulye Date: Fri, 13 Feb 2026 12:13:01 -0800 Subject: [PATCH 08/25] Clippy + fmt --- .../consomme/src/dns_resolver/dns_tcp.rs | 35 +++++++++++++------ .../consomme/src/dns_resolver/mod.rs | 2 ++ .../consomme/src/dns_resolver/windows/mod.rs | 16 +-------- 3 files changed, 28 insertions(+), 25 deletions(-) diff --git a/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs b/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs index 3bd49b9b57..08adffb9da 100644 --- a/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs +++ b/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs @@ -83,13 +83,22 @@ impl DnsTcpHandler { // Incomplete message; wait for more data. break; } - // Drain the 2-byte length prefix, then drain the payload. - self.rx_buf.drain(..2); - let bytes: Vec = self.rx_buf.drain(..msg_len).collect(); + // On Windows, the two byte prefix must be included in the buffer + // sent to the backend, as DnsQueryRaw expects the full TCP-framed + // message. + // On Unix, the backend expects just the raw DNS query without the TCP prefix, + // so we strip it before sending. + #[cfg(unix)] + let query_data = { + self.rx_buf.drain(..2); + self.rx_buf.drain(..msg_len).collect::>() + }; + #[cfg(windows)] + let query_data = self.rx_buf.drain(..2 + msg_len).collect::>(); let request = DnsRequest { flow: self.flow.clone(), - dns_query: &bytes, + dns_query: &query_data, }; self.backend.query(&request, self.receiver.sender()); } @@ -101,8 +110,8 @@ impl DnsTcpHandler { while let Poll::Ready(Ok(response)) = self.receiver.poll_recv(cx) { #[cfg(unix)] { - let len = response.response_data.len() as u16; - self.tx_buf.extend(&len.to_be_bytes()); + let len = response.response_data.len() as u16; + self.tx_buf.extend(&len.to_be_bytes()); } self.tx_buf.extend(&response.response_data); } @@ -193,8 +202,8 @@ mod tests { // 20-byte fake DNS query (> 12-byte header minimum) let query = vec![ - 0x00, 0x14, 0xAB, 0xCD, 0x01, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x77, - 0x77, 0x77, 0x03, 0x63, 0x6F, 0x6D, + 0x00, 0x14, 0xAB, 0xCD, 0x01, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x03, 0x77, 0x77, 0x77, 0x03, 0x63, 0x6F, 0x6D, ]; let msg = make_tcp_dns_message(&query); @@ -208,7 +217,10 @@ mod tests { // The echo backend returns the raw DNS query (without TCP length prefix). // poll_responses then wraps that in a 2-byte length prefix for transmission. assert_eq!(n, query.len()); // tx framing prefix + DNS payload - assert_eq!(u16::from_be_bytes([buf[0], buf[1]]) as usize, query.len() - 2); + assert_eq!( + u16::from_be_bytes([buf[0], buf[1]]) as usize, + query.len() - 2 + ); } #[test] @@ -228,7 +240,10 @@ mod tests { let waker = std::task::Waker::from(Arc::new(NoopWaker)); let mut cx = Context::from_waker(&waker); let mut buf = vec![0u8; 256]; - assert_eq!(handler.poll_read(&mut cx, &mut [IoSliceMut::new(&mut buf)]), 0); + assert_eq!( + handler.poll_read(&mut cx, &mut [IoSliceMut::new(&mut buf)]), + 0 + ); // Feed the rest handler.ingest(&[&msg[2..]]); diff --git a/vm/devices/net/net_consomme/consomme/src/dns_resolver/mod.rs b/vm/devices/net/net_consomme/consomme/src/dns_resolver/mod.rs index c04965a783..89adc8f0ee 100644 --- a/vm/devices/net/net_consomme/consomme/src/dns_resolver/mod.rs +++ b/vm/devices/net/net_consomme/consomme/src/dns_resolver/mod.rs @@ -37,6 +37,8 @@ pub struct DnsFlow { pub dst_port: u16, pub gateway_mac: EthernetAddress, pub client_mac: EthernetAddress, + // Used by the glibc and Windows DNS backends, but not the musl backend. + #[allow(dead_code)] pub transport: DnsTransport, } diff --git a/vm/devices/net/net_consomme/consomme/src/dns_resolver/windows/mod.rs b/vm/devices/net/net_consomme/consomme/src/dns_resolver/windows/mod.rs index 7de7f9bf4c..0f00411379 100644 --- a/vm/devices/net/net_consomme/consomme/src/dns_resolver/windows/mod.rs +++ b/vm/devices/net/net_consomme/consomme/src/dns_resolver/windows/mod.rs @@ -75,24 +75,10 @@ impl DnsBackend for WindowsDnsResolverBackend { // Clone the sender for error handling let response_sender_clone = response_sender.clone(); - // For TCP transport, prepend the 2-byte length prefix as required by the wire format. - // The DnsQueryRaw API with DNS_PROTOCOL_TCP expects TCP-framed - // messages. Unix resolvers handle this themselves. - let query_data = match request.flow.transport { - super::DnsTransport::Tcp => { - let len = request.dns_query.len() as u16; - let mut framed = Vec::with_capacity(2 + request.dns_query.len()); - framed.extend_from_slice(&len.to_be_bytes()); - framed.extend_from_slice(request.dns_query); - framed - } - super::DnsTransport::Udp => request.dns_query.to_vec(), - }; - // Create internal request let internal_request = DnsRequestInternal { flow: request.flow.clone(), - query: query_data, + query: request.dns_query.to_vec(), response_sender, }; From b9a59ec264bc50e5834bc29d24d6739853efd8dd Mon Sep 17 00:00:00 2001 From: Daman Mulye Date: Fri, 13 Feb 2026 14:20:51 -0800 Subject: [PATCH 09/25] fixes --- .../consomme/src/dns_resolver/dns_tcp.rs | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs b/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs index 08adffb9da..107bdad5ca 100644 --- a/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs +++ b/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs @@ -200,7 +200,7 @@ mod tests { let backend = Arc::new(EchoBackend); let mut handler = DnsTcpHandler::new(backend, test_flow()); - // 20-byte fake DNS query (> 12-byte header minimum) + // 22-byte fake DNS query (> 12-byte header minimum) let query = vec![ 0x00, 0x14, 0xAB, 0xCD, 0x01, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x77, 0x77, 0x77, 0x03, 0x63, 0x6F, 0x6D, @@ -214,12 +214,9 @@ mod tests { let mut buf = vec![0u8; 256]; let n = handler.poll_read(&mut cx, &mut [IoSliceMut::new(&mut buf)]); - // The echo backend returns the raw DNS query (without TCP length prefix). - // poll_responses then wraps that in a 2-byte length prefix for transmission. - assert_eq!(n, query.len()); // tx framing prefix + DNS payload assert_eq!( u16::from_be_bytes([buf[0], buf[1]]) as usize, - query.len() - 2 + query.len() ); } @@ -273,9 +270,8 @@ mod tests { let mut buf = vec![0u8; 512]; let n = handler.poll_read(&mut cx, &mut [IoSliceMut::new(&mut buf)]); - // Each echoed response is the raw DNS query (without TCP prefix), - // then poll_responses adds a 2-byte tx framing prefix. - let per_response = q1.len(); // tx prefix + DNS payload + // Each response is a 2-byte TCP length prefix + the DNS payload. + let per_response = q1.len() + 2; // 2-byte TCP prefix + DNS payload assert_eq!(n, 2 * per_response); } From 9b87165ab0d94888ad4865a0e18798a6106717ab Mon Sep 17 00:00:00 2001 From: Daman Mulye Date: Fri, 13 Feb 2026 15:35:09 -0800 Subject: [PATCH 10/25] missed a change --- .../net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs b/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs index 107bdad5ca..cc1b439be7 100644 --- a/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs +++ b/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs @@ -213,7 +213,7 @@ mod tests { let mut cx = Context::from_waker(&waker); let mut buf = vec![0u8; 256]; - let n = handler.poll_read(&mut cx, &mut [IoSliceMut::new(&mut buf)]); + handler.poll_read(&mut cx, &mut [IoSliceMut::new(&mut buf)]); assert_eq!( u16::from_be_bytes([buf[0], buf[1]]) as usize, query.len() From a420207326c683ddd0fc84f06d4f154d56f68d9e Mon Sep 17 00:00:00 2001 From: Daman Mulye Date: Tue, 17 Feb 2026 14:02:25 -0800 Subject: [PATCH 11/25] . --- .../net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs b/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs index cc1b439be7..7f046a47b2 100644 --- a/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs +++ b/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs @@ -214,10 +214,7 @@ mod tests { let mut buf = vec![0u8; 256]; handler.poll_read(&mut cx, &mut [IoSliceMut::new(&mut buf)]); - assert_eq!( - u16::from_be_bytes([buf[0], buf[1]]) as usize, - query.len() - ); + assert_eq!(u16::from_be_bytes([buf[0], buf[1]]) as usize, query.len()); } #[test] From bacea856bbf14b2d2507673e4f62130eaa261e84 Mon Sep 17 00:00:00 2001 From: Daman Mulye Date: Mon, 23 Feb 2026 14:58:28 -0800 Subject: [PATCH 12/25] feedback --- .../consomme/src/dns_resolver/dns_tcp.rs | 23 +++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs b/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs index 7f046a47b2..e52e9c4e67 100644 --- a/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs +++ b/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs @@ -19,7 +19,10 @@ use std::sync::Arc; use std::task::Context; use std::task::Poll; -/// Maximum DNS message size over TCP (2-byte length field can represent up to 65535). +/// There is no official maximum size for DNS messages over TCP, but we can set +/// a reasonable upper bound to u16::MAX (65535 bytes) to prevent unbounded memory +/// usage. This is larger than the typical 512-byte limit for UDP, as TCP can +/// handle larger messages. const MAX_DNS_TCP_MESSAGE_SIZE: usize = 65535; pub struct DnsTcpHandler { @@ -32,6 +35,8 @@ pub struct DnsTcpHandler { tx_buf: VecDeque, /// The guest has sent FIN; no more data will arrive. guest_fin: bool, + /// Number of DNS queries submitted but not yet resolved. + in_flight: usize, } impl DnsTcpHandler { @@ -44,6 +49,7 @@ impl DnsTcpHandler { rx_buf: VecDeque::new(), tx_buf: VecDeque::new(), guest_fin: false, + in_flight: 0, } } @@ -101,6 +107,7 @@ impl DnsTcpHandler { dns_query: &query_data, }; self.backend.query(&request, self.receiver.sender()); + self.in_flight += 1; } } @@ -108,6 +115,14 @@ impl DnsTcpHandler { /// Returns the total number of bytes written. pub fn poll_read(&mut self, cx: &mut Context<'_>, bufs: &mut [IoSliceMut<'_>]) -> usize { while let Poll::Ready(Ok(response)) = self.receiver.poll_recv(cx) { + self.in_flight = self.in_flight.saturating_sub(1); + if response.response_data.len() > MAX_DNS_TCP_MESSAGE_SIZE { + tracelimit::warn_ratelimited!( + size = response.response_data.len(), + "DNS TCP response exceeds maximum message size, dropping" + ); + continue; + } #[cfg(unix)] { let len = response.response_data.len() as u16; @@ -146,10 +161,10 @@ impl DnsTcpHandler { self.guest_fin = true; } - /// Returns true when the guest has sent FIN and all responses have - /// been flushed, so the server side can send FIN too. + /// Returns true when the guest has sent FIN, all in-flight queries + /// have been resolved, and all responses have been flushed. pub fn should_close(&self) -> bool { - self.guest_fin && self.tx_buf.is_empty() + self.guest_fin && self.in_flight == 0 && self.tx_buf.is_empty() } } From e5617e2409a77e34910d91e1ba4301897a401a5b Mon Sep 17 00:00:00 2001 From: Daman Mulye Date: Mon, 23 Feb 2026 15:15:38 -0800 Subject: [PATCH 13/25] . --- .../net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs b/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs index e52e9c4e67..4e6c773ee8 100644 --- a/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs +++ b/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs @@ -20,8 +20,8 @@ use std::task::Context; use std::task::Poll; /// There is no official maximum size for DNS messages over TCP, but we can set -/// a reasonable upper bound to u16::MAX (65535 bytes) to prevent unbounded memory -/// usage. This is larger than the typical 512-byte limit for UDP, as TCP can +/// a reasonable upper bound to u16::MAX (65535 bytes) to prevent unbounded memory +/// usage. This is larger than the typical 512-byte limit for UDP, as TCP can /// handle larger messages. const MAX_DNS_TCP_MESSAGE_SIZE: usize = 65535; From 239d7d8ba9a7cae68788e56d81afb91843954cc9 Mon Sep 17 00:00:00 2001 From: Daman Mulye Date: Mon, 23 Feb 2026 15:46:38 -0800 Subject: [PATCH 14/25] . --- .../consomme/src/dns_resolver/dns_tcp.rs | 37 ++++++------ .../net/net_consomme/consomme/src/tcp.rs | 56 ++++++++++--------- 2 files changed, 47 insertions(+), 46 deletions(-) diff --git a/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs b/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs index 4e6c773ee8..a0eecec828 100644 --- a/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs +++ b/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs @@ -15,7 +15,6 @@ use super::DnsResponse; use mesh_channel_core::Receiver; use std::collections::VecDeque; use std::io::IoSliceMut; -use std::sync::Arc; use std::task::Context; use std::task::Poll; @@ -26,7 +25,6 @@ use std::task::Poll; const MAX_DNS_TCP_MESSAGE_SIZE: usize = 65535; pub struct DnsTcpHandler { - backend: Arc, receiver: Receiver, flow: DnsFlow, /// Data received from the guest, accumulating DNS TCP framed messages. @@ -40,10 +38,9 @@ pub struct DnsTcpHandler { } impl DnsTcpHandler { - pub fn new(backend: Arc, flow: DnsFlow) -> Self { + pub fn new(flow: DnsFlow) -> Self { let receiver = Receiver::new(); Self { - backend, receiver, flow, rx_buf: VecDeque::new(), @@ -55,7 +52,7 @@ impl DnsTcpHandler { /// Feed data received from the guest into the handler. /// Extracts complete DNS messages and submits them for resolution. - pub fn ingest(&mut self, data: &[&[u8]]) { + pub fn ingest(&mut self, data: &[&[u8]], backend: &dyn DnsBackend) { for chunk in data { let remaining_capacity = MAX_DNS_TCP_MESSAGE_SIZE.saturating_sub(self.rx_buf.len()); let accepted = chunk.len().min(remaining_capacity); @@ -69,19 +66,20 @@ impl DnsTcpHandler { ); } } - self.extract_and_submit_queries(); + self.extract_and_submit_queries(backend); } /// Parse the rx buffer for complete DNS TCP-framed messages /// (2-byte big-endian length prefix + payload) and submit each query. - fn extract_and_submit_queries(&mut self) { + fn extract_and_submit_queries(&mut self, backend: &dyn DnsBackend) { loop { if self.rx_buf.len() < 2 { break; } let msg_len = u16::from_be_bytes([self.rx_buf[0], self.rx_buf[1]]) as usize; - if msg_len == 0 || msg_len > MAX_DNS_TCP_MESSAGE_SIZE { - // Malformed: discard the length prefix and try to resync. + if msg_len < super::DNS_HEADER_SIZE { + // Too small to be a valid DNS message; discard the length + // prefix and try to resync. self.rx_buf.drain(..2); continue; } @@ -106,7 +104,7 @@ impl DnsTcpHandler { flow: self.flow.clone(), dns_query: &query_data, }; - self.backend.query(&request, self.receiver.sender()); + backend.query(&request, self.receiver.sender()); self.in_flight += 1; } } @@ -171,6 +169,7 @@ impl DnsTcpHandler { #[cfg(test)] mod tests { use super::*; + use std::sync::Arc; /// A test DNS backend that echoes the query back as the response. struct EchoBackend; @@ -213,7 +212,7 @@ mod tests { #[test] fn single_query_response() { let backend = Arc::new(EchoBackend); - let mut handler = DnsTcpHandler::new(backend, test_flow()); + let mut handler = DnsTcpHandler::new(test_flow()); // 22-byte fake DNS query (> 12-byte header minimum) let query = vec![ @@ -222,7 +221,7 @@ mod tests { ]; let msg = make_tcp_dns_message(&query); - handler.ingest(&[&msg]); + handler.ingest(&[&msg], backend.as_ref()); let waker = std::task::Waker::from(Arc::new(NoopWaker)); let mut cx = Context::from_waker(&waker); @@ -235,7 +234,7 @@ mod tests { #[test] fn partial_message_buffering() { let backend = Arc::new(EchoBackend); - let mut handler = DnsTcpHandler::new(backend, test_flow()); + let mut handler = DnsTcpHandler::new(test_flow()); let query = vec![ 0xAB, 0xCD, 0x01, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x66, @@ -244,7 +243,7 @@ mod tests { let msg = make_tcp_dns_message(&query); // Feed just the length prefix - handler.ingest(&[&msg[..2]]); + handler.ingest(&[&msg[..2]], backend.as_ref()); let waker = std::task::Waker::from(Arc::new(NoopWaker)); let mut cx = Context::from_waker(&waker); @@ -255,14 +254,14 @@ mod tests { ); // Feed the rest - handler.ingest(&[&msg[2..]]); + handler.ingest(&[&msg[2..]], backend.as_ref()); assert!(handler.poll_read(&mut cx, &mut [IoSliceMut::new(&mut buf)]) > 0); } #[test] fn multiple_queries_in_one_write() { let backend = Arc::new(EchoBackend); - let mut handler = DnsTcpHandler::new(backend, test_flow()); + let mut handler = DnsTcpHandler::new(test_flow()); let q1 = vec![ 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x61, @@ -275,7 +274,7 @@ mod tests { let mut combined = make_tcp_dns_message(&q1); combined.extend(make_tcp_dns_message(&q2)); - handler.ingest(&[&combined]); + handler.ingest(&[&combined], backend.as_ref()); let waker = std::task::Waker::from(Arc::new(NoopWaker)); let mut cx = Context::from_waker(&waker); @@ -290,13 +289,13 @@ mod tests { #[test] fn should_close_after_fin_and_drain() { let backend = Arc::new(EchoBackend); - let mut handler = DnsTcpHandler::new(backend, test_flow()); + let mut handler = DnsTcpHandler::new(test_flow()); let query = vec![ 0xAB, 0xCD, 0x01, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x78, 0x78, 0x78, ]; - handler.ingest(&[&make_tcp_dns_message(&query)]); + handler.ingest(&[&make_tcp_dns_message(&query)], backend.as_ref()); handler.set_guest_fin(); let waker = std::task::Waker::from(Arc::new(NoopWaker)); diff --git a/vm/devices/net/net_consomme/consomme/src/tcp.rs b/vm/devices/net/net_consomme/consomme/src/tcp.rs index 30a142c076..2ca41eb95c 100644 --- a/vm/devices/net/net_consomme/consomme/src/tcp.rs +++ b/vm/devices/net/net_consomme/consomme/src/tcp.rs @@ -9,6 +9,7 @@ use super::DropReason; use crate::ChecksumState; use crate::ConsommeState; use crate::IpAddresses; +use crate::dns_resolver::DnsBackend; use crate::dns_resolver::dns_tcp::DnsTcpHandler; use futures::AsyncRead; use futures::AsyncWrite; @@ -301,15 +302,32 @@ impl Access<'_, T> { Err(_) => false, }); // Check for any new incoming data + let dns_backend = self.inner.dns.as_ref().map(|d| d.backend().as_ref()); self.inner.tcp.connections.retain(|ft, conn| { - conn.poll_conn( - cx, - &mut Sender { - ft, - state: &mut self.inner.state, - client: self.client, + let mut sender = Sender { + ft, + state: &mut self.inner.state, + client: self.client, + }; + // Temporarily take the backend to split the mutable borrow between + // the backend and the rest of TcpConnection's fields. + let mut backend = std::mem::replace(&mut conn.backend, TcpBackend::Socket(None)); + let result = match &mut backend { + TcpBackend::Dns(dns_handler) => match dns_backend { + Some(dns_backend) => { + conn.poll_dns_backend(cx, &mut sender, dns_handler, dns_backend) + } + None => { + tracing::warn!("DNS TCP connection without DNS resolver, dropping"); + false + } }, - ) + TcpBackend::Socket(opt_socket) => { + conn.poll_socket_backend(cx, &mut sender, opt_socket) + } + }; + conn.backend = backend; + result }) } @@ -389,11 +407,7 @@ impl Access<'_, T> { sender.rst(ack, None); } else if tcp.control == TcpControl::Syn { if is_dns_tcp { - let conn = TcpConnection::new_dns( - &mut sender, - &tcp, - self.inner.dns.as_ref().unwrap(), - )?; + let conn = TcpConnection::new_dns(&mut sender, &tcp)?; e.insert(conn); } else { let conn = TcpConnection::new(&mut sender, &tcp)?; @@ -671,7 +685,6 @@ impl TcpConnection { fn new_dns( sender: &mut Sender<'_, impl Client>, tcp: &TcpRepr<'_>, - dns: &crate::dns_resolver::DnsResolver, ) -> Result { let mut this = Self::default(); this.initialize_from_first_client_packet(tcp)?; @@ -686,7 +699,7 @@ impl TcpConnection { transport: crate::dns_resolver::DnsTransport::Tcp, }; - this.backend = TcpBackend::Dns(DnsTcpHandler::new(dns.backend().clone(), flow)); + this.backend = TcpBackend::Dns(DnsTcpHandler::new(flow)); // Immediately transition to SynReceived so the handshake SYN-ACK is sent. this.state = TcpState::SynReceived; this.rx_window_cap = this.rx_buffer.capacity(); @@ -729,18 +742,6 @@ impl TcpConnection { Ok(()) } - fn poll_conn(&mut self, cx: &mut Context<'_>, sender: &mut Sender<'_, impl Client>) -> bool { - // Temporarily take the backend to split the mutable borrow between - // the backend and the rest of TcpConnection's fields. - let mut backend = std::mem::replace(&mut self.backend, TcpBackend::Socket(None)); - let result = match &mut backend { - TcpBackend::Dns(dns_handler) => self.poll_dns_backend(cx, sender, dns_handler), - TcpBackend::Socket(opt_socket) => self.poll_socket_backend(cx, sender, opt_socket), - }; - self.backend = backend; - result - } - /// Poll the DNS TCP virtual connection backend. /// /// There is no real socket; data flows through the [`DnsTcpHandler`]. @@ -749,10 +750,11 @@ impl TcpConnection { cx: &mut Context<'_>, sender: &mut Sender<'_, impl Client>, dns_handler: &mut DnsTcpHandler, + dns_backend: &dyn DnsBackend, ) -> bool { // rx path: feed guest data into the DNS handler for query extraction. let (a, b) = self.rx_buffer.as_slices(); - dns_handler.ingest(&[a, b]); + dns_handler.ingest(&[a, b], dns_backend); self.rx_buffer.clear(); if self.state.rx_fin() && !dns_handler.guest_fin() { From f3ade00b557ce40e8a705cb9cf41ea67316b6cf3 Mon Sep 17 00:00:00 2001 From: Daman Mulye Date: Wed, 25 Feb 2026 16:16:29 -0800 Subject: [PATCH 15/25] Feedback --- .../consomme/src/dns_resolver/dns_tcp.rs | 426 ++++++++++++------ .../consomme/src/dns_resolver/mod.rs | 20 +- .../consomme/src/dns_resolver/windows/mod.rs | 39 +- .../net/net_consomme/consomme/src/tcp.rs | 125 ++--- 4 files changed, 408 insertions(+), 202 deletions(-) diff --git a/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs b/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs index a0eecec828..e121be656f 100644 --- a/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs +++ b/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs @@ -7,13 +7,11 @@ //! preceded by a 2-byte big-endian length prefix. This module intercepts //! TCP connections to the gateway on port 53 and resolves queries using //! the shared `DnsBackend`. - use super::DnsBackend; use super::DnsFlow; use super::DnsRequest; use super::DnsResponse; use mesh_channel_core::Receiver; -use std::collections::VecDeque; use std::io::IoSliceMut; use std::task::Context; use std::task::Poll; @@ -24,17 +22,32 @@ use std::task::Poll; /// handle larger messages. const MAX_DNS_TCP_MESSAGE_SIZE: usize = 65535; +/// Current phase of the DNS TCP handler state machine. +enum Phase { + /// Accumulating an incoming TCP-framed DNS request. + Receiving, + /// Query submitted to the backend; awaiting response. + InFlight, + /// Writing a TCP-framed response back to the caller. + Responding, +} + pub struct DnsTcpHandler { receiver: Receiver, flow: DnsFlow, - /// Data received from the guest, accumulating DNS TCP framed messages. - rx_buf: VecDeque, - /// Length-prefixed DNS responses waiting to be sent to the guest. - tx_buf: VecDeque, + /// Shared buffer used for both the incoming request and the outgoing + /// response. During [`Phase::Receiving`] it accumulates one TCP-framed + /// DNS message from the guest. During [`Phase::Responding`] it holds + /// the TCP-framed response being drained to the caller. + buf: Vec, + /// Write offset into `buf` while draining a response to the caller. + /// Only meaningful during [`Phase::Responding`]. + tx_offset: usize, + phase: Phase, /// The guest has sent FIN; no more data will arrive. guest_fin: bool, - /// Number of DNS queries submitted but not yet resolved. - in_flight: usize, + /// True if the TCP framing is invalid and the connection should be dropped. + protocol_error: bool, } impl DnsTcpHandler { @@ -43,112 +56,211 @@ impl DnsTcpHandler { Self { receiver, flow, - rx_buf: VecDeque::new(), - tx_buf: VecDeque::new(), + buf: Vec::new(), + tx_offset: 0, + phase: Phase::Receiving, guest_fin: false, - in_flight: 0, + protocol_error: false, } } /// Feed data received from the guest into the handler. - /// Extracts complete DNS messages and submits them for resolution. - pub fn ingest(&mut self, data: &[&[u8]], backend: &dyn DnsBackend) { + /// + /// Consumes bytes from `data` to assemble one complete TCP-framed DNS + /// message. When a complete message is assembled, it is submitted to the + /// backend for resolution and no further data is accepted until the + /// response has been fully written out by [`poll_read`]. + /// + /// Returns the number of bytes consumed from `data`. The caller should + /// only drain this many bytes from its receive buffer. + pub fn ingest(&mut self, data: &[&[u8]], backend: &impl DnsBackend) -> usize { + // Don't accept data while a query is in-flight, a response is pending, + // or we've hit a protocol error. + if !matches!(self.phase, Phase::Receiving) || self.protocol_error { + return 0; + } + + let total_offered: usize = data.iter().map(|c| c.len()).sum(); + tracing::info!(total_offered, buf_len = self.buf.len(), "dns_tcp ingest: start"); + + let mut total_consumed = 0; for chunk in data { - let remaining_capacity = MAX_DNS_TCP_MESSAGE_SIZE.saturating_sub(self.rx_buf.len()); - let accepted = chunk.len().min(remaining_capacity); - if accepted > 0 { - self.rx_buf.extend(&chunk[..accepted]); - } - if accepted < chunk.len() { - tracelimit::warn_ratelimited!( - dropped = chunk.len() - accepted, - "DNS TCP rx_buf full, dropping excess data" - ); + let mut pos = 0; + while pos < chunk.len() { + let need = self.bytes_needed(); + if need == 0 { + // Complete message already in rx_buf but not yet submitted + // (should not happen in practice). + break; + } + let accept = (chunk.len() - pos).min(need); + self.buf.extend_from_slice(&chunk[pos..pos + accept]); + pos += accept; + total_consumed += accept; + + if self.try_submit(backend) { + // Query submitted; stop accepting data. + tracing::info!(total_consumed, "dns_tcp ingest: query submitted"); + return total_consumed; + } + if self.protocol_error { + tracing::info!(total_consumed, "dns_tcp ingest: protocol error"); + return total_consumed; + } } } - self.extract_and_submit_queries(backend); + + tracing::info!( + total_consumed, + buf_len = self.buf.len(), + "dns_tcp ingest: done (message incomplete)" + ); + total_consumed } - /// Parse the rx buffer for complete DNS TCP-framed messages - /// (2-byte big-endian length prefix + payload) and submit each query. - fn extract_and_submit_queries(&mut self, backend: &dyn DnsBackend) { - loop { - if self.rx_buf.len() < 2 { - break; - } - let msg_len = u16::from_be_bytes([self.rx_buf[0], self.rx_buf[1]]) as usize; - if msg_len < super::DNS_HEADER_SIZE { - // Too small to be a valid DNS message; discard the length - // prefix and try to resync. - self.rx_buf.drain(..2); - continue; - } - if self.rx_buf.len() < 2 + msg_len { - // Incomplete message; wait for more data. - break; - } - // On Windows, the two byte prefix must be included in the buffer - // sent to the backend, as DnsQueryRaw expects the full TCP-framed - // message. - // On Unix, the backend expects just the raw DNS query without the TCP prefix, - // so we strip it before sending. - #[cfg(unix)] - let query_data = { - self.rx_buf.drain(..2); - self.rx_buf.drain(..msg_len).collect::>() - }; - #[cfg(windows)] - let query_data = self.rx_buf.drain(..2 + msg_len).collect::>(); - - let request = DnsRequest { - flow: self.flow.clone(), - dns_query: &query_data, - }; - backend.query(&request, self.receiver.sender()); - self.in_flight += 1; + /// How many more bytes are needed to complete the current message. + fn bytes_needed(&self) -> usize { + if self.buf.len() < 2 { + return 2 - self.buf.len(); } + let msg_len = u16::from_be_bytes([self.buf[0], self.buf[1]]) as usize; + (2 + msg_len).saturating_sub(self.buf.len()) } - /// Poll for completed DNS responses and write length-prefixed data - /// Returns the total number of bytes written. - pub fn poll_read(&mut self, cx: &mut Context<'_>, bufs: &mut [IoSliceMut<'_>]) -> usize { - while let Poll::Ready(Ok(response)) = self.receiver.poll_recv(cx) { - self.in_flight = self.in_flight.saturating_sub(1); - if response.response_data.len() > MAX_DNS_TCP_MESSAGE_SIZE { - tracelimit::warn_ratelimited!( - size = response.response_data.len(), - "DNS TCP response exceeds maximum message size, dropping" - ); - continue; - } - #[cfg(unix)] - { - let len = response.response_data.len() as u16; - self.tx_buf.extend(&len.to_be_bytes()); + /// If a complete TCP-framed DNS message is in `rx_buf`, submit it to the + /// backend. Returns true if a query was submitted. + fn try_submit(&mut self, backend: &impl DnsBackend) -> bool { + if self.buf.len() < 2 { + return false; + } + let msg_len = u16::from_be_bytes([self.buf[0], self.buf[1]]) as usize; + if msg_len < super::DNS_HEADER_SIZE { + // Invalid DNS message length; flag a protocol error so the caller + // can reset the connection. + tracing::info!(msg_len, "dns_tcp: message length below DNS header minimum"); + self.protocol_error = true; + return false; + } + if self.buf.len() < 2 + msg_len { + return false; + } + + // Submit the raw DNS query (without the TCP length prefix). + tracing::info!( + msg_len, + src_port = self.flow.src_port, + "dns_tcp: submitting query to backend" + ); + let request = DnsRequest { + flow: self.flow.clone(), + dns_query: &self.buf[2..2 + msg_len], + }; + backend.query(&request, self.receiver.sender()); + self.buf.clear(); + self.phase = Phase::InFlight; + true + } + + /// Poll for the next chunk of response data. + /// + /// Models the socket `poll_read_vectored` contract: + /// - `Poll::Ready(n)` where `n > 0`: wrote `n` bytes of response data. + /// - `Poll::Ready(0)`: EOF — the guest sent FIN and all responses have + /// been drained. The caller should close the connection. + /// - `Poll::Pending`: waiting for a DNS response or for [`ingest`] to + /// submit a new query. + pub fn poll_read(&mut self, cx: &mut Context<'_>, bufs: &mut [IoSliceMut<'_>]) -> Poll { + // Continue writing a partially-sent response. + if matches!(self.phase, Phase::Responding) { + let n = self.drain_tx(bufs); + tracing::info!(n, "dns_tcp poll_read: continuing drain"); + return Poll::Ready(n); + } + + // Wait for the in-flight response. + if matches!(self.phase, Phase::InFlight) { + match self.receiver.poll_recv(cx) { + Poll::Ready(Ok(response)) => { + let payload_len = response.response_data.len(); + tracing::info!( + payload_len, + "dns_tcp poll_read: received response from backend" + ); + if payload_len > MAX_DNS_TCP_MESSAGE_SIZE { + tracelimit::warn_ratelimited!( + size = payload_len, + "DNS TCP response exceeds maximum message size, dropping" + ); + // Discard the oversized response and return to + // receiving so that ingest can accept the next query. + self.phase = Phase::Receiving; + cx.waker().wake_by_ref(); + return Poll::Pending; + } + + // Build TCP-framed response: 2-byte length prefix + payload. + self.buf.clear(); + self.buf.reserve( + (2 + payload_len).saturating_sub(self.buf.capacity()), + ); + self.buf + .extend_from_slice(&(payload_len as u16).to_be_bytes()); + self.buf.extend(response.response_data); + self.tx_offset = 0; + self.phase = Phase::Responding; + + let n = self.drain_tx(bufs); + return Poll::Ready(n); + } + Poll::Ready(Err(_)) => { + // Channel closed unexpectedly; return to receiving. + tracing::info!("dns_tcp poll_read: response channel closed unexpectedly"); + self.phase = Phase::Receiving; + } + Poll::Pending => { + tracing::info!("dns_tcp poll_read: awaiting backend response"); + return Poll::Pending; + } } - self.tx_buf.extend(&response.response_data); } - self.drain_buffered(bufs) + + // No in-flight query and no pending response. + if self.guest_fin { + tracing::info!("dns_tcp poll_read: EOF (guest FIN, no pending work)"); + Poll::Ready(0) + } else { + tracing::info!("dns_tcp poll_read: idle, waiting for ingest"); + Poll::Pending + } } - /// Drain buffered tx data into the provided buffers. - fn drain_buffered(&mut self, bufs: &mut [IoSliceMut<'_>]) -> usize { - let mut total = 0; + /// Write as much of `buf[tx_offset..]` into `bufs` as possible. + /// Clears `buf` when fully drained so it can be reused for the next + /// incoming request. + fn drain_tx(&mut self, bufs: &mut [IoSliceMut<'_>]) -> usize { + let remaining = &self.buf[self.tx_offset..]; + let mut written = 0; for buf in bufs.iter_mut() { - if self.tx_buf.is_empty() { + let left = remaining.len() - written; + if left == 0 { break; } - let n = buf.len().min(self.tx_buf.len()); - for (dst, src) in buf[..n].iter_mut().zip(self.tx_buf.drain(..n)) { - *dst = src; - } - total += n; + let n = buf.len().min(left); + buf[..n].copy_from_slice(&remaining[written..written + n]); + written += n; } - total + self.tx_offset += written; + if self.tx_offset >= self.buf.len() { + self.buf.clear(); + self.tx_offset = 0; + self.phase = Phase::Receiving; + } + written } - pub fn has_pending_tx(&self) -> bool { - !self.tx_buf.is_empty() + /// Returns true if the connection should be dropped due to invalid framing. + pub fn protocol_error(&self) -> bool { + self.protocol_error } pub fn guest_fin(&self) -> bool { @@ -158,12 +270,6 @@ impl DnsTcpHandler { pub fn set_guest_fin(&mut self) { self.guest_fin = true; } - - /// Returns true when the guest has sent FIN, all in-flight queries - /// have been resolved, and all responses have been flushed. - pub fn should_close(&self) -> bool { - self.guest_fin && self.in_flight == 0 && self.tx_buf.is_empty() - } } #[cfg(test)] @@ -209,26 +315,45 @@ mod tests { msg } + /// A 16-byte fake DNS query payload (>= 12-byte header minimum). + fn sample_query() -> Vec { + vec![ + 0xAB, 0xCD, 0x01, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x66, + 0x6F, 0x6F, + ] + } + + struct NoopWaker; + impl std::task::Wake for NoopWaker { + fn wake(self: Arc) {} + } + #[test] fn single_query_response() { let backend = Arc::new(EchoBackend); let mut handler = DnsTcpHandler::new(test_flow()); - // 22-byte fake DNS query (> 12-byte header minimum) - let query = vec![ - 0x00, 0x14, 0xAB, 0xCD, 0x01, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x03, 0x77, 0x77, 0x77, 0x03, 0x63, 0x6F, 0x6D, - ]; + let query = sample_query(); let msg = make_tcp_dns_message(&query); - handler.ingest(&[&msg], backend.as_ref()); + let consumed = handler.ingest(&[&msg], backend.as_ref()); + assert_eq!(consumed, msg.len()); let waker = std::task::Waker::from(Arc::new(NoopWaker)); let mut cx = Context::from_waker(&waker); let mut buf = vec![0u8; 256]; - handler.poll_read(&mut cx, &mut [IoSliceMut::new(&mut buf)]); - assert_eq!(u16::from_be_bytes([buf[0], buf[1]]) as usize, query.len()); + match handler.poll_read(&mut cx, &mut [IoSliceMut::new(&mut buf)]) { + Poll::Ready(n) => { + assert!(n > 0); + // First 2 bytes are the TCP length prefix. + let resp_len = u16::from_be_bytes([buf[0], buf[1]]) as usize; + assert_eq!(resp_len, query.len()); + // Response payload should match the query (echo backend). + assert_eq!(&buf[2..2 + resp_len], &query); + } + Poll::Pending => panic!("expected Ready"), + } } #[test] @@ -236,37 +361,37 @@ mod tests { let backend = Arc::new(EchoBackend); let mut handler = DnsTcpHandler::new(test_flow()); - let query = vec![ - 0xAB, 0xCD, 0x01, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x66, - 0x6F, 0x6F, - ]; + let query = sample_query(); let msg = make_tcp_dns_message(&query); - // Feed just the length prefix - handler.ingest(&[&msg[..2]], backend.as_ref()); + // Feed just the length prefix. + let consumed = handler.ingest(&[&msg[..2]], backend.as_ref()); + assert_eq!(consumed, 2); let waker = std::task::Waker::from(Arc::new(NoopWaker)); let mut cx = Context::from_waker(&waker); let mut buf = vec![0u8; 256]; - assert_eq!( + assert!(matches!( handler.poll_read(&mut cx, &mut [IoSliceMut::new(&mut buf)]), - 0 - ); + Poll::Pending + )); + + // Feed the rest. + let consumed = handler.ingest(&[&msg[2..]], backend.as_ref()); + assert_eq!(consumed, msg.len() - 2); - // Feed the rest - handler.ingest(&[&msg[2..]], backend.as_ref()); - assert!(handler.poll_read(&mut cx, &mut [IoSliceMut::new(&mut buf)]) > 0); + match handler.poll_read(&mut cx, &mut [IoSliceMut::new(&mut buf)]) { + Poll::Ready(n) => assert!(n > 0), + Poll::Pending => panic!("expected Ready after completing message"), + } } #[test] - fn multiple_queries_in_one_write() { + fn backpressure_one_at_a_time() { let backend = Arc::new(EchoBackend); let mut handler = DnsTcpHandler::new(test_flow()); - let q1 = vec![ - 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x61, - 0x61, 0x61, - ]; + let q1 = sample_query(); let q2 = vec![ 0x00, 0x02, 0x01, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x62, 0x62, 0x62, @@ -274,45 +399,64 @@ mod tests { let mut combined = make_tcp_dns_message(&q1); combined.extend(make_tcp_dns_message(&q2)); - handler.ingest(&[&combined], backend.as_ref()); + // Only the first message should be consumed. + let consumed = handler.ingest(&[&combined], backend.as_ref()); + assert_eq!(consumed, make_tcp_dns_message(&q1).len()); let waker = std::task::Waker::from(Arc::new(NoopWaker)); let mut cx = Context::from_waker(&waker); - let mut buf = vec![0u8; 512]; - let n = handler.poll_read(&mut cx, &mut [IoSliceMut::new(&mut buf)]); - // Each response is a 2-byte TCP length prefix + the DNS payload. - let per_response = q1.len() + 2; // 2-byte TCP prefix + DNS payload - assert_eq!(n, 2 * per_response); + // Drain the first response. + let mut buf = vec![0u8; 256]; + match handler.poll_read(&mut cx, &mut [IoSliceMut::new(&mut buf)]) { + Poll::Ready(n) => assert!(n > 0), + Poll::Pending => panic!("expected Ready for first response"), + } + + // Now the second message can be ingested. + let remaining = &combined[consumed..]; + let consumed2 = handler.ingest(&[remaining], backend.as_ref()); + assert_eq!(consumed2, make_tcp_dns_message(&q2).len()); + + match handler.poll_read(&mut cx, &mut [IoSliceMut::new(&mut buf)]) { + Poll::Ready(n) => assert!(n > 0), + Poll::Pending => panic!("expected Ready for second response"), + } } #[test] - fn should_close_after_fin_and_drain() { + fn eof_after_fin_and_drain() { let backend = Arc::new(EchoBackend); let mut handler = DnsTcpHandler::new(test_flow()); - let query = vec![ - 0xAB, 0xCD, 0x01, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x78, - 0x78, 0x78, - ]; + let query = sample_query(); handler.ingest(&[&make_tcp_dns_message(&query)], backend.as_ref()); - handler.set_guest_fin(); let waker = std::task::Waker::from(Arc::new(NoopWaker)); let mut cx = Context::from_waker(&waker); + // Drain the response. let mut buf = vec![0u8; 256]; let _ = handler.poll_read(&mut cx, &mut [IoSliceMut::new(&mut buf)]); - // tx_buf is now drained, but we need to verify should_close - // only returns true after all data is consumed. - assert!(!handler.has_pending_tx()); + handler.set_guest_fin(); - assert!(handler.should_close()); + // Should now report EOF. + assert!(matches!( + handler.poll_read(&mut cx, &mut [IoSliceMut::new(&mut buf)]), + Poll::Ready(0) + )); } - struct NoopWaker; - impl std::task::Wake for NoopWaker { - fn wake(self: Arc) {} + #[test] + fn protocol_error_on_invalid_length() { + let backend = Arc::new(EchoBackend); + let mut handler = DnsTcpHandler::new(test_flow()); + + // Craft a message with msg_len < DNS_HEADER_SIZE (12). + // Length prefix says 4 bytes, which is too small for a DNS header. + let bad_msg = [0x00, 0x04, 0x01, 0x02, 0x03, 0x04]; + handler.ingest(&[&bad_msg], backend.as_ref()); + assert!(handler.protocol_error()); } } diff --git a/vm/devices/net/net_consomme/consomme/src/dns_resolver/mod.rs b/vm/devices/net/net_consomme/consomme/src/dns_resolver/mod.rs index 89adc8f0ee..0503548fcd 100644 --- a/vm/devices/net/net_consomme/consomme/src/dns_resolver/mod.rs +++ b/vm/devices/net/net_consomme/consomme/src/dns_resolver/mod.rs @@ -20,6 +20,12 @@ mod unix; #[cfg(windows)] mod windows; +#[cfg(unix)] +type PlatformDnsBackend = unix::UnixDnsResolverBackend; + +#[cfg(windows)] +type PlatformDnsBackend = windows::WindowsDnsResolverBackend; + static DNS_HEADER_SIZE: usize = 12; /// Transport protocol for a DNS query. @@ -37,7 +43,9 @@ pub struct DnsFlow { pub dst_port: u16, pub gateway_mac: EthernetAddress, pub client_mac: EthernetAddress, - // Used by the glibc and Windows DNS backends, but not the musl backend. + // Used by the glibc and Windows DNS backends. The musl resolver + // implementation handles TCP internally, so this field is not + // used in the musl backend. #[allow(dead_code)] pub transport: DnsTransport, } @@ -55,6 +63,12 @@ pub struct DnsResponse { pub response_data: Vec, } +/// Backend trait for resolving DNS queries. +/// +/// Both `dns_query` in [`DnsRequest`] and `response_data` in [`DnsResponse`] +/// carry **raw DNS message bytes** with no transport-layer framing (e.g. no +/// TCP 2-byte length prefix). Transport framing is the responsibility of the +/// caller (see [`dns_tcp::DnsTcpHandler`]). pub(crate) trait DnsBackend: Send + Sync { fn query(&self, request: &DnsRequest<'_>, response_sender: Sender); } @@ -62,7 +76,7 @@ pub(crate) trait DnsBackend: Send + Sync { #[derive(Inspect)] pub struct DnsResolver { #[inspect(skip)] - backend: Arc, + backend: Arc, #[inspect(skip)] receiver: Receiver, pending_requests: usize, @@ -136,7 +150,7 @@ impl DnsResolver { } } - pub fn backend(&self) -> &Arc { + pub fn backend(&self) -> &Arc { &self.backend } } diff --git a/vm/devices/net/net_consomme/consomme/src/dns_resolver/windows/mod.rs b/vm/devices/net/net_consomme/consomme/src/dns_resolver/windows/mod.rs index 0f00411379..58e3822d5f 100644 --- a/vm/devices/net/net_consomme/consomme/src/dns_resolver/windows/mod.rs +++ b/vm/devices/net/net_consomme/consomme/src/dns_resolver/windows/mod.rs @@ -75,15 +75,30 @@ impl DnsBackend for WindowsDnsResolverBackend { // Clone the sender for error handling let response_sender_clone = response_sender.clone(); - // Create internal request + // For TCP, DnsQueryRaw expects the 2-byte TCP length prefix in the + // query buffer. Prepend it here so that the DnsTcpHandler can remain + // platform-agnostic and always pass raw DNS bytes. + let wire_query = match request.flow.transport { + super::DnsTransport::Tcp => { + let len = request.dns_query.len() as u16; + let mut buf = Vec::with_capacity(2 + request.dns_query.len()); + buf.extend_from_slice(&len.to_be_bytes()); + buf.extend_from_slice(request.dns_query); + buf + } + super::DnsTransport::Udp => request.dns_query.to_vec(), + }; + + // Create internal request with raw DNS bytes (no TCP prefix) so that + // SERVFAIL generation works correctly. let internal_request = DnsRequestInternal { flow: request.flow.clone(), query: request.dns_query.to_vec(), response_sender, }; - let dns_query_size = internal_request.query.len() as u32; - let dns_query = internal_request.query.as_ptr().cast_mut(); + let dns_query_size = wire_query.len() as u32; + let dns_query = wire_query.as_ptr().cast_mut(); // Pre-insert placeholder before calling DnsQueryRaw to avoid race condition // where callback fires before we can insert the cancel handle. @@ -233,10 +248,20 @@ unsafe extern "system" fn dns_query_raw_callback( // SAFETY: query_results is provided by Windows and will be freed after processing let response = match unsafe { process_dns_results(query_results) } { - Ok(response_data) => Some(DnsResponse { - flow: context.request.flow.clone(), - response_data, - }), + Ok(mut response_data) => { + // For TCP, DnsQueryRaw returns the response with a 2-byte TCP + // length prefix. Strip it so the DnsTcpHandler can add its own + // framing. + if context.request.flow.transport == super::DnsTransport::Tcp + && response_data.len() >= 2 + { + response_data.drain(..2); + } + Some(DnsResponse { + flow: context.request.flow.clone(), + response_data, + }) + } Err(DnsResultError::QueryFailed(status)) => { tracelimit::warn_ratelimited!(status, "DNS query failed, returning SERVFAIL"); None diff --git a/vm/devices/net/net_consomme/consomme/src/tcp.rs b/vm/devices/net/net_consomme/consomme/src/tcp.rs index 2ca41eb95c..f8f9e78e65 100644 --- a/vm/devices/net/net_consomme/consomme/src/tcp.rs +++ b/vm/devices/net/net_consomme/consomme/src/tcp.rs @@ -132,10 +132,19 @@ enum TcpBackend { Dns(DnsTcpHandler), } -#[derive(Inspect)] struct TcpConnection { - #[inspect(skip)] backend: TcpBackend, + inner: TcpConnectionInner, +} + +impl Inspect for TcpConnection { + fn inspect(&self, req: inspect::Request<'_>) { + self.inner.inspect(req) + } +} + +#[derive(Inspect)] +struct TcpConnectionInner { loopback_port: LoopbackPortInfo, state: TcpState, @@ -238,8 +247,8 @@ impl Access<'_, T> { // This supports a guest owning both the sending and receiving ports. if other_addr.ip().is_loopback() { for (other_ft, connection) in self.inner.tcp.connections.iter() { - if connection.state == TcpState::Connecting && other_ft.dst.port() == *port { - if let LoopbackPortInfo::ProxyForGuestPort{sending_port, guest_port} = connection.loopback_port { + if connection.inner.state == TcpState::Connecting && other_ft.dst.port() == *port { + if let LoopbackPortInfo::ProxyForGuestPort{sending_port, guest_port} = connection.inner.loopback_port { if sending_port == other_addr.port() { other_addr.set_port(guest_port); break; @@ -309,13 +318,11 @@ impl Access<'_, T> { state: &mut self.inner.state, client: self.client, }; - // Temporarily take the backend to split the mutable borrow between - // the backend and the rest of TcpConnection's fields. - let mut backend = std::mem::replace(&mut conn.backend, TcpBackend::Socket(None)); - let result = match &mut backend { + match &mut conn.backend { TcpBackend::Dns(dns_handler) => match dns_backend { Some(dns_backend) => { - conn.poll_dns_backend(cx, &mut sender, dns_handler, dns_backend) + conn.inner + .poll_dns_backend(cx, &mut sender, dns_handler, dns_backend) } None => { tracing::warn!("DNS TCP connection without DNS resolver, dropping"); @@ -323,11 +330,9 @@ impl Access<'_, T> { } }, TcpBackend::Socket(opt_socket) => { - conn.poll_socket_backend(cx, &mut sender, opt_socket) + conn.inner.poll_socket_backend(cx, &mut sender, opt_socket) } - }; - conn.backend = backend; - result + } }) } @@ -395,7 +400,7 @@ impl Access<'_, T> { match self.inner.tcp.connections.entry(ft) { hash_map::Entry::Occupied(mut e) => { let conn = e.get_mut(); - if !conn.handle_packet(&mut sender, &tcp)? { + if !conn.inner.handle_packet(&mut sender, &tcp)? { e.remove(); } } @@ -564,7 +569,7 @@ impl Sender<'_, T> { } } -impl Default for TcpConnection { +impl Default for TcpConnectionInner { fn default() -> Self { let mut rx_tx_seq = [0; 8]; getrandom::fill(&mut rx_tx_seq[..]).expect("prng failure"); @@ -582,7 +587,6 @@ impl Default for TcpConnection { let tx_buffer_size = 16384; Self { - backend: TcpBackend::Socket(None), loopback_port: LoopbackPortInfo::None, state: TcpState::Connecting, rx_buffer: VecDeque::with_capacity(rx_buffer_size), @@ -609,8 +613,8 @@ impl Default for TcpConnection { impl TcpConnection { fn new(sender: &mut Sender<'_, impl Client>, tcp: &TcpRepr<'_>) -> Result { - let mut this = Self::default(); - this.initialize_from_first_client_packet(tcp)?; + let mut inner = TcpConnectionInner::default(); + inner.initialize_from_first_client_packet(tcp)?; let socket = Socket::new( match sender.ft.dst { @@ -652,7 +656,7 @@ impl TcpConnection { } Some(addr) => { if addr.ip().is_loopback() { - this.loopback_port = LoopbackPortInfo::ProxyForGuestPort { + inner.loopback_port = LoopbackPortInfo::ProxyForGuestPort { sending_port: addr.port(), guest_port: sender.ft.src.port(), }; @@ -660,23 +664,27 @@ impl TcpConnection { } } } - this.backend = TcpBackend::Socket(Some(socket)); - Ok(this) + Ok(Self { + backend: TcpBackend::Socket(Some(socket)), + inner, + }) } fn new_from_accept( sender: &mut Sender<'_, impl Client>, socket: Socket, ) -> Result { - let mut this = Self { - backend: TcpBackend::Socket(Some( - PolledSocket::new(sender.client.driver(), socket).map_err(DropReason::Io)?, - )), + let mut inner = TcpConnectionInner { state: TcpState::SynSent, ..Default::default() }; - this.send_syn(sender, None); - Ok(this) + inner.send_syn(sender, None); + Ok(Self { + backend: TcpBackend::Socket(Some( + PolledSocket::new(sender.client.driver(), socket).map_err(DropReason::Io)?, + )), + inner, + }) } /// Create a virtual DNS TCP connection (no real host socket). @@ -686,8 +694,8 @@ impl TcpConnection { sender: &mut Sender<'_, impl Client>, tcp: &TcpRepr<'_>, ) -> Result { - let mut this = Self::default(); - this.initialize_from_first_client_packet(tcp)?; + let mut inner = TcpConnectionInner::default(); + inner.initialize_from_first_client_packet(tcp)?; let flow = crate::dns_resolver::DnsFlow { src_addr: sender.ft.src.ip().into(), @@ -699,15 +707,19 @@ impl TcpConnection { transport: crate::dns_resolver::DnsTransport::Tcp, }; - this.backend = TcpBackend::Dns(DnsTcpHandler::new(flow)); // Immediately transition to SynReceived so the handshake SYN-ACK is sent. - this.state = TcpState::SynReceived; - this.rx_window_cap = this.rx_buffer.capacity(); - this.send_syn(sender, Some(this.rx_seq)); + inner.state = TcpState::SynReceived; + inner.rx_window_cap = inner.rx_buffer.capacity(); + inner.send_syn(sender, Some(inner.rx_seq)); - Ok(this) + Ok(Self { + backend: TcpBackend::Dns(DnsTcpHandler::new(flow)), + inner, + }) } +} +impl TcpConnectionInner { fn initialize_from_first_client_packet(&mut self, tcp: &TcpRepr<'_>) -> Result<(), DropReason> { // The TCPv4 default maximum segment size is 536. This can be bigger for // IPv6. @@ -750,33 +762,45 @@ impl TcpConnection { cx: &mut Context<'_>, sender: &mut Sender<'_, impl Client>, dns_handler: &mut DnsTcpHandler, - dns_backend: &dyn DnsBackend, + dns_backend: &impl DnsBackend, ) -> bool { - // rx path: feed guest data into the DNS handler for query extraction. - let (a, b) = self.rx_buffer.as_slices(); - dns_handler.ingest(&[a, b], dns_backend); - self.rx_buffer.clear(); - + // Propagate guest FIN before the tx path so that poll_read can + // detect EOF on the same iteration. if self.state.rx_fin() && !dns_handler.guest_fin() { dns_handler.set_guest_fin(); } - // tx path: poll DNS responses directly into tx_buffer. + // tx path first: drain DNS responses into tx_buffer. + // This frees up backpressure so that ingest can make progress. while !self.tx_buffer.is_full() { let (a, b) = self.tx_buffer.unwritten_slices_mut(); let mut bufs = [IoSliceMut::new(a), IoSliceMut::new(b)]; - let n = dns_handler.poll_read(cx, &mut bufs); - if n == 0 { - break; + match dns_handler.poll_read(cx, &mut bufs) { + Poll::Ready(n) => { + if n == 0 { + // EOF — close the connection. + if !self.state.tx_fin() { + self.close(); + } + break; + } + self.tx_buffer.extend_by(n); + } + Poll::Pending => break, } - self.tx_buffer.extend_by(n); } - let want_close = dns_handler.should_close() && !self.state.tx_fin(); - let has_pending_tx = dns_handler.has_pending_tx(); + // rx path: feed guest data into the DNS handler for query extraction. + let (a, b) = self.rx_buffer.as_slices(); + let consumed = dns_handler.ingest(&[a, b], dns_backend); + if consumed > 0 { + self.rx_buffer.drain(..consumed); + } - if want_close { - self.close(); + if dns_handler.protocol_error() { + // Invalid DNS TCP framing; reset the connection. + sender.rst(self.tx_send, Some(self.rx_seq)); + return false; } self.send_next(sender); @@ -784,8 +808,7 @@ impl TcpConnection { || self.state == TcpState::LastAck || (self.state.tx_fin() && self.state.rx_fin() - && self.tx_buffer.is_empty() - && !has_pending_tx)) + && self.tx_buffer.is_empty())) } /// Poll the real-socket TCP connection backend. From 464930c1814b35352147ac139f9bca738d75ea02 Mon Sep 17 00:00:00 2001 From: Daman Mulye Date: Wed, 25 Feb 2026 16:34:01 -0800 Subject: [PATCH 16/25] . --- .../net_consomme/consomme/src/dns_resolver/dns_tcp.rs | 11 +++++++---- vm/devices/net/net_consomme/consomme/src/tcp.rs | 4 +--- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs b/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs index e121be656f..b4de01de4b 100644 --- a/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs +++ b/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs @@ -81,7 +81,11 @@ impl DnsTcpHandler { } let total_offered: usize = data.iter().map(|c| c.len()).sum(); - tracing::info!(total_offered, buf_len = self.buf.len(), "dns_tcp ingest: start"); + tracing::info!( + total_offered, + buf_len = self.buf.len(), + "dns_tcp ingest: start" + ); let mut total_consumed = 0; for chunk in data { @@ -200,9 +204,8 @@ impl DnsTcpHandler { // Build TCP-framed response: 2-byte length prefix + payload. self.buf.clear(); - self.buf.reserve( - (2 + payload_len).saturating_sub(self.buf.capacity()), - ); + self.buf + .reserve((2 + payload_len).saturating_sub(self.buf.capacity())); self.buf .extend_from_slice(&(payload_len as u16).to_be_bytes()); self.buf.extend(response.response_data); diff --git a/vm/devices/net/net_consomme/consomme/src/tcp.rs b/vm/devices/net/net_consomme/consomme/src/tcp.rs index f8f9e78e65..1cc8047c1f 100644 --- a/vm/devices/net/net_consomme/consomme/src/tcp.rs +++ b/vm/devices/net/net_consomme/consomme/src/tcp.rs @@ -806,9 +806,7 @@ impl TcpConnectionInner { self.send_next(sender); !(self.state == TcpState::TimeWait || self.state == TcpState::LastAck - || (self.state.tx_fin() - && self.state.rx_fin() - && self.tx_buffer.is_empty())) + || (self.state.tx_fin() && self.state.rx_fin() && self.tx_buffer.is_empty())) } /// Poll the real-socket TCP connection backend. From 3b09d2d230316b533443696e761d2892f10522a7 Mon Sep 17 00:00:00 2001 From: Daman Mulye Date: Wed, 25 Feb 2026 17:12:23 -0800 Subject: [PATCH 17/25] Remove extraneous tracing, have all dns requests/responses go through the same codepaths --- .../consomme/src/dns_resolver/dns_tcp.rs | 99 ++++++++++--------- .../consomme/src/dns_resolver/mod.rs | 91 +++++++++++++---- .../net/net_consomme/consomme/src/tcp.rs | 18 ++-- .../net/net_consomme/consomme/src/udp.rs | 4 +- 4 files changed, 137 insertions(+), 75 deletions(-) diff --git a/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs b/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs index b4de01de4b..da418d2e7b 100644 --- a/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs +++ b/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs @@ -10,6 +10,7 @@ use super::DnsBackend; use super::DnsFlow; use super::DnsRequest; +use super::DnsResolver; use super::DnsResponse; use mesh_channel_core::Receiver; use std::io::IoSliceMut; @@ -73,7 +74,7 @@ impl DnsTcpHandler { /// /// Returns the number of bytes consumed from `data`. The caller should /// only drain this many bytes from its receive buffer. - pub fn ingest(&mut self, data: &[&[u8]], backend: &impl DnsBackend) -> usize { + pub fn ingest(&mut self, data: &[&[u8]], dns: &mut DnsResolver) -> usize { // Don't accept data while a query is in-flight, a response is pending, // or we've hit a protocol error. if !matches!(self.phase, Phase::Receiving) || self.protocol_error { @@ -102,13 +103,10 @@ impl DnsTcpHandler { pos += accept; total_consumed += accept; - if self.try_submit(backend) { - // Query submitted; stop accepting data. - tracing::info!(total_consumed, "dns_tcp ingest: query submitted"); + if self.try_submit(dns) { return total_consumed; } if self.protocol_error { - tracing::info!(total_consumed, "dns_tcp ingest: protocol error"); return total_consumed; } } @@ -131,17 +129,17 @@ impl DnsTcpHandler { (2 + msg_len).saturating_sub(self.buf.len()) } - /// If a complete TCP-framed DNS message is in `rx_buf`, submit it to the - /// backend. Returns true if a query was submitted. - fn try_submit(&mut self, backend: &impl DnsBackend) -> bool { + /// If a complete TCP-framed DNS message is in `buf`, submit it to the + /// resolver via [`DnsResolver::submit_tcp_query`]. Returns true if the query + /// was accepted. + fn try_submit(&mut self, dns: &mut DnsResolver) -> bool { if self.buf.len() < 2 { return false; } let msg_len = u16::from_be_bytes([self.buf[0], self.buf[1]]) as usize; - if msg_len < super::DNS_HEADER_SIZE { + if msg_len <= super::DNS_HEADER_SIZE { // Invalid DNS message length; flag a protocol error so the caller // can reset the connection. - tracing::info!(msg_len, "dns_tcp: message length below DNS header minimum"); self.protocol_error = true; return false; } @@ -150,16 +148,21 @@ impl DnsTcpHandler { } // Submit the raw DNS query (without the TCP length prefix). - tracing::info!( - msg_len, - src_port = self.flow.src_port, - "dns_tcp: submitting query to backend" - ); let request = DnsRequest { flow: self.flow.clone(), dns_query: &self.buf[2..2 + msg_len], }; - backend.query(&request, self.receiver.sender()); + if !dns.submit_tcp_query(&request, self.receiver.sender()) { + // Request limit hit; flag an error so the caller + // resets the connection. + tracelimit::warn_ratelimited!( + msg_len, + src_port = self.flow.src_port, + "dns_tcp: query rate-limited, closing connection" + ); + self.protocol_error = true; + return false; + } self.buf.clear(); self.phase = Phase::InFlight; true @@ -173,11 +176,15 @@ impl DnsTcpHandler { /// been drained. The caller should close the connection. /// - `Poll::Pending`: waiting for a DNS response or for [`ingest`] to /// submit a new query. - pub fn poll_read(&mut self, cx: &mut Context<'_>, bufs: &mut [IoSliceMut<'_>]) -> Poll { + pub fn poll_read( + &mut self, + cx: &mut Context<'_>, + bufs: &mut [IoSliceMut<'_>], + dns: &mut DnsResolver, + ) -> Poll { // Continue writing a partially-sent response. if matches!(self.phase, Phase::Responding) { let n = self.drain_tx(bufs); - tracing::info!(n, "dns_tcp poll_read: continuing drain"); return Poll::Ready(n); } @@ -185,11 +192,8 @@ impl DnsTcpHandler { if matches!(self.phase, Phase::InFlight) { match self.receiver.poll_recv(cx) { Poll::Ready(Ok(response)) => { + dns.complete_tcp_query(); let payload_len = response.response_data.len(); - tracing::info!( - payload_len, - "dns_tcp poll_read: received response from backend" - ); if payload_len > MAX_DNS_TCP_MESSAGE_SIZE { tracelimit::warn_ratelimited!( size = payload_len, @@ -217,11 +221,13 @@ impl DnsTcpHandler { } Poll::Ready(Err(_)) => { // Channel closed unexpectedly; return to receiving. - tracing::info!("dns_tcp poll_read: response channel closed unexpectedly"); + tracelimit::warn_ratelimited!( + "dns_tcp poll_read: response channel closed unexpectedly" + ); self.phase = Phase::Receiving; } Poll::Pending => { - tracing::info!("dns_tcp poll_read: awaiting backend response"); + tracelimit::warn_ratelimited!("dns_tcp poll_read: awaiting backend response"); return Poll::Pending; } } @@ -229,10 +235,8 @@ impl DnsTcpHandler { // No in-flight query and no pending response. if self.guest_fin { - tracing::info!("dns_tcp poll_read: EOF (guest FIN, no pending work)"); Poll::Ready(0) } else { - tracing::info!("dns_tcp poll_read: idle, waiting for ingest"); Poll::Pending } } @@ -278,6 +282,9 @@ impl DnsTcpHandler { #[cfg(test)] mod tests { use super::*; + use crate::dns_resolver::DnsBackend; + use crate::dns_resolver::DnsRequest; + use crate::dns_resolver::DnsResponse; use std::sync::Arc; /// A test DNS backend that echoes the query back as the response. @@ -333,20 +340,20 @@ mod tests { #[test] fn single_query_response() { - let backend = Arc::new(EchoBackend); + let mut dns = DnsResolver::new_for_test(Arc::new(EchoBackend)); let mut handler = DnsTcpHandler::new(test_flow()); let query = sample_query(); let msg = make_tcp_dns_message(&query); - let consumed = handler.ingest(&[&msg], backend.as_ref()); + let consumed = handler.ingest(&[&msg], &mut dns); assert_eq!(consumed, msg.len()); let waker = std::task::Waker::from(Arc::new(NoopWaker)); let mut cx = Context::from_waker(&waker); let mut buf = vec![0u8; 256]; - match handler.poll_read(&mut cx, &mut [IoSliceMut::new(&mut buf)]) { + match handler.poll_read(&mut cx, &mut [IoSliceMut::new(&mut buf)], &mut dns) { Poll::Ready(n) => { assert!(n > 0); // First 2 bytes are the TCP length prefix. @@ -361,29 +368,29 @@ mod tests { #[test] fn partial_message_buffering() { - let backend = Arc::new(EchoBackend); + let mut dns = DnsResolver::new_for_test(Arc::new(EchoBackend)); let mut handler = DnsTcpHandler::new(test_flow()); let query = sample_query(); let msg = make_tcp_dns_message(&query); // Feed just the length prefix. - let consumed = handler.ingest(&[&msg[..2]], backend.as_ref()); + let consumed = handler.ingest(&[&msg[..2]], &mut dns); assert_eq!(consumed, 2); let waker = std::task::Waker::from(Arc::new(NoopWaker)); let mut cx = Context::from_waker(&waker); let mut buf = vec![0u8; 256]; assert!(matches!( - handler.poll_read(&mut cx, &mut [IoSliceMut::new(&mut buf)]), + handler.poll_read(&mut cx, &mut [IoSliceMut::new(&mut buf)], &mut dns), Poll::Pending )); // Feed the rest. - let consumed = handler.ingest(&[&msg[2..]], backend.as_ref()); + let consumed = handler.ingest(&[&msg[2..]], &mut dns); assert_eq!(consumed, msg.len() - 2); - match handler.poll_read(&mut cx, &mut [IoSliceMut::new(&mut buf)]) { + match handler.poll_read(&mut cx, &mut [IoSliceMut::new(&mut buf)], &mut dns) { Poll::Ready(n) => assert!(n > 0), Poll::Pending => panic!("expected Ready after completing message"), } @@ -391,7 +398,7 @@ mod tests { #[test] fn backpressure_one_at_a_time() { - let backend = Arc::new(EchoBackend); + let mut dns = DnsResolver::new_for_test(Arc::new(EchoBackend)); let mut handler = DnsTcpHandler::new(test_flow()); let q1 = sample_query(); @@ -403,7 +410,7 @@ mod tests { combined.extend(make_tcp_dns_message(&q2)); // Only the first message should be consumed. - let consumed = handler.ingest(&[&combined], backend.as_ref()); + let consumed = handler.ingest(&[&combined], &mut dns); assert_eq!(consumed, make_tcp_dns_message(&q1).len()); let waker = std::task::Waker::from(Arc::new(NoopWaker)); @@ -411,17 +418,17 @@ mod tests { // Drain the first response. let mut buf = vec![0u8; 256]; - match handler.poll_read(&mut cx, &mut [IoSliceMut::new(&mut buf)]) { + match handler.poll_read(&mut cx, &mut [IoSliceMut::new(&mut buf)], &mut dns) { Poll::Ready(n) => assert!(n > 0), Poll::Pending => panic!("expected Ready for first response"), } // Now the second message can be ingested. let remaining = &combined[consumed..]; - let consumed2 = handler.ingest(&[remaining], backend.as_ref()); + let consumed2 = handler.ingest(&[remaining], &mut dns); assert_eq!(consumed2, make_tcp_dns_message(&q2).len()); - match handler.poll_read(&mut cx, &mut [IoSliceMut::new(&mut buf)]) { + match handler.poll_read(&mut cx, &mut [IoSliceMut::new(&mut buf)], &mut dns) { Poll::Ready(n) => assert!(n > 0), Poll::Pending => panic!("expected Ready for second response"), } @@ -429,37 +436,37 @@ mod tests { #[test] fn eof_after_fin_and_drain() { - let backend = Arc::new(EchoBackend); + let mut dns = DnsResolver::new_for_test(Arc::new(EchoBackend)); let mut handler = DnsTcpHandler::new(test_flow()); let query = sample_query(); - handler.ingest(&[&make_tcp_dns_message(&query)], backend.as_ref()); + handler.ingest(&[&make_tcp_dns_message(&query)], &mut dns); let waker = std::task::Waker::from(Arc::new(NoopWaker)); let mut cx = Context::from_waker(&waker); // Drain the response. let mut buf = vec![0u8; 256]; - let _ = handler.poll_read(&mut cx, &mut [IoSliceMut::new(&mut buf)]); + let _ = handler.poll_read(&mut cx, &mut [IoSliceMut::new(&mut buf)], &mut dns); handler.set_guest_fin(); // Should now report EOF. assert!(matches!( - handler.poll_read(&mut cx, &mut [IoSliceMut::new(&mut buf)]), + handler.poll_read(&mut cx, &mut [IoSliceMut::new(&mut buf)], &mut dns), Poll::Ready(0) )); } #[test] fn protocol_error_on_invalid_length() { - let backend = Arc::new(EchoBackend); + let mut dns = DnsResolver::new_for_test(Arc::new(EchoBackend)); let mut handler = DnsTcpHandler::new(test_flow()); - // Craft a message with msg_len < DNS_HEADER_SIZE (12). + // Craft a message with msg_len <= DNS_HEADER_SIZE (12). // Length prefix says 4 bytes, which is too small for a DNS header. let bad_msg = [0x00, 0x04, 0x01, 0x02, 0x03, 0x04]; - handler.ingest(&[&bad_msg], backend.as_ref()); + handler.ingest(&[&bad_msg], &mut dns); assert!(handler.protocol_error()); } } diff --git a/vm/devices/net/net_consomme/consomme/src/dns_resolver/mod.rs b/vm/devices/net/net_consomme/consomme/src/dns_resolver/mod.rs index 0503548fcd..acecb02c13 100644 --- a/vm/devices/net/net_consomme/consomme/src/dns_resolver/mod.rs +++ b/vm/devices/net/net_consomme/consomme/src/dns_resolver/mod.rs @@ -74,11 +74,15 @@ pub(crate) trait DnsBackend: Send + Sync { } #[derive(Inspect)] -pub struct DnsResolver { +pub struct DnsResolver { #[inspect(skip)] - backend: Arc, + backend: Arc, + /// Channel receiver for UDP DNS responses. Each call to + /// [`Self::submit_udp_query`] sends the response back through this + /// channel so that [`Self::poll_udp_response`] can retrieve it. + /// The TCP path uses its own per-connection channel instead. #[inspect(skip)] - receiver: Receiver, + udp_receiver: Receiver, pending_requests: usize, max_pending_requests: usize, } @@ -95,10 +99,10 @@ impl DnsResolver { pub fn new(max_pending_requests: usize) -> Result { use crate::dns_resolver::windows::WindowsDnsResolverBackend; - let receiver = Receiver::new(); + let udp_receiver = Receiver::new(); Ok(Self { backend: Arc::new(WindowsDnsResolverBackend::new()?), - receiver, + udp_receiver, pending_requests: 0, max_pending_requests, }) @@ -112,36 +116,60 @@ impl DnsResolver { pub fn new(max_pending_requests: usize) -> Result { use crate::dns_resolver::unix::UnixDnsResolverBackend; - let receiver = Receiver::new(); + let udp_receiver = Receiver::new(); Ok(Self { backend: Arc::new(UnixDnsResolverBackend::new()?), - receiver, + udp_receiver, pending_requests: 0, max_pending_requests, }) } +} - pub fn handle_dns(&mut self, request: &DnsRequest<'_>) -> Result<(), DropReason> { - if request.dns_query.len() <= DNS_HEADER_SIZE { - return Err(DropReason::Packet(smoltcp::wire::Error)); - } - +impl DnsResolver { + // ── Shared ─────────────────────────────────────────────────────── + + /// Submit a DNS query to the backend with a caller-supplied response + /// sender. Returns `true` if accepted, `false` if the pending-request + /// limit has been reached. + fn submit_query( + &mut self, + request: &DnsRequest<'_>, + response_sender: Sender, + ) -> bool { if self.pending_requests < self.max_pending_requests { self.pending_requests += 1; - self.backend.query(request, self.receiver.sender()); + self.backend.query(request, response_sender); + true } else { tracelimit::warn_ratelimited!( current = self.pending_requests, max = self.max_pending_requests, "DNS request limit reached" ); + false + } + } + + /// Validate and submit a DNS query received over UDP. + /// + /// The response will be delivered through [`Self::poll_udp_response`]. + pub fn submit_udp_query(&mut self, request: &DnsRequest<'_>) -> Result<(), DropReason> { + if request.dns_query.len() <= DNS_HEADER_SIZE { + return Err(DropReason::Packet(smoltcp::wire::Error)); } + let sender = self.udp_receiver.sender(); + self.submit_query(request, sender); Ok(()) } - pub fn poll_response(&mut self, cx: &mut Context<'_>) -> Poll> { - match self.receiver.poll_recv(cx) { + /// Poll for the next completed UDP DNS response. + /// + /// This drains `self.udp_receiver`; it must **not** be used for TCP + /// responses (the TCP path has its own per-connection channel). + pub fn poll_udp_response(&mut self, cx: &mut Context<'_>) -> Poll> { + match self.udp_receiver.poll_recv(cx) { Poll::Ready(Ok(response)) => { self.pending_requests -= 1; Poll::Ready(Some(response)) @@ -150,8 +178,37 @@ impl DnsResolver { } } - pub fn backend(&self) -> &Arc { - &self.backend + /// Submit a DNS query with a caller-supplied response sender. + /// + /// Returns `true` if the query was accepted, or `false` if the + /// pending-request limit has been reached. + /// + /// The TCP handler calls this with its own [`Sender`] so responses + /// arrive on the per-connection channel rather than `udp_receiver`. + pub fn submit_tcp_query( + &mut self, + request: &DnsRequest<'_>, + response_sender: Sender, + ) -> bool { + self.submit_query(request, response_sender) + } + + /// Decrement the pending-request counter after a TCP response has + /// been consumed by [`dns_tcp::DnsTcpHandler`]. + pub fn complete_tcp_query(&mut self) { + self.pending_requests = self.pending_requests.saturating_sub(1); + } + + /// Create a resolver with a test backend (for unit tests only). + #[cfg(test)] + pub(crate) fn new_for_test(backend: Arc) -> Self { + let udp_receiver = Receiver::new(); + Self { + backend, + udp_receiver, + pending_requests: 0, + max_pending_requests: DEFAULT_MAX_PENDING_DNS_REQUESTS, + } } } diff --git a/vm/devices/net/net_consomme/consomme/src/tcp.rs b/vm/devices/net/net_consomme/consomme/src/tcp.rs index 1cc8047c1f..a753b9fac7 100644 --- a/vm/devices/net/net_consomme/consomme/src/tcp.rs +++ b/vm/devices/net/net_consomme/consomme/src/tcp.rs @@ -9,7 +9,7 @@ use super::DropReason; use crate::ChecksumState; use crate::ConsommeState; use crate::IpAddresses; -use crate::dns_resolver::DnsBackend; +use crate::dns_resolver::DnsResolver; use crate::dns_resolver::dns_tcp::DnsTcpHandler; use futures::AsyncRead; use futures::AsyncWrite; @@ -311,7 +311,6 @@ impl Access<'_, T> { Err(_) => false, }); // Check for any new incoming data - let dns_backend = self.inner.dns.as_ref().map(|d| d.backend().as_ref()); self.inner.tcp.connections.retain(|ft, conn| { let mut sender = Sender { ft, @@ -319,11 +318,10 @@ impl Access<'_, T> { client: self.client, }; match &mut conn.backend { - TcpBackend::Dns(dns_handler) => match dns_backend { - Some(dns_backend) => { - conn.inner - .poll_dns_backend(cx, &mut sender, dns_handler, dns_backend) - } + TcpBackend::Dns(dns_handler) => match &mut self.inner.dns { + Some(dns) => conn + .inner + .poll_dns_backend(cx, &mut sender, dns_handler, dns), None => { tracing::warn!("DNS TCP connection without DNS resolver, dropping"); false @@ -762,7 +760,7 @@ impl TcpConnectionInner { cx: &mut Context<'_>, sender: &mut Sender<'_, impl Client>, dns_handler: &mut DnsTcpHandler, - dns_backend: &impl DnsBackend, + dns: &mut DnsResolver, ) -> bool { // Propagate guest FIN before the tx path so that poll_read can // detect EOF on the same iteration. @@ -775,7 +773,7 @@ impl TcpConnectionInner { while !self.tx_buffer.is_full() { let (a, b) = self.tx_buffer.unwritten_slices_mut(); let mut bufs = [IoSliceMut::new(a), IoSliceMut::new(b)]; - match dns_handler.poll_read(cx, &mut bufs) { + match dns_handler.poll_read(cx, &mut bufs, dns) { Poll::Ready(n) => { if n == 0 { // EOF — close the connection. @@ -792,7 +790,7 @@ impl TcpConnectionInner { // rx path: feed guest data into the DNS handler for query extraction. let (a, b) = self.rx_buffer.as_slices(); - let consumed = dns_handler.ingest(&[a, b], dns_backend); + let consumed = dns_handler.ingest(&[a, b], dns); if consumed > 0 { self.rx_buffer.drain(..consumed); } diff --git a/vm/devices/net/net_consomme/consomme/src/udp.rs b/vm/devices/net/net_consomme/consomme/src/udp.rs index 2870446960..645ab7f216 100644 --- a/vm/devices/net/net_consomme/consomme/src/udp.rs +++ b/vm/devices/net/net_consomme/consomme/src/udp.rs @@ -204,7 +204,7 @@ impl Access<'_, T> { self.inner .dns .as_mut() - .and_then(|dns| match dns.poll_response(cx) { + .and_then(|dns| match dns.poll_udp_response(cx) { Poll::Ready(resp) => resp, Poll::Pending => None, }) @@ -449,7 +449,7 @@ impl Access<'_, T> { // Submit the DNS query with addressing information // The response will be queued and sent later in poll_udp - dns.handle_dns(&request).map_err(|e| { + dns.submit_udp_query(&request).map_err(|e| { tracelimit::error_ratelimited!(error = ?e, "Failed to start DNS query"); DropReason::Packet(smoltcp::wire::Error) })?; From d8295fd74ff4a3ce26a3cb685fc079de8b6cd125 Mon Sep 17 00:00:00 2001 From: Daman Mulye Date: Wed, 25 Feb 2026 17:36:42 -0800 Subject: [PATCH 18/25] . --- .../consomme/src/dns_resolver/dns_tcp.rs | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs b/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs index da418d2e7b..d4eaf801b2 100644 --- a/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs +++ b/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs @@ -17,11 +17,10 @@ use std::io::IoSliceMut; use std::task::Context; use std::task::Poll; -/// There is no official maximum size for DNS messages over TCP, but we can set -/// a reasonable upper bound to u16::MAX (65535 bytes) to prevent unbounded memory -/// usage. This is larger than the typical 512-byte limit for UDP, as TCP can -/// handle larger messages. -const MAX_DNS_TCP_MESSAGE_SIZE: usize = 65535; +// Maximum allowed DNS message size over TCP: 65535 bytes for the message +// plus 2 bytes for the TCP length prefix. This is a sanity check to prevent +// unbounded memory growth. +const MAX_DNS_TCP_PAYLOAD_SIZE: usize = (u16::MAX as usize) + 2; /// Current phase of the DNS TCP handler state machine. enum Phase { @@ -81,13 +80,6 @@ impl DnsTcpHandler { return 0; } - let total_offered: usize = data.iter().map(|c| c.len()).sum(); - tracing::info!( - total_offered, - buf_len = self.buf.len(), - "dns_tcp ingest: start" - ); - let mut total_consumed = 0; for chunk in data { let mut pos = 0; @@ -194,7 +186,7 @@ impl DnsTcpHandler { Poll::Ready(Ok(response)) => { dns.complete_tcp_query(); let payload_len = response.response_data.len(); - if payload_len > MAX_DNS_TCP_MESSAGE_SIZE { + if payload_len > MAX_DNS_TCP_PAYLOAD_SIZE { tracelimit::warn_ratelimited!( size = payload_len, "DNS TCP response exceeds maximum message size, dropping" From d8b241167841e3d141d7a83739d724ea56c9e24e Mon Sep 17 00:00:00 2001 From: Daman Mulye Date: Thu, 26 Feb 2026 08:34:54 -0800 Subject: [PATCH 19/25] . --- .../net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs b/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs index d4eaf801b2..0909d1fd07 100644 --- a/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs +++ b/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs @@ -18,7 +18,7 @@ use std::task::Context; use std::task::Poll; // Maximum allowed DNS message size over TCP: 65535 bytes for the message -// plus 2 bytes for the TCP length prefix. This is a sanity check to prevent +// plus 2 bytes for the TCP length prefix. This is a sanity check to prevent // unbounded memory growth. const MAX_DNS_TCP_PAYLOAD_SIZE: usize = (u16::MAX as usize) + 2; From bb42fe41f12202b08918f0574252b2498c160475 Mon Sep 17 00:00:00 2001 From: Daman Mulye Date: Fri, 27 Feb 2026 09:44:27 -0800 Subject: [PATCH 20/25] Feedback --- .../consomme/src/dns_resolver/dns_tcp.rs | 203 ++++++++++-------- .../net/net_consomme/consomme/src/tcp.rs | 33 +-- 2 files changed, 126 insertions(+), 110 deletions(-) diff --git a/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs b/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs index 0909d1fd07..2b4b9efd41 100644 --- a/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs +++ b/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs @@ -16,12 +16,26 @@ use mesh_channel_core::Receiver; use std::io::IoSliceMut; use std::task::Context; use std::task::Poll; +use std::task::ready; +use thiserror::Error; // Maximum allowed DNS message size over TCP: 65535 bytes for the message // plus 2 bytes for the TCP length prefix. This is a sanity check to prevent // unbounded memory growth. const MAX_DNS_TCP_PAYLOAD_SIZE: usize = (u16::MAX as usize) + 2; +/// Errors returned by [`DnsTcpHandler::ingest`] and [`DnsTcpHandler::poll_read`] +/// when the DNS TCP framing is invalid or the query cannot be processed. +#[derive(Debug, Error)] +pub enum DnsTcpError { + /// The TCP length prefix specified a message size too small for a valid DNS header. + #[error("invalid DNS TCP message length")] + InvalidMessageLength, + /// The query was rate-limited by the resolver backend. + #[error("DNS TCP query rate-limited")] + RateLimited, +} + /// Current phase of the DNS TCP handler state machine. enum Phase { /// Accumulating an incoming TCP-framed DNS request. @@ -46,8 +60,6 @@ pub struct DnsTcpHandler { phase: Phase, /// The guest has sent FIN; no more data will arrive. guest_fin: bool, - /// True if the TCP framing is invalid and the connection should be dropped. - protocol_error: bool, } impl DnsTcpHandler { @@ -60,7 +72,6 @@ impl DnsTcpHandler { tx_offset: 0, phase: Phase::Receiving, guest_fin: false, - protocol_error: false, } } @@ -73,11 +84,17 @@ impl DnsTcpHandler { /// /// Returns the number of bytes consumed from `data`. The caller should /// only drain this many bytes from its receive buffer. - pub fn ingest(&mut self, data: &[&[u8]], dns: &mut DnsResolver) -> usize { - // Don't accept data while a query is in-flight, a response is pending, - // or we've hit a protocol error. - if !matches!(self.phase, Phase::Receiving) || self.protocol_error { - return 0; + /// + /// Returns an error if the TCP framing is invalid or the query cannot be + /// submitted, in which case the caller should reset the connection. + pub fn ingest( + &mut self, + data: &[&[u8]], + dns: &mut DnsResolver, + ) -> Result { + // Don't accept data while a query is in-flight or a response is pending. + if !matches!(self.phase, Phase::Receiving) { + return Ok(0); } let mut total_consumed = 0; @@ -95,11 +112,10 @@ impl DnsTcpHandler { pos += accept; total_consumed += accept; - if self.try_submit(dns) { - return total_consumed; - } - if self.protocol_error { - return total_consumed; + match self.try_submit(dns) { + Ok(true) => return Ok(total_consumed), + Ok(false) => {} + Err(e) => return Err(e), } } } @@ -109,7 +125,7 @@ impl DnsTcpHandler { buf_len = self.buf.len(), "dns_tcp ingest: done (message incomplete)" ); - total_consumed + Ok(total_consumed) } /// How many more bytes are needed to complete the current message. @@ -122,21 +138,24 @@ impl DnsTcpHandler { } /// If a complete TCP-framed DNS message is in `buf`, submit it to the - /// resolver via [`DnsResolver::submit_tcp_query`]. Returns true if the query - /// was accepted. - fn try_submit(&mut self, dns: &mut DnsResolver) -> bool { + /// resolver via [`DnsResolver::submit_tcp_query`]. + /// + /// Returns `Ok(true)` if the query was submitted, `Ok(false)` if the + /// message is still incomplete, or `Err` if the framing is invalid or + /// the query was rejected. + fn try_submit( + &mut self, + dns: &mut DnsResolver, + ) -> Result { if self.buf.len() < 2 { - return false; + return Ok(false); } let msg_len = u16::from_be_bytes([self.buf[0], self.buf[1]]) as usize; if msg_len <= super::DNS_HEADER_SIZE { - // Invalid DNS message length; flag a protocol error so the caller - // can reset the connection. - self.protocol_error = true; - return false; + return Err(DnsTcpError::InvalidMessageLength); } if self.buf.len() < 2 + msg_len { - return false; + return Ok(false); } // Submit the raw DNS query (without the TCP length prefix). @@ -145,27 +164,26 @@ impl DnsTcpHandler { dns_query: &self.buf[2..2 + msg_len], }; if !dns.submit_tcp_query(&request, self.receiver.sender()) { - // Request limit hit; flag an error so the caller - // resets the connection. tracelimit::warn_ratelimited!( msg_len, src_port = self.flow.src_port, "dns_tcp: query rate-limited, closing connection" ); - self.protocol_error = true; - return false; + return Err(DnsTcpError::RateLimited); } self.buf.clear(); self.phase = Phase::InFlight; - true + Ok(true) } /// Poll for the next chunk of response data. /// /// Models the socket `poll_read_vectored` contract: - /// - `Poll::Ready(n)` where `n > 0`: wrote `n` bytes of response data. - /// - `Poll::Ready(0)`: EOF — the guest sent FIN and all responses have + /// - `Poll::Ready(Ok(n))` where `n > 0`: wrote `n` bytes of response data. + /// - `Poll::Ready(Ok(0))`: EOF — the guest sent FIN and all responses have /// been drained. The caller should close the connection. + /// - `Poll::Ready(Err(_))`: a protocol error occurred; the caller should + /// reset the connection. /// - `Poll::Pending`: waiting for a DNS response or for [`ingest`] to /// submit a new query. pub fn poll_read( @@ -173,61 +191,53 @@ impl DnsTcpHandler { cx: &mut Context<'_>, bufs: &mut [IoSliceMut<'_>], dns: &mut DnsResolver, - ) -> Poll { - // Continue writing a partially-sent response. - if matches!(self.phase, Phase::Responding) { - let n = self.drain_tx(bufs); - return Poll::Ready(n); - } - - // Wait for the in-flight response. - if matches!(self.phase, Phase::InFlight) { - match self.receiver.poll_recv(cx) { - Poll::Ready(Ok(response)) => { - dns.complete_tcp_query(); - let payload_len = response.response_data.len(); - if payload_len > MAX_DNS_TCP_PAYLOAD_SIZE { - tracelimit::warn_ratelimited!( - size = payload_len, - "DNS TCP response exceeds maximum message size, dropping" - ); - // Discard the oversized response and return to - // receiving so that ingest can accept the next query. + ) -> Poll> { + match self.phase { + Phase::InFlight => { + match ready!(self.receiver.poll_recv(cx)) { + Ok(response) => { + dns.complete_tcp_query(); + let payload_len = response.response_data.len(); + if payload_len > MAX_DNS_TCP_PAYLOAD_SIZE { + tracelimit::warn_ratelimited!( + size = payload_len, + "DNS TCP response exceeds maximum message size, dropping" + ); + // Discard the oversized response and return to + // receiving so that ingest can accept the next query. + self.phase = Phase::Receiving; + cx.waker().wake_by_ref(); + return Poll::Pending; + } + + // Build TCP-framed response: 2-byte length prefix + payload. + self.buf.clear(); + self.buf + .reserve((2 + payload_len).saturating_sub(self.buf.capacity())); + self.buf + .extend_from_slice(&(payload_len as u16).to_be_bytes()); + self.buf.extend(response.response_data); + self.tx_offset = 0; + self.phase = Phase::Responding; + + let n = self.drain_tx(bufs); + return Poll::Ready(Ok(n)); + } + Err(_) => { self.phase = Phase::Receiving; - cx.waker().wake_by_ref(); - return Poll::Pending; } - - // Build TCP-framed response: 2-byte length prefix + payload. - self.buf.clear(); - self.buf - .reserve((2 + payload_len).saturating_sub(self.buf.capacity())); - self.buf - .extend_from_slice(&(payload_len as u16).to_be_bytes()); - self.buf.extend(response.response_data); - self.tx_offset = 0; - self.phase = Phase::Responding; - - let n = self.drain_tx(bufs); - return Poll::Ready(n); - } - Poll::Ready(Err(_)) => { - // Channel closed unexpectedly; return to receiving. - tracelimit::warn_ratelimited!( - "dns_tcp poll_read: response channel closed unexpectedly" - ); - self.phase = Phase::Receiving; - } - Poll::Pending => { - tracelimit::warn_ratelimited!("dns_tcp poll_read: awaiting backend response"); - return Poll::Pending; } } + Phase::Responding => { + let n = self.drain_tx(bufs); + return Poll::Ready(Ok(n)); + } + Phase::Receiving => {} } // No in-flight query and no pending response. if self.guest_fin { - Poll::Ready(0) + Poll::Ready(Ok(0)) } else { Poll::Pending } @@ -257,11 +267,6 @@ impl DnsTcpHandler { written } - /// Returns true if the connection should be dropped due to invalid framing. - pub fn protocol_error(&self) -> bool { - self.protocol_error - } - pub fn guest_fin(&self) -> bool { self.guest_fin } @@ -338,7 +343,7 @@ mod tests { let query = sample_query(); let msg = make_tcp_dns_message(&query); - let consumed = handler.ingest(&[&msg], &mut dns); + let consumed = handler.ingest(&[&msg], &mut dns).unwrap(); assert_eq!(consumed, msg.len()); let waker = std::task::Waker::from(Arc::new(NoopWaker)); @@ -346,7 +351,7 @@ mod tests { let mut buf = vec![0u8; 256]; match handler.poll_read(&mut cx, &mut [IoSliceMut::new(&mut buf)], &mut dns) { - Poll::Ready(n) => { + Poll::Ready(Ok(n)) => { assert!(n > 0); // First 2 bytes are the TCP length prefix. let resp_len = u16::from_be_bytes([buf[0], buf[1]]) as usize; @@ -354,6 +359,7 @@ mod tests { // Response payload should match the query (echo backend). assert_eq!(&buf[2..2 + resp_len], &query); } + Poll::Ready(Err(e)) => panic!("unexpected error: {e}"), Poll::Pending => panic!("expected Ready"), } } @@ -367,7 +373,7 @@ mod tests { let msg = make_tcp_dns_message(&query); // Feed just the length prefix. - let consumed = handler.ingest(&[&msg[..2]], &mut dns); + let consumed = handler.ingest(&[&msg[..2]], &mut dns).unwrap(); assert_eq!(consumed, 2); let waker = std::task::Waker::from(Arc::new(NoopWaker)); @@ -379,11 +385,12 @@ mod tests { )); // Feed the rest. - let consumed = handler.ingest(&[&msg[2..]], &mut dns); + let consumed = handler.ingest(&[&msg[2..]], &mut dns).unwrap(); assert_eq!(consumed, msg.len() - 2); match handler.poll_read(&mut cx, &mut [IoSliceMut::new(&mut buf)], &mut dns) { - Poll::Ready(n) => assert!(n > 0), + Poll::Ready(Ok(n)) => assert!(n > 0), + Poll::Ready(Err(e)) => panic!("unexpected error: {e}"), Poll::Pending => panic!("expected Ready after completing message"), } } @@ -402,7 +409,7 @@ mod tests { combined.extend(make_tcp_dns_message(&q2)); // Only the first message should be consumed. - let consumed = handler.ingest(&[&combined], &mut dns); + let consumed = handler.ingest(&[&combined], &mut dns).unwrap(); assert_eq!(consumed, make_tcp_dns_message(&q1).len()); let waker = std::task::Waker::from(Arc::new(NoopWaker)); @@ -411,17 +418,19 @@ mod tests { // Drain the first response. let mut buf = vec![0u8; 256]; match handler.poll_read(&mut cx, &mut [IoSliceMut::new(&mut buf)], &mut dns) { - Poll::Ready(n) => assert!(n > 0), + Poll::Ready(Ok(n)) => assert!(n > 0), + Poll::Ready(Err(e)) => panic!("unexpected error: {e}"), Poll::Pending => panic!("expected Ready for first response"), } // Now the second message can be ingested. let remaining = &combined[consumed..]; - let consumed2 = handler.ingest(&[remaining], &mut dns); + let consumed2 = handler.ingest(&[remaining], &mut dns).unwrap(); assert_eq!(consumed2, make_tcp_dns_message(&q2).len()); match handler.poll_read(&mut cx, &mut [IoSliceMut::new(&mut buf)], &mut dns) { - Poll::Ready(n) => assert!(n > 0), + Poll::Ready(Ok(n)) => assert!(n > 0), + Poll::Ready(Err(e)) => panic!("unexpected error: {e}"), Poll::Pending => panic!("expected Ready for second response"), } } @@ -432,7 +441,9 @@ mod tests { let mut handler = DnsTcpHandler::new(test_flow()); let query = sample_query(); - handler.ingest(&[&make_tcp_dns_message(&query)], &mut dns); + handler + .ingest(&[&make_tcp_dns_message(&query)], &mut dns) + .unwrap(); let waker = std::task::Waker::from(Arc::new(NoopWaker)); let mut cx = Context::from_waker(&waker); @@ -446,7 +457,7 @@ mod tests { // Should now report EOF. assert!(matches!( handler.poll_read(&mut cx, &mut [IoSliceMut::new(&mut buf)], &mut dns), - Poll::Ready(0) + Poll::Ready(Ok(0)) )); } @@ -458,7 +469,9 @@ mod tests { // Craft a message with msg_len <= DNS_HEADER_SIZE (12). // Length prefix says 4 bytes, which is too small for a DNS header. let bad_msg = [0x00, 0x04, 0x01, 0x02, 0x03, 0x04]; - handler.ingest(&[&bad_msg], &mut dns); - assert!(handler.protocol_error()); + assert!(matches!( + handler.ingest(&[&bad_msg], &mut dns), + Err(DnsTcpError::InvalidMessageLength) + )); } } diff --git a/vm/devices/net/net_consomme/consomme/src/tcp.rs b/vm/devices/net/net_consomme/consomme/src/tcp.rs index a753b9fac7..440d4205e4 100644 --- a/vm/devices/net/net_consomme/consomme/src/tcp.rs +++ b/vm/devices/net/net_consomme/consomme/src/tcp.rs @@ -132,16 +132,14 @@ enum TcpBackend { Dns(DnsTcpHandler), } +#[derive(Inspect)] struct TcpConnection { + #[inspect(skip)] backend: TcpBackend, + #[inspect(flatten)] inner: TcpConnectionInner, } -impl Inspect for TcpConnection { - fn inspect(&self, req: inspect::Request<'_>) { - self.inner.inspect(req) - } -} #[derive(Inspect)] struct TcpConnectionInner { @@ -774,7 +772,7 @@ impl TcpConnectionInner { let (a, b) = self.tx_buffer.unwritten_slices_mut(); let mut bufs = [IoSliceMut::new(a), IoSliceMut::new(b)]; match dns_handler.poll_read(cx, &mut bufs, dns) { - Poll::Ready(n) => { + Poll::Ready(Ok(n)) => { if n == 0 { // EOF — close the connection. if !self.state.tx_fin() { @@ -784,21 +782,26 @@ impl TcpConnectionInner { } self.tx_buffer.extend_by(n); } + Poll::Ready(Err(_)) => { + sender.rst(self.tx_send, Some(self.rx_seq)); + return false; + } Poll::Pending => break, } } // rx path: feed guest data into the DNS handler for query extraction. let (a, b) = self.rx_buffer.as_slices(); - let consumed = dns_handler.ingest(&[a, b], dns); - if consumed > 0 { - self.rx_buffer.drain(..consumed); - } - - if dns_handler.protocol_error() { - // Invalid DNS TCP framing; reset the connection. - sender.rst(self.tx_send, Some(self.rx_seq)); - return false; + match dns_handler.ingest(&[a, b], dns) { + Ok(consumed) if consumed > 0 => { + self.rx_buffer.drain(..consumed); + } + Ok(_) => {} + Err(_) => { + // Invalid DNS TCP framing; reset the connection. + sender.rst(self.tx_send, Some(self.rx_seq)); + return false; + } } self.send_next(sender); From f7186fda78ea998d88472f45868f9bffc9dbf54c Mon Sep 17 00:00:00 2001 From: Daman Mulye Date: Fri, 27 Feb 2026 12:08:44 -0800 Subject: [PATCH 21/25] . --- .../net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs | 5 +---- vm/devices/net/net_consomme/consomme/src/tcp.rs | 1 - 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs b/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs index 2b4b9efd41..968f5cb167 100644 --- a/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs +++ b/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs @@ -143,10 +143,7 @@ impl DnsTcpHandler { /// Returns `Ok(true)` if the query was submitted, `Ok(false)` if the /// message is still incomplete, or `Err` if the framing is invalid or /// the query was rejected. - fn try_submit( - &mut self, - dns: &mut DnsResolver, - ) -> Result { + fn try_submit(&mut self, dns: &mut DnsResolver) -> Result { if self.buf.len() < 2 { return Ok(false); } diff --git a/vm/devices/net/net_consomme/consomme/src/tcp.rs b/vm/devices/net/net_consomme/consomme/src/tcp.rs index 440d4205e4..7f6e75da7d 100644 --- a/vm/devices/net/net_consomme/consomme/src/tcp.rs +++ b/vm/devices/net/net_consomme/consomme/src/tcp.rs @@ -140,7 +140,6 @@ struct TcpConnection { inner: TcpConnectionInner, } - #[derive(Inspect)] struct TcpConnectionInner { loopback_port: LoopbackPortInfo, From 69db4e396584cf850edb1b8244903e393bc0c643 Mon Sep 17 00:00:00 2001 From: Daman Mulye Date: Fri, 27 Feb 2026 15:17:56 -0800 Subject: [PATCH 22/25] . --- .../consomme/src/dns_resolver/dns_tcp.rs | 23 ++++++++----------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs b/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs index 968f5cb167..45ddcdf06a 100644 --- a/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs +++ b/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs @@ -34,6 +34,12 @@ pub enum DnsTcpError { /// The query was rate-limited by the resolver backend. #[error("DNS TCP query rate-limited")] RateLimited, + /// The DNS response exceeded the maximum allowed TCP message size. + #[error("DNS TCP response too large")] + ResponseTooLarge, + /// The resolver backend dropped the query without sending a response. + #[error("DNS TCP query cancelled")] + QueryCancelled, } /// Current phase of the DNS TCP handler state machine. @@ -120,11 +126,6 @@ impl DnsTcpHandler { } } - tracing::info!( - total_consumed, - buf_len = self.buf.len(), - "dns_tcp ingest: done (message incomplete)" - ); Ok(total_consumed) } @@ -198,16 +199,11 @@ impl DnsTcpHandler { if payload_len > MAX_DNS_TCP_PAYLOAD_SIZE { tracelimit::warn_ratelimited!( size = payload_len, - "DNS TCP response exceeds maximum message size, dropping" + "DNS TCP response exceeds maximum message size" ); - // Discard the oversized response and return to - // receiving so that ingest can accept the next query. - self.phase = Phase::Receiving; - cx.waker().wake_by_ref(); - return Poll::Pending; + return Poll::Ready(Err(DnsTcpError::ResponseTooLarge)); } - // Build TCP-framed response: 2-byte length prefix + payload. self.buf.clear(); self.buf .reserve((2 + payload_len).saturating_sub(self.buf.capacity())); @@ -221,7 +217,8 @@ impl DnsTcpHandler { return Poll::Ready(Ok(n)); } Err(_) => { - self.phase = Phase::Receiving; + dns.complete_tcp_query(); + return Poll::Ready(Err(DnsTcpError::QueryCancelled)); } } } From 8475d9f284972853ada50f771b9a1b19099e38a1 Mon Sep 17 00:00:00 2001 From: Daman Mulye Date: Fri, 27 Feb 2026 15:18:32 -0800 Subject: [PATCH 23/25] xtask --- .../consomme/src/dns_resolver/dns_tcp.rs | 56 +++++++++---------- 1 file changed, 27 insertions(+), 29 deletions(-) diff --git a/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs b/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs index 45ddcdf06a..1330157a76 100644 --- a/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs +++ b/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs @@ -191,37 +191,35 @@ impl DnsTcpHandler { dns: &mut DnsResolver, ) -> Poll> { match self.phase { - Phase::InFlight => { - match ready!(self.receiver.poll_recv(cx)) { - Ok(response) => { - dns.complete_tcp_query(); - let payload_len = response.response_data.len(); - if payload_len > MAX_DNS_TCP_PAYLOAD_SIZE { - tracelimit::warn_ratelimited!( - size = payload_len, - "DNS TCP response exceeds maximum message size" - ); - return Poll::Ready(Err(DnsTcpError::ResponseTooLarge)); - } - - self.buf.clear(); - self.buf - .reserve((2 + payload_len).saturating_sub(self.buf.capacity())); - self.buf - .extend_from_slice(&(payload_len as u16).to_be_bytes()); - self.buf.extend(response.response_data); - self.tx_offset = 0; - self.phase = Phase::Responding; - - let n = self.drain_tx(bufs); - return Poll::Ready(Ok(n)); - } - Err(_) => { - dns.complete_tcp_query(); - return Poll::Ready(Err(DnsTcpError::QueryCancelled)); + Phase::InFlight => match ready!(self.receiver.poll_recv(cx)) { + Ok(response) => { + dns.complete_tcp_query(); + let payload_len = response.response_data.len(); + if payload_len > MAX_DNS_TCP_PAYLOAD_SIZE { + tracelimit::warn_ratelimited!( + size = payload_len, + "DNS TCP response exceeds maximum message size" + ); + return Poll::Ready(Err(DnsTcpError::ResponseTooLarge)); } + + self.buf.clear(); + self.buf + .reserve((2 + payload_len).saturating_sub(self.buf.capacity())); + self.buf + .extend_from_slice(&(payload_len as u16).to_be_bytes()); + self.buf.extend(response.response_data); + self.tx_offset = 0; + self.phase = Phase::Responding; + + let n = self.drain_tx(bufs); + return Poll::Ready(Ok(n)); } - } + Err(_) => { + dns.complete_tcp_query(); + return Poll::Ready(Err(DnsTcpError::QueryCancelled)); + } + }, Phase::Responding => { let n = self.drain_tx(bufs); return Poll::Ready(Ok(n)); From 1acc2372fa4582eeee546efe641b332e7f1b0ccf Mon Sep 17 00:00:00 2001 From: Daman Mulye Date: Fri, 6 Mar 2026 19:37:23 -0800 Subject: [PATCH 24/25] copilot feedback --- .../consomme/src/dns_resolver/dns_tcp.rs | 4 ++++ .../net/net_consomme/consomme/src/tcp.rs | 19 ++++++++++++++----- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs b/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs index 1330157a76..a2c3237e29 100644 --- a/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs +++ b/vm/devices/net/net_consomme/consomme/src/dns_resolver/dns_tcp.rs @@ -266,6 +266,10 @@ impl DnsTcpHandler { pub fn set_guest_fin(&mut self) { self.guest_fin = true; } + + pub fn is_in_flight(&self) -> bool { + matches!(self.phase, Phase::InFlight) + } } #[cfg(test)] diff --git a/vm/devices/net/net_consomme/consomme/src/tcp.rs b/vm/devices/net/net_consomme/consomme/src/tcp.rs index 7f6e75da7d..4585cd64b9 100644 --- a/vm/devices/net/net_consomme/consomme/src/tcp.rs +++ b/vm/devices/net/net_consomme/consomme/src/tcp.rs @@ -394,9 +394,18 @@ impl Access<'_, T> { match self.inner.tcp.connections.entry(ft) { hash_map::Entry::Occupied(mut e) => { - let conn = e.get_mut(); - if !conn.inner.handle_packet(&mut sender, &tcp)? { + let keep = e.get_mut().inner.handle_packet(&mut sender, &tcp)?; + if !keep { + let dns_in_flight = matches!( + e.get().backend, + TcpBackend::Dns(ref h) if h.is_in_flight() + ); e.remove(); + if dns_in_flight { + if let Some(dns) = &mut self.inner.dns { + dns.complete_tcp_query(); + } + } } } hash_map::Entry::Vacant(e) => { @@ -821,9 +830,9 @@ impl TcpConnectionInner { ) -> bool { // Wait for the outbound connection to complete. if self.state == TcpState::Connecting { - let socket = opt_socket - .as_mut() - .expect("Connecting state requires a socket"); + let Some(socket) = opt_socket.as_mut() else { + return false; + }; match socket.poll_ready(cx, PollEvents::OUT) { Poll::Ready(r) => { if r.has_err() { From 274268669c2c02249e5f499c2177bb390fee46ee Mon Sep 17 00:00:00 2001 From: Daman Mulye Date: Mon, 9 Mar 2026 17:03:30 -0700 Subject: [PATCH 25/25] Fix rx_window_cap --- vm/devices/net/net_consomme/consomme/src/tcp.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/vm/devices/net/net_consomme/consomme/src/tcp.rs b/vm/devices/net/net_consomme/consomme/src/tcp.rs index 9c8f55ecec..03eb39fbd4 100644 --- a/vm/devices/net/net_consomme/consomme/src/tcp.rs +++ b/vm/devices/net/net_consomme/consomme/src/tcp.rs @@ -727,7 +727,6 @@ impl TcpConnection { // Immediately transition to SynReceived so the handshake SYN-ACK is sent. inner.state = TcpState::SynReceived; - inner.rx_window_cap = inner.rx_buffer.capacity(); inner.send_syn(sender, Some(inner.rx_seq)); Ok(Self {