Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions include/libkrun.h
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,7 @@ int32_t krun_add_virtiofs2(uint32_t ctx_id,
/* Send the VFKIT magic after establishing the connection,
as required by gvproxy in vfkit mode. */
#define NET_FLAG_VFKIT 1 << 0
#define NET_FLAG_INCLUDE_VNET_HEADER 1 << 1

/* Taken from uapi/linux/virtio_net.h */
#define NET_FEATURE_CSUM 1 << 0
Expand Down
4 changes: 4 additions & 0 deletions src/devices/src/virtio/net/device.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ struct VirtioNetConfig {
mac: [u8; 6],
status: u16,
max_virtqueue_pairs: u16,
include_vnet_header: bool,
}

// Safe because it only has data and has no implicit padding.
Expand Down Expand Up @@ -91,6 +92,7 @@ impl Net {
cfg_backend: VirtioNetBackend,
mac: [u8; 6],
features: u32,
include_vnet_header: bool,
) -> Result<Self> {
let avail_features = features as u64
| (1 << VIRTIO_NET_F_MAC)
Expand All @@ -108,6 +110,7 @@ impl Net {
mac,
status: 0,
max_virtqueue_pairs: 0,
include_vnet_header,
};

Ok(Net {
Expand Down Expand Up @@ -207,6 +210,7 @@ impl VirtioDevice for Net {
interrupt.clone(),
mem.clone(),
self.acked_features,
self.config.include_vnet_header,
self.cfg_backend.clone(),
) {
Ok(worker) => {
Expand Down
7 changes: 6 additions & 1 deletion src/devices/src/virtio/net/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,12 @@
use std::{io, mem, result};
use virtio_bindings::virtio_net::virtio_net_hdr_v1;

pub const MAX_BUFFER_SIZE: usize = 65562;
/// Each frame forwarded to a unixstream backend is prepended by a 4 byte "header".
/// It is interpreted as a big-endian u32 integer and is the length of the following ethernet frame.
/// In order to avoid unnecessary allocations and copies, the TX buffer is allocated with extra
/// space to accommodate this header.
const FRAME_HEADER_LEN: usize = 4;
pub const MAX_BUFFER_SIZE: usize = 65562 + FRAME_HEADER_LEN;
pub const QUEUE_SIZE: u16 = 1024;
pub const NUM_QUEUES: usize = 2;
pub const QUEUE_SIZES: &[u16] = &[QUEUE_SIZE; NUM_QUEUES];
Expand Down
37 changes: 30 additions & 7 deletions src/devices/src/virtio/net/tap.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,24 @@ use virtio_bindings::virtio_net::{
};

use super::backend::{ConnectError, NetBackend, ReadError, WriteError};
use super::{write_virtio_net_hdr, FRAME_HEADER_LEN};

ioctl_write_ptr!(tunsetiff, b'T', 202, c_int);
ioctl_write_int!(tunsetoffload, b'T', 208);
ioctl_write_ptr!(tunsetvnethdrsz, b'T', 216, c_int);

pub struct Tap {
fd: OwnedFd,
include_vnet_header: bool,
}

impl Tap {
/// Create an endpoint using the file descriptor of a tap device
pub fn new(tap_name: String, vnet_features: u64) -> Result<Self, ConnectError> {
pub fn new(
tap_name: String,
vnet_features: u64,
include_vnet_header: bool,
) -> Result<Self, ConnectError> {
let fd = match open("/dev/net/tun", OFlag::O_RDWR, Mode::empty()) {
Ok(fd) => fd,
Err(err) => return Err(ConnectError::OpenNetTun(err)),
Expand All @@ -41,7 +47,10 @@ impl Tap {
);
}

req.ifr_ifru.ifru_flags = IFF_TAP as i16 | IFF_NO_PI as i16 | IFF_VNET_HDR as i16;
req.ifr_ifru.ifru_flags = IFF_TAP as i16 | IFF_NO_PI as i16;
if include_vnet_header {
req.ifr_ifru.ifru_flags |= IFF_VNET_HDR as i16;
}

let mut offload_flags: u64 = 0;
if (vnet_features & (1 << VIRTIO_NET_F_GUEST_CSUM)) != 0 {
Expand Down Expand Up @@ -84,15 +93,24 @@ impl Tap {
Err(e) => error!("couldn't obtain fd flags id={fd:?}, err={e}"),
};

Ok(Self { fd })
Ok(Self {
fd,
include_vnet_header,
})
}
}

impl NetBackend for Tap {
/// Try to read a frame from the tap devie. If no bytes are available reports
/// ReadError::NothingRead.
fn read_frame(&mut self, buf: &mut [u8]) -> Result<usize, ReadError> {
let frame_length = match read(&self.fd, buf) {
let buf_offset = if !self.include_vnet_header {
write_virtio_net_hdr(buf)
} else {
0
};

let frame_length = match read(&self.fd, &mut buf[buf_offset..]) {
Ok(f) => f,
#[allow(unreachable_patterns)]
Err(nix::Error::EAGAIN | nix::Error::EWOULDBLOCK) => {
Expand All @@ -103,12 +121,17 @@ impl NetBackend for Tap {
}
};
debug!("Read eth frame from tap: {frame_length} bytes");
Ok(frame_length)
Ok(buf_offset + frame_length)
}

/// Try to write a frame to the tap device.
fn write_frame(&mut self, _hdr_len: usize, buf: &mut [u8]) -> Result<(), WriteError> {
let ret = write(&self.fd, buf).map_err(WriteError::Internal)?;
fn write_frame(&mut self, hdr_len: usize, buf: &mut [u8]) -> Result<(), WriteError> {
let buf_offset = if !self.include_vnet_header {
hdr_len
} else {
FRAME_HEADER_LEN
};
let ret = write(&self.fd, buf[buf_offset..]).map_err(WriteError::Internal)?;
debug!("Written frame size={}, written={}", buf.len(), ret);
Ok(())
}
Expand Down
42 changes: 33 additions & 9 deletions src/devices/src/virtio/net/unixgram.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,18 @@ use std::os::fd::{AsRawFd, OwnedFd, RawFd};
use std::path::PathBuf;

use super::backend::{ConnectError, NetBackend, ReadError, WriteError};
use super::write_virtio_net_hdr;
use super::{write_virtio_net_hdr, FRAME_HEADER_LEN};

const VFKIT_MAGIC: [u8; 4] = *b"VFKT";

pub struct Unixgram {
fd: OwnedFd,
include_vnet_header: bool,
}

impl Unixgram {
/// Create the backend with a pre-established connection to the userspace network proxy.
pub fn new(fd: OwnedFd) -> Self {
pub fn new(fd: OwnedFd, include_vnet_header: bool) -> Self {
// Ensure the socket is in non-blocking mode.
match fcntl(&fd, FcntlArg::F_GETFL) {
Ok(flags) => match OFlag::from_bits(flags) {
Expand Down Expand Up @@ -47,11 +48,18 @@ impl Unixgram {
};
}

Self { fd }
Self {
fd,
include_vnet_header,
}
}

/// Create the backend opening a connection to the userspace network proxy.
pub fn open(path: PathBuf, send_vfkit_magic: bool) -> Result<Self, ConnectError> {
pub fn open(
path: PathBuf,
send_vfkit_magic: bool,
include_vnet_header: bool,
) -> Result<Self, ConnectError> {
// We cannot create a non-blocking socket on macOS here. This is done later in new().
let fd = socket(
AddressFamily::Unix,
Expand Down Expand Up @@ -90,15 +98,24 @@ impl Unixgram {
getsockopt(&fd, sockopt::RcvBuf)
);

Ok(Self::new(fd))
Ok(Self::new(fd, include_vnet_header))
}
}

impl NetBackend for Unixgram {
/// Try to read a frame the proxy. If no bytes are available reports ReadError::NothingRead
fn read_frame(&mut self, buf: &mut [u8]) -> Result<usize, ReadError> {
let hdr_len = write_virtio_net_hdr(buf);
let frame_length = match recv(self.fd.as_raw_fd(), &mut buf[hdr_len..], MsgFlags::empty()) {
let buf_offset = if !self.include_vnet_header {
write_virtio_net_hdr(buf)
} else {
0
};

let frame_length = match recv(
self.fd.as_raw_fd(),
&mut buf[buf_offset..],
MsgFlags::empty(),
) {
Ok(f) => f,
#[allow(unreachable_patterns)]
Err(nix::Error::EAGAIN | nix::Error::EWOULDBLOCK) => {
Expand All @@ -109,12 +126,19 @@ impl NetBackend for Unixgram {
}
};
debug!("Read eth frame from proxy: {frame_length} bytes");
Ok(hdr_len + frame_length)
Ok(buf_offset + frame_length)
}

/// Try to write a frame to the proxy.
fn write_frame(&mut self, hdr_len: usize, buf: &mut [u8]) -> Result<(), WriteError> {
let ret = send(self.fd.as_raw_fd(), &buf[hdr_len..], MsgFlags::empty())
let buf_offset = if !self.include_vnet_header {
hdr_len
} else {
// Unixgram backends don't include the frame length header.
FRAME_HEADER_LEN
};

let ret = send(self.fd.as_raw_fd(), &buf[buf_offset..], MsgFlags::empty())
.map_err(WriteError::Internal)?;
debug!(
"Written frame size={}, written={}",
Expand Down
35 changes: 23 additions & 12 deletions src/devices/src/virtio/net/unixstream.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,23 +10,20 @@ use std::{
use crate::virtio::net::backend::ConnectError;

use super::backend::{NetBackend, ReadError, WriteError};
use super::write_virtio_net_hdr;

/// Each frame the network proxy is prepended by a 4 byte "header".
/// It is interpreted as a big-endian u32 integer and is the length of the following ethernet frame.
const FRAME_HEADER_LEN: usize = 4;
use super::{write_virtio_net_hdr, FRAME_HEADER_LEN};

pub struct Unixstream {
fd: OwnedFd,
// 0 when a frame length has not been read
expecting_frame_length: u32,
// 0 if last write is fully complete, otherwise the length that was written
last_partial_write_length: usize,
include_vnet_header: bool,
}

impl Unixstream {
/// Create the backend with a pre-established connection to the userspace network proxy.
pub fn new(fd: OwnedFd) -> Self {
pub fn new(fd: OwnedFd, include_vnet_header: bool) -> Self {
if let Err(e) = setsockopt(&fd, sockopt::SndBuf, &(16 * 1024 * 1024)) {
log::warn!("Failed to increase SO_SNDBUF (performance may be decreased): {e}");
}
Expand All @@ -41,11 +38,12 @@ impl Unixstream {
fd,
expecting_frame_length: 0,
last_partial_write_length: 0,
include_vnet_header,
}
}

/// Create the backend opening a connection to the userspace network proxy.
pub fn open(path: PathBuf) -> Result<Self, ConnectError> {
pub fn open(path: PathBuf, include_vnet_header: bool) -> Result<Self, ConnectError> {
let fd = socket(
AddressFamily::Unix,
SockType::Stream,
Expand All @@ -70,6 +68,7 @@ impl Unixstream {
fd,
expecting_frame_length: 0,
last_partial_write_length: 0,
include_vnet_header,
})
}

Expand Down Expand Up @@ -159,13 +158,17 @@ impl NetBackend for Unixstream {
};
}

let hdr_len = write_virtio_net_hdr(buf);
let buf = &mut buf[hdr_len..];
let buf_offset = if !self.include_vnet_header {
write_virtio_net_hdr(buf)
} else {
0
};
let buf = &mut buf[buf_offset..];
let frame_length = self.expecting_frame_length as usize;
self.read_loop(&mut buf[..frame_length], false)?;
self.expecting_frame_length = 0;
log::trace!("Read eth frame from network proxy: {frame_length} bytes");
Ok(hdr_len + frame_length)
Ok(buf_offset + frame_length)
}

/// Try to write a frame to the proxy.
Expand All @@ -188,10 +191,18 @@ impl NetBackend for Unixstream {
assert!(buf.len() > hdr_len);
let frame_length = buf.len() - hdr_len;

buf[hdr_len - FRAME_HEADER_LEN..hdr_len]
// If the vnet header is not included, overwrite it with the frame length, otherwise
// write the frame length before the vnet header.
let buf_offset = if !self.include_vnet_header {
hdr_len - FRAME_HEADER_LEN
} else {
0
};

buf[buf_offset..buf_offset + FRAME_HEADER_LEN]
.copy_from_slice(&(frame_length as u32).to_be_bytes());

self.write_loop(&buf[hdr_len - FRAME_HEADER_LEN..])?;
self.write_loop(&buf[buf_offset..buf_offset + frame_length])?;
Ok(())
}

Expand Down
Loading