Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(modern): try to address a page fault caused by bpf_probe_read_kernel #1858

Merged
merged 4 commits into from
May 13, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions driver/modern_bpf/helpers/base/maps_getters.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,11 @@ static __always_inline uint16_t maps__get_statsd_port()
return g_settings.statsd_port;
}

static __always_inline int32_t maps__get_scap_pid()
{
return g_settings.scap_pid;
}

/*=============================== SETTINGS ===========================*/

/*=============================== KERNEL CONFIGS ===========================*/
Expand All @@ -76,6 +81,16 @@ static __always_inline void maps__set_is_dropping(bool value)
is_dropping = value;
}

static __always_inline void* maps__get_socket_file_ops()
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

now this info is only in kernel space

{
return socket_file_ops;
}

static __always_inline void maps__set_socket_file_ops(void* value)
{
socket_file_ops = value;
}

/*=============================== KERNEL CONFIGS ===========================*/

/*=============================== SAMPLING TABLES ===========================*/
Expand Down
38 changes: 37 additions & 1 deletion driver/modern_bpf/helpers/extract/extract_from_kernel.h
Original file line number Diff line number Diff line change
Expand Up @@ -244,8 +244,24 @@ static __always_inline struct file *extract__file_struct_from_fd(int32_t file_de
if(file_descriptor >= 0)
{
struct file **fds = NULL;
struct fdtable *fdt = NULL;
int max_fds = 0;

struct task_struct *task = get_current_task();
BPF_CORE_READ_INTO(&fds, task, files, fdt, fd);
BPF_CORE_READ_INTO(&fdt, task, files, fdt);
if(unlikely(fdt == NULL))
{
return NULL;
}

// Try a bound check to avoid reading out of bounds.
BPF_CORE_READ_INTO(&max_fds, fdt, max_fds);
if(unlikely(file_descriptor >= max_fds))
{
return NULL;
}

BPF_CORE_READ_INTO(&fds, fdt, fd);
if(fds != NULL)
{
bpf_probe_read_kernel(&f, sizeof(struct file *), &fds[file_descriptor]);
Expand Down Expand Up @@ -1127,3 +1143,23 @@ static __always_inline bool extract__exe_writable(struct task_struct *task, stru

return false;
}

/**
* @brief Return a socket pointer from a file pointer.
* @param file pointer to the file struct.
*/
static __always_inline struct socket* get_sock_from_file(struct file *file)
{
if(file == NULL)
{
return NULL;
}

struct file_operations *fop = (struct file_operations *)BPF_CORE_READ(file, f_op);
if(fop != maps__get_socket_file_ops())
{
// We are not a socket.
return NULL;
}
return (struct socket*)BPF_CORE_READ(file, private_data);
}
13 changes: 9 additions & 4 deletions driver/modern_bpf/helpers/store/auxmap_store_params.h
Original file line number Diff line number Diff line change
Expand Up @@ -664,7 +664,12 @@ static __always_inline void auxmap__store_socktuple_param(struct auxiliary_map *
/* Get the socket family directly from the socket */
uint16_t socket_family = 0;
struct file *file = extract__file_struct_from_fd(socket_fd);
struct socket *socket = BPF_CORE_READ(file, private_data);
struct socket *socket = get_sock_from_file(file);
if(socket == NULL)
{
auxmap__store_empty_param(auxmap);
return;
}
struct sock *sk = BPF_CORE_READ(socket, sk);
BPF_CORE_READ_INTO(&socket_family, sk, __sk_common.skc_family);

Expand Down Expand Up @@ -1437,12 +1442,12 @@ static __always_inline void apply_dynamic_snaplen(struct pt_regs *regs, uint16_t
}

struct file *file = extract__file_struct_from_fd(socket_fd);
struct socket *socket = BPF_CORE_READ(file, private_data);
struct sock *sk = BPF_CORE_READ(socket, sk);
if(!sk)
struct socket *socket = get_sock_from_file(file);
if(socket == NULL)
{
return;
}
struct sock *sk = BPF_CORE_READ(socket, sk);

uint16_t port_local = 0;
uint16_t port_remote = 0;
Expand Down
11 changes: 11 additions & 0 deletions driver/modern_bpf/helpers/store/ringbuf_store_params.h
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,17 @@ static __always_inline void ringbuf__store_event_header(struct ringbuf_struct *r
ringbuf->lengths_pos = sizeof(struct ppm_evt_hdr);
}

static __always_inline void ringbuf__rewrite_header_for_calibration(struct ringbuf_struct *ringbuf, pid_t vtid)
{
struct ppm_evt_hdr *hdr = (struct ppm_evt_hdr *)ringbuf->data;
/* we set this to 0 to recognize this calibration event */
hdr->nparams = 0;
/* we cannot send the tid seen by the init namespace we need to send the pid seen by the current pid namespace
* to be compliant with what scap expects.
*/
hdr->tid = vtid;
}

/////////////////////////////////
// SUBMIT EVENT IN THE RINGBUF
////////////////////////////////
Expand Down
5 changes: 5 additions & 0 deletions driver/modern_bpf/maps/maps.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,11 @@ __weak struct capture_settings g_settings;
*/
__weak bool is_dropping;

/**
* @brief Pointer we use to understand if we are operating on a socket.
*/
__weak void *socket_file_ops = NULL;

/*=============================== BPF GLOBAL VARIABLES ===============================*/

/*=============================== BPF_MAP_TYPE_PROG_ARRAY ===============================*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,15 +86,17 @@ int BPF_PROG(accept_x,
* new one.
*/
int32_t sockfd = (int32_t)args[0];
struct file *file = NULL;
file = extract__file_struct_from_fd(sockfd);
struct socket *socket = BPF_CORE_READ(file, private_data);
struct sock *sk = BPF_CORE_READ(socket, sk);
BPF_CORE_READ_INTO(&queuelen, sk, sk_ack_backlog);
BPF_CORE_READ_INTO(&queuemax, sk, sk_max_ack_backlog);
if(queuelen && queuemax)
struct file *file = extract__file_struct_from_fd(sockfd);
struct socket *socket = get_sock_from_file(file);
if(socket != NULL)
{
queuepct = (uint8_t)((uint64_t)queuelen * 100 / queuemax);
struct sock *sk = BPF_CORE_READ(socket, sk);
BPF_CORE_READ_INTO(&queuelen, sk, sk_ack_backlog);
BPF_CORE_READ_INTO(&queuemax, sk, sk_max_ack_backlog);
if(queuelen && queuemax)
{
queuepct = (uint8_t)((uint64_t)queuelen * 100 / queuemax);
}
}
}
else
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,15 +90,17 @@ int BPF_PROG(accept4_x,
* new one.
*/
int32_t sockfd = (int32_t)args[0];
struct file *file = NULL;
file = extract__file_struct_from_fd(sockfd);
struct socket *socket = BPF_CORE_READ(file, private_data);
struct sock *sk = BPF_CORE_READ(socket, sk);
BPF_CORE_READ_INTO(&queuelen, sk, sk_ack_backlog);
BPF_CORE_READ_INTO(&queuemax, sk, sk_max_ack_backlog);
if(queuelen && queuemax)
struct file *file = extract__file_struct_from_fd(sockfd);
struct socket *socket = get_sock_from_file(file);
if(socket != NULL)
{
queuepct = (uint8_t)((uint64_t)queuelen * 100 / queuemax);
struct sock *sk = BPF_CORE_READ(socket, sk);
BPF_CORE_READ_INTO(&queuelen, sk, sk_ack_backlog);
BPF_CORE_READ_INTO(&queuemax, sk, sk_max_ack_backlog);
if(queuelen && queuemax)
{
queuepct = (uint8_t)((uint64_t)queuelen * 100 / queuemax);
}
}
}
else
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,29 @@ int BPF_PROG(socket_x,
/* Parameter 1: res (type: PT_ERRNO)*/
ringbuf__store_s64(&ringbuf, ret);

/* Just called once by our scap process */
if(ret >= 0 && maps__get_socket_file_ops() == NULL)
{
struct task_struct *task = get_current_task();
/* Please note that in `g_settings.scap_pid` scap will put its virtual pid
* if it is running inside a container. If we want to extract the same information
* in the kernel we need to extract the virtual pid of the task.
*/
pid_t vpid = extract__task_xid_vnr(task, PIDTYPE_TGID);
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

since we can run inside a container we need to extract the vpid to match the pid we extract in userspace with getpid()

/* it means that scap is performing the calibration */
if(vpid == maps__get_scap_pid())
{
struct file *f = extract__file_struct_from_fd(ret);
if(f)
{
struct file_operations *f_op = (struct file_operations *)BPF_CORE_READ(f, f_op);
maps__set_socket_file_ops((void*)f_op);
/* we need to rewrite the event header */
ringbuf__rewrite_header_for_calibration(&ringbuf, vpid);
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

instead of using additional variables we can simply use our event to communicate something to userspace

}
}
}

/*=============================== COLLECT PARAMETERS ===========================*/

ringbuf__submit_event(&ringbuf);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,10 +91,13 @@ int BPF_PROG(socketpair_x,

/* Get source and peer. */
struct file *file = extract__file_struct_from_fd((int32_t)fds[0]);
struct socket *socket = BPF_CORE_READ(file, private_data);
BPF_CORE_READ_INTO(&source, socket, sk);
struct unix_sock *us = (struct unix_sock *)source;
BPF_CORE_READ_INTO(&peer, us, peer);
struct socket *socket = get_sock_from_file(file);
if(socket != NULL)
{
BPF_CORE_READ_INTO(&source, socket, sk);
struct unix_sock *us = (struct unix_sock *)source;
BPF_CORE_READ_INTO(&peer, us, peer);
}
}

/* Parameter 2: fd1 (type: PT_FD) */
Expand Down
1 change: 1 addition & 0 deletions driver/modern_bpf/shared_definitions/struct_definitions.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ struct capture_settings
uint16_t fullcapture_port_range_start; /* first interesting port */
uint16_t fullcapture_port_range_end; /* last interesting port */
uint16_t statsd_port; /* port for statsd metrics */
int32_t scap_pid; /* pid of the scap process */
};

/**
Expand Down
7 changes: 7 additions & 0 deletions userspace/libpman/include/libpman.h
Original file line number Diff line number Diff line change
Expand Up @@ -373,6 +373,13 @@ extern "C"
*/
void pman_set_statsd_port(uint16_t statsd_port);

/**
* @brief Set scap pid for socket calibration logic.
*
* @param scap_pid port number.
*/
void pman_set_scap_pid(int32_t scap_pid);

/**
* @brief Get API version to check it a runtime.
*
Expand Down
5 changes: 5 additions & 0 deletions userspace/libpman/src/maps.c
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,11 @@ void pman_set_statsd_port(uint16_t statsd_port)
g_state.skel->bss->g_settings.statsd_port = statsd_port;
}

void pman_set_scap_pid(int32_t scap_pid)
{
g_state.skel->bss->g_settings.scap_pid = scap_pid;
}

void pman_mark_single_64bit_syscall(int intersting_syscall_id, bool interesting)
{
g_state.skel->bss->g_64bit_interesting_syscalls_table[intersting_syscall_id] = interesting;
Expand Down
82 changes: 79 additions & 3 deletions userspace/libscap/engine/modern_bpf/scap_modern_bpf.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ limitations under the License.
#include <errno.h>
#include <stdint.h>
#include <stdio.h>
#include <sys/socket.h>

#define SCAP_HANDLE_T struct modern_bpf_engine
#include <libscap/engine/modern_bpf/scap_modern_bpf.h>
Expand All @@ -30,6 +31,7 @@ limitations under the License.
#include <sys/utsname.h>
#include <libscap/ringbuffer/ringbuffer.h>
#include <libscap/scap_engine_util.h>
#include <libscap/strerror.h>

static struct modern_bpf_engine* scap_modern_bpf__alloc_engine(scap_t* main_handle, char* lasterr_ptr)
{
Expand Down Expand Up @@ -164,6 +166,74 @@ int32_t scap_modern_bpf__stop_capture(struct scap_engine_handle engine)
return pman_enforce_sc_set(NULL);
}

static int32_t calibrate_socket_file_ops(struct scap_engine_handle engine)
{
/* Set the scap_pid for the socket calibration.
* If we are in a container this is the virtual pid.
*/
pid_t scap_pid = getpid();
pman_set_scap_pid(scap_pid);

/* We just need to enable the socket syscall for the socket calibration */
engine.m_handle->curr_sc_set.ppm_sc[PPM_SC_SOCKET] = 1;
if(scap_modern_bpf__start_capture(engine) != SCAP_SUCCESS)
{
return scap_errprintf(engine.m_handle->m_lasterr, errno, "unable to start the capture for the socket calibration");
}

int fd = socket(AF_INET, SOCK_DGRAM, 0);
if(fd == -1)
{
return scap_errprintf(engine.m_handle->m_lasterr, errno, "unable to create a socket for the calibration");
}
close(fd);

/* We need to stop the capture */
if(scap_modern_bpf__stop_capture(engine) != SCAP_SUCCESS)
{
return scap_errprintf(engine.m_handle->m_lasterr, errno, "unable to stop the capture after the calibration");
}

/* We need to read the socket event from the buffer */
scap_evt* pevent = NULL;
uint16_t attempts = 0;
uint16_t buffer_id = 0;
uint32_t flags = 0;
int32_t res = 0;
bool found = false;

while(attempts <= 1)
{
res = scap_modern_bpf__next(engine, &pevent, &buffer_id, &flags);
if(res == SCAP_SUCCESS && pevent != NULL)
{
/* This is not a socket event or this is not our socket event */
if(pevent->type != PPME_SOCKET_SOCKET_X || pevent->tid != scap_pid)
{
continue;
}

/* BPF side we send this special event with nparams = 0 */
if(pevent->nparams == 0)
{
/* We don't want to stop here because we want to clean all the buffers. */
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we clean all the buffers so we will restart the capture in a clean state

found = true;
}
}
else if(res == SCAP_TIMEOUT)
{
/* We need more than one attempt because the first time we just need to read the producers' positions. */
attempts++;
}
}

if(!found)
{
return scap_errprintf(engine.m_handle->m_lasterr, 0, "unable to find the socket event for the calibration in the ringbuffers");
}
return SCAP_SUCCESS;
}

int32_t scap_modern_bpf__init(scap_t* handle, scap_open_args* oargs)
{
int ret = 0;
Expand Down Expand Up @@ -211,9 +281,6 @@ int32_t scap_modern_bpf__init(scap_t* handle, scap_open_args* oargs)
return ret;
}

/* Store interesting sc codes */
memcpy(&engine.m_handle->curr_sc_set, &oargs->ppm_sc_of_interest, sizeof(interesting_ppm_sc_set));

/* Set the boot time */
uint64_t boot_time = 0;
if(scap_get_precise_boot_time(handle->m_lasterr, &boot_time) != SCAP_SUCCESS)
Expand All @@ -222,6 +289,15 @@ int32_t scap_modern_bpf__init(scap_t* handle, scap_open_args* oargs)
}
pman_set_boot_time(boot_time);

/* Calibrate the socket at init time */
if(calibrate_socket_file_ops(engine) != SCAP_SUCCESS)
{
return SCAP_FAILURE;
}

/* Store interesting sc codes */
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

moved after the calibration so we will clear the curr_sc_set

memcpy(&engine.m_handle->curr_sc_set, &oargs->ppm_sc_of_interest, sizeof(interesting_ppm_sc_set));

engine.m_handle->m_api_version = pman_get_probe_api_ver();
engine.m_handle->m_schema_version = pman_get_probe_schema_ver();

Expand Down
Loading