Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(modern): try to address a page fault caused by bpf_probe_read_kernel #1858

Merged
merged 4 commits into from
May 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions driver/modern_bpf/definitions/missing_definitions.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,14 @@

/* This header should include different definitions according to different architectures.*/

#ifndef likely
# define likely(X) __builtin_expect(!!(X), 1)
#endif

#ifndef unlikely
# define unlikely(X) __builtin_expect(!!(X), 0)
#endif

/*
* Per process flags
*/
Expand Down
25 changes: 0 additions & 25 deletions driver/modern_bpf/helpers/base/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,31 +16,6 @@
#include <bpf/bpf_core_read.h>
#include <bpf/bpf_tracing.h>

/*=============================== LIBBPF MISSING TRACING DEFINITION ===========================*/
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

now we use libbpf 1.3 so this macro shouldn't be necessary


/* Look at bpf/bpf_tracing.h, this definition is similar
* to the others `PT_REGS_PARAM...`
*/

#ifndef PT_REGS_PARM6_CORE_SYSCALL

#if defined(bpf_target_x86)
#define __PT_PARM6_REG r9
#elif defined(bpf_target_arm64)
#define __PT_PARM6_REG regs[5]
#elif defined(bpf_target_s390)
#define __PT_PARM6_REG gprs[7]
#elif defined(bpf_target_powerpc)
#define __PT_PARM6_REG gpr[8]
#endif

#define PT_REGS_PARM6_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM6_REG)
#define PT_REGS_PARM6_CORE_SYSCALL(x) PT_REGS_PARM6_CORE(x)

#endif

/*=============================== LIBBPF MISSING TRACING DEFINITION ===========================*/

/*=============================== DEBUG MACRO ===========================*/

/* For more info about this macro look at https://nakryiko.com/posts/bpf-tips-printk/
Expand Down
15 changes: 15 additions & 0 deletions driver/modern_bpf/helpers/base/maps_getters.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,11 @@ static __always_inline uint16_t maps__get_statsd_port()
return g_settings.statsd_port;
}

static __always_inline int32_t maps__get_scap_tid()
{
return g_settings.scap_tid;
}

/*=============================== SETTINGS ===========================*/

/*=============================== KERNEL CONFIGS ===========================*/
Expand All @@ -76,6 +81,16 @@ static __always_inline void maps__set_is_dropping(bool value)
is_dropping = value;
}

static __always_inline void* maps__get_socket_file_ops()
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

now this info is only in kernel space

{
return socket_file_ops;
}

static __always_inline void maps__set_socket_file_ops(void* value)
{
socket_file_ops = value;
}

/*=============================== KERNEL CONFIGS ===========================*/

/*=============================== SAMPLING TABLES ===========================*/
Expand Down
38 changes: 37 additions & 1 deletion driver/modern_bpf/helpers/extract/extract_from_kernel.h
Original file line number Diff line number Diff line change
Expand Up @@ -244,8 +244,24 @@ static __always_inline struct file *extract__file_struct_from_fd(int32_t file_de
if(file_descriptor >= 0)
{
struct file **fds = NULL;
struct fdtable *fdt = NULL;
int max_fds = 0;

struct task_struct *task = get_current_task();
BPF_CORE_READ_INTO(&fds, task, files, fdt, fd);
BPF_CORE_READ_INTO(&fdt, task, files, fdt);
if(unlikely(fdt == NULL))
{
return NULL;
}

// Try a bound check to avoid reading out of bounds.
BPF_CORE_READ_INTO(&max_fds, fdt, max_fds);
if(unlikely(file_descriptor >= max_fds))
{
return NULL;
}

BPF_CORE_READ_INTO(&fds, fdt, fd);
if(fds != NULL)
{
bpf_probe_read_kernel(&f, sizeof(struct file *), &fds[file_descriptor]);
Expand Down Expand Up @@ -1127,3 +1143,23 @@ static __always_inline bool extract__exe_writable(struct task_struct *task, stru

return false;
}

/**
* @brief Return a socket pointer from a file pointer.
* @param file pointer to the file struct.
*/
static __always_inline struct socket* get_sock_from_file(struct file *file)
{
if(file == NULL)
{
return NULL;
}

struct file_operations *fop = (struct file_operations *)BPF_CORE_READ(file, f_op);
if(fop != maps__get_socket_file_ops())
{
// We are not a socket.
return NULL;
}
return (struct socket*)BPF_CORE_READ(file, private_data);
}
13 changes: 9 additions & 4 deletions driver/modern_bpf/helpers/store/auxmap_store_params.h
Original file line number Diff line number Diff line change
Expand Up @@ -664,7 +664,12 @@ static __always_inline void auxmap__store_socktuple_param(struct auxiliary_map *
/* Get the socket family directly from the socket */
uint16_t socket_family = 0;
struct file *file = extract__file_struct_from_fd(socket_fd);
struct socket *socket = BPF_CORE_READ(file, private_data);
struct socket *socket = get_sock_from_file(file);
if(socket == NULL)
{
auxmap__store_empty_param(auxmap);
return;
}
struct sock *sk = BPF_CORE_READ(socket, sk);
BPF_CORE_READ_INTO(&socket_family, sk, __sk_common.skc_family);

Expand Down Expand Up @@ -1437,12 +1442,12 @@ static __always_inline void apply_dynamic_snaplen(struct pt_regs *regs, uint16_t
}

struct file *file = extract__file_struct_from_fd(socket_fd);
struct socket *socket = BPF_CORE_READ(file, private_data);
struct sock *sk = BPF_CORE_READ(socket, sk);
if(!sk)
struct socket *socket = get_sock_from_file(file);
if(socket == NULL)
{
return;
}
struct sock *sk = BPF_CORE_READ(socket, sk);

uint16_t port_local = 0;
uint16_t port_remote = 0;
Expand Down
11 changes: 11 additions & 0 deletions driver/modern_bpf/helpers/store/ringbuf_store_params.h
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,17 @@ static __always_inline void ringbuf__store_event_header(struct ringbuf_struct *r
ringbuf->lengths_pos = sizeof(struct ppm_evt_hdr);
}

static __always_inline void ringbuf__rewrite_header_for_calibration(struct ringbuf_struct *ringbuf, pid_t vtid)
{
struct ppm_evt_hdr *hdr = (struct ppm_evt_hdr *)ringbuf->data;
/* we set this to 0 to recognize this calibration event */
hdr->nparams = 0;
/* we cannot send the tid seen by the init namespace we need to send the tid seen by the current pid namespace
* to be compliant with what scap expects.
*/
hdr->tid = vtid;
}

/////////////////////////////////
// SUBMIT EVENT IN THE RINGBUF
////////////////////////////////
Expand Down
5 changes: 5 additions & 0 deletions driver/modern_bpf/maps/maps.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,11 @@ __weak struct capture_settings g_settings;
*/
__weak bool is_dropping;

/**
* @brief Pointer we use to understand if we are operating on a socket.
*/
__weak void *socket_file_ops = NULL;

/*=============================== BPF GLOBAL VARIABLES ===============================*/

/*=============================== BPF_MAP_TYPE_PROG_ARRAY ===============================*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,15 +86,17 @@ int BPF_PROG(accept_x,
* new one.
*/
int32_t sockfd = (int32_t)args[0];
struct file *file = NULL;
file = extract__file_struct_from_fd(sockfd);
struct socket *socket = BPF_CORE_READ(file, private_data);
struct sock *sk = BPF_CORE_READ(socket, sk);
BPF_CORE_READ_INTO(&queuelen, sk, sk_ack_backlog);
BPF_CORE_READ_INTO(&queuemax, sk, sk_max_ack_backlog);
if(queuelen && queuemax)
struct file *file = extract__file_struct_from_fd(sockfd);
struct socket *socket = get_sock_from_file(file);
if(socket != NULL)
{
queuepct = (uint8_t)((uint64_t)queuelen * 100 / queuemax);
struct sock *sk = BPF_CORE_READ(socket, sk);
BPF_CORE_READ_INTO(&queuelen, sk, sk_ack_backlog);
BPF_CORE_READ_INTO(&queuemax, sk, sk_max_ack_backlog);
if(queuelen && queuemax)
{
queuepct = (uint8_t)((uint64_t)queuelen * 100 / queuemax);
}
}
}
else
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,15 +90,17 @@ int BPF_PROG(accept4_x,
* new one.
*/
int32_t sockfd = (int32_t)args[0];
struct file *file = NULL;
file = extract__file_struct_from_fd(sockfd);
struct socket *socket = BPF_CORE_READ(file, private_data);
struct sock *sk = BPF_CORE_READ(socket, sk);
BPF_CORE_READ_INTO(&queuelen, sk, sk_ack_backlog);
BPF_CORE_READ_INTO(&queuemax, sk, sk_max_ack_backlog);
if(queuelen && queuemax)
struct file *file = extract__file_struct_from_fd(sockfd);
struct socket *socket = get_sock_from_file(file);
if(socket != NULL)
{
queuepct = (uint8_t)((uint64_t)queuelen * 100 / queuemax);
struct sock *sk = BPF_CORE_READ(socket, sk);
BPF_CORE_READ_INTO(&queuelen, sk, sk_ack_backlog);
BPF_CORE_READ_INTO(&queuemax, sk, sk_max_ack_backlog);
if(queuelen && queuemax)
{
queuepct = (uint8_t)((uint64_t)queuelen * 100 / queuemax);
}
}
}
else
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,29 @@ int BPF_PROG(socket_x,
/* Parameter 1: res (type: PT_ERRNO)*/
ringbuf__store_s64(&ringbuf, ret);

/* Just called once by our scap process */
if(ret >= 0 && maps__get_socket_file_ops() == NULL)
{
struct task_struct *task = get_current_task();
/* Please note that in `g_settings.scap_tid` scap will put its virtual tid
* if it is running inside a container. If we want to extract the same information
* in the kernel we need to extract the virtual tid of the task.
*/
pid_t vtid = extract__task_xid_vnr(task, PIDTYPE_PID);
/* it means that scap is performing the calibration */
if(vtid == maps__get_scap_tid())
{
struct file *f = extract__file_struct_from_fd(ret);
if(f)
{
struct file_operations *f_op = (struct file_operations *)BPF_CORE_READ(f, f_op);
maps__set_socket_file_ops((void*)f_op);
/* we need to rewrite the event header */
ringbuf__rewrite_header_for_calibration(&ringbuf, vtid);
}
}
}

/*=============================== COLLECT PARAMETERS ===========================*/

ringbuf__submit_event(&ringbuf);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,10 +91,13 @@ int BPF_PROG(socketpair_x,

/* Get source and peer. */
struct file *file = extract__file_struct_from_fd((int32_t)fds[0]);
struct socket *socket = BPF_CORE_READ(file, private_data);
BPF_CORE_READ_INTO(&source, socket, sk);
struct unix_sock *us = (struct unix_sock *)source;
BPF_CORE_READ_INTO(&peer, us, peer);
struct socket *socket = get_sock_from_file(file);
if(socket != NULL)
{
BPF_CORE_READ_INTO(&source, socket, sk);
struct unix_sock *us = (struct unix_sock *)source;
BPF_CORE_READ_INTO(&peer, us, peer);
}
}

/* Parameter 2: fd1 (type: PT_FD) */
Expand Down
1 change: 1 addition & 0 deletions driver/modern_bpf/shared_definitions/struct_definitions.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ struct capture_settings
uint16_t fullcapture_port_range_start; /* first interesting port */
uint16_t fullcapture_port_range_end; /* last interesting port */
uint16_t statsd_port; /* port for statsd metrics */
int32_t scap_tid; /* tid of the scap process */
};

/**
Expand Down
15 changes: 15 additions & 0 deletions driver/syscall_compat.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#pragma once

#if defined(__x86_64__) || defined(__EMSCRIPTEN__)
#include "syscall_compat_x86_64.h"
#elif defined(__aarch64__)
#include "syscall_compat_aarch64.h"
#elif defined(__s390x__)
#include "syscall_compat_s390x.h"
#elif defined(__powerpc__)
#include "syscall_compat_ppc64le.h"
#elif defined(__riscv)
#include "syscall_compat_riscv64.h"
#elif defined(__loongarch__)
#include "syscall_compat_loongarch64.h"
#endif /* __x86_64__ */
14 changes: 1 addition & 13 deletions driver/syscall_table64.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,19 +18,7 @@ or GPL2.txt for full copies of the license.
* even if the driver won't be able to send all syscalls.
*/
#if defined(__GNUC__)
#if defined(__x86_64__) || defined(__EMSCRIPTEN__)
#include "syscall_compat_x86_64.h"
#elif defined(__aarch64__)
#include "syscall_compat_aarch64.h"
#elif defined(__s390x__)
#include "syscall_compat_s390x.h"
#elif defined(__powerpc__)
#include "syscall_compat_ppc64le.h"
#elif defined(__riscv)
#include "syscall_compat_riscv64.h"
#elif defined(__loongarch__)
#include "syscall_compat_loongarch64.h"
#endif /* __x86_64__ */
#include "syscall_compat.h"
#elif defined(_MSC_VER) || defined(__EMSCRIPTEN__)
// these are Linux syscall numbers and obviously meaningless for Windows/macOS
// but we need *some* definition so that we have a mapping for scap_ppm_sc.c
Expand Down
7 changes: 7 additions & 0 deletions userspace/libpman/include/libpman.h
Original file line number Diff line number Diff line change
Expand Up @@ -373,6 +373,13 @@ extern "C"
*/
void pman_set_statsd_port(uint16_t statsd_port);

/**
* @brief Set scap tid for socket calibration logic.
*
* @param scap_tid
*/
void pman_set_scap_tid(int32_t scap_tid);

/**
* @brief Get API version to check it a runtime.
*
Expand Down
5 changes: 5 additions & 0 deletions userspace/libpman/src/maps.c
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,11 @@ void pman_set_statsd_port(uint16_t statsd_port)
g_state.skel->bss->g_settings.statsd_port = statsd_port;
}

void pman_set_scap_tid(int32_t scap_tid)
{
g_state.skel->bss->g_settings.scap_tid = scap_tid;
}

void pman_mark_single_64bit_syscall(int intersting_syscall_id, bool interesting)
{
g_state.skel->bss->g_64bit_interesting_syscalls_table[intersting_syscall_id] = interesting;
Expand Down
Loading
Loading