"Fossies" - the Fresh Open Source Software Archive

Member "sysdig-0.26.1/driver/bpf/plumbing_helpers.h" (24 May 2019, 10201 Bytes) of package /linux/misc/sysdig-0.26.1.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "plumbing_helpers.h" see the Fossies "Dox" file reference documentation.

    1 /*
    2 
    3 Copyright (c) 2013-2018 Draios Inc. dba Sysdig.
    4 
    5 This file is dual licensed under either the MIT or GPL 2. See MIT.txt
    6 or GPL2.txt for full copies of the license.
    7 
    8 */
    9 #ifndef __PLUMBING_HELPERS_H
   10 #define __PLUMBING_HELPERS_H
   11 
   12 #include <linux/ptrace.h>
   13 #include <linux/version.h>
   14 #include <linux/fdtable.h>
   15 
   16 #include "types.h"
   17 
   18 #define _READ(P) ({ typeof(P) _val;             \
   19             memset(&_val, 0, sizeof(_val));     \
   20             bpf_probe_read(&_val, sizeof(_val), &P);    \
   21             _val;                   \
   22          })
   23 
   24 #ifdef BPF_DEBUG
   25 #define bpf_printk(fmt, ...)                    \
   26     do {                            \
   27         char s[] = fmt;                 \
   28         bpf_trace_printk(s, sizeof(s), ##__VA_ARGS__);  \
   29     } while (0)
   30 #else
   31 #define bpf_printk(fmt, ...)
   32 #endif
   33 
   34 #ifndef BPF_SUPPORTS_RAW_TRACEPOINTS
   35 static __always_inline int __stash_args(unsigned long long id,
   36                     unsigned long *args)
   37 {
   38     int ret = bpf_map_update_elem(&stash_map, &id, args, BPF_ANY);
   39 
   40     if (ret)
   41         bpf_printk("error stashing arguments for %d:%d\n", id, ret);
   42 
   43     return ret;
   44 }
   45 
   46 static __always_inline int stash_args(unsigned long *args)
   47 {
   48     unsigned long long id = bpf_get_current_pid_tgid() & 0xffffffff;
   49 
   50     return __stash_args(id, args);
   51 }
   52 
   53 static __always_inline unsigned long *__unstash_args(unsigned long long id)
   54 {
   55     struct sys_stash_args *args;
   56 
   57     args = bpf_map_lookup_elem(&stash_map, &id);
   58     if (!args)
   59         return NULL;
   60 
   61     return args->args;
   62 }
   63 
   64 static __always_inline unsigned long *unstash_args(void)
   65 {
   66     unsigned long long id = bpf_get_current_pid_tgid() & 0xffffffff;
   67 
   68     return __unstash_args(id);
   69 }
   70 
   71 static __always_inline void delete_args(void)
   72 {
   73     unsigned long long id = bpf_get_current_pid_tgid() & 0xffffffff;
   74 
   75     bpf_map_delete_elem(&stash_map, &id);
   76 }
   77 #endif
   78 
   79 /* Can be called just from an exit event
   80  */
   81 static __always_inline long bpf_syscall_get_retval(void *ctx)
   82 {
   83     struct sys_exit_args *args = (struct sys_exit_args *)ctx;
   84 
   85     return args->ret;
   86 }
   87 
   88 /* Can be called from both enter and exit event, id is at the same
   89  * offset in both struct sys_enter_args and struct sys_exit_args
   90  */
   91 static __always_inline long bpf_syscall_get_nr(void *ctx)
   92 {
   93     struct sys_enter_args *args = (struct sys_enter_args *)ctx;
   94     long id;
   95 
   96 #ifdef BPF_SUPPORTS_RAW_TRACEPOINTS
   97     struct pt_regs *regs = (struct pt_regs *)args->regs;
   98 
   99     id = _READ(regs->orig_ax);
  100 #else
  101     id = args->id;
  102 #endif
  103 
  104     return id;
  105 }
  106 
  107 #ifndef BPF_SUPPORTS_RAW_TRACEPOINTS
  108 static __always_inline unsigned long bpf_syscall_get_argument_from_args(unsigned long *args,
  109                                     int idx)
  110 {
  111     unsigned long arg;
  112 
  113     if (idx <= 5)
  114         arg = args[idx];
  115     else
  116         arg = 0;
  117 
  118     return arg;
  119 }
  120 #endif
  121 
  122 static __always_inline unsigned long bpf_syscall_get_argument_from_ctx(void *ctx,
  123                                        int idx)
  124 {
  125     unsigned long arg;
  126 
  127 #ifdef BPF_SUPPORTS_RAW_TRACEPOINTS
  128     struct sys_enter_args *args = (struct sys_enter_args *)ctx;
  129     struct pt_regs *regs = (struct pt_regs *)args->regs;
  130 
  131     switch (idx) {
  132     case 0:
  133         arg = _READ(regs->di);
  134         break;
  135     case 1:
  136         arg = _READ(regs->si);
  137         break;
  138     case 2:
  139         arg = _READ(regs->dx);
  140         break;
  141     case 3:
  142         arg = _READ(regs->r10);
  143         break;
  144     case 4:
  145         arg = _READ(regs->r8);
  146         break;
  147     case 5:
  148         arg = _READ(regs->r9);
  149         break;
  150     default:
  151         arg = 0;
  152     }
  153 #else
  154     unsigned long *args = unstash_args();
  155 
  156     if (args)
  157         arg = bpf_syscall_get_argument_from_args(args, idx);
  158     else
  159         arg = 0;
  160 #endif
  161 
  162     return arg;
  163 }
  164 
  165 static __always_inline unsigned long bpf_syscall_get_argument(struct filler_data *data,
  166                                   int idx)
  167 {
  168 #ifdef BPF_SUPPORTS_RAW_TRACEPOINTS
  169     return bpf_syscall_get_argument_from_ctx(data->ctx, idx);
  170 #else
  171     return bpf_syscall_get_argument_from_args(data->args, idx);
  172 #endif
  173 }
  174 
  175 static __always_inline char *get_frame_scratch_area(unsigned int cpu)
  176 {
  177     char *scratchp;
  178 
  179     scratchp = bpf_map_lookup_elem(&frame_scratch_map, &cpu);
  180     if (!scratchp)
  181         bpf_printk("frame scratch NULL\n");
  182 
  183     return scratchp;
  184 }
  185 
  186 static __always_inline char *get_tmp_scratch_area(unsigned int cpu)
  187 {
  188     char *scratchp;
  189 
  190     scratchp = bpf_map_lookup_elem(&tmp_scratch_map, &cpu);
  191     if (!scratchp)
  192         bpf_printk("tmp scratch NULL\n");
  193 
  194     return scratchp;
  195 }
  196 
  197 static __always_inline const struct syscall_evt_pair *get_syscall_info(int id)
  198 {
  199     const struct syscall_evt_pair *p =
  200             bpf_map_lookup_elem(&syscall_table, &id);
  201 
  202     if (!p)
  203         bpf_printk("no syscall_info for %d\n", id);
  204 
  205     return p;
  206 }
  207 
  208 static __always_inline const struct ppm_event_info *get_event_info(enum ppm_event_type event_type)
  209 {
  210     const struct ppm_event_info *e =
  211         bpf_map_lookup_elem(&event_info_table, &event_type);
  212 
  213     if (!e)
  214         bpf_printk("no event info for %d\n", event_type);
  215 
  216     return e;
  217 }
  218 
  219 static __always_inline const struct ppm_event_entry *get_event_filler_info(enum ppm_event_type event_type)
  220 {
  221     const struct ppm_event_entry *e;
  222 
  223     e = bpf_map_lookup_elem(&fillers_table, &event_type);
  224     if (!e)
  225         bpf_printk("no filler info for %d\n", event_type);
  226 
  227     return e;
  228 }
  229 
  230 static __always_inline struct sysdig_bpf_settings *get_bpf_settings(void)
  231 {
  232     struct sysdig_bpf_settings *settings;
  233     int id = 0;
  234 
  235     settings = bpf_map_lookup_elem(&settings_map, &id);
  236     if (!settings)
  237         bpf_printk("settings NULL\n");
  238 
  239     return settings;
  240 }
  241 
  242 static __always_inline struct sysdig_bpf_per_cpu_state *get_local_state(unsigned int cpu)
  243 {
  244     struct sysdig_bpf_per_cpu_state *state;
  245 
  246     state = bpf_map_lookup_elem(&local_state_map, &cpu);
  247     if (!state)
  248         bpf_printk("state NULL\n");
  249 
  250     return state;
  251 }
  252 
  253 static __always_inline bool acquire_local_state(struct sysdig_bpf_per_cpu_state *state)
  254 {
  255     if (state->in_use) {
  256         bpf_printk("acquire_local_state: already in use\n");
  257         return false;
  258     }
  259 
  260     state->in_use = true;
  261     return true;
  262 }
  263 
  264 static __always_inline bool release_local_state(struct sysdig_bpf_per_cpu_state *state)
  265 {
  266     if (!state->in_use) {
  267         bpf_printk("release_local_state: already not in use\n");
  268         return false;
  269     }
  270 
  271     state->in_use = false;
  272     return true;
  273 }
  274 
  275 static __always_inline int init_filler_data(void *ctx,
  276                         struct filler_data *data,
  277                         bool is_syscall)
  278 {
  279     unsigned int cpu;
  280 
  281     data->ctx = ctx;
  282 
  283     data->settings = get_bpf_settings();
  284     if (!data->settings)
  285         return PPM_FAILURE_BUG;
  286 
  287     cpu = bpf_get_smp_processor_id();
  288 
  289     data->buf = get_frame_scratch_area(cpu);
  290     if (!data->buf)
  291         return PPM_FAILURE_BUG;
  292 
  293     data->state = get_local_state(cpu);
  294     if (!data->state)
  295         return PPM_FAILURE_BUG;
  296 
  297     data->tmp_scratch = get_tmp_scratch_area(cpu);
  298     if (!data->tmp_scratch)
  299         return PPM_FAILURE_BUG;
  300 
  301     data->evt = get_event_info(data->state->tail_ctx.evt_type);
  302     if (!data->evt)
  303         return PPM_FAILURE_BUG;
  304 
  305     data->filler_info = get_event_filler_info(data->state->tail_ctx.evt_type);
  306     if (!data->filler_info)
  307         return PPM_FAILURE_BUG;
  308 
  309 #ifndef BPF_SUPPORTS_RAW_TRACEPOINTS
  310     if (is_syscall) {
  311         data->args = unstash_args();
  312         if (!data->args)
  313             return PPM_SKIP_EVENT;
  314     }
  315 #endif
  316 
  317     data->curarg_already_on_frame = false;
  318     data->fd = -1;
  319 
  320     return PPM_SUCCESS;
  321 }
  322 
  323 static __always_inline int bpf_test_bit(int nr, unsigned long *addr)
  324 {
  325     return 1UL & (_READ(addr[BIT_WORD(nr)]) >> (nr & (BITS_PER_LONG - 1)));
  326 }
  327 
  328 static __always_inline bool drop_event(void *ctx,
  329                        struct sysdig_bpf_per_cpu_state *state,
  330                        enum ppm_event_type evt_type,
  331                        struct sysdig_bpf_settings *settings,
  332                        enum syscall_flags drop_flags)
  333 {
  334     if (!settings->dropping_mode)
  335         return false;
  336 
  337     switch (evt_type) {
  338     case PPME_SYSCALL_CLOSE_X:
  339     case PPME_SOCKET_BIND_X: {
  340         long ret = bpf_syscall_get_retval(ctx);
  341 
  342         if (ret < 0)
  343             return true;
  344 
  345         break;
  346     }
  347     case PPME_SYSCALL_CLOSE_E: {
  348         struct sys_enter_args *args;
  349         struct files_struct *files;
  350         struct task_struct *task;
  351         unsigned long *open_fds;
  352         struct fdtable *fdt;
  353         int close_fd;
  354         int max_fds;
  355 
  356         close_fd = bpf_syscall_get_argument_from_ctx(ctx, 0);
  357         if (close_fd < 0)
  358             return true;
  359 
  360         task = (struct task_struct *)bpf_get_current_task();
  361         if (!task)
  362             break;
  363 
  364         files = _READ(task->files);
  365         if (!files)
  366             break;
  367 
  368         fdt = _READ(files->fdt);
  369         if (!fdt)
  370             break;
  371 
  372         max_fds = _READ(fdt->max_fds);
  373         if (close_fd >= max_fds)
  374             return true;
  375 
  376         open_fds = _READ(fdt->open_fds);
  377         if (!open_fds)
  378             break;
  379 
  380         if (!bpf_test_bit(close_fd, open_fds))
  381             return true;
  382 
  383         break;
  384     }
  385     case PPME_SYSCALL_FCNTL_E:
  386     case PPME_SYSCALL_FCNTL_X: {
  387         long cmd = bpf_syscall_get_argument_from_ctx(ctx, 1);
  388 
  389         if (cmd != F_DUPFD && cmd != F_DUPFD_CLOEXEC)
  390             return true;
  391 
  392         break;
  393     }
  394     default:
  395         break;
  396     }
  397 
  398     if (drop_flags & UF_NEVER_DROP)
  399         return false;
  400 
  401     if (drop_flags & UF_ALWAYS_DROP)
  402         return true;
  403 
  404     if (state->tail_ctx.ts % 1000000000 >= 1000000000 /
  405         settings->sampling_ratio) {
  406         if (!settings->is_dropping) {
  407             settings->is_dropping = true;
  408             state->tail_ctx.evt_type = PPME_DROP_E;
  409             return false;
  410         }
  411 
  412         return true;
  413     }
  414 
  415     if (settings->is_dropping) {
  416         settings->is_dropping = false;
  417         state->tail_ctx.evt_type = PPME_DROP_X;
  418         return false;
  419     }
  420 
  421     return false;
  422 }
  423 
  424 static __always_inline void reset_tail_ctx(struct sysdig_bpf_per_cpu_state *state,
  425                        enum ppm_event_type evt_type,
  426                        unsigned long long ts)
  427 {
  428     state->tail_ctx.evt_type = evt_type;
  429     state->tail_ctx.ts = ts;
  430     state->tail_ctx.curarg = 0;
  431     state->tail_ctx.curoff = 0;
  432     state->tail_ctx.len = 0;
  433     state->tail_ctx.prev_res = 0;
  434 }
  435 
  436 static __always_inline void call_filler(void *ctx,
  437                     void *stack_ctx,
  438                     enum ppm_event_type evt_type,
  439                     struct sysdig_bpf_settings *settings,
  440                     enum syscall_flags drop_flags)
  441 {
  442     const struct ppm_event_entry *filler_info;
  443     struct sysdig_bpf_per_cpu_state *state;
  444     unsigned long long pid;
  445     unsigned long long ts;
  446     unsigned int cpu;
  447 
  448     cpu = bpf_get_smp_processor_id();
  449 
  450     state = get_local_state(cpu);
  451     if (!state)
  452         return;
  453 
  454     if (!acquire_local_state(state))
  455         return;
  456 
  457     if (cpu == 0 && state->hotplug_cpu != 0) {
  458         evt_type = PPME_CPU_HOTPLUG_E;
  459         drop_flags = UF_NEVER_DROP;
  460     }
  461 
  462     ts = settings->boot_time + bpf_ktime_get_ns();
  463     reset_tail_ctx(state, evt_type, ts);
  464 
  465     /* drop_event can change state->tail_ctx.evt_type */
  466     if (drop_event(stack_ctx, state, evt_type, settings, drop_flags))
  467         goto cleanup;
  468 
  469     ++state->n_evts;
  470 
  471     filler_info = get_event_filler_info(state->tail_ctx.evt_type);
  472     if (!filler_info)
  473         goto cleanup;
  474 
  475     bpf_tail_call(ctx, &tail_map, filler_info->filler_id);
  476     bpf_printk("Can't tail call filler evt=%d, filler=%d\n",
  477            state->tail_ctx.evt_type,
  478            filler_info->filler_id);
  479 
  480 cleanup:
  481     release_local_state(state);
  482 }
  483 
  484 #endif