"Fossies" - the Fresh Open Source Software Archive

Member "stress-ng-0.09.56/stress-userfaultfd.c" (15 Mar 2019, 11802 Bytes) of package /linux/privat/stress-ng-0.09.56.tar.xz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "stress-userfaultfd.c" see the Fossies "Dox" file reference documentation and the last Fossies "Diffs" side-by-side code changes report: 0.09.52_vs_0.09.54.

    1 /*
    2  * Copyright (C) 2013-2019 Canonical, Ltd.
    3  *
    4  * This program is free software; you can redistribute it and/or
    5  * modify it under the terms of the GNU General Public License
    6  * as published by the Free Software Foundation; either version 2
    7  * of the License, or (at your option) any later version.
    8  *
    9  * This program is distributed in the hope that it will be useful,
   10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
   11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   12  * GNU General Public License for more details.
   13  *
   14  * You should have received a copy of the GNU General Public License
   15  * along with this program; if not, write to the Free Software
   16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
   17  *
   18  * This code is a complete clean re-write of the stress tool by
   19  * Colin Ian King <colin.king@canonical.com> and attempts to be
   20  * backwardly compatible with the stress tool by Amos Waterland
   21  * <apw@rossby.metr.ou.edu> but has more stress tests and more
   22  * functionality.
   23  *
   24  */
   25 #include "stress-ng.h"
   26 
   27 #if defined(HAVE_USERFAULTFD) &&     \
   28     defined(HAVE_LINUX_USERFAULTFD_H) && \
   29     defined(HAVE_POLL_H) &&      \
   30     defined(HAVE_CLONE)
   31 
   32 #define STACK_SIZE  (64 * 1024)
   33 #define COUNT_MAX   (256)
   34 
   35 /* Context for clone */
   36 typedef struct {
   37     const args_t *args;
   38     uint8_t *data;
   39     size_t page_size;
   40     size_t sz;
   41     pid_t parent;
   42 } context_t;
   43 
   44 #endif
   45 
   46 int stress_set_userfaultfd_bytes(const char *opt)
   47 {
   48     size_t userfaultfd_bytes;
   49 
   50     userfaultfd_bytes = (size_t)get_uint64_byte_memory(opt, 1);
   51     check_range_bytes("userfaultfd-bytes", userfaultfd_bytes,
   52         MIN_MMAP_BYTES, MAX_MEM_LIMIT);
   53     return set_setting("userfaultfd-bytes", TYPE_ID_SIZE_T, &userfaultfd_bytes);
   54 }
   55 
   56 #if defined(HAVE_USERFAULTFD) && \
   57     defined(HAVE_LINUX_USERFAULTFD_H) && \
   58     defined(HAVE_CLONE)
   59 
   60 /*
   61  *  stress_child_alarm_handler()
   62  *  SIGALRM handler to terminate child immediately
   63  */
   64 static void MLOCKED_TEXT stress_child_alarm_handler(int signum)
   65 {
   66     (void)signum;
   67 
   68     _exit(0);
   69 }
   70 
   71 /*
   72  *  stress_userfaultfd_child()
   73  *  generate page faults for parent to handle
   74  */
   75 static int stress_userfaultfd_child(void *arg)
   76 {
   77     context_t *c = (context_t *)arg;
   78     const args_t *args = c->args;
   79 
   80     (void)setpgid(0, g_pgrp);
   81     stress_parent_died_alarm();
   82     if (stress_sighandler(args->name, SIGALRM, stress_child_alarm_handler, NULL) < 0)
   83         return EXIT_NO_RESOURCE;
   84 
   85     do {
   86         uint8_t *ptr, *end = c->data + c->sz;
   87 
   88         /* hint we don't need these pages */
   89         if (shim_madvise(c->data, c->sz, MADV_DONTNEED) < 0) {
   90             pr_fail_err("userfaultfd madvise failed");
   91             (void)kill(c->parent, SIGALRM);
   92             return -1;
   93         }
   94         /* and trigger some page faults */
   95         for (ptr = c->data; ptr < end; ptr += c->page_size)
   96             *ptr = 0xff;
   97     } while (keep_stressing());
   98 
   99     return 0;
  100 }
  101 
  102 /*
  103  *  handle_page_fault()
  104  *  handle a write page fault caused by child
  105  */
  106 static inline int handle_page_fault(
  107     const args_t *args,
  108     const int fd,
  109     uint8_t *addr,
  110     void *zero_page,
  111     uint8_t *data_start,
  112     uint8_t *data_end,
  113     const size_t page_size)
  114 {
  115     if ((addr < data_start) || (addr >= data_end)) {
  116         pr_fail_err("userfaultfd page fault address out of range");
  117         return -1;
  118     }
  119 
  120     if (mwc32() & 1) {
  121         struct uffdio_copy copy;
  122 
  123         copy.copy = 0;
  124         copy.mode = 0;
  125         copy.dst = (unsigned long)addr;
  126         copy.src = (unsigned long)zero_page;
  127         copy.len = page_size;
  128 
  129         if (ioctl(fd, UFFDIO_COPY, &copy) < 0) {
  130             pr_fail_err("userfaultfd page fault copy ioctl failed");
  131             return -1;
  132         }
  133     } else {
  134         struct uffdio_zeropage zeropage;
  135 
  136         zeropage.range.start = (unsigned long)addr;
  137         zeropage.range.len = page_size;
  138         zeropage.mode = 0;
  139         if (ioctl(fd, UFFDIO_ZEROPAGE, &zeropage) < 0) {
  140             pr_fail_err("userfaultfd page fault zeropage ioctl failed");
  141             return -1;
  142         }
  143     }
  144     return 0;
  145 }
  146 
  147 /*
  148  *  stress_userfaultfd_oomable()
  149  *  stress userfaultfd system call, this
  150  *  is an OOM-able child process that the
  151  *  parent can restart
  152  */
  153 static int stress_userfaultfd_oomable(
  154     const args_t *args,
  155     const size_t userfaultfd_bytes)
  156 {
  157     const size_t page_size = args->page_size;
  158     size_t sz;
  159     uint8_t *data;
  160     void *zero_page = NULL;
  161     int fd = -1, fdinfo = -1, status, rc = EXIT_SUCCESS, count = 0;
  162     const unsigned int uffdio_copy = 1 << _UFFDIO_COPY;
  163     const unsigned int uffdio_zeropage = 1 << _UFFDIO_ZEROPAGE;
  164     pid_t pid;
  165     struct uffdio_api api;
  166     struct uffdio_register reg;
  167     context_t c;
  168     bool do_poll = true;
  169     char filename[PATH_MAX];
  170 
  171     /* Child clone stack */
  172     static uint8_t stack[STACK_SIZE];
  173     const ssize_t stack_offset =
  174         stress_get_stack_direction() * (STACK_SIZE - 64);
  175     uint8_t *stack_top = stack + stack_offset;
  176 
  177     sz = userfaultfd_bytes & ~(page_size - 1);
  178 
  179     if (posix_memalign(&zero_page, page_size, page_size)) {
  180         pr_err("%s: zero page allocation failed\n", args->name);
  181         return EXIT_NO_RESOURCE;
  182     }
  183 
  184     data = mmap(NULL, sz, PROT_READ | PROT_WRITE,
  185         MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
  186     if (data == MAP_FAILED) {
  187         rc = EXIT_NO_RESOURCE;
  188         pr_err("%s: mmap failed\n", args->name);
  189         goto free_zeropage;
  190     }
  191 
  192     /* Get userfault fd */
  193     if ((fd = shim_userfaultfd(0)) < 0) {
  194         if (errno == ENOSYS) {
  195             pr_inf("%s: stressor will be skipped, "
  196                 "userfaultfd not supported\n",
  197                 args->name);
  198             rc = EXIT_NOT_IMPLEMENTED;
  199             goto unmap_data;
  200         }
  201         rc = exit_status(errno);
  202         pr_err("%s: userfaultfd failed, errno = %d (%s)\n",
  203             args->name, errno, strerror(errno));
  204         goto unmap_data;
  205     }
  206 
  207     (void)snprintf(filename, sizeof(filename), "/proc/%d/fdinfo/%d",
  208         getpid(), fd);
  209     fdinfo = open(filename, O_RDONLY);
  210 
  211     if (stress_set_nonblock(fd) < 0)
  212         do_poll = false;
  213 
  214     /* API sanity check */
  215     (void)memset(&api, 0, sizeof(api));
  216     api.api = UFFD_API;
  217     api.features = 0;
  218     if (ioctl(fd, UFFDIO_API, &api) < 0) {
  219         pr_err("%s: ioctl UFFDIO_API failed, errno = %d (%s)\n",
  220             args->name, errno, strerror(errno));
  221         rc = EXIT_FAILURE;
  222         goto unmap_data;
  223     }
  224     if (api.api != UFFD_API) {
  225         pr_err("%s: ioctl UFFDIO_API API check failed\n",
  226             args->name);
  227         rc = EXIT_FAILURE;
  228         goto unmap_data;
  229     }
  230 
  231     /* Register fault handling mode */
  232     (void)memset(&reg, 0, sizeof(reg));
  233     reg.range.start = (unsigned long)data;
  234     reg.range.len = sz;
  235     reg.mode = UFFDIO_REGISTER_MODE_MISSING;
  236     if (ioctl(fd, UFFDIO_REGISTER, &reg) < 0) {
  237         pr_err("%s: ioctl UFFDIO_REGISTER failed, errno = %d (%s)\n",
  238             args->name, errno, strerror(errno));
  239         rc = EXIT_FAILURE;
  240         goto unmap_data;
  241     }
  242 
  243     /* OK, so do we have copy supported? */
  244     if ((reg.ioctls & uffdio_copy) != uffdio_copy) {
  245         pr_err("%s: ioctl UFFDIO_REGISTER did not support _UFFDIO_COPY\n",
  246             args->name);
  247         rc = EXIT_FAILURE;
  248         goto unmap_data;
  249     }
  250     /* OK, so do we have zeropage supported? */
  251     if ((reg.ioctls & uffdio_zeropage) != uffdio_zeropage) {
  252         pr_err("%s: ioctl UFFDIO_REGISTER did not support _UFFDIO_ZEROPAGE\n",
  253             args->name);
  254         rc = EXIT_FAILURE;
  255         goto unmap_data;
  256     }
  257 
  258     /* Set up context for child */
  259     c.args = args;
  260     c.data = data;
  261     c.sz = sz;
  262     c.page_size = page_size;
  263     c.parent = getpid();
  264 
  265     /*
  266      *  We need to clone the child and share the same VM address space
  267      *  as parent so we can perform the page fault handling
  268      */
  269     pid = clone(stress_userfaultfd_child, align_stack(stack_top),
  270         SIGCHLD | CLONE_FILES | CLONE_FS | CLONE_SIGHAND | CLONE_VM, &c);
  271     if (pid < 0) {
  272         pr_err("%s: fork failed, errno = %d (%s)\n",
  273             args->name, errno, strerror(errno));
  274         goto unreg;
  275     }
  276 
  277     /* Parent */
  278     do {
  279         struct uffd_msg msg;
  280         ssize_t ret;
  281 
  282         /* check we should break out before we block on the read */
  283         if (!g_keep_stressing_flag)
  284             break;
  285 
  286         /*
  287          * polled wait exercises userfaultfd_poll
  288          * in the kernel, but only works if fd is NONBLOCKing
  289          */
  290         if (do_poll) {
  291             struct pollfd fds[1];
  292 
  293             (void)memset(fds, 0, sizeof fds);
  294             fds[0].fd = fd;
  295             fds[0].events = POLLIN;
  296             /* wait for 1 second max */
  297 
  298             ret = poll(fds, 1, 1000);
  299             if (ret == 0)
  300                 continue;   /* timed out, redo the poll */
  301             if (ret < 0) {
  302                 if (errno == EINTR)
  303                     continue;
  304                 if (errno != ENOMEM) {
  305                     pr_fail_err("poll userfaultfd");
  306                     if (!g_keep_stressing_flag)
  307                         break;
  308                 }
  309                 /*
  310                  *  poll ran out of free space for internal
  311                  *  fd tables, so give up and block on the
  312                  *  read anyway
  313                  */
  314                 goto do_read;
  315             }
  316             /* No data, re-poll */
  317             if (!(fds[0].revents & POLLIN))
  318                 continue;
  319 
  320             if (LIKELY(fdinfo > -1) &&
  321                 UNLIKELY(count++ >= COUNT_MAX)) {
  322                 ret = lseek(fdinfo, 0, SEEK_SET);
  323                 if (ret == 0) {
  324                     char buffer[4096];
  325 
  326                     ret = read(fdinfo, buffer, sizeof(buffer));
  327                     (void)ret;
  328                 }
  329                 count = 0;
  330             }
  331         }
  332 
  333 do_read:
  334         if ((ret = read(fd, &msg, sizeof(msg))) < 0) {
  335             if (errno == EINTR)
  336                 continue;
  337             pr_fail_err("read userfaultfd");
  338             if (!g_keep_stressing_flag)
  339                 break;
  340             continue;
  341         }
  342         /* We only expect a page fault event */
  343         if (msg.event != UFFD_EVENT_PAGEFAULT) {
  344             pr_fail_err("userfaultfd msg not pagefault event");
  345             continue;
  346         }
  347         /* We only expect a write fault */
  348         if (!(msg.arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE)) {
  349             pr_fail_err("userfaultfd msg not write page fault event");
  350             continue;
  351         }
  352         /* Go handle the page fault */
  353         if (handle_page_fault(args, fd, (uint8_t *)(ptrdiff_t)msg.arg.pagefault.address,
  354                 zero_page, data, data + sz, page_size) < 0)
  355             break;
  356         inc_counter(args);
  357     } while (keep_stressing());
  358 
  359     /* Run it over, zap child */
  360     (void)kill(pid, SIGKILL);
  361     if (waitpid(pid, &status, 0) < 0) {
  362         pr_dbg("%s: waitpid failed, errno = %d (%s)\n",
  363             args->name, errno, strerror(errno));
  364     }
  365 unreg:
  366     if (ioctl(fd, UFFDIO_UNREGISTER, &reg) < 0) {
  367         pr_err("%s: ioctl UFFDIO_UNREGISTER failed, errno = %d (%s)\n",
  368             args->name, errno, strerror(errno));
  369         rc = EXIT_FAILURE;
  370         goto unmap_data;
  371     }
  372 unmap_data:
  373     (void)munmap(data, sz);
  374 free_zeropage:
  375     free(zero_page);
  376     if (fdinfo > -1)
  377         (void)close(fdinfo);
  378     if (fd > -1)
  379         (void)close(fd);
  380 
  381     return rc;
  382 }
  383 
  384 /*
  385  *  stress_userfaultfd()
  386  *  stress userfaultfd
  387  */
  388 static int stress_userfaultfd(const args_t *args)
  389 {
  390     pid_t pid;
  391     int rc = EXIT_FAILURE;
  392     size_t userfaultfd_bytes = DEFAULT_MMAP_BYTES;
  393 
  394     if (!get_setting("userfaultfd-bytes", &userfaultfd_bytes)) {
  395         if (g_opt_flags & OPT_FLAGS_MAXIMIZE)
  396             userfaultfd_bytes = MAX_MMAP_BYTES;
  397         if (g_opt_flags & OPT_FLAGS_MINIMIZE)
  398             userfaultfd_bytes = MIN_MMAP_BYTES;
  399     }
  400     userfaultfd_bytes /= args->num_instances;
  401     if (userfaultfd_bytes < MIN_MMAP_BYTES)
  402         userfaultfd_bytes = MIN_MMAP_BYTES;
  403     if (userfaultfd_bytes < args->page_size)
  404         userfaultfd_bytes = args->page_size;
  405 
  406     pid = fork();
  407     if (pid < 0) {
  408         if (errno == EAGAIN)
  409             return EXIT_NO_RESOURCE;
  410         pr_err("%s: fork failed: errno=%d: (%s)\n",
  411             args->name, errno, strerror(errno));
  412     } else if (pid > 0) {
  413         /* Parent */
  414         int status, ret;
  415 
  416         (void)setpgid(pid, g_pgrp);
  417         ret = waitpid(pid, &status, 0);
  418         if (ret < 0) {
  419             if (errno != EINTR)
  420                 pr_dbg("%s: waitpid(): errno=%d (%s)\n",
  421                     args->name, errno, strerror(errno));
  422             (void)kill(pid, SIGTERM);
  423             (void)kill(pid, SIGKILL);
  424             (void)waitpid(pid, &status, 0);
  425         } else if (WIFSIGNALED(status)) {
  426             pr_dbg("%s: child died: %s (instance %d)\n",
  427                 args->name, stress_strsignal(WTERMSIG(status)),
  428                 args->instance);
  429             /* If we got killed by OOM killer, report this */
  430             if (WTERMSIG(status) == SIGKILL) {
  431                 log_system_mem_info();
  432                 pr_dbg("%s: assuming killed by OOM "
  433                     "killer, aborting "
  434                     "(instance %d)\n",
  435                     args->name, args->instance);
  436                 return EXIT_NO_RESOURCE;
  437             }
  438             return EXIT_FAILURE;
  439         }
  440         rc = WEXITSTATUS(status);
  441     } else if (pid == 0) {
  442         /* Child */
  443         (void)setpgid(0, g_pgrp);
  444         stress_parent_died_alarm();
  445 
  446         _exit(stress_userfaultfd_oomable(args, userfaultfd_bytes));
  447     }
  448     return rc;
  449 }
  450 
  451 stressor_info_t stress_userfaultfd_info = {
  452     .stressor = stress_userfaultfd,
  453     .class = CLASS_VM | CLASS_OS
  454 };
  455 #else
  456 stressor_info_t stress_userfaultfd_info = {
  457     .stressor = stress_not_implemented,
  458     .class = CLASS_VM | CLASS_OS
  459 };
  460 #endif