"Fossies" - the Fresh Open Source Software Archive

Member "libev-4.33/ev_epoll.c" (31 Oct 2019, 10365 Bytes) of package /linux/misc/libev-4.33.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "ev_epoll.c" see the Fossies "Dox" file reference documentation and the last Fossies "Diffs" side-by-side code changes report: 4.27_vs_4.31.

    1 /*
    2  * libev epoll fd activity backend
    3  *
    4  * Copyright (c) 2007,2008,2009,2010,2011,2016,2017,2019 Marc Alexander Lehmann <libev@schmorp.de>
    5  * All rights reserved.
    6  *
    7  * Redistribution and use in source and binary forms, with or without modifica-
    8  * tion, are permitted provided that the following conditions are met:
    9  *
   10  *   1.  Redistributions of source code must retain the above copyright notice,
   11  *       this list of conditions and the following disclaimer.
   12  *
   13  *   2.  Redistributions in binary form must reproduce the above copyright
   14  *       notice, this list of conditions and the following disclaimer in the
   15  *       documentation and/or other materials provided with the distribution.
   16  *
   17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
   18  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-
   19  * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO
   20  * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
   21  * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
   22  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
   23  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
   24  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-
   25  * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
   26  * OF THE POSSIBILITY OF SUCH DAMAGE.
   27  *
   28  * Alternatively, the contents of this file may be used under the terms of
   29  * the GNU General Public License ("GPL") version 2 or any later version,
   30  * in which case the provisions of the GPL are applicable instead of
   31  * the above. If you wish to allow the use of your version of this file
   32  * only under the terms of the GPL and not to allow others to use your
   33  * version of this file under the BSD license, indicate your decision
   34  * by deleting the provisions above and replace them with the notice
   35  * and other provisions required by the GPL. If you do not delete the
   36  * provisions above, a recipient may use your version of this file under
   37  * either the BSD or the GPL.
   38  */
   39 
   40 /*
   41  * general notes about epoll:
   42  *
   43  * a) epoll silently removes fds from the fd set. as nothing tells us
   44  *    that an fd has been removed otherwise, we have to continually
   45  *    "rearm" fds that we suspect *might* have changed (same
   46  *    problem with kqueue, but much less costly there).
   47  * b) the fact that ADD != MOD creates a lot of extra syscalls due to a)
   48  *    and seems not to have any advantage.
   49  * c) the inability to handle fork or file descriptors (think dup)
   50  *    limits the applicability over poll, so this is not a generic
   51  *    poll replacement.
   52  * d) epoll doesn't work the same as select with many file descriptors
   53  *    (such as files). while not critical, no other advanced interface
   54  *    seems to share this (rather non-unixy) limitation.
   55  * e) epoll claims to be embeddable, but in practise you never get
   56  *    a ready event for the epoll fd (broken: <=2.6.26, working: >=2.6.32).
   57  * f) epoll_ctl returning EPERM means the fd is always ready.
   58  *
   59  * lots of "weird code" and complication handling in this file is due
   60  * to these design problems with epoll, as we try very hard to avoid
   61  * epoll_ctl syscalls for common usage patterns and handle the breakage
   62  * ensuing from receiving events for closed and otherwise long gone
   63  * file descriptors.
   64  */
   65 
   66 #include <sys/epoll.h>
   67 
   68 #define EV_EMASK_EPERM 0x80
   69 
   70 static void
   71 epoll_modify (EV_P_ int fd, int oev, int nev)
   72 {
   73   struct epoll_event ev;
   74   unsigned char oldmask;
   75 
   76   /*
   77    * we handle EPOLL_CTL_DEL by ignoring it here
   78    * on the assumption that the fd is gone anyways
   79    * if that is wrong, we have to handle the spurious
   80    * event in epoll_poll.
   81    * if the fd is added again, we try to ADD it, and, if that
   82    * fails, we assume it still has the same eventmask.
   83    */
   84   if (!nev)
   85     return;
   86 
   87   oldmask = anfds [fd].emask;
   88   anfds [fd].emask = nev;
   89 
   90   /* store the generation counter in the upper 32 bits, the fd in the lower 32 bits */
   91   ev.data.u64 = (uint64_t)(uint32_t)fd
   92               | ((uint64_t)(uint32_t)++anfds [fd].egen << 32);
   93   ev.events   = (nev & EV_READ  ? EPOLLIN  : 0)
   94               | (nev & EV_WRITE ? EPOLLOUT : 0);
   95 
   96   if (ecb_expect_true (!epoll_ctl (backend_fd, oev && oldmask != nev ? EPOLL_CTL_MOD : EPOLL_CTL_ADD, fd, &ev)))
   97     return;
   98 
   99   if (ecb_expect_true (errno == ENOENT))
  100     {
  101       /* if ENOENT then the fd went away, so try to do the right thing */
  102       if (!nev)
  103         goto dec_egen;
  104 
  105       if (!epoll_ctl (backend_fd, EPOLL_CTL_ADD, fd, &ev))
  106         return;
  107     }
  108   else if (ecb_expect_true (errno == EEXIST))
  109     {
  110       /* EEXIST means we ignored a previous DEL, but the fd is still active */
  111       /* if the kernel mask is the same as the new mask, we assume it hasn't changed */
  112       if (oldmask == nev)
  113         goto dec_egen;
  114 
  115       if (!epoll_ctl (backend_fd, EPOLL_CTL_MOD, fd, &ev))
  116         return;
  117     }
  118   else if (ecb_expect_true (errno == EPERM))
  119     {
  120       /* EPERM means the fd is always ready, but epoll is too snobbish */
  121       /* to handle it, unlike select or poll. */
  122       anfds [fd].emask = EV_EMASK_EPERM;
  123 
  124       /* add fd to epoll_eperms, if not already inside */
  125       if (!(oldmask & EV_EMASK_EPERM))
  126         {
  127           array_needsize (int, epoll_eperms, epoll_epermmax, epoll_epermcnt + 1, array_needsize_noinit);
  128           epoll_eperms [epoll_epermcnt++] = fd;
  129         }
  130 
  131       return;
  132     }
  133   else
  134     assert (("libev: I/O watcher with invalid fd found in epoll_ctl", errno != EBADF && errno != ELOOP && errno != EINVAL));
  135 
  136   fd_kill (EV_A_ fd);
  137 
  138 dec_egen:
  139   /* we didn't successfully call epoll_ctl, so decrement the generation counter again */
  140   --anfds [fd].egen;
  141 }
  142 
  143 static void
  144 epoll_poll (EV_P_ ev_tstamp timeout)
  145 {
  146   int i;
  147   int eventcnt;
  148 
  149   if (ecb_expect_false (epoll_epermcnt))
  150     timeout = EV_TS_CONST (0.);
  151 
  152   /* epoll wait times cannot be larger than (LONG_MAX - 999UL) / HZ msecs, which is below */
  153   /* the default libev max wait time, however. */
  154   EV_RELEASE_CB;
  155   eventcnt = epoll_wait (backend_fd, epoll_events, epoll_eventmax, EV_TS_TO_MSEC (timeout));
  156   EV_ACQUIRE_CB;
  157 
  158   if (ecb_expect_false (eventcnt < 0))
  159     {
  160       if (errno != EINTR)
  161         ev_syserr ("(libev) epoll_wait");
  162 
  163       return;
  164     }
  165 
  166   for (i = 0; i < eventcnt; ++i)
  167     {
  168       struct epoll_event *ev = epoll_events + i;
  169 
  170       int fd = (uint32_t)ev->data.u64; /* mask out the lower 32 bits */
  171       int want = anfds [fd].events;
  172       int got  = (ev->events & (EPOLLOUT | EPOLLERR | EPOLLHUP) ? EV_WRITE : 0)
  173                | (ev->events & (EPOLLIN  | EPOLLERR | EPOLLHUP) ? EV_READ  : 0);
  174 
  175       /*
  176        * check for spurious notification.
  177        * this only finds spurious notifications on egen updates
  178        * other spurious notifications will be found by epoll_ctl, below
  179        * we assume that fd is always in range, as we never shrink the anfds array
  180        */
  181       if (ecb_expect_false ((uint32_t)anfds [fd].egen != (uint32_t)(ev->data.u64 >> 32)))
  182         {
  183           /* recreate kernel state */
  184           postfork |= 2;
  185           continue;
  186         }
  187 
  188       if (ecb_expect_false (got & ~want))
  189         {
  190           anfds [fd].emask = want;
  191 
  192           /*
  193            * we received an event but are not interested in it, try mod or del
  194            * this often happens because we optimistically do not unregister fds
  195            * when we are no longer interested in them, but also when we get spurious
  196            * notifications for fds from another process. this is partially handled
  197            * above with the gencounter check (== our fd is not the event fd), and
  198            * partially here, when epoll_ctl returns an error (== a child has the fd
  199            * but we closed it).
  200            * note: for events such as POLLHUP, where we can't know whether it refers
  201            * to EV_READ or EV_WRITE, we might issue redundant EPOLL_CTL_MOD calls.
  202            */
  203           ev->events = (want & EV_READ  ? EPOLLIN  : 0)
  204                      | (want & EV_WRITE ? EPOLLOUT : 0);
  205 
  206           /* pre-2.6.9 kernels require a non-null pointer with EPOLL_CTL_DEL, */
  207           /* which is fortunately easy to do for us. */
  208           if (epoll_ctl (backend_fd, want ? EPOLL_CTL_MOD : EPOLL_CTL_DEL, fd, ev))
  209             {
  210               postfork |= 2; /* an error occurred, recreate kernel state */
  211               continue;
  212             }
  213         }
  214 
  215       fd_event (EV_A_ fd, got);
  216     }
  217 
  218   /* if the receive array was full, increase its size */
  219   if (ecb_expect_false (eventcnt == epoll_eventmax))
  220     {
  221       ev_free (epoll_events);
  222       epoll_eventmax = array_nextsize (sizeof (struct epoll_event), epoll_eventmax, epoll_eventmax + 1);
  223       epoll_events = (struct epoll_event *)ev_malloc (sizeof (struct epoll_event) * epoll_eventmax);
  224     }
  225 
  226   /* now synthesize events for all fds where epoll fails, while select works... */
  227   for (i = epoll_epermcnt; i--; )
  228     {
  229       int fd = epoll_eperms [i];
  230       unsigned char events = anfds [fd].events & (EV_READ | EV_WRITE);
  231 
  232       if (anfds [fd].emask & EV_EMASK_EPERM && events)
  233         fd_event (EV_A_ fd, events);
  234       else
  235         {
  236           epoll_eperms [i] = epoll_eperms [--epoll_epermcnt];
  237           anfds [fd].emask = 0;
  238         }
  239     }
  240 }
  241 
  242 static int
  243 epoll_epoll_create (void)
  244 {
  245   int fd;
  246 
  247 #if defined EPOLL_CLOEXEC && !defined __ANDROID__
  248   fd = epoll_create1 (EPOLL_CLOEXEC);
  249 
  250   if (fd < 0 && (errno == EINVAL || errno == ENOSYS))
  251 #endif
  252     {
  253       fd = epoll_create (256);
  254 
  255       if (fd >= 0)
  256         fcntl (fd, F_SETFD, FD_CLOEXEC);
  257     }
  258 
  259   return fd;
  260 }
  261 
  262 inline_size
  263 int
  264 epoll_init (EV_P_ int flags)
  265 {
  266   if ((backend_fd = epoll_epoll_create ()) < 0)
  267     return 0;
  268 
  269   backend_mintime = EV_TS_CONST (1e-3); /* epoll does sometimes return early, this is just to avoid the worst */
  270   backend_modify  = epoll_modify;
  271   backend_poll    = epoll_poll;
  272 
  273   epoll_eventmax = 64; /* initial number of events receivable per poll */
  274   epoll_events = (struct epoll_event *)ev_malloc (sizeof (struct epoll_event) * epoll_eventmax);
  275 
  276   return EVBACKEND_EPOLL;
  277 }
  278 
  279 inline_size
  280 void
  281 epoll_destroy (EV_P)
  282 {
  283   ev_free (epoll_events);
  284   array_free (epoll_eperm, EMPTY);
  285 }
  286 
  287 ecb_cold
  288 static void
  289 epoll_fork (EV_P)
  290 {
  291   close (backend_fd);
  292 
  293   while ((backend_fd = epoll_epoll_create ()) < 0)
  294     ev_syserr ("(libev) epoll_create");
  295 
  296   fd_rearm_all (EV_A);
  297 }
  298