libpcap  1.10.1
About: libpcap is a packet filter library used by tools like tcpdump.
  Fossies Dox: libpcap-1.10.1.tar.gz  ("unofficial" and yet experimental doxygen-generated source code documentation)  

pcap-rdmasniff.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017 Pure Storage, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright
10  * notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  * notice, this list of conditions and the following disclaimer in the
13  * documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote
15  * products derived from this software without specific prior written
16  * permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  */
30 
31 #ifdef HAVE_CONFIG_H
32 #include "config.h"
33 #endif
34 
35 #include "pcap-int.h"
36 #include "pcap-rdmasniff.h"
37 
38 #include <infiniband/verbs.h>
39 #include <stdlib.h>
40 #include <string.h>
41 #include <sys/time.h>
42 
43 #if !defined(IBV_FLOW_ATTR_SNIFFER)
44 #define IBV_FLOW_ATTR_SNIFFER 3
45 #endif
46 
47 static const int RDMASNIFF_NUM_RECEIVES = 128;
48 static const int RDMASNIFF_RECEIVE_SIZE = 10000;
49 
51  struct ibv_device * rdma_device;
52  struct ibv_context * context;
53  struct ibv_comp_channel * channel;
54  struct ibv_pd * pd;
55  struct ibv_cq * cq;
56  struct ibv_qp * qp;
57  struct ibv_flow * flow;
58  struct ibv_mr * mr;
59  u_char * oneshot_buffer;
60  unsigned long port_num;
61  int cq_event;
62  u_int packets_recv;
63 };
64 
65 static int
67 {
68  struct pcap_rdmasniff *priv = handle->priv;
69 
70  stat->ps_recv = priv->packets_recv;
71  stat->ps_drop = 0;
72  stat->ps_ifdrop = 0;
73 
74  return 0;
75 }
76 
77 static void
79 {
80  struct pcap_rdmasniff *priv = handle->priv;
81 
82  ibv_dereg_mr(priv->mr);
83  ibv_destroy_flow(priv->flow);
84  ibv_destroy_qp(priv->qp);
85  ibv_destroy_cq(priv->cq);
86  ibv_dealloc_pd(priv->pd);
87  ibv_destroy_comp_channel(priv->channel);
88  ibv_close_device(priv->context);
89  free(priv->oneshot_buffer);
90 
92 }
93 
94 static void
95 rdmasniff_post_recv(pcap_t *handle, uint64_t wr_id)
96 {
97  struct pcap_rdmasniff *priv = handle->priv;
98  struct ibv_sge sg_entry;
99  struct ibv_recv_wr wr, *bad_wr;
100 
101  sg_entry.length = RDMASNIFF_RECEIVE_SIZE;
102  sg_entry.addr = (uintptr_t) handle->buffer + RDMASNIFF_RECEIVE_SIZE * wr_id;
103  sg_entry.lkey = priv->mr->lkey;
104 
105  wr.wr_id = wr_id;
106  wr.num_sge = 1;
107  wr.sg_list = &sg_entry;
108  wr.next = NULL;
109 
110  ibv_post_recv(priv->qp, &wr, &bad_wr);
111 }
112 
113 static int
114 rdmasniff_read(pcap_t *handle, int max_packets, pcap_handler callback, u_char *user)
115 {
116  struct pcap_rdmasniff *priv = handle->priv;
117  struct ibv_cq *ev_cq;
118  void *ev_ctx;
119  struct ibv_wc wc;
120  struct pcap_pkthdr pkth;
121  u_char *pktd;
122  int count = 0;
123 
124  if (!priv->cq_event) {
125  while (ibv_get_cq_event(priv->channel, &ev_cq, &ev_ctx) < 0) {
126  if (errno != EINTR) {
127  return PCAP_ERROR;
128  }
129  if (handle->break_loop) {
130  handle->break_loop = 0;
131  return PCAP_ERROR_BREAK;
132  }
133  }
134  ibv_ack_cq_events(priv->cq, 1);
135  ibv_req_notify_cq(priv->cq, 0);
136  priv->cq_event = 1;
137  }
138 
139  while (count < max_packets || PACKET_COUNT_IS_UNLIMITED(max_packets)) {
140  if (ibv_poll_cq(priv->cq, 1, &wc) != 1) {
141  priv->cq_event = 0;
142  break;
143  }
144 
145  if (wc.status != IBV_WC_SUCCESS) {
146  fprintf(stderr, "failed WC wr_id %lld status %d/%s\n",
147  (unsigned long long) wc.wr_id,
148  wc.status, ibv_wc_status_str(wc.status));
149  continue;
150  }
151 
152  pkth.len = wc.byte_len;
153  pkth.caplen = min(pkth.len, (u_int)handle->snapshot);
154  gettimeofday(&pkth.ts, NULL);
155 
156  pktd = (u_char *) handle->buffer + wc.wr_id * RDMASNIFF_RECEIVE_SIZE;
157 
158  if (handle->fcode.bf_insns == NULL ||
159  pcap_filter(handle->fcode.bf_insns, pktd, pkth.len, pkth.caplen)) {
160  callback(user, &pkth, pktd);
161  ++priv->packets_recv;
162  ++count;
163  }
164 
165  rdmasniff_post_recv(handle, wc.wr_id);
166 
167  if (handle->break_loop) {
168  handle->break_loop = 0;
169  return PCAP_ERROR_BREAK;
170  }
171  }
172 
173  return count;
174 }
175 
176 static void
177 rdmasniff_oneshot(u_char *user, const struct pcap_pkthdr *h, const u_char *bytes)
178 {
179  struct oneshot_userdata *sp = (struct oneshot_userdata *) user;
180  pcap_t *handle = sp->pd;
181  struct pcap_rdmasniff *priv = handle->priv;
182 
183  *sp->hdr = *h;
184  memcpy(priv->oneshot_buffer, bytes, h->caplen);
185  *sp->pkt = priv->oneshot_buffer;
186 }
187 
188 static int
190 {
191  struct pcap_rdmasniff *priv = handle->priv;
192  struct ibv_qp_init_attr qp_init_attr;
193  struct ibv_qp_attr qp_attr;
194  struct ibv_flow_attr flow_attr;
195  struct ibv_port_attr port_attr;
196  int i;
197 
198  priv->context = ibv_open_device(priv->rdma_device);
199  if (!priv->context) {
201  "Failed to open device %s", handle->opt.device);
202  goto error;
203  }
204 
205  priv->pd = ibv_alloc_pd(priv->context);
206  if (!priv->pd) {
208  "Failed to alloc PD for device %s", handle->opt.device);
209  goto error;
210  }
211 
212  priv->channel = ibv_create_comp_channel(priv->context);
213  if (!priv->channel) {
215  "Failed to create comp channel for device %s", handle->opt.device);
216  goto error;
217  }
218 
219  priv->cq = ibv_create_cq(priv->context, RDMASNIFF_NUM_RECEIVES,
220  NULL, priv->channel, 0);
221  if (!priv->cq) {
223  "Failed to create CQ for device %s", handle->opt.device);
224  goto error;
225  }
226 
227  ibv_req_notify_cq(priv->cq, 0);
228 
229  memset(&qp_init_attr, 0, sizeof qp_init_attr);
230  qp_init_attr.send_cq = qp_init_attr.recv_cq = priv->cq;
231  qp_init_attr.cap.max_recv_wr = RDMASNIFF_NUM_RECEIVES;
232  qp_init_attr.cap.max_recv_sge = 1;
233  qp_init_attr.qp_type = IBV_QPT_RAW_PACKET;
234  priv->qp = ibv_create_qp(priv->pd, &qp_init_attr);
235  if (!priv->qp) {
237  "Failed to create QP for device %s", handle->opt.device);
238  goto error;
239  }
240 
241  memset(&qp_attr, 0, sizeof qp_attr);
242  qp_attr.qp_state = IBV_QPS_INIT;
243  qp_attr.port_num = priv->port_num;
244  if (ibv_modify_qp(priv->qp, &qp_attr, IBV_QP_STATE | IBV_QP_PORT)) {
246  "Failed to modify QP to INIT for device %s", handle->opt.device);
247  goto error;
248  }
249 
250  memset(&qp_attr, 0, sizeof qp_attr);
251  qp_attr.qp_state = IBV_QPS_RTR;
252  if (ibv_modify_qp(priv->qp, &qp_attr, IBV_QP_STATE)) {
254  "Failed to modify QP to RTR for device %s", handle->opt.device);
255  goto error;
256  }
257 
258  memset(&flow_attr, 0, sizeof flow_attr);
259  flow_attr.type = IBV_FLOW_ATTR_SNIFFER;
260  flow_attr.size = sizeof flow_attr;
261  flow_attr.port = priv->port_num;
262  priv->flow = ibv_create_flow(priv->qp, &flow_attr);
263  if (!priv->flow) {
265  "Failed to create flow for device %s", handle->opt.device);
266  goto error;
267  }
268 
270  handle->buffer = malloc(handle->bufsize);
271  if (!handle->buffer) {
273  "Failed to allocate receive buffer for device %s", handle->opt.device);
274  goto error;
275  }
276 
277  priv->oneshot_buffer = malloc(RDMASNIFF_RECEIVE_SIZE);
278  if (!priv->oneshot_buffer) {
280  "Failed to allocate oneshot buffer for device %s", handle->opt.device);
281  goto error;
282  }
283 
284  priv->mr = ibv_reg_mr(priv->pd, handle->buffer, handle->bufsize, IBV_ACCESS_LOCAL_WRITE);
285  if (!priv->mr) {
287  "Failed to register MR for device %s", handle->opt.device);
288  goto error;
289  }
290 
291 
292  for (i = 0; i < RDMASNIFF_NUM_RECEIVES; ++i) {
293  rdmasniff_post_recv(handle, i);
294  }
295 
296  if (!ibv_query_port(priv->context, priv->port_num, &port_attr) &&
297  port_attr.link_layer == IBV_LINK_LAYER_INFINIBAND) {
298  handle->linktype = DLT_INFINIBAND;
299  } else {
300  handle->linktype = DLT_EN10MB;
301  }
302 
303  if (handle->snapshot <= 0 || handle->snapshot > RDMASNIFF_RECEIVE_SIZE)
305 
306  handle->offset = 0;
307  handle->read_op = rdmasniff_read;
308  handle->stats_op = rdmasniff_stats;
309  handle->cleanup_op = rdmasniff_cleanup;
311  handle->setdirection_op = NULL;
312  handle->set_datalink_op = NULL;
316  handle->selectable_fd = priv->channel->fd;
317 
318  return 0;
319 
320 error:
321  if (priv->mr) {
322  ibv_dereg_mr(priv->mr);
323  }
324 
325  if (priv->flow) {
326  ibv_destroy_flow(priv->flow);
327  }
328 
329  if (priv->qp) {
330  ibv_destroy_qp(priv->qp);
331  }
332 
333  if (priv->cq) {
334  ibv_destroy_cq(priv->cq);
335  }
336 
337  if (priv->channel) {
338  ibv_destroy_comp_channel(priv->channel);
339  }
340 
341  if (priv->pd) {
342  ibv_dealloc_pd(priv->pd);
343  }
344 
345  if (priv->context) {
346  ibv_close_device(priv->context);
347  }
348 
349  if (priv->oneshot_buffer) {
350  free(priv->oneshot_buffer);
351  }
352 
353  return PCAP_ERROR;
354 }
355 
356 pcap_t *
357 rdmasniff_create(const char *device, char *ebuf, int *is_ours)
358 {
359  struct pcap_rdmasniff *priv;
360  struct ibv_device **dev_list;
361  int numdev;
362  size_t namelen;
363  const char *port;
364  unsigned long port_num;
365  int i;
366  pcap_t *p = NULL;
367 
368  *is_ours = 0;
369 
370  dev_list = ibv_get_device_list(&numdev);
371  if (!dev_list) {
372  return NULL;
373  }
374  if (!numdev) {
375  ibv_free_device_list(dev_list);
376  return NULL;
377  }
378 
379  namelen = strlen(device);
380 
381  port = strchr(device, ':');
382  if (port) {
383  port_num = strtoul(port + 1, NULL, 10);
384  if (port_num > 0) {
385  namelen = port - device;
386  } else {
387  port_num = 1;
388  }
389  } else {
390  port_num = 1;
391  }
392 
393  for (i = 0; i < numdev; ++i) {
394  if (strlen(dev_list[i]->name) == namelen &&
395  !strncmp(device, dev_list[i]->name, namelen)) {
396  *is_ours = 1;
397 
398  p = PCAP_CREATE_COMMON(ebuf, struct pcap_rdmasniff);
399  if (p) {
401  priv = p->priv;
402  priv->rdma_device = dev_list[i];
403  priv->port_num = port_num;
404  }
405  break;
406  }
407  }
408 
409  ibv_free_device_list(dev_list);
410  return p;
411 }
412 
413 int
414 rdmasniff_findalldevs(pcap_if_list_t *devlistp, char *err_str)
415 {
416  struct ibv_device **dev_list;
417  int numdev;
418  int i;
419  int ret = 0;
420 
421  dev_list = ibv_get_device_list(&numdev);
422  if (!dev_list) {
423  return 0;
424  }
425 
426  for (i = 0; i < numdev; ++i) {
427  /*
428  * XXX - do the notions of "up", "running", or
429  * "connected" apply here?
430  */
431  if (!add_dev(devlistp, dev_list[i]->name, 0, "RDMA sniffer", err_str)) {
432  ret = -1;
433  break;
434  }
435  }
436 
437  ibv_free_device_list(dev_list);
438  return ret;
439 }
u_int pcap_filter(const struct bpf_insn *pc, const u_char *p, u_int wirelen, u_int buflen)
Definition: bpf_filter.c:391
static void error(const char *,...)
#define DLT_EN10MB
Definition: dlt.h:63
#define DLT_INFINIBAND
Definition: dlt.h:1185
int install_bpf_program(pcap_t *p, struct bpf_program *fp)
Definition: optimize.c:2939
int snprintf(char *, size_t, const char *,...)
int gettimeofday(struct timeval *, struct timezone *)
#define min(a, b)
Definition: pcap-dos.h:81
int errno
#define PACKET_COUNT_IS_UNLIMITED(count)
Definition: pcap-int.h:444
pcap_if_t * add_dev(pcap_if_list_t *, const char *, bpf_u_int32, const char *, char *)
Definition: pcap.c:1308
void pcap_cleanup_live_common(pcap_t *)
Definition: pcap.c:3987
int pcap_getnonblock_fd(pcap_t *)
Definition: pcap.c:3537
int pcap_setnonblock_fd(pcap_t *p, int)
Definition: pcap.c:3583
#define PCAP_CREATE_COMMON(ebuf, type)
Definition: pcap-int.h:474
static const int RDMASNIFF_RECEIVE_SIZE
int rdmasniff_findalldevs(pcap_if_list_t *devlistp, char *err_str)
#define IBV_FLOW_ATTR_SNIFFER
pcap_t * rdmasniff_create(const char *device, char *ebuf, int *is_ours)
static int rdmasniff_activate(pcap_t *handle)
static void rdmasniff_post_recv(pcap_t *handle, uint64_t wr_id)
static void rdmasniff_oneshot(u_char *user, const struct pcap_pkthdr *h, const u_char *bytes)
static void rdmasniff_cleanup(pcap_t *handle)
static int rdmasniff_stats(pcap_t *handle, struct pcap_stat *stat)
static int rdmasniff_read(pcap_t *handle, int max_packets, pcap_handler callback, u_char *user)
static const int RDMASNIFF_NUM_RECEIVES
struct pcap_stat stat
Definition: pcap-septel.c:51
void(* pcap_handler)(u_char *, const struct pcap_pkthdr *, const u_char *)
Definition: pcap.h:330
#define PCAP_ERROR_BREAK
Definition: pcap.h:340
#define PCAP_ERRBUF_SIZE
Definition: pcap.h:152
#define PCAP_ERROR
Definition: pcap.h:339
static char port[2048+1]
keeps the network port to bind to
Definition: rpcapd.c:88
struct bpf_insn * bf_insns
Definition: bpf.h:119
pcap_t * pd
Definition: pcap-int.h:431
const u_char ** pkt
Definition: pcap-int.h:430
struct pcap_pkthdr * hdr
Definition: pcap-int.h:429
char * device
Definition: pcap-int.h:146
bpf_u_int32 caplen
Definition: pcap.h:247
struct timeval ts
Definition: pcap.h:246
bpf_u_int32 len
Definition: pcap.h:248
struct ibv_qp * qp
struct ibv_device * rdma_device
struct ibv_comp_channel * channel
struct ibv_cq * cq
struct ibv_context * context
struct ibv_flow * flow
u_char * oneshot_buffer
unsigned long port_num
struct ibv_mr * mr
struct ibv_pd * pd
u_int ps_drop
Definition: pcap.h:256
u_int ps_recv
Definition: pcap.h:255
u_int ps_ifdrop
Definition: pcap.h:257
Definition: pcap-int.h:200
stats_op_t stats_op
Definition: pcap-int.h:320
activate_op_t activate_op
Definition: pcap-int.h:311
setnonblock_op_t setnonblock_op
Definition: pcap-int.h:319
setfilter_op_t setfilter_op
Definition: pcap-int.h:315
sig_atomic_t break_loop
Definition: pcap-int.h:225
pcap_handler oneshot_callback
Definition: pcap-int.h:326
u_int bufsize
Definition: pcap-int.h:220
void * priv
Definition: pcap-int.h:227
getnonblock_op_t getnonblock_op
Definition: pcap-int.h:318
int offset
Definition: pcap-int.h:250
read_op_t read_op
Definition: pcap-int.h:204
void * buffer
Definition: pcap-int.h:221
int snapshot
Definition: pcap-int.h:247
cleanup_op_t cleanup_op
Definition: pcap-int.h:346
setdirection_op_t setdirection_op
Definition: pcap-int.h:316
struct bpf_program fcode
Definition: pcap-int.h:293
set_datalink_op_t set_datalink_op
Definition: pcap-int.h:317
char errbuf[256+1]
Definition: pcap-int.h:295
int linktype
Definition: pcap-int.h:248
int selectable_fd
Definition: pcap-int.h:274
struct pcap_opt opt
Definition: pcap-int.h:254