"Fossies" - the Fresh Open Source Software Archive 
Member "memcached-1.6.15/memcached.h" (30 Mar 2022, 38733 Bytes) of package /linux/www/memcached-1.6.15.tar.gz:
As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style:
standard) with prefixed line numbers and
code folding option.
Alternatively you can here
view or
download the uninterpreted source code file.
For more information about "memcached.h" see the
Fossies "Dox" file reference documentation and the latest
Fossies "Diffs" side-by-side code changes report:
1.6.14_vs_1.6.15.
1 /* -*- Mode: C; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2
3 /** \file
4 * The main memcached header holding commonly used data
5 * structures and function prototypes.
6 */
7
8 #ifdef HAVE_CONFIG_H
9 #include "config.h"
10 #endif
11
12 #include <sys/types.h>
13 #include <sys/socket.h>
14 #include <sys/time.h>
15 #include <netinet/in.h>
16 #include <event.h>
17 #include <netdb.h>
18 #include <pthread.h>
19 #include <unistd.h>
20 #include <assert.h>
21 #include <grp.h>
22 #include <signal.h>
23 /* need this to get IOV_MAX on some platforms. */
24 #ifndef __need_IOV_MAX
25 #define __need_IOV_MAX
26 #endif
27 #include <limits.h>
28 /* FreeBSD 4.x doesn't have IOV_MAX exposed. */
29 #ifndef IOV_MAX
30 #if defined(__FreeBSD__) || defined(__APPLE__) || defined(__GNU__)
31 # define IOV_MAX 1024
32 /* GNU/Hurd don't set MAXPATHLEN
33 * http://www.gnu.org/software/hurd/hurd/porting/guidelines.html#PATH_MAX_tt_MAX_PATH_tt_MAXPATHL */
34 #ifndef MAXPATHLEN
35 #define MAXPATHLEN 4096
36 #endif
37 #endif
38 #endif
39
40 #include "itoa_ljust.h"
41 #include "protocol_binary.h"
42 #include "cache.h"
43 #include "logger.h"
44
45 #ifdef EXTSTORE
46 #include "crc32c.h"
47 #endif
48
49 #include "sasl_defs.h"
50 #ifdef TLS
51 #include <openssl/ssl.h>
52 #endif
53
54 /* for NAPI pinning feature */
55 #ifndef SO_INCOMING_NAPI_ID
56 #define SO_INCOMING_NAPI_ID 56
57 #endif
58
59 /** Maximum length of a key. */
60 #define KEY_MAX_LENGTH 250
61
62 /** Maximum length of a uri encoded key. */
63 #define KEY_MAX_URI_ENCODED_LENGTH (KEY_MAX_LENGTH * 3 + 1)
64
65 /** Size of an incr buf. */
66 #define INCR_MAX_STORAGE_LEN 24
67
68 #define WRITE_BUFFER_SIZE 1024
69 #define READ_BUFFER_SIZE 16384
70 #define READ_BUFFER_CACHED 0
71 #define UDP_READ_BUFFER_SIZE 65536
72 #define UDP_MAX_PAYLOAD_SIZE 1400
73 #define UDP_HEADER_SIZE 8
74 #define UDP_DATA_SIZE 1392 // UDP_MAX_PAYLOAD_SIZE - UDP_HEADER_SIZE
75 #define MAX_SENDBUF_SIZE (256 * 1024 * 1024)
76
77 /* Binary protocol stuff */
78 #define BIN_MAX_EXTLEN 20 // length of the _incr command is currently the longest.
79
80 /* Initial power multiplier for the hash table */
81 #define HASHPOWER_DEFAULT 16
82 #define HASHPOWER_MAX 32
83
84 /*
85 * We only reposition items in the LRU queue if they haven't been repositioned
86 * in this many seconds. That saves us from churning on frequently-accessed
87 * items.
88 */
89 #define ITEM_UPDATE_INTERVAL 60
90
91 /*
92 * Valid range of the maximum size of an item, in bytes.
93 */
94 #define ITEM_SIZE_MAX_LOWER_LIMIT 1024
95 #define ITEM_SIZE_MAX_UPPER_LIMIT 1024 * 1024 * 1024
96
97
98 /* unistd.h is here */
99 #if HAVE_UNISTD_H
100 # include <unistd.h>
101 #endif
102
103 /* Slab sizing definitions. */
104 #define POWER_SMALLEST 1
105 #define POWER_LARGEST 256 /* actual cap is 255 */
106 #define SLAB_GLOBAL_PAGE_POOL 0 /* magic slab class for storing pages for reassignment */
107 #define CHUNK_ALIGN_BYTES 8
108 /* slab class max is a 6-bit number, -1. */
109 #define MAX_NUMBER_OF_SLAB_CLASSES (63 + 1)
110
111 /** How long an object can reasonably be assumed to be locked before
112 harvesting it on a low memory condition. Default: disabled. */
113 #define TAIL_REPAIR_TIME_DEFAULT 0
114
115 /* warning: don't use these macros with a function, as it evals its arg twice */
116 #define ITEM_get_cas(i) (((i)->it_flags & ITEM_CAS) ? \
117 (i)->data->cas : (uint64_t)0)
118
119 #define ITEM_set_cas(i,v) { \
120 if ((i)->it_flags & ITEM_CAS) { \
121 (i)->data->cas = v; \
122 } \
123 }
124
125 #define ITEM_key(item) (((char*)&((item)->data)) \
126 + (((item)->it_flags & ITEM_CAS) ? sizeof(uint64_t) : 0))
127
128 #define ITEM_suffix(item) ((char*) &((item)->data) + (item)->nkey + 1 \
129 + (((item)->it_flags & ITEM_CAS) ? sizeof(uint64_t) : 0))
130
131 #define ITEM_data(item) ((char*) &((item)->data) + (item)->nkey + 1 \
132 + (((item)->it_flags & ITEM_CFLAGS) ? sizeof(uint32_t) : 0) \
133 + (((item)->it_flags & ITEM_CAS) ? sizeof(uint64_t) : 0))
134
135 #define ITEM_ntotal(item) (sizeof(struct _stritem) + (item)->nkey + 1 \
136 + (item)->nbytes \
137 + (((item)->it_flags & ITEM_CFLAGS) ? sizeof(uint32_t) : 0) \
138 + (((item)->it_flags & ITEM_CAS) ? sizeof(uint64_t) : 0))
139
140 #define ITEM_clsid(item) ((item)->slabs_clsid & ~(3<<6))
141 #define ITEM_lruid(item) ((item)->slabs_clsid & (3<<6))
142
143 #define STAT_KEY_LEN 128
144 #define STAT_VAL_LEN 128
145
146 /** Append a simple stat with a stat name, value format and value */
147 #define APPEND_STAT(name, fmt, val) \
148 append_stat(name, add_stats, c, fmt, val);
149
150 /** Append an indexed stat with a stat name (with format), value format
151 and value */
152 #define APPEND_NUM_FMT_STAT(name_fmt, num, name, fmt, val) \
153 klen = snprintf(key_str, STAT_KEY_LEN, name_fmt, num, name); \
154 vlen = snprintf(val_str, STAT_VAL_LEN, fmt, val); \
155 add_stats(key_str, klen, val_str, vlen, c);
156
157 /** Common APPEND_NUM_FMT_STAT format. */
158 #define APPEND_NUM_STAT(num, name, fmt, val) \
159 APPEND_NUM_FMT_STAT("%d:%s", num, name, fmt, val)
160
161 /** Item client flag conversion */
162 #define FLAGS_CONV(it, flag) { \
163 if ((it)->it_flags & ITEM_CFLAGS) { \
164 flag = *((uint32_t *)ITEM_suffix((it))); \
165 } else { \
166 flag = 0; \
167 } \
168 }
169
170 #define FLAGS_SIZE(item) (((item)->it_flags & ITEM_CFLAGS) ? sizeof(uint32_t) : 0)
171
172 /**
173 * Callback for any function producing stats.
174 *
175 * @param key the stat's key
176 * @param klen length of the key
177 * @param val the stat's value in an ascii form (e.g. text form of a number)
178 * @param vlen length of the value
179 * @parm cookie magic callback cookie
180 */
181 typedef void (*ADD_STAT)(const char *key, const uint16_t klen,
182 const char *val, const uint32_t vlen,
183 const void *cookie);
184
185 /*
186 * NOTE: If you modify this table you _MUST_ update the function state_text
187 */
188 /**
189 * Possible states of a connection.
190 */
191 enum conn_states {
192 conn_listening, /**< the socket which listens for connections */
193 conn_new_cmd, /**< Prepare connection for next command */
194 conn_waiting, /**< waiting for a readable socket */
195 conn_read, /**< reading in a command line */
196 conn_parse_cmd, /**< try to parse a command from the input buffer */
197 conn_write, /**< writing out a simple response */
198 conn_nread, /**< reading in a fixed number of bytes */
199 conn_swallow, /**< swallowing unnecessary bytes w/o storing */
200 conn_closing, /**< closing this connection */
201 conn_mwrite, /**< writing out many items sequentially */
202 conn_closed, /**< connection is closed */
203 conn_watch, /**< held by the logger thread as a watcher */
204 conn_io_queue, /**< wait on async. process to get response object */
205 conn_max_state /**< Max state value (used for assertion) */
206 };
207
208 enum bin_substates {
209 bin_no_state,
210 bin_reading_set_header,
211 bin_reading_cas_header,
212 bin_read_set_value,
213 bin_reading_get_key,
214 bin_reading_stat,
215 bin_reading_del_header,
216 bin_reading_incr_header,
217 bin_read_flush_exptime,
218 bin_reading_sasl_auth,
219 bin_reading_sasl_auth_data,
220 bin_reading_touch_key,
221 };
222
223 enum protocol {
224 ascii_prot = 3, /* arbitrary value. */
225 binary_prot,
226 negotiating_prot, /* Discovering the protocol */
227 #ifdef PROXY
228 proxy_prot,
229 #endif
230 };
231
232 enum network_transport {
233 local_transport, /* Unix sockets*/
234 tcp_transport,
235 udp_transport
236 };
237
238 enum pause_thread_types {
239 PAUSE_WORKER_THREADS = 0,
240 PAUSE_ALL_THREADS,
241 RESUME_ALL_THREADS,
242 RESUME_WORKER_THREADS
243 };
244
245 enum stop_reasons {
246 NOT_STOP,
247 GRACE_STOP,
248 EXIT_NORMALLY
249 };
250
251 enum close_reasons {
252 ERROR_CLOSE,
253 NORMAL_CLOSE,
254 IDLE_TIMEOUT_CLOSE,
255 SHUTDOWN_CLOSE,
256 };
257
258 #define IS_TCP(x) (x == tcp_transport)
259 #define IS_UDP(x) (x == udp_transport)
260
261 #define NREAD_ADD 1
262 #define NREAD_SET 2
263 #define NREAD_REPLACE 3
264 #define NREAD_APPEND 4
265 #define NREAD_PREPEND 5
266 #define NREAD_CAS 6
267
268 enum store_item_type {
269 NOT_STORED=0, STORED, EXISTS, NOT_FOUND, TOO_LARGE, NO_MEMORY
270 };
271
272 enum delta_result_type {
273 OK, NON_NUMERIC, EOM, DELTA_ITEM_NOT_FOUND, DELTA_ITEM_CAS_MISMATCH
274 };
275
276 /** Time relative to server start. Smaller than time_t on 64-bit systems. */
277 // TODO: Move to sub-header. needed in logger.h
278 //typedef unsigned int rel_time_t;
279
280 /** Use X macros to avoid iterating over the stats fields during reset and
281 * aggregation. No longer have to add new stats in 3+ places.
282 */
283
284 #define SLAB_STATS_FIELDS \
285 X(set_cmds) \
286 X(get_hits) \
287 X(touch_hits) \
288 X(delete_hits) \
289 X(cas_hits) \
290 X(cas_badval) \
291 X(incr_hits) \
292 X(decr_hits)
293
294 /** Stats stored per slab (and per thread). */
295 struct slab_stats {
296 #define X(name) uint64_t name;
297 SLAB_STATS_FIELDS
298 #undef X
299 };
300
301 #define THREAD_STATS_FIELDS \
302 X(get_cmds) \
303 X(get_misses) \
304 X(get_expired) \
305 X(get_flushed) \
306 X(touch_cmds) \
307 X(touch_misses) \
308 X(delete_misses) \
309 X(incr_misses) \
310 X(decr_misses) \
311 X(cas_misses) \
312 X(meta_cmds) \
313 X(bytes_read) \
314 X(bytes_written) \
315 X(flush_cmds) \
316 X(conn_yields) /* # of yields for connections (-R option)*/ \
317 X(auth_cmds) \
318 X(auth_errors) \
319 X(idle_kicks) /* idle connections killed */ \
320 X(response_obj_oom) \
321 X(response_obj_count) \
322 X(response_obj_bytes) \
323 X(read_buf_oom) \
324 X(store_too_large) \
325 X(store_no_memory)
326
327 #ifdef EXTSTORE
328 #define EXTSTORE_THREAD_STATS_FIELDS \
329 X(get_extstore) \
330 X(get_aborted_extstore) \
331 X(get_oom_extstore) \
332 X(recache_from_extstore) \
333 X(miss_from_extstore) \
334 X(badcrc_from_extstore)
335 #endif
336
337 #ifdef PROXY
338 #define PROXY_THREAD_STATS_FIELDS \
339 X(proxy_conn_requests) \
340 X(proxy_conn_errors) \
341 X(proxy_conn_oom) \
342 X(proxy_req_active)
343 #endif
344
345 /**
346 * Stats stored per-thread.
347 */
348 struct thread_stats {
349 pthread_mutex_t mutex;
350 #define X(name) uint64_t name;
351 THREAD_STATS_FIELDS
352 #ifdef EXTSTORE
353 EXTSTORE_THREAD_STATS_FIELDS
354 #endif
355 #ifdef PROXY
356 PROXY_THREAD_STATS_FIELDS
357 #endif
358 #undef X
359 struct slab_stats slab_stats[MAX_NUMBER_OF_SLAB_CLASSES];
360 uint64_t lru_hits[POWER_LARGEST];
361 uint64_t read_buf_count;
362 uint64_t read_buf_bytes;
363 uint64_t read_buf_bytes_free;
364 };
365
366 /**
367 * Global stats. Only resettable stats should go into this structure.
368 */
369 struct stats {
370 uint64_t total_items;
371 uint64_t total_conns;
372 uint64_t rejected_conns;
373 uint64_t malloc_fails;
374 uint64_t listen_disabled_num;
375 uint64_t slabs_moved; /* times slabs were moved around */
376 uint64_t slab_reassign_rescues; /* items rescued during slab move */
377 uint64_t slab_reassign_evictions_nomem; /* valid items lost during slab move */
378 uint64_t slab_reassign_inline_reclaim; /* valid items lost during slab move */
379 uint64_t slab_reassign_chunk_rescues; /* chunked-item chunks recovered */
380 uint64_t slab_reassign_busy_items; /* valid temporarily unmovable */
381 uint64_t slab_reassign_busy_deletes; /* refcounted items killed */
382 uint64_t lru_crawler_starts; /* Number of item crawlers kicked off */
383 uint64_t lru_maintainer_juggles; /* number of LRU bg pokes */
384 uint64_t time_in_listen_disabled_us; /* elapsed time in microseconds while server unable to process new connections */
385 uint64_t log_worker_dropped; /* logs dropped by worker threads */
386 uint64_t log_worker_written; /* logs written by worker threads */
387 uint64_t log_watcher_skipped; /* logs watchers missed */
388 uint64_t log_watcher_sent; /* logs sent to watcher buffers */
389 #ifdef EXTSTORE
390 uint64_t extstore_compact_lost; /* items lost because they were locked */
391 uint64_t extstore_compact_rescues; /* items re-written during compaction */
392 uint64_t extstore_compact_skipped; /* unhit items skipped during compaction */
393 #endif
394 #ifdef TLS
395 uint64_t ssl_handshake_errors; /* TLS failures at accept/handshake time */
396 uint64_t ssl_new_sessions; /* successfully negotiated new (non-reused) TLS sessions */
397 #endif
398 struct timeval maxconns_entered; /* last time maxconns entered */
399 uint64_t unexpected_napi_ids; /* see doc/napi_ids.txt */
400 uint64_t round_robin_fallback; /* see doc/napi_ids.txt */
401 };
402
403 /**
404 * Global "state" stats. Reflects state that shouldn't be wiped ever.
405 * Ordered for some cache line locality for commonly updated counters.
406 */
407 struct stats_state {
408 uint64_t curr_items;
409 uint64_t curr_bytes;
410 uint64_t curr_conns;
411 uint64_t hash_bytes; /* size used for hash tables */
412 unsigned int conn_structs;
413 unsigned int reserved_fds;
414 unsigned int hash_power_level; /* Better hope it's not over 9000 */
415 unsigned int log_watchers; /* number of currently active watchers */
416 bool hash_is_expanding; /* If the hash table is being expanded */
417 bool accepting_conns; /* whether we are currently accepting */
418 bool slab_reassign_running; /* slab reassign in progress */
419 bool lru_crawler_running; /* crawl in progress */
420 };
421
422 #define MAX_VERBOSITY_LEVEL 2
423
424 /* When adding a setting, be sure to update process_stat_settings */
425 /**
426 * Globally accessible settings as derived from the commandline.
427 */
428 struct settings {
429 size_t maxbytes;
430 int maxconns;
431 int port;
432 int udpport;
433 char *inter;
434 int verbose;
435 rel_time_t oldest_live; /* ignore existing items older than this */
436 uint64_t oldest_cas; /* ignore existing items with CAS values lower than this */
437 int evict_to_free;
438 char *socketpath; /* path to unix socket if using local socket */
439 char *auth_file; /* path to user authentication file */
440 int access; /* access mask (a la chmod) for unix domain socket */
441 double factor; /* chunk size growth factor */
442 int chunk_size;
443 int num_threads; /* number of worker (without dispatcher) libevent threads to run */
444 int num_threads_per_udp; /* number of worker threads serving each udp socket */
445 char prefix_delimiter; /* character that marks a key prefix (for stats) */
446 int detail_enabled; /* nonzero if we're collecting detailed stats */
447 int reqs_per_event; /* Maximum number of io to process on each
448 io-event. */
449 bool use_cas;
450 enum protocol binding_protocol;
451 int backlog;
452 int item_size_max; /* Maximum item size */
453 int slab_chunk_size_max; /* Upper end for chunks within slab pages. */
454 int slab_page_size; /* Slab's page units. */
455 volatile sig_atomic_t sig_hup; /* a HUP signal was received but not yet handled */
456 bool sasl; /* SASL on/off */
457 bool maxconns_fast; /* Whether or not to early close connections */
458 bool lru_crawler; /* Whether or not to enable the autocrawler thread */
459 bool lru_maintainer_thread; /* LRU maintainer background thread */
460 bool lru_segmented; /* Use split or flat LRU's */
461 bool slab_reassign; /* Whether or not slab reassignment is allowed */
462 int slab_automove; /* Whether or not to automatically move slabs */
463 double slab_automove_ratio; /* youngest must be within pct of oldest */
464 unsigned int slab_automove_window; /* window mover for algorithm */
465 int hashpower_init; /* Starting hash power level */
466 bool shutdown_command; /* allow shutdown command */
467 int tail_repair_time; /* LRU tail refcount leak repair time */
468 bool flush_enabled; /* flush_all enabled */
469 bool dump_enabled; /* whether cachedump/metadump commands work */
470 char *hash_algorithm; /* Hash algorithm in use */
471 int lru_crawler_sleep; /* Microsecond sleep between items */
472 uint32_t lru_crawler_tocrawl; /* Number of items to crawl per run */
473 int hot_lru_pct; /* percentage of slab space for HOT_LRU */
474 int warm_lru_pct; /* percentage of slab space for WARM_LRU */
475 double hot_max_factor; /* HOT tail age relative to COLD tail */
476 double warm_max_factor; /* WARM tail age relative to COLD tail */
477 int crawls_persleep; /* Number of LRU crawls to run before sleeping */
478 bool temp_lru; /* TTL < temporary_ttl uses TEMP_LRU */
479 uint32_t temporary_ttl; /* temporary LRU threshold */
480 int idle_timeout; /* Number of seconds to let connections idle */
481 unsigned int logger_watcher_buf_size; /* size of logger's per-watcher buffer */
482 unsigned int logger_buf_size; /* size of per-thread logger buffer */
483 unsigned int read_buf_mem_limit; /* total megabytes allowable for net buffers */
484 bool drop_privileges; /* Whether or not to drop unnecessary process privileges */
485 bool watch_enabled; /* allows watch commands to be dropped */
486 bool relaxed_privileges; /* Relax process restrictions when running testapp */
487 bool meta_response_old; /* use "OK" instead of "HD". for response code TEMPORARY! */
488 #ifdef EXTSTORE
489 unsigned int ext_io_threadcount; /* number of IO threads to run. */
490 unsigned int ext_page_size; /* size in megabytes of storage pages. */
491 unsigned int ext_item_size; /* minimum size of items to store externally */
492 unsigned int ext_item_age; /* max age of tail item before storing ext. */
493 unsigned int ext_low_ttl; /* remaining TTL below this uses own pages */
494 unsigned int ext_recache_rate; /* counter++ % recache_rate == 0 > recache */
495 unsigned int ext_wbuf_size; /* read only note for the engine */
496 unsigned int ext_compact_under; /* when fewer than this many pages, compact */
497 unsigned int ext_drop_under; /* when fewer than this many pages, drop COLD items */
498 unsigned int ext_max_sleep; /* maximum sleep time for extstore bg threads, in us */
499 double ext_max_frag; /* ideal maximum page fragmentation */
500 double slab_automove_freeratio; /* % of memory to hold free as buffer */
501 bool ext_drop_unread; /* skip unread items during compaction */
502 /* per-slab-class free chunk limit */
503 unsigned int ext_free_memchunks[MAX_NUMBER_OF_SLAB_CLASSES];
504 #endif
505 #ifdef TLS
506 bool ssl_enabled; /* indicates whether SSL is enabled */
507 SSL_CTX *ssl_ctx; /* holds the SSL server context which has the server certificate */
508 char *ssl_chain_cert; /* path to the server SSL chain certificate */
509 char *ssl_key; /* path to the server key */
510 int ssl_verify_mode; /* client certificate verify mode */
511 int ssl_keyformat; /* key format , default is PEM */
512 char *ssl_ciphers; /* list of SSL ciphers */
513 char *ssl_ca_cert; /* certificate with CAs. */
514 rel_time_t ssl_last_cert_refresh_time; /* time of the last server certificate refresh */
515 unsigned int ssl_wbuf_size; /* size of the write buffer used by ssl_sendmsg method */
516 bool ssl_session_cache; /* enable SSL server session caching */
517 int ssl_min_version; /* minimum SSL protocol version to accept */
518 #endif
519 int num_napi_ids; /* maximum number of NAPI IDs */
520 char *memory_file; /* warm restart memory file path */
521 #ifdef PROXY
522 bool proxy_enabled;
523 bool proxy_uring; /* if the proxy should use io_uring */
524 char *proxy_startfile; /* lua file to run when workers start */
525 void *proxy_ctx; /* proxy's state context */
526 #endif
527 };
528
529 extern struct stats stats;
530 extern struct stats_state stats_state;
531 extern time_t process_started;
532 extern struct settings settings;
533
534 #define ITEM_LINKED 1
535 #define ITEM_CAS 2
536
537 /* temp */
538 #define ITEM_SLABBED 4
539
540 /* Item was fetched at least once in its lifetime */
541 #define ITEM_FETCHED 8
542 /* Appended on fetch, removed on LRU shuffling */
543 #define ITEM_ACTIVE 16
544 /* If an item's storage are chained chunks. */
545 #define ITEM_CHUNKED 32
546 #define ITEM_CHUNK 64
547 /* ITEM_data bulk is external to item */
548 #define ITEM_HDR 128
549 /* additional 4 bytes for item client flags */
550 #define ITEM_CFLAGS 256
551 /* item has sent out a token already */
552 #define ITEM_TOKEN_SENT 512
553 /* reserved, in case tokens should be a 2-bit count in future */
554 #define ITEM_TOKEN_RESERVED 1024
555 /* if item has been marked as a stale value */
556 #define ITEM_STALE 2048
557 /* if item key was sent in binary */
558 #define ITEM_KEY_BINARY 4096
559
560 /**
561 * Structure for storing items within memcached.
562 */
563 typedef struct _stritem {
564 /* Protected by LRU locks */
565 struct _stritem *next;
566 struct _stritem *prev;
567 /* Rest are protected by an item lock */
568 struct _stritem *h_next; /* hash chain next */
569 rel_time_t time; /* least recent access */
570 rel_time_t exptime; /* expire time */
571 int nbytes; /* size of data */
572 unsigned short refcount;
573 uint16_t it_flags; /* ITEM_* above */
574 uint8_t slabs_clsid;/* which slab class we're in */
575 uint8_t nkey; /* key length, w/terminating null and padding */
576 /* this odd type prevents type-punning issues when we do
577 * the little shuffle to save space when not using CAS. */
578 union {
579 uint64_t cas;
580 char end;
581 } data[];
582 /* if it_flags & ITEM_CAS we have 8 bytes CAS */
583 /* then null-terminated key */
584 /* then " flags length\r\n" (no terminating null) */
585 /* then data with terminating \r\n (no terminating null; it's binary!) */
586 } item;
587
588 // TODO: If we eventually want user loaded modules, we can't use an enum :(
589 enum crawler_run_type {
590 CRAWLER_AUTOEXPIRE=0, CRAWLER_EXPIRED, CRAWLER_METADUMP
591 };
592
593 typedef struct {
594 struct _stritem *next;
595 struct _stritem *prev;
596 struct _stritem *h_next; /* hash chain next */
597 rel_time_t time; /* least recent access */
598 rel_time_t exptime; /* expire time */
599 int nbytes; /* size of data */
600 unsigned short refcount;
601 uint16_t it_flags; /* ITEM_* above */
602 uint8_t slabs_clsid;/* which slab class we're in */
603 uint8_t nkey; /* key length, w/terminating null and padding */
604 uint32_t remaining; /* Max keys to crawl per slab per invocation */
605 uint64_t reclaimed; /* items reclaimed during this crawl. */
606 uint64_t unfetched; /* items reclaimed unfetched during this crawl. */
607 uint64_t checked; /* items examined during this crawl. */
608 } crawler;
609
610 /* Header when an item is actually a chunk of another item. */
611 typedef struct _strchunk {
612 struct _strchunk *next; /* points within its own chain. */
613 struct _strchunk *prev; /* can potentially point to the head. */
614 struct _stritem *head; /* always points to the owner chunk */
615 int size; /* available chunk space in bytes */
616 int used; /* chunk space used */
617 int nbytes; /* used. */
618 unsigned short refcount; /* used? */
619 uint16_t it_flags; /* ITEM_* above. */
620 uint8_t slabs_clsid; /* Same as above. */
621 uint8_t orig_clsid; /* For obj hdr chunks slabs_clsid is fake. */
622 char data[];
623 } item_chunk;
624
625 #ifdef NEED_ALIGN
626 static inline char *ITEM_schunk(item *it) {
627 int offset = it->nkey + 1
628 + ((it->it_flags & ITEM_CFLAGS) ? sizeof(uint32_t) : 0)
629 + ((it->it_flags & ITEM_CAS) ? sizeof(uint64_t) : 0);
630 int remain = offset % 8;
631 if (remain != 0) {
632 offset += 8 - remain;
633 }
634 return ((char *) &(it->data)) + offset;
635 }
636 #else
637 #define ITEM_schunk(item) ((char*) &((item)->data) + (item)->nkey + 1 \
638 + (((item)->it_flags & ITEM_CFLAGS) ? sizeof(uint32_t) : 0) \
639 + (((item)->it_flags & ITEM_CAS) ? sizeof(uint64_t) : 0))
640 #endif
641
642 #ifdef EXTSTORE
643 typedef struct {
644 unsigned int page_version; /* from IO header */
645 unsigned int offset; /* from IO header */
646 unsigned short page_id; /* from IO header */
647 } item_hdr;
648 #endif
649
650 #define IO_QUEUE_COUNT 3
651
652 #define IO_QUEUE_NONE 0
653 #define IO_QUEUE_EXTSTORE 1
654 #define IO_QUEUE_PROXY 2
655
656 typedef struct _io_pending_t io_pending_t;
657 typedef struct io_queue_s io_queue_t;
658 typedef void (*io_queue_stack_cb)(io_queue_t *q);
659 typedef void (*io_queue_cb)(io_pending_t *pending);
660 // this structure's ownership gets passed between threads:
661 // - owned normally by the worker thread.
662 // - multiple queues can be submitted at the same time.
663 // - each queue can be sent to different background threads.
664 // - each submitted queue needs to know when to return to the worker.
665 // - the worker needs to know when all queues have returned so it can process.
666 //
667 // io_queue_t's count field is owned by worker until submitted. Then owned by
668 // side thread until returned.
669 // conn->io_queues_submitted is always owned by the worker thread. it is
670 // incremented as the worker submits queues, and decremented as it gets pinged
671 // for returned threads.
672 //
673 // All of this is to avoid having to hit a mutex owned by the connection
674 // thread that gets pinged for each thread (or an equivalent atomic).
675 struct io_queue_s {
676 void *ctx; // duplicated from io_queue_cb_t
677 void *stack_ctx; // module-specific context to be batch-submitted
678 int count; // ios to process before returning. only accessed by queue processor once submitted
679 int type; // duplicated from io_queue_cb_t
680 };
681
682 typedef struct io_queue_cb_s {
683 void *ctx; // untouched ptr for specific context
684 io_queue_stack_cb submit_cb; // callback given a full stack of pending IO's at once.
685 io_queue_stack_cb complete_cb;
686 io_queue_cb return_cb; // called on worker thread.
687 io_queue_cb finalize_cb; // called back on the worker thread.
688 int type;
689 } io_queue_cb_t;
690
691 typedef struct _mc_resp_bundle mc_resp_bundle;
692 typedef struct {
693 pthread_t thread_id; /* unique ID of this thread */
694 struct event_base *base; /* libevent handle this thread uses */
695 struct event notify_event; /* listen event for notify pipe */
696 #ifdef HAVE_EVENTFD
697 int notify_event_fd; /* notify counter */
698 #else
699 int notify_receive_fd; /* receiving end of notify pipe */
700 int notify_send_fd; /* sending end of notify pipe */
701 #endif
702 struct thread_stats stats; /* Stats generated by this thread */
703 io_queue_cb_t io_queues[IO_QUEUE_COUNT];
704 struct conn_queue *ev_queue; /* Worker/conn event queue */
705 cache_t *rbuf_cache; /* static-sized read buffers */
706 mc_resp_bundle *open_bundle;
707 cache_t *io_cache; /* IO objects */
708 #ifdef EXTSTORE
709 void *storage; /* data object for storage system */
710 #endif
711 logger *l; /* logger buffer */
712 void *lru_bump_buf; /* async LRU bump buffer */
713 #ifdef TLS
714 char *ssl_wbuf;
715 #endif
716 int napi_id; /* napi id associated with this thread */
717 #ifdef PROXY
718 void *L;
719 void *proxy_hooks;
720 void *proxy_user_stats;
721 void *proxy_int_stats;
722 // TODO: add ctx object so we can attach to queue.
723 #endif
724 } LIBEVENT_THREAD;
725
726 /**
727 * Response objects
728 */
729 #define MC_RESP_IOVCOUNT 4
730 typedef struct _mc_resp {
731 mc_resp_bundle *bundle; // ptr back to bundle
732 struct _mc_resp *next; // choo choo.
733 int wbytes; // bytes to write out of wbuf: might be able to nuke this.
734 int tosend; // total bytes to send for this response
735 void *write_and_free; /** free this memory after finishing writing */
736 io_pending_t *io_pending; /* pending IO descriptor for this response */
737
738 item *item; /* item associated with this response object, with reference held */
739 struct iovec iov[MC_RESP_IOVCOUNT]; /* built-in iovecs to simplify network code */
740 int chunked_total; /* total amount of chunked item data to send. */
741 uint8_t iovcnt;
742 uint8_t chunked_data_iov; /* this iov is a pointer to chunked data header */
743
744 /* instruct transmit to skip this response object. used by storage engines
745 * to asynchronously kill an object that was queued to write
746 */
747 bool skip;
748 bool free; // double free detection.
749 // UDP bits. Copied in from the client.
750 uint16_t request_id; /* Incoming UDP request ID, if this is a UDP "connection" */
751 uint16_t udp_sequence; /* packet counter when transmitting result */
752 uint16_t udp_total; /* total number of packets in sequence */
753 struct sockaddr_in6 request_addr; /* udp: Who sent this request */
754 socklen_t request_addr_size;
755
756 char wbuf[WRITE_BUFFER_SIZE];
757 } mc_resp;
758
759 #define MAX_RESP_PER_BUNDLE ((READ_BUFFER_SIZE - sizeof(mc_resp_bundle)) / sizeof(mc_resp))
760 struct _mc_resp_bundle {
761 uint8_t refcount;
762 uint8_t next_check; // next object to check on assignment.
763 struct _mc_resp_bundle *next;
764 struct _mc_resp_bundle *prev;
765 mc_resp r[];
766 };
767
768 typedef struct conn conn;
769
770 struct _io_pending_t {
771 int io_queue_type; // matches one of IO_QUEUE_*
772 LIBEVENT_THREAD *thread;
773 conn *c;
774 mc_resp *resp; // associated response object
775 char data[120];
776 };
777
778 /**
779 * The structure representing a connection into memcached.
780 */
781 struct conn {
782 sasl_conn_t *sasl_conn;
783 int sfd;
784 bool sasl_started;
785 bool authenticated;
786 bool set_stale;
787 bool mset_res; /** uses mset format for return code */
788 bool close_after_write; /** flush write then move to close connection */
789 bool rbuf_malloced; /** read buffer was malloc'ed for ascii mget, needs free() */
790 bool item_malloced; /** item for conn_nread state is a temporary malloc */
791 #ifdef TLS
792 SSL *ssl;
793 char *ssl_wbuf;
794 bool ssl_enabled;
795 #endif
796 enum conn_states state;
797 enum bin_substates substate;
798 rel_time_t last_cmd_time;
799 struct event event;
800 short ev_flags;
801 short which; /** which events were just triggered */
802
803 char *rbuf; /** buffer to read commands into */
804 char *rcurr; /** but if we parsed some already, this is where we stopped */
805 int rsize; /** total allocated size of rbuf */
806 int rbytes; /** how much data, starting from rcur, do we have unparsed */
807
808 mc_resp *resp; // tail response.
809 mc_resp *resp_head; // first response in current stack.
810 char *ritem; /** when we read in an item's value, it goes here */
811 int rlbytes;
812
813 /**
814 * item is used to hold an item structure created after reading the command
815 * line of set/add/replace commands, but before we finished reading the actual
816 * data. The data is read into ITEM_data(item) to avoid extra copying.
817 */
818
819 void *item; /* for commands set/add/replace */
820
821 /* data for the swallow state */
822 int sbytes; /* how many bytes to swallow */
823
824 int io_queues_submitted; /* see notes on io_queue_t */
825 io_queue_t io_queues[IO_QUEUE_COUNT]; /* set of deferred IO queues. */
826 #ifdef PROXY
827 unsigned int proxy_coro_ref; /* lua reference for active coroutine */
828 #endif
829 #ifdef EXTSTORE
830 unsigned int recache_counter;
831 #endif
832 enum protocol protocol; /* which protocol this connection speaks */
833 enum network_transport transport; /* what transport is used by this connection */
834 enum close_reasons close_reason; /* reason for transition into conn_closing */
835
836 /* data for UDP clients */
837 int request_id; /* Incoming UDP request ID, if this is a UDP "connection" */
838 struct sockaddr_in6 request_addr; /* udp: Who sent the most recent request */
839 socklen_t request_addr_size;
840
841 bool noreply; /* True if the reply should not be sent. */
842 /* current stats command */
843 struct {
844 char *buffer;
845 size_t size;
846 size_t offset;
847 } stats;
848
849 /* Binary protocol stuff */
850 /* This is where the binary header goes */
851 protocol_binary_request_header binary_header;
852 uint64_t cas; /* the cas to return */
853 short cmd; /* current command being processed */
854 int opaque;
855 int keylen;
856 conn *next; /* Used for generating a list of conn structures */
857 LIBEVENT_THREAD *thread; /* Pointer to the thread object serving this connection */
858 int (*try_read_command)(conn *c); /* pointer for top level input parser */
859 ssize_t (*read)(conn *c, void *buf, size_t count);
860 ssize_t (*sendmsg)(conn *c, struct msghdr *msg, int flags);
861 ssize_t (*write)(conn *c, void *buf, size_t count);
862 };
863
864 /* array of conn structures, indexed by file descriptor */
865 extern conn **conns;
866
867 /* current time of day (updated periodically) */
868 extern volatile rel_time_t current_time;
869
870 #ifdef MEMCACHED_DEBUG
871 extern volatile bool is_paused;
872 extern volatile int64_t delta;
873 #endif
874
875 /* TODO: Move to slabs.h? */
876 extern volatile int slab_rebalance_signal;
877
878 struct slab_rebalance {
879 void *slab_start;
880 void *slab_end;
881 void *slab_pos;
882 int s_clsid;
883 int d_clsid;
884 uint32_t busy_items;
885 uint32_t rescues;
886 uint32_t evictions_nomem;
887 uint32_t inline_reclaim;
888 uint32_t chunk_rescues;
889 uint32_t busy_deletes;
890 uint32_t busy_loops;
891 uint8_t done;
892 uint8_t *completed;
893 };
894
895 extern struct slab_rebalance slab_rebal;
896 #ifdef EXTSTORE
897 extern void *ext_storage;
898 #endif
899 /*
900 * Functions
901 */
902 void do_accept_new_conns(const bool do_accept);
903 enum delta_result_type do_add_delta(conn *c, const char *key,
904 const size_t nkey, const bool incr,
905 const int64_t delta, char *buf,
906 uint64_t *cas, const uint32_t hv,
907 item **it_ret);
908 enum store_item_type do_store_item(item *item, int comm, conn* c, const uint32_t hv);
909 void thread_io_queue_add(LIBEVENT_THREAD *t, int type, void *ctx, io_queue_stack_cb cb, io_queue_stack_cb com_cb, io_queue_cb ret_cb, io_queue_cb fin_cb);
910 void conn_io_queue_setup(conn *c);
911 io_queue_t *conn_io_queue_get(conn *c, int type);
912 io_queue_cb_t *thread_io_queue_get(LIBEVENT_THREAD *t, int type);
913 void conn_io_queue_return(io_pending_t *io);
914 conn *conn_new(const int sfd, const enum conn_states init_state, const int event_flags, const int read_buffer_size,
915 enum network_transport transport, struct event_base *base, void *ssl);
916
917 void conn_worker_readd(conn *c);
918 extern int daemonize(int nochdir, int noclose);
919
920 #define mutex_lock(x) pthread_mutex_lock(x)
921 #define mutex_unlock(x) pthread_mutex_unlock(x)
922
923 #include "stats_prefix.h"
924 #include "slabs.h"
925 #include "assoc.h"
926 #include "items.h"
927 #include "crawler.h"
928 #include "trace.h"
929 #include "hash.h"
930 #include "util.h"
931
932 /*
933 * Functions such as the libevent-related calls that need to do cross-thread
934 * communication in multithreaded mode (rather than actually doing the work
935 * in the current thread) are called via "dispatch_" frontends, which are
936 * also #define-d to directly call the underlying code in singlethreaded mode.
937 */
938 void memcached_thread_init(int nthreads, void *arg);
939 void redispatch_conn(conn *c);
940 void timeout_conn(conn *c);
941 #ifdef PROXY
942 void proxy_reload_notify(LIBEVENT_THREAD *t);
943 #endif
944 void return_io_pending(io_pending_t *io);
945 void dispatch_conn_new(int sfd, enum conn_states init_state, int event_flags, int read_buffer_size,
946 enum network_transport transport, void *ssl);
947 void sidethread_conn_close(conn *c);
948
949 /* Lock wrappers for cache functions that are called from main loop. */
950 enum delta_result_type add_delta(conn *c, const char *key,
951 const size_t nkey, bool incr,
952 const int64_t delta, char *buf,
953 uint64_t *cas);
954 void accept_new_conns(const bool do_accept);
955 void conn_close_idle(conn *c);
956 void conn_close_all(void);
957 item *item_alloc(char *key, size_t nkey, int flags, rel_time_t exptime, int nbytes);
958 #define DO_UPDATE true
959 #define DONT_UPDATE false
960 item *item_get(const char *key, const size_t nkey, conn *c, const bool do_update);
961 item *item_get_locked(const char *key, const size_t nkey, conn *c, const bool do_update, uint32_t *hv);
962 item *item_touch(const char *key, const size_t nkey, uint32_t exptime, conn *c);
963 int item_link(item *it);
964 void item_remove(item *it);
965 int item_replace(item *it, item *new_it, const uint32_t hv);
966 void item_unlink(item *it);
967
968 void item_lock(uint32_t hv);
969 void *item_trylock(uint32_t hv);
970 void item_trylock_unlock(void *arg);
971 void item_unlock(uint32_t hv);
972 void pause_threads(enum pause_thread_types type);
973 void stop_threads(void);
974 int stop_conn_timeout_thread(void);
975 #define refcount_incr(it) ++(it->refcount)
976 #define refcount_decr(it) --(it->refcount)
977 void STATS_LOCK(void);
978 void STATS_UNLOCK(void);
979 #define THR_STATS_LOCK(c) pthread_mutex_lock(&c->thread->stats.mutex)
980 #define THR_STATS_UNLOCK(c) pthread_mutex_unlock(&c->thread->stats.mutex)
981 void threadlocal_stats_reset(void);
982 void threadlocal_stats_aggregate(struct thread_stats *stats);
983 void slab_stats_aggregate(struct thread_stats *stats, struct slab_stats *out);
984 LIBEVENT_THREAD *get_worker_thread(int id);
985
986 /* Stat processing functions */
987 void append_stat(const char *name, ADD_STAT add_stats, conn *c,
988 const char *fmt, ...);
989
990 enum store_item_type store_item(item *item, int comm, conn *c);
991
992 /* Protocol related code */
993 void out_string(conn *c, const char *str);
994 #define REALTIME_MAXDELTA 60*60*24*30
995 /* Negative exptimes can underflow and end up immortal. realtime() will
996 immediately expire values that are greater than REALTIME_MAXDELTA, but less
997 than process_started, so lets aim for that. */
998 #define EXPTIME_TO_POSITIVE_TIME(exptime) (exptime < 0) ? \
999 REALTIME_MAXDELTA + 1 : exptime
1000 rel_time_t realtime(const time_t exptime);
1001 item* limited_get(char *key, size_t nkey, conn *c, uint32_t exptime, bool should_touch, bool do_update, bool *overflow);
1002 item* limited_get_locked(char *key, size_t nkey, conn *c, bool do_update, uint32_t *hv, bool *overflow);
1003 // Read/Response object handlers.
1004 void resp_reset(mc_resp *resp);
1005 void resp_add_iov(mc_resp *resp, const void *buf, int len);
1006 void resp_add_chunked_iov(mc_resp *resp, const void *buf, int len);
1007 bool resp_start(conn *c);
1008 mc_resp* resp_finish(conn *c, mc_resp *resp);
1009 bool resp_has_stack(conn *c);
1010 bool rbuf_switch_to_malloc(conn *c);
1011 void conn_release_items(conn *c);
1012 void conn_set_state(conn *c, enum conn_states state);
1013 void out_of_memory(conn *c, char *ascii_error);
1014 void out_errstring(conn *c, const char *str);
1015 void write_and_free(conn *c, char *buf, int bytes);
1016 void server_stats(ADD_STAT add_stats, conn *c);
1017 void append_stats(const char *key, const uint16_t klen,
1018 const char *val, const uint32_t vlen,
1019 const void *cookie);
1020 /** Return a datum for stats in binary protocol */
1021 bool get_stats(const char *stat_type, int nkey, ADD_STAT add_stats, void *c);
1022 void stats_reset(void);
1023 void process_stat_settings(ADD_STAT add_stats, void *c);
1024 void process_stats_conns(ADD_STAT add_stats, void *c);
1025
1026 #if HAVE_DROP_PRIVILEGES
1027 extern void setup_privilege_violations_handler(void);
1028 extern void drop_privileges(void);
1029 #else
1030 #define setup_privilege_violations_handler()
1031 #define drop_privileges()
1032 #endif
1033
1034 #if HAVE_DROP_WORKER_PRIVILEGES
1035 extern void drop_worker_privileges(void);
1036 #else
1037 #define drop_worker_privileges()
1038 #endif
1039
1040 /* If supported, give compiler hints for branch prediction. */
1041 #if !defined(__GNUC__) || (__GNUC__ == 2 && __GNUC_MINOR__ < 96)
1042 #define __builtin_expect(x, expected_value) (x)
1043 #endif
1044
1045 #define likely(x) __builtin_expect((x),1)
1046 #define unlikely(x) __builtin_expect((x),0)