"Fossies" - the Fresh Open Source Software Archive 
Member "eucalyptus-4.4.2/storage/blobstore.c" (4 Aug 2017, 217286 Bytes) of package /linux/misc/eucalyptus-4.4.2.tar.gz:
As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style:
standard) with prefixed line numbers and
code folding option.
Alternatively you can here
view or
download the uninterpreted source code file.
For more information about "blobstore.c" see the
Fossies "Dox" file reference documentation and the latest
Fossies "Diffs" side-by-side code changes report:
4.4.1_vs_4.4.2.
1 // -*- mode: C; c-basic-offset: 4; tab-width: 4; indent-tabs-mode: nil -*-
2 // vim: set softtabstop=4 shiftwidth=4 tabstop=4 expandtab:
3
4 /*************************************************************************
5 * Copyright 2009-2012 Eucalyptus Systems, Inc.
6 *
7 * This program is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; version 3 of the License.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program. If not, see http://www.gnu.org/licenses/.
18 *
19 * Please contact Eucalyptus Systems, Inc., 6755 Hollister Ave., Goleta
20 * CA 93117, USA or visit http://www.eucalyptus.com/licenses/ if you need
21 * additional information or have any questions.
22 *
23 * This file may incorporate work covered under the following copyright
24 * and permission notice:
25 *
26 * Software License Agreement (BSD License)
27 *
28 * Copyright (c) 2008, Regents of the University of California
29 * All rights reserved.
30 *
31 * Redistribution and use of this software in source and binary forms,
32 * with or without modification, are permitted provided that the
33 * following conditions are met:
34 *
35 * Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 *
38 * Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer
40 * in the documentation and/or other materials provided with the
41 * distribution.
42 *
43 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
44 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
45 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
46 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
47 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
48 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
49 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
50 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
51 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
52 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
53 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
54 * POSSIBILITY OF SUCH DAMAGE. USERS OF THIS SOFTWARE ACKNOWLEDGE
55 * THE POSSIBLE PRESENCE OF OTHER OPEN SOURCE LICENSED MATERIAL,
56 * COPYRIGHTED MATERIAL OR PATENTED MATERIAL IN THIS SOFTWARE,
57 * AND IF ANY SUCH MATERIAL IS DISCOVERED THE PARTY DISCOVERING
58 * IT MAY INFORM DR. RICH WOLSKI AT THE UNIVERSITY OF CALIFORNIA,
59 * SANTA BARBARA WHO WILL THEN ASCERTAIN THE MOST APPROPRIATE REMEDY,
60 * WHICH IN THE REGENTS' DISCRETION MAY INCLUDE, WITHOUT LIMITATION,
61 * REPLACEMENT OF THE CODE SO IDENTIFIED, LICENSING OF THE CODE SO
62 * IDENTIFIED, OR WITHDRAWAL OF THE CODE CAPABILITY TO THE EXTENT
63 * NEEDED TO COMPLY WITH ANY SUCH LICENSES OR RIGHTS.
64 ************************************************************************/
65
66 //!
67 //! @file storage/blobstore.c
68 //! Implements blobstore storage
69 //!
70
71 /*----------------------------------------------------------------------------*\
72 | |
73 | INCLUDES |
74 | |
75 \*----------------------------------------------------------------------------*/
76
77 #define _GNU_SOURCE
78 #include <stdio.h>
79 #include <stdlib.h>
80 #include <string.h>
81 #include <assert.h>
82 #include <unistd.h> // close
83 #include <time.h> // time
84 #include <sys/time.h> // gettimeofday
85 #include <sys/stat.h> // mkdir
86 #include <errno.h> // errno
87 #include <sys/types.h> // *dir, etc, wait
88 #include <sys/file.h> // flock
89 #include <dirent.h>
90 #include <sys/wait.h> // wait
91 #include <pthread.h>
92 #include <sys/types.h> // gettid
93 #include <regex.h>
94 #include <libgen.h> // basename
95
96 #include <eucalyptus.h> // euca user
97 #include <misc.h> // ensure_...
98 #include <ipc.h>
99 #include <euca_string.h>
100
101 #include "blobstore.h"
102 #include "diskutil.h"
103
104 #ifdef _EUCA_BLOBS
105 #include "map.h"
106 #endif /* _EUCA_BLOBS */
107
108 /*----------------------------------------------------------------------------*\
109 | |
110 | DEFINES |
111 | |
112 \*----------------------------------------------------------------------------*/
113
114 #define BLOBSTORE_METADATA_FILE ".blobstore"
115 #define BLOBSTORE_METADATA_TIMEOUT_USEC (1000000LL * 60 * 2) //!< it may take dozens of seconds to open blobstore when others are LRU-purging it
116 #define BLOBSTORE_LOCK_TIMEOUT_USEC 500000LL
117 #define BLOBSTORE_FIND_TIMEOUT_USEC 50000LL
118 #define BLOBSTORE_DELETE_TIMEOUT_USEC 50000LL
119 #define BLOBSTORE_SLEEP_INTERVAL_USEC 99999LL
120 #define BLOBSTORE_DMSETUP_TIMEOUT_SEC 60
121 #define BLOBSTORE_MAX_CONCURRENT 99
122 #define BLOBSTORE_NO_TIMEOUT -1L
123 #define BLOBSTORE_SIG_MAX 262144
124 #define DM_PATH "/dev/mapper/"
125 #define DM_FORMAT DM_PATH "%s" //!< @TODO do not hardcode?
126 #define MIN_BLOCKS_SNAPSHOT 32 //!< otherwise dmsetup fails with device-mapper: reload ioctl failed: Cannot allocate memory OR device-mapper: reload ioctl failed: Input/output error
127 #define EUCA_ZERO "euca-zero"
128 #define EUCA_ZERO_SIZE "2199023255552" //!< is one petabyte enough?
129
130 #define __INLINE__ __inline__
131
132 #ifdef _UNIT_TEST
133 #define F1 "/tmp/blobstore_test_1"
134 #define F2 "/tmp/blobstore_test_2"
135 #define F3 "/tmp/blobstore_test_3"
136
137 #define _R BLOBSTORE_FLAG_RDONLY
138 #define _W BLOBSTORE_FLAG_RDWR
139 #define _C (BLOBSTORE_FLAG_CREAT | BLOBSTORE_FLAG_EXCL | BLOBSTORE_FLAG_RDWR)
140 #define _CBB (BLOBSTORE_FLAG_CREAT | BLOBSTORE_FLAG_EXCL)
141
142 #define B1 "BLOCKBLOB-01"
143 #define B2 "BLOCKBLOB-02"
144 #define B3 "BLOCKBLOB-03"
145 #define B4 "BLOCKBLOB-04"
146 #define B5 "BLOCKBLOB-05"
147 #define B6 "BLOCKBLOB-06"
148
149 #define BS_SIZE 30
150 #define BB_SIZE 10
151 #define CBB_SIZE 32
152 #define STRESS_BS_SIZE 100000
153 #define STRESS_MIN_BB 64
154 #define STRESS_BLOBS 10
155
156 #define LOCK_CYCLES 3
157 #define COMPETITIVE_PARTICIPANTS 3
158 #define COMPETITIVE_ITERATIONS 30
159 #define COMPETITIVE_PAUSE_USEC 5
160 #define COMPETITIVE_TIMEOUT_USEC 3000000L
161 #endif /* _UNIT_TEST */
162
163 #ifdef _EUCA_BLOBS
164 #define USAGE "Usage: euca-blobs [cache=... work=...] command [param1] [param2]...\n"
165 #define HELP "\n" \
166 "\thelp\t\t- print this help message\n" \
167 "\tlist\t\t- list blobs in work and cache\n" \
168 "\tdelete [id]\t- delete blob with\n"
169 #define MAX_ARGS 5
170 #endif /* _EUCA_BLOBS */
171
172 /*----------------------------------------------------------------------------*\
173 | |
174 | TYPEDEFS |
175 | |
176 \*----------------------------------------------------------------------------*/
177
178 /*----------------------------------------------------------------------------*\
179 | |
180 | ENUMERATIONS |
181 | |
182 \*----------------------------------------------------------------------------*/
183
184 // if changing, change the array below and set_blockblob_metadata_path()
185 typedef enum { //!< paths to files containing...
186 BLOCKBLOB_PATH_NONE = 0, //!< sentinel for identifying files that are not blockblob related
187 BLOCKBLOB_PATH_BLOCKS, //!< ...blocks, either in flat format or as a snapshot backing
188 BLOCKBLOB_PATH_LOCK, //!< ...nothing, but needed for safe locking of access to the blob
189 BLOCKBLOB_PATH_DM, //!< ...device mapper devices created for this clone, if any
190 BLOCKBLOB_PATH_DEPS, //!< ...names of blockblobs that this blockblob depends on, if any
191 BLOCKBLOB_PATH_LOOPBACK, //!< ...name of the loopback device for this blob, when attached
192 BLOCKBLOB_PATH_SIG, //!< ...signature of the blob, if provided from outside
193 BLOCKBLOB_PATH_REFS, //!< ...names of blockblobs that depend on this blockblob, if any
194 BLOCKBLOB_PATH_HOLLOW, //!< ...nothing, but the file acts as a marker of 'hollow' blobs
195 BLOCKBLOB_PATH_TOTAL,
196 } blockblob_path_t;
197
198 enum {
199 DMSETUP,
200 ROOTWRAP,
201 LASTHELPER,
202 };
203
204 /*----------------------------------------------------------------------------*\
205 | |
206 | STRUCTURES |
207 | |
208 \*----------------------------------------------------------------------------*/
209
210 typedef struct _blobstore_filelock {
211 char path[PATH_MAX]; //!< path that the file was open with @TODO canonicalize?
212 int refs; //!< number of open file descriptors (some holding the lock, some waiting) for this path in this process
213 int next_fd; //!< next available file descriptor in the table below:
214 int fd[BLOBSTORE_MAX_CONCURRENT];
215 int fd_status[BLOBSTORE_MAX_CONCURRENT]; //!< 0 = unused, 1 = open
216 #ifdef _TEST_FILELOCK
217 unsigned int thread_id[BLOBSTORE_MAX_CONCURRENT];
218 #endif /* _TEST_FILELOCK */
219 pthread_rwlock_t lock; //!< reader/writer lock for controlling intra-process access
220 pthread_mutex_t mutex; //!< for locking this specific struct during manipulations
221 sem *sem; //!< semaphore for debugging
222 struct _blobstore_filelock *next; //!< pointer for constructing a LL
223 } blobstore_filelock;
224
225 /*----------------------------------------------------------------------------*\
226 | |
227 | EXTERNAL VARIABLES |
228 | |
229 \*----------------------------------------------------------------------------*/
230
231 /* Should preferably be handled in header file */
232
233 /*----------------------------------------------------------------------------*\
234 | |
235 | GLOBAL VARIABLES |
236 | |
237 \*----------------------------------------------------------------------------*/
238
239 //! Blobstore errors matching strings. Make sure these match up with blobstore_error_t enums above
240 const char *_blobstore_error_strings[] = {
241 "success",
242 "general error",
243
244 // system errno equivalents
245 "no such entity",
246 "bad file descriptor",
247 "out of memory",
248 "permission denied",
249 "already exists",
250 "invalid parameters",
251 "no space left",
252 "timeout",
253 "too many files open",
254
255 // blobstore-specific errors
256 "wrong signature",
257 "unknown error",
258 };
259
260 const char *blobstore_relation_type_name[] = {
261 "copy",
262 "map",
263 "snapshot",
264 };
265
266 __thread blobstore_error_t _blobstore_errno = BLOBSTORE_ERROR_OK; //!< thread-local errno
267
268 /*----------------------------------------------------------------------------*\
269 | |
270 | STATIC VARIABLES |
271 | |
272 \*----------------------------------------------------------------------------*/
273
274 // entries must match the ones in enum above
275 static const char *blobstore_metadata_suffixes[] = {
276 "none", // sentinel entry so that all actual entries have indeces > 0
277 "blocks", // MUST be second so loop in check_metadata_name() works
278 "lock",
279 "dm",
280 "deps",
281 "loopback",
282 "sig",
283 "refs",
284 "hollow",
285 };
286
287 static void (*err_fn) (const char *msg) = NULL;
288 static unsigned char _do_print_errors = 1;
289 static unsigned char _do_print_trace = 1;
290 static pthread_mutex_t _blobstore_mutex = PTHREAD_MUTEX_INITIALIZER; //!< process-global mutex
291 static blobstore_filelock *locks_list = NULL; //!< process-global LL head @TODO replace this with a hash table
292
293 //! @{
294 //! @name debugging counters
295 //! @TODO remove these
296 static long _locks_list_add_ctr = 0L;
297 static long _locks_list_rem_ctr = 0L;
298 static long _open_success_ctr = 0L;
299 static long _close_success_ctr = 0L;
300 static long _open_error_ctr = 0L;
301 static long _open_timeout_ctr = 0L;
302 static long _close_error_ctr = 0L;
303 static char zero_buf[1] = "\0";
304 //! @}
305
306 static __thread char _blobstore_last_msg[512] = "";
307 static __thread char _blobstore_last_trace[8172] = "";
308
309 static char *helpers[LASTHELPER] = {
310 "dmsetup",
311 "euca_rootwrap",
312 };
313
314 static char *helpers_path[LASTHELPER];
315 static int initialized = 0;
316
317 #ifdef _UNIT_TEST
318 static char *_farray[] = { F1, F2, F3 };
319 #endif /* _UNIT_TEST */
320
321 #ifdef _EUCA_BLOBS
322 static char show_debug = FALSE;
323 static char show_extras = FALSE;
324 static char show_children = FALSE;
325 static char show_parents = FALSE;
326 static char *euca_home = NULL;
327 static char *work_path = NULL;
328 static char *cache_path = NULL;
329 static blobstore *work_bs = NULL;
330 static blobstore *cache_bs = NULL;
331 static map *blob_map;
332 #endif /* _EUCA_BLOBS */
333
334 /*----------------------------------------------------------------------------*\
335 | |
336 | STATIC PROTOTYPES |
337 | |
338 \*----------------------------------------------------------------------------*/
339
340 static void myprintf(int loglevel, const char *format, ...);
341 static __INLINE__ void _err_on(void);
342 static __INLINE__ void _err_off(void);
343 static void err(blobstore_error_t error, const char *custom_msg, const int src_line_no, const char *src_file_name);
344 static __INLINE__ void propagate_system_errno(blobstore_error_t default_errno, const int src_line_no, const char *src_file_name);
345 static void gen_id(char *str, unsigned int size);
346 static void close_filelock(blobstore_filelock * l);
347 static void free_filelock(blobstore_filelock * l);
348 static int close_and_unlock(int fd);
349 #ifdef _TEST_LOCKS
350 static char *path_to_sem_name(const char *path, char *name, int name_size);
351 #endif /* _TEST_LOCKS */
352 static int open_and_lock(const char *path, int flags, long long timeout_usec, mode_t mode);
353 static char *get_val(const char *buf, const char *key);
354 static int fd_to_buf(int fd, char *buf, int size_buf);
355 static int buf_to_fd(int fd, const char *buf, int size_buf);
356 static int read_store_metadata(blobstore * bs);
357 static int write_store_metadata(blobstore * bs);
358 static int set_blockblob_metadata_path(blockblob_path_t path_t, const blobstore * bs, const char *bb_id, char *path, size_t path_size);
359 static int write_blockblob_metadata_path(blockblob_path_t path_t, const blobstore * bs, const char *bb_id, const char *str);
360 static int read_blockblob_metadata_path(blockblob_path_t path_t, const blobstore * bs, const char *bb_id, char *str, int str_size);
361 static int write_array_blockblob_metadata_path(blockblob_path_t path_t, const blobstore * bs, const char *bb_id, char **array, int array_size);
362 static int read_array_blockblob_metadata_path(blockblob_path_t path_t, const blobstore * bs, const char *bb_id, char ***array, int *array_size);
363 static int update_entry_blockblob_metadata_path(blockblob_path_t path_t, const blobstore * bs, const char *bb_id, const char *entry, int removing);
364 static int typeof_blockblob_metadata_path(const blobstore * bs, const char *path, char *bb_id, unsigned int bb_id_size);
365 static int delete_blockblob_files(const blobstore * bs, const char *bb_id);
366 static int ensure_blockblob_metadata_path(const blobstore * bs, const char *bb_id);
367 static void free_bbs(blockblob * bbs);
368 static unsigned int check_in_use(blobstore * bs, const char *bb_id, long long timeout_usec);
369 static void set_device_path(blockblob * bb);
370 static blockblob **walk_bs(blobstore * bs, const char *dir_path, blockblob ** tail_bb, const blockblob * bb_to_avoid);
371 static blockblob *scan_blobstore(blobstore * bs, const blockblob * bb_to_avoid);
372 static int compare_bbs(const void *bb1, const void *bb2);
373 static long long purge_blockblobs_lru(blobstore * bs, blockblob * bb_list, long long need_blocks);
374 static int get_stale_refs(const blockblob * bb, char ***refs);
375 static int loop_remove(blobstore * bs, const char *bb_id);
376 static int dm_suspend_resume(const char *dev_name);
377 static int dm_check_device(const char *dev_name);
378 static int dm_delete_device(const char *dev_name);
379 static int dm_delete_devices(char *dev_names[], int size);
380 static int dm_create_devices(char *dev_names[], char *dm_tables[], int size);
381 static char *dm_get_zero(void);
382 static int blockblob_check(const blockblob * bb);
383 static int delete_blob_state(blockblob * bb, long long timeout_usec, char do_force);
384 static int verify_bb(const blockblob * bb, unsigned long long min_size_bytes);
385
386 #ifdef _UNIT_TEST
387 static void _fill_blob(blockblob * bb, char c, int use_file);
388 static blobstore *create_teststore(int size_blocks, const char *base, const char *name, blobstore_format_t format, blobstore_revocation_t revocation,
389 blobstore_snapshot_t snapshot);
390 static int write_byte(blockblob * bb, int seek, char c);
391 static char read_byte(blockblob * bb, int seek);
392 static int do_clone_stresstest(const char *base, const char *name, blobstore_format_t format, blobstore_revocation_t revocation, blobstore_snapshot_t snapshot);
393 static int check_destination(blockblob * bb4, char *op);
394 static int do_copy_test(const char *base, const char *name);
395 static int do_clone_test(const char *base, const char *name, blobstore_format_t format, blobstore_revocation_t revocation, blobstore_snapshot_t snapshot, int copy_or_snapshot);
396 static int do_metadata_test(const char *base, const char *name);
397 static int do_blobstore_test(const char *base, const char *name, blobstore_format_t format, blobstore_revocation_t revocation);
398 static void *competitor_function(void *ptr);
399 static void *thread_function(void *ptr);
400 static void dummy_err_fn(const char *msg);
401 #endif /* _UNIT_TEST */
402
403 #ifdef _EUCA_BLOBS
404 static void bs_errors(const char *msg);
405 static int open_blobstore(const char *path, blobstore ** bs, const char *name);
406 static int open_blobstores();
407 static void close_blobstores();
408 static int do_list_bs(blobstore * bs, const char *regex);
409 static void print_tree(const char *prefix, blockblob_meta * bm, blockblob_path_t type);
410 static int do_list(const char *regex);
411 static int do_delete(const char *bs_path, const char *bb_id);
412 static void usage(const char *msg);
413 static void set_global_parameter(char *key, char *val);
414 #endif /* _EUCA_BLOBS */
415
416 /*----------------------------------------------------------------------------*\
417 | |
418 | MACROS |
419 | |
420 \*----------------------------------------------------------------------------*/
421
422 #define ERR(_ERRNO,_MSG) err(_ERRNO, _MSG, __LINE__, __FILE__)
423
424 #define PROPAGATE_ERR(_ERRNO) propagate_system_errno(_ERRNO, __LINE__, __FILE__)
425
426 #ifdef _UNIT_TEST
427 #define _UNEXPECTED() printf ("======================> UNEXPECTED RESULT (errors=%d)!!!\n", ++errors);
428
429 #define _CHKMETA(_ST, _RE) \
430 { \
431 snprintf(entry_path, sizeof(entry_path), "%s/%s", bs->path, _ST); \
432 if (_RE != typeof_blockblob_metadata_path(bs, entry_path, blob_id, sizeof(blob_id))) \
433 _UNEXPECTED(); \
434 }
435
436 #define _OPEN(_FD, _FI, _FL, _TI, _RE) \
437 { \
438 _blobstore_errno = 0; \
439 printf("%d: open (" _FI " flags=%d timeout=%d)", getpid(), _FL, _TI); \
440 _FD = open_and_lock(_FI, _FL, _TI, BLOBSTORE_FILE_PERM); \
441 printf("=%d errno=%d '%s'\n", _FD, _blobstore_errno, blobstore_get_error_str(_blobstore_errno)); \
442 if ((_FD == -1) && (_blobstore_errno == 0)) \
443 printf("======================> UNSET errno ON ERROR (errors=%d)!!!\n", ++errors); \
444 else if (((_RE == -1) && (_FD != -1)) || ((_RE == 0) && (_FD < 0))) \
445 _UNEXPECTED(); \
446 }
447
448 #define _CLOS(_FD, _FI) \
449 { \
450 ret = close_and_unlock(_FD); \
451 printf("%d: close (%d " _FI ")=%d\n", getpid(), _FD, ret); \
452 }
453
454 #define _PARENT_WAITS() \
455 { \
456 int status = 0; \
457 int ret = 0; \
458 printf("waiting for child pid=%d\n", pid); \
459 ret = wait(&status); \
460 printf("waited for child pid=%d ret=%d\n", ret, WEXITSTATUS(status)); \
461 errors += WEXITSTATUS(status); \
462 }
463
464 #define _OPENBB(_BB, _ID, _SI, _SG, _FL, _TI, _RE) \
465 { \
466 _blobstore_errno = 0; \
467 printf("%d: bb_open (%s size=%d flags=%d timeout=%d)", getpid(), SP(_ID), _SI, _FL, _TI); \
468 _BB = blockblob_open(bs, _ID, (_SI) * 512, _FL, _SG, _TI); \
469 printf("=%s errno=%d '%s'\n", ((_BB == NULL) ? ("NULL") : ("OK")), _blobstore_errno, blobstore_get_error_str(_blobstore_errno)); \
470 if ((_BB == NULL) && (_blobstore_errno == 0)) \
471 printf("======================> UNSET errno ON ERROR (errors=%d)!!!\n", ++errors); \
472 else if (((_RE == -1) && (_BB != NULL)) || ((_RE == 0) && (_BB == NULL))) \
473 _UNEXPECTED(); \
474 }
475
476 // same as _OPENBB but accepts bytes rather than blocks
477 #define _OPENBBb(_BB, _ID, _SI, _SG, _FL, _TI, _RE) \
478 { \
479 _blobstore_errno = 0; \
480 printf("%d: bb_open (%s size=%d flags=%d timeout=%d)", getpid(), SP(_ID), _SI, _FL, _TI); \
481 _BB = blockblob_open(bs, _ID, _SI, _FL, _SG, _TI); \
482 printf("=%s errno=%d '%s'\n", ((_BB == NULL) ? ("NULL") : ("OK")), _blobstore_errno, blobstore_get_error_str(_blobstore_errno)); \
483 if ((_BB == NULL) && (_blobstore_errno == 0)) \
484 printf("======================> UNSET errno ON ERROR (errors=%d)!!!\n", ++errors); \
485 else if (((_RE == -1) && (_BB != NULL)) || ((_RE == 0) && (_BB == NULL))) \
486 _UNEXPECTED(); \
487 }
488
489 #define _SEARCH(_PATTERN, _RE) \
490 { \
491 results = NULL; \
492 printf("%d: bs_search (pattern=%s)", getpid(), _PATTERN); \
493 nresults = blobstore_search (bs, _PATTERN, &results); \
494 printf("=%d (expected %d) errno=%d '%s'\n", nresults, _RE, _blobstore_errno, blobstore_get_error_str(_blobstore_errno)); \
495 if ((nresults < 0) && (_blobstore_errno == 0)) \
496 printf("======================> UNSET errno ON ERROR (errors=%d)!!!\n", ++errors); \
497 else if (_RE != nresults) \
498 _UNEXPECTED(); \
499 for (blockblob_meta * bm = results; bm;) { \
500 blockblob_meta * next = bm->next; \
501 EUCA_FREE(bm); \
502 bm = next; \
503 } \
504 }
505
506 #define _CLOSBB(_BB, _ID) \
507 { \
508 ret = blockblob_close(_BB); \
509 printf("%d: bb_close (%lu %s)=%d errno=%d '%s'\n", \
510 getpid(), ((unsigned long) _BB), SP(_ID), ret, _blobstore_errno, \
511 blobstore_get_error_str(_blobstore_errno)); \
512 }
513
514 #define _DELEBB(_BB, _ID, _RE) \
515 { \
516 ret = blockblob_delete(_BB, 3000, 0); \
517 printf("%d: bb_delete (%lu %s)=%d errno=%d '%s'\n", \
518 getpid(), ((unsigned long) _BB), SP(_ID), ret, _blobstore_errno, \
519 blobstore_get_error_str(_blobstore_errno)); \
520 if (ret != _RE) \
521 _UNEXPECTED(); \
522 }
523
524 #define _CLONBB(_BB, _ID, _MP, _RE) \
525 { \
526 _blobstore_errno = 0; \
527 printf("%d: bb_clone (%s map=%lu)", getpid(), SP(_ID), ((unsigned long) _MP)); \
528 ret = blockblob_clone(_BB, _MP, (sizeof(_MP) / sizeof(blockmap))); \
529 printf("=%d errno=%d '%s'\n", ret, _blobstore_errno, blobstore_get_error_str(_blobstore_errno)); \
530 if ((ret == -1) && (_blobstore_errno == 0)) \
531 printf("======================> UNSET errno ON ERROR (errors=%d)!!!\n", ++errors); \
532 else if (_RE != ret) \
533 _UNEXPECTED(); \
534 }
535
536 #define _COPYBB(_SBB, _SO, _DBB, _DO, _LEN, _RE) \
537 { \
538 _blobstore_errno = 0; \
539 printf("%d: bb_copy (%s to %s)", getpid(), (_SBB)->id, (_DBB)->id); \
540 ret = blockblob_copy(_SBB, _SO, _DBB, _DO, _LEN); \
541 printf("=%d errno=%d '%s'\n", ret, _blobstore_errno, blobstore_get_error_str(_blobstore_errno)); \
542 if ((ret == -1) && (_blobstore_errno == 0)) \
543 printf("======================> UNSET errno ON ERROR (errors=%d)!!!\n", ++errors); \
544 else if (_RE != ret) \
545 _UNEXPECTED(); \
546 }
547 #endif /* _UNIT_TEST */
548
549 /*----------------------------------------------------------------------------*\
550 | |
551 | IMPLEMENTATION |
552 | |
553 \*----------------------------------------------------------------------------*/
554
555 //!
556 //!
557 //!
558 //! @param[in] loglevel
559 //! @param[in] format
560 //!
561 //! @pre
562 //!
563 //! @note
564 //!
565 static void myprintf(int loglevel, const char *format, ...)
566 {
567 char buf[1024];
568
569 va_list ap;
570 va_start(ap, format);
571 vsnprintf(buf, sizeof(buf), format, ap);
572 va_end(ap);
573
574 if (err_fn)
575 err_fn(buf);
576 else
577 puts(buf);
578 }
579
580 //!
581 //!
582 //!
583 //! @param[in] error
584 //!
585 //! @return
586 //!
587 //! @pre
588 //!
589 //! @note
590 //!
591 const char *blobstore_get_error_str(blobstore_error_t error)
592 {
593 return _blobstore_error_strings[error];
594 }
595
596 //!
597 //!
598 //!
599 //! @return
600 //!
601 //! @pre
602 //!
603 //! @note
604 //!
605 const char *blobstore_get_last_msg(void)
606 {
607 return _blobstore_last_msg;
608 }
609
610 //!
611 //!
612 //!
613 //! @return
614 //!
615 //! @pre
616 //!
617 //! @note
618 //!
619 const char *blobstore_get_last_trace(void)
620 {
621 return _blobstore_last_trace;
622 }
623
624 //!
625 //!
626 //!
627 //! @note
628 //!
629 static __INLINE__ void _err_on(void)
630 {
631 _do_print_errors = 1;
632 }
633
634 //!
635 //!
636 //!
637 //! @note
638 //!
639 static __INLINE__ void _err_off(void)
640 {
641 _do_print_errors = 0;
642 }
643
644 //!
645 //!
646 //!
647 //! @param[in] error
648 //! @param[in] custom_msg
649 //! @param[in] src_line_no
650 //! @param[in] src_file_name
651 //!
652 //! @pre
653 //!
654 //! @note
655 //!
656 static void err(blobstore_error_t error, const char *custom_msg, const int src_line_no, const char *src_file_name)
657 {
658 const char *msg = custom_msg;
659 if (msg == NULL) {
660 msg = blobstore_get_error_str(error);
661 }
662 snprintf(_blobstore_last_msg, sizeof(_blobstore_last_msg), "%s:%d %s", src_file_name, src_line_no, msg);
663 log_dump_trace(_blobstore_last_trace, sizeof(_blobstore_last_trace));
664
665 if (_do_print_errors) {
666 myprintf(EUCA_LOG_ERROR, "error: %s\n", _blobstore_last_msg);
667 if (_do_print_trace)
668 myprintf(EUCA_LOG_ERROR, "%s", _blobstore_last_trace);
669 }
670 _blobstore_errno = error;
671 }
672
673 //!
674 //!
675 //!
676 //! @param[in] default_errno
677 //! @param[in] src_line_no
678 //! @param[in] src_file_name
679 //!
680 //! @pre
681 //!
682 //! @note
683 //!
684 static __INLINE__ void propagate_system_errno(blobstore_error_t default_errno, const int src_line_no, const char *src_file_name)
685 {
686 switch (errno) {
687 case ENOENT:
688 _blobstore_errno = BLOBSTORE_ERROR_NOENT;
689 break;
690 case ENOMEM:
691 _blobstore_errno = BLOBSTORE_ERROR_NOMEM;
692 break;
693 case EACCES:
694 _blobstore_errno = BLOBSTORE_ERROR_ACCES;
695 break;
696 case EEXIST:
697 _blobstore_errno = BLOBSTORE_ERROR_EXIST;
698 break;
699 case EINVAL:
700 _blobstore_errno = BLOBSTORE_ERROR_INVAL;
701 break;
702 case ENOSPC:
703 _blobstore_errno = BLOBSTORE_ERROR_NOSPC;
704 break;
705 case EAGAIN:
706 _blobstore_errno = BLOBSTORE_ERROR_AGAIN;
707 break;
708 default:
709 perror("blobstore");
710 _blobstore_errno = default_errno;
711 }
712 err(_blobstore_errno, NULL, src_line_no, src_file_name);
713 }
714
715 //!
716 //!
717 //!
718 //! @param[in] fn
719 //!
720 //! @pre
721 //!
722 //! @note
723 //!
724 void blobstore_set_error_function(void (*fn) (const char *msg))
725 {
726 err_fn = fn;
727 }
728
729 //!
730 //!
731 //!
732 //! @param[in] str
733 //! @param[in] size
734 //!
735 //! @pre
736 //!
737 //! @note
738 //!
739 static void gen_id(char *str, unsigned int size)
740 {
741 snprintf(str, size, "%08lx%08lx%08lx", (unsigned long)random(), (unsigned long)random(), (unsigned long)random());
742 }
743
744 //!
745 //!
746 //!
747 //! @param[in] l
748 //! @param[in] type
749 //!
750 //! @return
751 //!
752 //! @pre
753 //!
754 //! @note
755 //!
756 struct flock *flock_whole_file(struct flock *l, short type)
757 {
758 l->l_type = type;
759 l->l_pid = 0;
760
761 // set params so as to lock the whole file
762 l->l_start = 0;
763 l->l_whence = SEEK_SET;
764 l->l_len = 0;
765
766 return l;
767 }
768
769 //!
770 //!
771 //!
772 //! @param[in] l
773 //!
774 //! @pre \li MUST be called with _blobstore_mutex held.
775 //! \li The l parameter must not be NULL
776 //!
777 //! @note
778 //!
779 static void close_filelock(blobstore_filelock * l)
780 {
781 // close all file descriptors at once (we do this because
782 // closing any one removes the lock for all descriptors
783 // held by a process)
784 for (int i = 0; i < l->next_fd; i++) {
785 if (l->fd[i] > -1) {
786 close(l->fd[i]);
787 l->fd[i] = -1;
788 }
789 }
790 l->next_fd = 0; // knock the open fd counter back to 0
791 }
792
793 //!
794 //!
795 //!
796 //! @param[in] l
797 //!
798 //! @pre \li MUST be called with _blobstore_mutex held
799 //! \li The l parameter must not be NULL.
800 //!
801 //! @note
802 //!
803 static void free_filelock(blobstore_filelock * l)
804 {
805 pthread_rwlock_destroy(&(l->lock));
806 pthread_mutex_destroy(&(l->mutex));
807 EUCA_FREE(l);
808 }
809
810 //!
811 //! This function must be used to close files opened with open_and_lock(). (Simply doing close() will
812 //! leave the file locked via pthreads and future open_and_lock() requests from the same process may
813 //! fail.) Also, closing the file descriptor releases the OS file lock for the process, so any other
814 //! read-only descriptors held by the process are no longer guarded since other processes may open the
815 //! file for writing.
816 //!
817 //! @param[in] fd
818 //!
819 //! @return
820 //!
821 //! @pre
822 //!
823 //! @note
824 //!
825 static int close_and_unlock(int fd)
826 {
827 if (fd < 0) {
828 ERR(BLOBSTORE_ERROR_BADF, NULL);
829 return -1;
830 }
831 int ret = 0;
832 { // critical section
833 pthread_mutex_lock(&_blobstore_mutex); // grab global lock (we will not block below and we may be deallocating)
834 LOGTRACE("{%u} close_and_unlock: obtained global lock for closing of fd=%d\n", (unsigned int)pthread_self(), fd);
835
836 blobstore_filelock *path_lock = NULL; // lock struct to which this fd belongs
837 int index = -1; // index of this fd entry in the lock struct
838
839 // traverse all locks, looking for one with fd,
840 // when found, compute index and open_fds
841 blobstore_filelock **next_ptr = &locks_list;
842 for (blobstore_filelock * l = locks_list; l; l = l->next) { // look for the fd
843 assert(l->next_fd >= 0 && l->next_fd <= BLOBSTORE_MAX_CONCURRENT);
844 for (int i = 0; i < l->next_fd; i++) {
845 if (l->fd_status[i] && l->fd[i] == fd) {
846 path_lock = l; // found it!
847 index = i;
848 break;
849 }
850 }
851 if (index != -1)
852 break;
853 next_ptr = &(l->next); // list head or prev element
854 }
855
856 if (path_lock) {
857 assert(*next_ptr == path_lock);
858 assert(index >= 0 && index < BLOBSTORE_MAX_CONCURRENT);
859
860 boolean did_close = FALSE;
861 boolean do_free = FALSE;
862 { // inner critical section to protect changes to 'path_lock', if any
863 pthread_mutex_lock(&(path_lock->mutex)); // grab path-specific mutex
864 if (path_lock->fd_status[index] == 1) { // has not been closed yet
865 path_lock->fd_status[index] = 0; // set status to 'unused'
866 did_close = TRUE;
867 path_lock->refs--;
868
869 int open_fds = 0;
870 for (int i = 0; i < path_lock->next_fd; i++) {
871 if (path_lock->fd_status[i]) {
872 assert(path_lock->fd[i] != fd);
873 open_fds++;
874 }
875 }
876
877 if (open_fds == 0 && path_lock->refs == 0) { // no open blockblob file descriptors in this process
878 close_filelock(path_lock);
879 *next_ptr = path_lock->next; // remove from LL
880 do_free = TRUE;
881 _locks_list_rem_ctr++;
882 LOGTRACE("{%u} close_and_unlock: unlocked and freed fd=%d path=%s\n", (unsigned int)pthread_self(), fd, path_lock->path);
883
884 } else {
885 LOGTRACE("{%u} close_and_unlock: kept fd=%d path=%s open/refs=%d/%d\n", (unsigned int)pthread_self(), fd, path_lock->path, open_fds, path_lock->refs);
886 }
887 pthread_rwlock_unlock(&(path_lock->lock)); // give up the Posix lock
888 /* lock testing code
889 if (path_lock->sem) {
890 sem_v (path_lock->sem);
891 sem_free (path_lock->sem);
892 path_lock->sem = NULL;
893 }
894 */
895 }
896 pthread_mutex_unlock(&(path_lock->mutex));
897 } // end of inner critical section
898
899 if (do_free)
900 free_filelock(path_lock);
901
902 if (!did_close) {
903 ERR(BLOBSTORE_ERROR_BADF, "file descriptor already closed");
904 ret = -1;
905 }
906 } else { // no match
907 ERR(BLOBSTORE_ERROR_BADF, "not an open file descriptor");
908 ret = -1;
909 }
910
911 if (ret == 0)
912 _close_success_ctr++;
913 else
914 _close_error_ctr++;
915
916 LOGTRACE("{%u} close_and_unlock: releasing global lock for closing of fd=%d ret=%d\n", (unsigned int)pthread_self(), fd, ret);
917 pthread_mutex_unlock(&_blobstore_mutex);
918 } // end of critical section
919
920 return ret;
921 }
922
923 #ifdef _TEST_LOCKS
924 //!
925 //!
926 //!
927 //! @param[in] path
928 //! @param[in] name
929 //! @param[in] name_size
930 //!
931 //! @return
932 //!
933 //! @pre
934 //!
935 //! @note
936 //!
937 static char *path_to_sem_name(const char *path, char *name, int name_size)
938 {
939 snprintf(name, name_size, "euca%s", path);
940 for (int i = 0; i < name_size && name[i]; i++)
941 if (name[i] == '/')
942 name[i] = '-';
943 return name;
944 }
945 #endif /* _TEST_LOCKS */
946
947 //!
948 //! This function creates or opens a file and locks it. The lock is:
949 //!
950 //! \li exclusive if the file is being created or written to, or a
951 //! \li non-exclusive readers' lock if the file was opened RDONLY.
952 //!
953 //! The lock works both across threads and processes. File descriptors obtained from
954 //! this function should be released with close_and_unlock(). All locks held by a process
955 //! are released upon termination, whether normal or abnormal.
956 //!
957 //! @param[in] path
958 //! @param[in] flags \li BLOBSTORE_FLAG_RDONLY - open with O_RDONLY, reader lock
959 //! \li BLOBSTORE_FLAG_RDWR - open with O_RDWR, writer lock
960 //! \li BLOBSTORE_FLAG_CREAT - open with O_RDWR | O_CREAT, writer lock
961 //! \li BLOBSTORE_FLAG_EXCL - can be added to _CREAT, as with open()
962 //! @param[in] timeout_usec \li timeout in microseconds for waiting on a lock
963 //! \li BLOBSTORE_NO_TIMEOUT / -1 - wait forever
964 //! \li BLOBSTORE_NO_WAIT / 0 - do not wait at all
965 //! @param[in] mode gets passed to open() directly
966 //!
967 //! @return
968 //!
969 //! @see close_and_unlock()
970 //!
971 //! @pre
972 //!
973 //! @note
974 //!
975 static int open_and_lock(const char *path, int flags, long long timeout_usec, mode_t mode)
976 {
977 short l_type;
978 int o_flags = 0;
979 long long started = time_usec();
980 long long deadline = started + timeout_usec;
981
982 // verify the flags and, based on them,
983 // decide what type of lock to use
984 if (flags & BLOBSTORE_FLAG_RDONLY) {
985 l_type = F_RDLCK; // use shared (read) lock
986 o_flags |= O_RDONLY; // required when using F_RDLCK
987
988 } else if ((flags & BLOBSTORE_FLAG_RDWR) || (flags & BLOBSTORE_FLAG_CREAT)) {
989 l_type = F_WRLCK; // use exclusive (write) lock
990 o_flags |= O_RDWR; // required when using F_WRLCK
991 if (flags & BLOBSTORE_FLAG_CREAT) {
992 o_flags |= O_CREAT;
993 // intentionally ignore _EXCL supplied without _CREAT
994 if (flags & BLOBSTORE_FLAG_EXCL)
995 o_flags |= O_EXCL;
996 }
997
998 if (flags & BLOBSTORE_FLAG_CREAT)
999 o_flags |= O_TRUNC;
1000 } else {
1001 ERR(BLOBSTORE_ERROR_INVAL, "flags to open_and_lock must include either _RDONLY or _RDWR or _CREAT");
1002 return -1;
1003 }
1004
1005 // handle intra-process locking, with a pthreads read-write lock
1006 // either find in a global linked list 'locks_list' or
1007 // allocate and append to it a 'blobstore_filelock' struct
1008 blobstore_filelock *path_lock = NULL;
1009 { // critical section
1010 pthread_mutex_lock(&_blobstore_mutex); // grab the global mutex
1011 blobstore_filelock **next_ptr = &locks_list;
1012 for (blobstore_filelock * l = locks_list; l; l = l->next) { // look through existing locks
1013 if (strcmp(path, l->path) == 0) {
1014 path_lock = l;
1015 break;
1016 }
1017 next_ptr = &(l->next);
1018 }
1019 // next_ptr now points either to LL head or
1020 // to the last non-matching element's next pointer
1021
1022 if (path_lock == NULL) { // this path is not locked by any thread
1023 path_lock = EUCA_ZALLOC(1, sizeof(blobstore_filelock));
1024 if (path_lock == NULL) {
1025 pthread_mutex_unlock(&_blobstore_mutex);
1026 ERR(BLOBSTORE_ERROR_NOMEM, NULL);
1027 return -1;
1028 }
1029 euca_strncpy(path_lock->path, path, sizeof(path_lock->path));
1030 pthread_rwlock_init(&(path_lock->lock), NULL);
1031 pthread_mutex_init(&(path_lock->mutex), NULL);
1032 *next_ptr = path_lock; // add at the end of LL
1033 _locks_list_add_ctr++;
1034 } else {
1035 assert(*next_ptr == path_lock);
1036 if (path_lock->next_fd == BLOBSTORE_MAX_CONCURRENT) {
1037 pthread_mutex_unlock(&_blobstore_mutex);
1038 ERR(BLOBSTORE_ERROR_MFILE, "too many open file descriptors"); // to be precise, this means too many file descriptors with overlapping lifetimes
1039 return -1;
1040 }
1041 }
1042 pthread_mutex_lock(&(path_lock->mutex)); // grab path-specific mutex
1043 {
1044 path_lock->refs++; // increase the reference count while still under lock
1045 }
1046 pthread_mutex_unlock(&(path_lock->mutex)); // release path-specific mutex
1047 pthread_mutex_unlock(&_blobstore_mutex); // release global mutex
1048 } // end of critical section
1049
1050 // open/create the file, using Posix file locks for inter-process locking
1051 int fd = open(path, o_flags, mode);
1052 LOGTRACE("{%u} open_and_lock: open fd=%d flags=%0x path=%s\n", (unsigned int)pthread_self(), fd, o_flags, path);
1053 if (fd == -1) {
1054 PROPAGATE_ERR(BLOBSTORE_ERROR_UNKNOWN);
1055 goto error;
1056 }
1057
1058 { // critical section
1059 pthread_mutex_lock(&_blobstore_mutex); // grab the global mutex
1060
1061 // ensure we do not have this file descriptor already in some other list
1062 for (blobstore_filelock * l = locks_list; l; l = l->next) {
1063 { // inner critical section
1064 pthread_mutex_lock(&(l->mutex)); // grab path-specific mutex for atomic update to the table of descriptors
1065 for (int i = 0; i < l->next_fd; i++) {
1066 if (l->fd[i] == fd) {
1067 LOGWARN("WARNING: blobstore lock closed outside close_and_unlock [fd=%d, index=%d, refs=%d]\n", fd, i, l->refs);
1068 l->fd[i] = -1; // set to invalid so no one else closes our valid descriptor
1069 l->fd_status[i] = 0; // definitely unused.
1070 l->refs--;
1071 }
1072 }
1073 pthread_mutex_unlock(&(l->mutex)); // release path-specific mutex
1074 } // end of inner critical section
1075 }
1076
1077 { // inner critical section
1078 pthread_mutex_lock(&(path_lock->mutex)); // grab path-specific mutex for atomic update to the table of descriptors
1079
1080 // record the file descriptor in the array regardless of whether
1081 // we ultimately succeed in obtaining the lock or not -- we must
1082 // ensure we do not close this file descriptor until all users
1083 // of the lock are through
1084 path_lock->fd[path_lock->next_fd] = fd; // record file descriptor to enable future lookups
1085 path_lock->fd_status[path_lock->next_fd] = 1; // mark the slot as in-use
1086 #ifdef _TEST_FILELOCK
1087 path_lock->thread_id[path_lock->next_fd] = (unsigned int)pthread_self();
1088 #endif
1089 path_lock->next_fd++; // move the index up (it only goes up because we close all file descriptors together)
1090
1091 pthread_mutex_unlock(&(path_lock->mutex)); // release path-specific mutex
1092 } // end of inner critical section
1093
1094 pthread_mutex_unlock(&_blobstore_mutex); // release global mutex
1095 } // end of critical section
1096
1097 for (;;) {
1098 // first try getting the Posix rwlock
1099 int ret;
1100 if (l_type == F_WRLCK)
1101 ret = pthread_rwlock_trywrlock(&(path_lock->lock));
1102 else
1103 ret = pthread_rwlock_tryrdlock(&(path_lock->lock));
1104 if (ret == 0) {
1105 // Posix rwlock succeeded, try the file lock
1106 errno = 0;
1107 struct flock l;
1108 if (fcntl(fd, F_SETLK, flock_whole_file(&l, l_type)) != -1)
1109 break; // success!
1110 pthread_rwlock_unlock(&(path_lock->lock)); // give up the Posix lock
1111 if (errno != EAGAIN) { // any error other than inability to get the lock
1112 PROPAGATE_ERR(BLOBSTORE_ERROR_UNKNOWN);
1113 goto error;
1114 }
1115 }
1116 long long now = time_usec();
1117 if (timeout_usec != BLOBSTORE_NO_TIMEOUT && now >= deadline) { // we timed out waiting for the lock
1118 ERR(BLOBSTORE_ERROR_AGAIN, NULL);
1119 pthread_mutex_lock(&_blobstore_mutex);
1120 _open_timeout_ctr++;
1121 pthread_mutex_unlock(&_blobstore_mutex);
1122 goto error;
1123 }
1124 LOGTRACE("{%u} open_and_lock: could not acquire %s lock, sleeping on %s\n", (unsigned int)pthread_self(), (ret == 0) ? ("file") : ("posix"), path);
1125
1126 usleep(BLOBSTORE_SLEEP_INTERVAL_USEC);
1127 }
1128
1129 // successully acquired both file and Posix locks
1130
1131 #ifdef _TEST_LOCKS
1132 if (l_type == F_WRLCK) {
1133 char sem_name[512];
1134 path_lock->sem = sem_alloc(1, path_to_sem_name(path, sem_name, sizeof(sem_name)));
1135 sem_p(path_lock->sem);
1136 }
1137 #endif // _TEST_LOCKS
1138
1139 pthread_mutex_lock(&_blobstore_mutex);
1140 _open_success_ctr++;
1141 pthread_mutex_unlock(&_blobstore_mutex);
1142 { // print out information about the newly acquired lock
1143 struct stat s;
1144 fstat(fd, &s);
1145
1146 struct flock l;
1147 fcntl(fd, F_GETLK, flock_whole_file(&l, l_type));
1148
1149 LOGTRACE("{%u} open_and_lock: locked fd=%d path=%s flags=%d ino=%ld mode=%0o [lock type=%d whence=%d start=%ld length=%ld]\n",
1150 (unsigned int)pthread_self(), fd, path, o_flags, s.st_ino, s.st_mode, l.l_type, l.l_whence, l.l_start, l.l_len);
1151 }
1152 return fd;
1153
1154 error:
1155 // due to aproblem above (inability to open the file or
1156 // to acquire Posix locks within the deadline), the
1157 // 'blobstore_filelock' struct will be removed from the
1158 // global linked list 'locks_list', its files closed,
1159 // and its memory freed -- but only if this is the last
1160 // thread using it
1161
1162 { // critical section
1163 pthread_mutex_lock(&_blobstore_mutex); // grab the global lock to protect locks_list traversal
1164
1165 // we must recalculate next_ptr since the element that it points to
1166 // may have been removed from the LL and freed while we were outside
1167 // the critical section
1168 blobstore_filelock **next_ptr = &locks_list;
1169 for (blobstore_filelock * l = locks_list; l; l = l->next) { // look through existing locks
1170 if (path_lock == l)
1171 break;
1172 next_ptr = &(l->next);
1173 }
1174 // next_ptr must point at the struct we are looking for,
1175 // which must be in the list
1176 assert(*next_ptr == path_lock);
1177
1178 boolean do_free = FALSE;
1179 { // inner critical section
1180 pthread_mutex_lock(&(path_lock->mutex)); // grab path-specific mutex for atomic update to the table of descriptors
1181 path_lock->refs--;
1182
1183 int open_fds = 0;
1184 for (int i = 0; i < path_lock->next_fd; i++) {
1185 if (path_lock->fd_status[i]) {
1186 if (path_lock->fd[i] == fd) {
1187 path_lock->fd_status[i] = 0; // mark as 'unused'
1188 } else {
1189 open_fds++;
1190 }
1191 }
1192 }
1193
1194 if (open_fds == 0 && path_lock->refs == 0) { // no open blockblob file descriptors in this process
1195 close_filelock(path_lock);
1196 *next_ptr = path_lock->next; // remove from LL
1197 do_free = TRUE;
1198 _locks_list_rem_ctr++;
1199 LOGTRACE("{%u} open_and_lock: freed fd=%d path=%s\n", (unsigned int)pthread_self(), fd, path_lock->path);
1200
1201 } else {
1202 LOGTRACE("{%u} open_and_lock: kept fd=%d path=%s open/refs=%d/%d\n", (unsigned int)pthread_self(), fd, path_lock->path, open_fds, path_lock->refs);
1203 }
1204
1205 pthread_mutex_unlock(&(path_lock->mutex));
1206 } // end of inner critical section
1207
1208 if (do_free)
1209 free_filelock(path_lock);
1210
1211 _open_error_ctr++;
1212 pthread_mutex_unlock(&_blobstore_mutex);
1213 } // end of critical section
1214
1215 return -1;
1216 }
1217
1218 //!
1219 //!
1220 //!
1221 //! @param[in] buf
1222 //! @param[in] key
1223 //!
1224 //! @return
1225 //!
1226 //! @pre
1227 //!
1228 //! @note
1229 //!
1230 static char *get_val(const char *buf, const char *key)
1231 {
1232 char *val = NULL;
1233 char full_key[512];
1234 snprintf(full_key, sizeof(full_key), "%s: ", key);
1235 char *val_begin = strstr(buf, full_key);
1236 if (val_begin) {
1237 val_begin += strlen(full_key);
1238 char *val_end = val_begin;
1239 while (*val_end != '\n' && *val_end != '\0')
1240 val_end++;
1241 val = EUCA_ZALLOC(val_end - val_begin + 1, sizeof(char)); // +1 for the \0
1242 if (val == NULL) {
1243 ERR(BLOBSTORE_ERROR_NOMEM, NULL);
1244 return NULL;
1245 }
1246 strncpy(val, val_begin, val_end - val_begin);
1247 }
1248
1249 return val;
1250 }
1251
1252 //!
1253 //! Helper for reading a file into a buffer
1254 //!
1255 //! @param[in] fd
1256 //! @param[in] buf
1257 //! @param[in] size_buf
1258 //!
1259 //! @return The number of bytes read or -1 if error
1260 //!
1261 //! @pre
1262 //!
1263 //! @note
1264 //!
1265 static int fd_to_buf(int fd, char *buf, int size_buf)
1266 {
1267 if (lseek(fd, 0, SEEK_SET) == -1) {
1268 ERR(BLOBSTORE_ERROR_ACCES, "failed to seek in metadata file");
1269 return -1;
1270 }
1271
1272 struct stat sb;
1273 if (fstat(fd, &sb) == -1) {
1274 ERR(BLOBSTORE_ERROR_ACCES, "failed to stat metadata file");
1275 return -1;
1276 }
1277
1278 if (read(fd, buf, size_buf) != sb.st_size) //! @TODO do this in a loop?
1279 {
1280 ERR(BLOBSTORE_ERROR_NOENT, "failed to read metadata file");
1281 return -1;
1282 }
1283
1284 return sb.st_size;
1285 }
1286
1287 //!
1288 //! Helper for write buffer into a file at descriptor
1289 //!
1290 //! @param[in] fd
1291 //! @param[in] buf
1292 //! @param[in] size_buf
1293 //!
1294 //! @return The number of bytes written or -1 if error
1295 //!
1296 //! @pre
1297 //!
1298 //! @note
1299 //!
1300 static int buf_to_fd(int fd, const char *buf, int size_buf)
1301 {
1302 if (lseek(fd, 0, SEEK_SET) == -1) {
1303 ERR(BLOBSTORE_ERROR_ACCES, "failed to seek in metadata file");
1304 return -1;
1305 }
1306
1307 ssize_t size_wrote = write(fd, buf, size_buf); //! @TODO do this in a loop?
1308 if (size_wrote < size_buf) {
1309 ERR(BLOBSTORE_ERROR_NOENT, "failed to write metadata file");
1310 return -1;
1311 }
1312 // as a sanity check, stat the file and verify its size
1313 struct stat sb;
1314 if (fstat(fd, &sb) == -1) {
1315 ERR(BLOBSTORE_ERROR_ACCES, "failed to stat metadata file");
1316 return -1;
1317 }
1318
1319 if (sb.st_size != size_buf) {
1320 ERR(BLOBSTORE_ERROR_NOENT, "failed to read back metadata file");
1321 return -1;
1322 }
1323
1324 return sb.st_size;
1325 }
1326
1327 //!
1328 //!
1329 //!
1330 //! @param[in] bs
1331 //!
1332 //! @return
1333 //!
1334 //! @pre
1335 //!
1336 //! @note
1337 //!
1338 static int read_store_metadata(blobstore * bs)
1339 {
1340 char *val = NULL;
1341 char buf[1024] = "";
1342 int size = fd_to_buf(bs->fd, buf, (sizeof(buf) - 1));
1343
1344 if (size == -1)
1345 return -1;
1346 if (size < 30) {
1347 ERR(BLOBSTORE_ERROR_NOENT, "metadata size is too small");
1348 return -1;
1349 }
1350
1351 buf[size] = '\0';
1352 if ((val = get_val(buf, "id")) == NULL)
1353 return -1;
1354 euca_strncpy(bs->id, val, sizeof(bs->id));
1355 EUCA_FREE(val);
1356
1357 if ((val = get_val(buf, "limit")) == NULL)
1358 return -1;
1359 errno = 0;
1360 bs->limit_blocks = strtoll(val, NULL, 10);
1361 EUCA_FREE(val);
1362 if (errno != 0) {
1363 ERR(BLOBSTORE_ERROR_NOENT, "invalid metadata file (limit is missing)");
1364 return -1;
1365 }
1366
1367 if ((val = get_val(buf, "revocation")) == NULL)
1368 return -1;
1369 errno = 0;
1370 bs->revocation_policy = strtoll(val, NULL, 10);
1371 EUCA_FREE(val);
1372 if (errno != 0) {
1373 ERR(BLOBSTORE_ERROR_NOENT, "invalid metadata file (revocation is missing)");
1374 return -1;
1375 }
1376
1377 if ((val = get_val(buf, "snapshot")) == NULL)
1378 return -1;
1379 errno = 0;
1380 bs->snapshot_policy = strtoll(val, NULL, 10);
1381 EUCA_FREE(val);
1382 if (errno != 0) {
1383 ERR(BLOBSTORE_ERROR_NOENT, "invalid metadata file (snapshot is missing)");
1384 return -1;
1385 }
1386
1387 if ((val = get_val(buf, "format")) == NULL)
1388 return -1;
1389 errno = 0;
1390 bs->format = strtoll(val, NULL, 10);
1391 EUCA_FREE(val);
1392 if (errno != 0) {
1393 ERR(BLOBSTORE_ERROR_NOENT, "invalid metadata file (format is missing)");
1394 return -1;
1395 }
1396 return 0;
1397 }
1398
1399 //!
1400 //!
1401 //!
1402 //! @param[in] bs
1403 //!
1404 //! @return
1405 //!
1406 //! @pre
1407 //!
1408 //! @note
1409 //!
1410 static int write_store_metadata(blobstore * bs)
1411 {
1412 if (ftruncate(bs->fd, 0) == -1) {
1413 ERR(BLOBSTORE_ERROR_NOENT, "failed to truncate the metadata file");
1414 return -1;
1415 }
1416 if (lseek(bs->fd, 0, SEEK_SET) == -1) {
1417 ERR(BLOBSTORE_ERROR_ACCES, "failed to seek in metadata file");
1418 return -1;
1419 }
1420 char buf[1024];
1421 snprintf(buf, sizeof(buf), "id: %s\n" "limit: %lld\n" "revocation: %d\n" "snapshot: %d\n" "format: %d\n", bs->id, bs->limit_blocks,
1422 bs->revocation_policy, bs->snapshot_policy, bs->format);
1423 int slen = strlen(buf);
1424 int len = write(bs->fd, buf, slen);
1425 if (len != slen) {
1426 ERR(BLOBSTORE_ERROR_NOENT, "failed to write to the metadata file");
1427 return -1;
1428 }
1429
1430 return 0;
1431 }
1432
1433 //!
1434 //!
1435 //!
1436 //! @return
1437 //!
1438 //! @pre
1439 //!
1440 //! @note
1441 //!
1442 int blobstore_init(void)
1443 {
1444 int ret = 0;
1445
1446 if (!initialized) {
1447 ret = diskutil_init(0);
1448 if (ret) {
1449 ERR(BLOBSTORE_ERROR_UNKNOWN, "failed to initialize diskutil library");
1450 } else {
1451 ret = verify_helpers(helpers, helpers_path, LASTHELPER);
1452 if (ret) {
1453 for (int i = 0; i < LASTHELPER; i++) {
1454 if (helpers_path[i] == NULL)
1455 LOGERROR("ERROR: missing a required handler: %s\n", helpers[i]);
1456 }
1457 ERR(BLOBSTORE_ERROR_UNKNOWN, "failed to initialize blobstore library");
1458 } else {
1459 initialized = 1;
1460 }
1461 }
1462 euca_srand(); // seed the random number generator
1463 }
1464
1465 return ret;
1466 }
1467
1468 //!
1469 //!
1470 //!
1471 //! @return
1472 //!
1473 //! @pre
1474 //!
1475 //! @note
1476 //!
1477 int blobstore_cleanup(void)
1478 {
1479 diskutil_cleanup();
1480 return 0;
1481 }
1482
1483 //!
1484 //!
1485 //!
1486 //! @param[in] path
1487 //! @param[in] limit_blocks
1488 //! @param[in] flags
1489 //! @param[in] format
1490 //! @param[in] revocation_policy
1491 //! @param[in] snapshot_policy
1492 //!
1493 //! @return
1494 //!
1495 //! @pre
1496 //!
1497 //! @note
1498 //!
1499 blobstore *blobstore_open(const char *path, unsigned long long limit_blocks, unsigned int flags, // BLOBSTORE_FLAG_CREAT - same semantcs as for open() flags
1500 blobstore_format_t format, blobstore_revocation_t revocation_policy, blobstore_snapshot_t snapshot_policy)
1501 {
1502 int saved_errno;
1503
1504 if (blobstore_init())
1505 return NULL;
1506
1507 blobstore *bs = EUCA_ZALLOC(1, sizeof(blobstore));
1508 if (bs == NULL) {
1509 ERR(BLOBSTORE_ERROR_NOMEM, NULL);
1510 goto out;
1511 }
1512 euca_strncpy(bs->path, path, sizeof(bs->path)); //! @TODO canonicalize path
1513 char meta_path[PATH_MAX];
1514 snprintf(meta_path, sizeof(meta_path), "%s/%s", bs->path, BLOBSTORE_METADATA_FILE);
1515
1516 int write_flags = 0;
1517 if (flags & BLOBSTORE_FLAG_CREAT) {
1518 write_flags = BLOBSTORE_FLAG_CREAT | BLOBSTORE_FLAG_EXCL;
1519 };
1520
1521 write_metadata:
1522
1523 if (write_flags) {
1524 _blobstore_errno = BLOBSTORE_ERROR_OK;
1525 _err_off();
1526 bs->fd = open_and_lock(meta_path, write_flags, 0, BLOBSTORE_FILE_PERM);
1527 _err_on();
1528 if (bs->fd != -1) { // managed to create or open blobstore metadata file and got exclusive lock
1529
1530 // the intention is to create the blobstore for the first time
1531 if (write_flags & BLOBSTORE_FLAG_CREAT) {
1532 gen_id(bs->id, sizeof(bs->id));
1533 bs->limit_blocks = limit_blocks;
1534 bs->revocation_policy = (revocation_policy == BLOBSTORE_REVOCATION_ANY) ? BLOBSTORE_REVOCATION_NONE : revocation_policy;
1535 bs->snapshot_policy = (snapshot_policy == BLOBSTORE_SNAPSHOT_ANY) ? BLOBSTORE_SNAPSHOT_DM : snapshot_policy; //! @TODO verify that DM is available?
1536 bs->format = (format == BLOBSTORE_FORMAT_ANY) ? BLOBSTORE_FORMAT_FILES : format;
1537
1538 // write metadata to disk
1539 write_store_metadata(bs);
1540
1541 } else if (write_flags & BLOBSTORE_FLAG_RDWR) { // the intention is to adjust metadata
1542 if (read_store_metadata(bs))
1543 goto free;
1544 assert(bs->id);
1545 if (limit_blocks)
1546 bs->limit_blocks = limit_blocks;
1547 if (revocation_policy != BLOBSTORE_REVOCATION_ANY)
1548 bs->revocation_policy = revocation_policy;
1549 write_store_metadata(bs);
1550 }
1551 close_and_unlock(bs->fd); // try to close, thus giving up the exclusive lock
1552 }
1553 if (_blobstore_errno != BLOBSTORE_ERROR_OK && // either open or write failed
1554 _blobstore_errno != BLOBSTORE_ERROR_EXIST && // it is OK if file already exists
1555 _blobstore_errno != BLOBSTORE_ERROR_AGAIN) { // it is OK if we lost the race for the write lock
1556 ERR(_blobstore_errno, "failed to open or create blobstore");
1557 goto free;
1558 }
1559 }
1560 // now (re)open, with a shared read lock
1561 bs->fd = open_and_lock(meta_path, BLOBSTORE_FLAG_RDONLY, BLOBSTORE_METADATA_TIMEOUT_USEC, BLOBSTORE_FILE_PERM);
1562 if (bs->fd == -1) {
1563 goto free;
1564 }
1565 if (read_store_metadata(bs)) { // try reading metadata
1566 goto free;
1567 }
1568 // verify that parameters are not being changed
1569 if (limit_blocks && limit_blocks != bs->limit_blocks) {
1570 if (flags & BLOBSTORE_FLAG_STRICT) {
1571 ERR(BLOBSTORE_ERROR_INVAL, "'limit_blocks' does not match existing blobstore");
1572 goto free;
1573 } else {
1574 LOGINFO("adjusting blobstore limit from %lld to %lld\n", bs->limit_blocks, limit_blocks);
1575 write_flags = BLOBSTORE_FLAG_RDWR;
1576 close_and_unlock(bs->fd);
1577 goto write_metadata;
1578 }
1579 }
1580 if (snapshot_policy != BLOBSTORE_SNAPSHOT_ANY && snapshot_policy != bs->snapshot_policy) {
1581 ERR(BLOBSTORE_ERROR_INVAL, "'snapshot_policy' does not match existing blobstore");
1582 goto free;
1583 }
1584 if (format != BLOBSTORE_FORMAT_ANY && format != bs->format) {
1585 ERR(BLOBSTORE_ERROR_INVAL, "'format' does not match existing blobstore");
1586 goto free;
1587 }
1588 if (revocation_policy != BLOBSTORE_REVOCATION_ANY && revocation_policy != bs->revocation_policy) {
1589 if (flags & BLOBSTORE_FLAG_STRICT) {
1590 ERR(BLOBSTORE_ERROR_INVAL, "'revocation_policy' does not match existing blobstore"); //! @TODO maybe make revocation_policy changeable after creation
1591 goto free;
1592 } else {
1593 write_flags = BLOBSTORE_FLAG_RDWR;
1594 close_and_unlock(bs->fd);
1595 goto write_metadata;
1596 }
1597 }
1598 int fd = bs->fd;
1599 bs->fd = -1;
1600 close_and_unlock(fd);
1601 goto out;
1602
1603 free:
1604 saved_errno = _blobstore_errno;
1605 close_and_unlock(bs->fd);
1606 EUCA_FREE(bs);
1607 _blobstore_errno = saved_errno;
1608
1609 out:
1610 return bs;
1611 }
1612
1613 //!
1614 //! Frees the blobstore handle
1615 //!
1616 //! @param[in] bs
1617 //!
1618 //! @return
1619 //!
1620 //! @pre
1621 //!
1622 //! @note
1623 //!
1624 int blobstore_close(blobstore * bs)
1625 {
1626 EUCA_FREE(bs);
1627 return 0;
1628 }
1629
1630 //!
1631 //! Locks the blobstore
1632 //!
1633 //! @param[in] bs
1634 //! @param[in] timeout_usec
1635 //!
1636 //! @return
1637 //!
1638 //! @pre
1639 //!
1640 //! @note
1641 //!
1642 int blobstore_lock(blobstore * bs, long long timeout_usec)
1643 {
1644 char meta_path[PATH_MAX];
1645 snprintf(meta_path, sizeof(meta_path), "%s/%s", bs->path, BLOBSTORE_METADATA_FILE);
1646
1647 LOGTRACE("{%u} blobstore_lock: called for %s\n", (unsigned int)pthread_self(), bs->path);
1648 int fd = open_and_lock(meta_path, BLOBSTORE_FLAG_RDWR, timeout_usec, BLOBSTORE_FILE_PERM);
1649 if (fd != -1)
1650 bs->fd = fd;
1651 return fd;
1652 }
1653
1654 //!
1655 //! Unlocks the blobstore
1656 //!
1657 //! @param[in] bs
1658 //!
1659 //! @return
1660 //!
1661 //! @pre
1662 //!
1663 //! @note
1664 //!
1665 int blobstore_unlock(blobstore * bs)
1666 {
1667 int fd = bs->fd;
1668 bs->fd = -1;
1669 LOGTRACE("{%u} blobstore_unlock: called for %s\n", (unsigned int)pthread_self(), bs->path);
1670 return close_and_unlock(fd);
1671 }
1672
1673 //!
1674 //! If no outside references to store or blobs exist, and
1675 //! no blobs are protected, deletes the blobs, the store metadata,
1676 //! and frees the blobstore handle
1677 //!
1678 //! @param[in] bs
1679 //!
1680 //! @return
1681 //!
1682 //! @pre
1683 //!
1684 //! @note
1685 //!
1686 int blobstore_delete(blobstore * bs)
1687 {
1688 LOGINFO("creating the baloon blob\n");
1689 blockblob *bb = blockblob_open(bs, "__baloon_blob__",
1690 bs->limit_blocks * 512, // biggest possible blob
1691 (BLOBSTORE_FLAG_CREAT | BLOBSTORE_FLAG_EXCL),
1692 NULL, // do not care for signature
1693 BLOBSTORE_METADATA_TIMEOUT_USEC); // give a generous timeout
1694 if (bb == NULL) {
1695 LOGINFO("failed to purge blobstore: %s: %s\n", blobstore_get_error_str(blobstore_get_error()), blobstore_get_last_msg());
1696 ERR(BLOBSTORE_ERROR_INVAL, "failed to purge blobstore with a baloon blob");
1697 return EUCA_ERROR;
1698 }
1699 blockblob_delete(bb, BLOBSTORE_DELETE_TIMEOUT_USEC, TRUE); // get rid of the last blob
1700
1701 char meta_path[PATH_MAX];
1702 snprintf(meta_path, sizeof(meta_path), "%s/%s", bs->path, BLOBSTORE_METADATA_FILE);
1703 LOGINFO("removing blobstore metadata '%s'\n", meta_path);
1704 unlink(meta_path);
1705 EUCA_FREE(bs);
1706
1707 return EUCA_OK;
1708 }
1709
1710 //!
1711 //!
1712 //!
1713 //! @return
1714 //!
1715 //! @pre
1716 //!
1717 //! @note
1718 //!
1719 int blobstore_get_error(void)
1720 {
1721 return _blobstore_errno;
1722 }
1723
1724 //!
1725 //! Helper for setting paths, depending on blockblob_path_t given BLOCKBLOB_PATH_X: x = tolower(X)
1726 //!
1727 //! for BLOBSTORE_FORMAT_FILES: BS/BB.x
1728 //! for BLOBSTORE_FORMAT_DIRECTORY: BS/BB/x
1729 //!
1730 //! where BS is blobstore path and BB is a blockblob id.
1731 //! BB may have '/' in it, thus placing all blob-related
1732 //! files in a deeper dir hierarchy
1733 //!
1734 //! @param[in] path_t
1735 //! @param[in] bs
1736 //! @param[in] bb_id
1737 //! @param[out] path
1738 //! @param[in] path_size
1739 //!
1740 //! @return
1741 //!
1742 //! @pre
1743 //!
1744 //! @note
1745 //!
1746 static int set_blockblob_metadata_path(blockblob_path_t path_t, const blobstore * bs, const char *bb_id, char *path, size_t path_size)
1747 {
1748 char base[PATH_MAX];
1749 snprintf(base, sizeof(base), "%s/%s", bs->path, bb_id);
1750
1751 char name[32];
1752 switch (path_t) {
1753 case BLOCKBLOB_PATH_BLOCKS:
1754 euca_strncpy(name, blobstore_metadata_suffixes[BLOCKBLOB_PATH_BLOCKS], sizeof(name));
1755 break;
1756 case BLOCKBLOB_PATH_LOCK:
1757 euca_strncpy(name, blobstore_metadata_suffixes[BLOCKBLOB_PATH_LOCK], sizeof(name));
1758 break;
1759 case BLOCKBLOB_PATH_DM:
1760 euca_strncpy(name, blobstore_metadata_suffixes[BLOCKBLOB_PATH_DM], sizeof(name));
1761 break;
1762 case BLOCKBLOB_PATH_DEPS:
1763 euca_strncpy(name, blobstore_metadata_suffixes[BLOCKBLOB_PATH_DEPS], sizeof(name));
1764 break;
1765 case BLOCKBLOB_PATH_LOOPBACK:
1766 euca_strncpy(name, blobstore_metadata_suffixes[BLOCKBLOB_PATH_LOOPBACK], sizeof(name));
1767 break;
1768 case BLOCKBLOB_PATH_SIG:
1769 euca_strncpy(name, blobstore_metadata_suffixes[BLOCKBLOB_PATH_SIG], sizeof(name));
1770 break;
1771 case BLOCKBLOB_PATH_REFS:
1772 euca_strncpy(name, blobstore_metadata_suffixes[BLOCKBLOB_PATH_REFS], sizeof(name));
1773 break;
1774 case BLOCKBLOB_PATH_HOLLOW:
1775 euca_strncpy(name, blobstore_metadata_suffixes[BLOCKBLOB_PATH_HOLLOW], sizeof(name));
1776 break;
1777 default:
1778 ERR(BLOBSTORE_ERROR_INVAL, "invalid path_t");
1779 return -1;
1780 }
1781
1782 switch (bs->format) {
1783 case BLOBSTORE_FORMAT_FILES:
1784 snprintf(path, path_size, "%s.%s", base, name);
1785 break;
1786 case BLOBSTORE_FORMAT_DIRECTORY:
1787 snprintf(path, path_size, "%s/%s", base, name);
1788 break;
1789 default:
1790 ERR(BLOBSTORE_ERROR_INVAL, "invalid bs->format");
1791 return -1;
1792 }
1793
1794 return 0;
1795 }
1796
1797 //!
1798 //! Write string 'str' into a specific metadata file (based on 'path_t') of blob 'bb_id'
1799 //!
1800 //! @param[in] path_t
1801 //! @param[in] bs
1802 //! @param[in] bb_id
1803 //! @param[in] str
1804 //!
1805 //! @return 0 for success or -1 for error
1806 //!
1807 //! @pre
1808 //!
1809 //! @note
1810 //!
1811 static int write_blockblob_metadata_path(blockblob_path_t path_t, const blobstore * bs, const char *bb_id, const char *str)
1812 {
1813 int ret = 0;
1814 char path[PATH_MAX];
1815 set_blockblob_metadata_path(path_t, bs, bb_id, path, sizeof(path));
1816
1817 int fd = open_and_lock(path,
1818 BLOBSTORE_FLAG_CREAT | BLOBSTORE_FLAG_RDWR,
1819 BLOBSTORE_METADATA_TIMEOUT_USEC,
1820 BLOBSTORE_FILE_PERM);
1821 if (fd == -1)
1822 return -1;
1823 int size = buf_to_fd(fd, str, strlen(str));
1824 int ret_close = close_and_unlock(fd);
1825 if (size != strlen(str)) {
1826 // set the error code, possibly overriding one set by close_and_unlock
1827 ERR(BLOBSTORE_ERROR_UNKNOWN, "failed to write desired number of characters to metadata file");
1828 ret = -1;
1829 } else if (ret_close != 0) {
1830 ret = -1; // close_and_unlock should have set the error code
1831 }
1832
1833 return ret;
1834 }
1835
1836 //!
1837 //! Reads contents of a specific metadata file (based on 'path_t') of blob 'bb_id' into string 'str' up to 'str_size'
1838 //!
1839 //! @param[in] path_t
1840 //! @param[in] bs
1841 //! @param[in] bb_id
1842 //! @param[out] str
1843 //! @param[in] str_size
1844 //!
1845 //! @return The number of bytes read or -1 in case of error
1846 //!
1847 //! @pre
1848 //!
1849 //! @note
1850 //!
1851 static int read_blockblob_metadata_path(blockblob_path_t path_t, const blobstore * bs, const char *bb_id, char *str, int str_size)
1852 {
1853 char path[PATH_MAX];
1854 set_blockblob_metadata_path(path_t, bs, bb_id, path, sizeof(path));
1855
1856 int fd = open_and_lock(path,
1857 BLOBSTORE_FLAG_RDONLY,
1858 BLOBSTORE_METADATA_TIMEOUT_USEC,
1859 BLOBSTORE_FILE_PERM);
1860 if (fd == -1)
1861 return -1;
1862 int size = fd_to_buf(fd, str, str_size);
1863 int ret_close = close_and_unlock(fd);
1864 if (size < 1) {
1865 // set the error code, possibly overriding one set by close_and_unlock
1866 ERR(BLOBSTORE_ERROR_NOENT, "blockblob metadata size is too small");
1867 size = -1;
1868 } else if (ret_close != 0) {
1869 size = -1; // close_and_unlock should have set the error code
1870 }
1871
1872 return size;
1873 }
1874
1875 //!
1876 //! Writes strings from 'array' of size 'array_size' (which can be 0) line-by-line
1877 //! into a specific metadata file (based on 'path_t') of blob 'bb_id'
1878 //!
1879 //! @param[in] path_t
1880 //! @param[in] bs
1881 //! @param[in] bb_id
1882 //! @param[out] array
1883 //! @param[out] array_size
1884 //!
1885 //! @return 0 for success and -1 for error
1886 //!
1887 //! @pre
1888 //!
1889 //! @note
1890 //!
1891 static int write_array_blockblob_metadata_path(blockblob_path_t path_t, const blobstore * bs, const char *bb_id, char **array, int array_size)
1892 {
1893 int i = 0;
1894 int fd = 0;
1895 int ret = 0;
1896 int dataLen = 0;
1897 unsigned int openFlags = (BLOBSTORE_FLAG_CREAT | BLOBSTORE_FLAG_TRUNC | BLOBSTORE_FLAG_RDWR);
1898 char path[EUCA_MAX_PATH] = "";
1899
1900 set_blockblob_metadata_path(path_t, bs, bb_id, path, sizeof(path));
1901 if ((fd = open_and_lock(path, openFlags, BLOBSTORE_METADATA_TIMEOUT_USEC, BLOBSTORE_FILE_PERM)) == -1) {
1902 PROPAGATE_ERR(BLOBSTORE_ERROR_UNKNOWN);
1903 return (-1);
1904 }
1905
1906 for (i = 0; i < array_size; i++) {
1907 dataLen = strlen(array[i]);
1908 if (write(fd, array[i], dataLen) != dataLen) {
1909 PROPAGATE_ERR(BLOBSTORE_ERROR_UNKNOWN);
1910 ret = -1;
1911 break;
1912 }
1913
1914 if (write(fd, "\n", 1) != 1) {
1915 PROPAGATE_ERR(BLOBSTORE_ERROR_UNKNOWN);
1916 ret = -1;
1917 break;
1918 }
1919 }
1920
1921 if (close_and_unlock(fd) != 0) {
1922 PROPAGATE_ERR(BLOBSTORE_ERROR_UNKNOWN);
1923 ret = -1;
1924 }
1925
1926 return (ret);
1927 }
1928
1929 //!
1930 //! The equivalent of getline for file descriptor.
1931 //!
1932 //! @param[in,out] ppLine pointer to the character array to read into
1933 //! @param[in,out] n amount of memory currently allocated for (*ppLine), if any
1934 //! @param[in] fd file descriptor to read from
1935 //!
1936 //! @return On success, number of characters read excluding the '\n' character is returned. A
1937 //! value or 0 indicates we reached the end of the file. A returned value of -1 indicates
1938 //! an error and the errno is set appropriately. On error, the original allocated memory
1939 //! is left untouched.
1940 //!
1941 //! @pre
1942 //!
1943 //! @note
1944 //!
1945 ssize_t get_line_desc(char **ppLine, size_t * n, int fd)
1946 {
1947 char c = '\0';
1948 size_t length = 0;
1949 size_t newSize = (*n);
1950 ssize_t error = 0;
1951 char *pLine = *ppLine;
1952 char *pNewBlock = *ppLine;
1953
1954 do {
1955 // Read one character.. If 0, then EOF, if less then error!
1956 if ((error = read(fd, &c, 1)) <= 0)
1957 break;
1958
1959 // If we're going over, re-allocate memory
1960 if ((length + 1) >= newSize) {
1961 newSize += 64;
1962
1963 if ((pNewBlock = EUCA_REALLOC(pLine, newSize, sizeof(char))) == NULL) {
1964 error = -1;
1965 break;
1966 }
1967
1968 pLine = pNewBlock;
1969 }
1970
1971 pLine[length++] = c;
1972 } while (c != '\n');
1973
1974 // Did we have an error?
1975 if (error < 0) {
1976 // If (*n) was originally 0 we should free pLine since we allocated that memory.
1977 if (((*n) == 0) && (pLine != NULL)) {
1978 EUCA_FREE(pLine);
1979 }
1980 return (-1);
1981 }
1982 // Now strip the '\n' character
1983 if (pLine != NULL) {
1984 (*ppLine) = pLine;
1985 pLine[length] = '\0'; // Safety
1986
1987 // Now strip '\n' if present. We could have reached EOF and no '\n' was present
1988 if (pLine[length - 1] == '\n')
1989 pLine[--length] = '\0';
1990
1991 // Update the (*n) value
1992 (*n) = newSize;
1993 }
1994
1995 return (length);
1996 }
1997
1998 //!
1999 //! Reads lines from a specific metadata file (based on 'path_t') of blob 'bb_id',
2000 //! places each line into a newly allocated string, arranges pointers to these
2001 //! strings into a newly allocated array of pointers, and places the size into 'array_size'
2002 //!
2003 //! @param[in] path_t
2004 //! @param[in] bs
2005 //! @param[in] bb_id
2006 //! @param[out] array
2007 //! @param[out] array_size
2008 //!
2009 //! @return 0 for success and -1 for error
2010 //!
2011 //! @pre
2012 //!
2013 //! @note Caller must deallocate the array and the strings pointed to by the array
2014 //!
2015 static int read_array_blockblob_metadata_path(blockblob_path_t path_t, const blobstore * bs, const char *bb_id, char ***array, int *array_size)
2016 {
2017 int fd = -1;
2018 int ret = 0;
2019 int i = 0;
2020 int j = 0;
2021 size_t n = 0;
2022 ssize_t rdLen = 1;
2023 char **lines = NULL;
2024 char *line = NULL;
2025 char **bigger_lines = NULL;
2026 char path[EUCA_MAX_PATH] = "";
2027
2028 set_blockblob_metadata_path(path_t, bs, bb_id, path, sizeof(path));
2029
2030 // Acquire the metadata file descriptor
2031 if ((fd = open_and_lock(path, BLOBSTORE_FLAG_RDONLY, BLOBSTORE_METADATA_TIMEOUT_USEC, BLOBSTORE_FILE_PERM)) == -1) {
2032 PROPAGATE_ERR(BLOBSTORE_ERROR_UNKNOWN);
2033 *array = NULL;
2034 *array_size = 0;
2035 return 0;
2036 }
2037 // Read each line and fill our array
2038 for (i = 0, rdLen = 1; rdLen > 0; i++) {
2039 n = 0;
2040 line = NULL;
2041
2042 // Read the file. 0 means EOF, < 0 means error...
2043 if ((rdLen = get_line_desc(&line, &n, fd)) < 0) {
2044 EUCA_FREE(line);
2045 PROPAGATE_ERR(BLOBSTORE_ERROR_UNKNOWN);
2046 ret = -1;
2047 break;
2048 } else if (rdLen == 0) {
2049 // EOF, no more data
2050 break;
2051 }
2052
2053 LOGEXTREME("%s => [%d] READ LINE %s rdLen %lu, n %ld\n", __func__, fd, line, rdLen, n);
2054
2055 // Add one more entry to our metadata array
2056 if ((bigger_lines = EUCA_REALLOC(lines, (i + 1), sizeof(char *))) == NULL) {
2057 ERR(BLOBSTORE_ERROR_NOMEM, NULL);
2058 EUCA_FREE(line);
2059 ret = -1;
2060 break;
2061 }
2062
2063 lines = bigger_lines;
2064 lines[i] = line;
2065 }
2066
2067 // Release the metadata file descriptor
2068 if (close_and_unlock(fd) != 0) {
2069 PROPAGATE_ERR(BLOBSTORE_ERROR_UNKNOWN);
2070 ret = -1;
2071 }
2072 // if something failed, lets do some house cleanup before we bail
2073 if (ret == -1) {
2074 if (lines != NULL) {
2075 for (j = 0; j < i; j++)
2076 EUCA_FREE(lines[j]);
2077 EUCA_FREE(lines);
2078 }
2079 return (ret);
2080 }
2081
2082 *array = lines;
2083 *array_size = i;
2084 return (0);
2085 }
2086
2087 //!
2088 //!
2089 //!
2090 //! @param[in] path_t
2091 //! @param[in] bs
2092 //! @param[in] bb_id
2093 //! @param[in] entry
2094 //! @param[in] removing
2095 //!
2096 //! @return
2097 //!
2098 //! @pre
2099 //!
2100 //! @note
2101 //!
2102 static int update_entry_blockblob_metadata_path(blockblob_path_t path_t, const blobstore * bs, const char *bb_id, const char *entry, int removing)
2103 {
2104 int ret = 0;
2105
2106 // read in current entries from a metadata file
2107 char **entries;
2108 int entries_size;
2109 if (read_array_blockblob_metadata_path(path_t, bs, bb_id, &entries, &entries_size) == -1) {
2110 return -1;
2111 }
2112 // see if this entry is already in the metadata file
2113 int found = -1;
2114 for (int j = 0; j < entries_size; j++) {
2115 if (!strcmp(entry, entries[j])) {
2116 found = j;
2117 break;
2118 }
2119 }
2120
2121 if (found == -1 && !removing) { // not in the file and adding
2122 entries_size++;
2123 char **bigger_entries = EUCA_ZALLOC(entries_size, sizeof(char *));
2124 if (bigger_entries == NULL) {
2125 ret = -1;
2126 goto cleanup;
2127 }
2128 for (int i = 0; i < entries_size - 1; i++) { // we do not trust realloc
2129 bigger_entries[i] = entries[i];
2130 }
2131 EUCA_FREE(entries);
2132 entries = bigger_entries;
2133 entries[entries_size - 1] = strdup(entry);
2134
2135 } else if (found != -1 && removing) { // in the file and deleting
2136 EUCA_FREE(entries[found]);
2137 entries_size--;
2138 if (entries_size && found != entries_size) { // still entries left and not deleting last one
2139 entries[found] = entries[entries_size]; // move the last one over the one we're deleting
2140 }
2141
2142 } else { // nothing to do
2143 goto cleanup;
2144 }
2145
2146 // save new entries into the metadata file
2147 if (write_array_blockblob_metadata_path(path_t, bs, bb_id, entries, entries_size) == -1) {
2148 ret = -1;
2149 }
2150
2151 cleanup:
2152 if (entries != NULL) {
2153 for (int j = 0; j < entries_size; j++) {
2154 EUCA_FREE(entries[j]);
2155 }
2156 EUCA_FREE(entries);
2157 }
2158 return ret;
2159 }
2160
2161 //!
2162 //! Retrieves the type of the blockblob metadata path we have.
2163 //!
2164 //! @param[in] bs
2165 //! @param[in] path
2166 //! @param[in] bb_id
2167 //! @param[in] bb_id_size
2168 //!
2169 //! @return If 'path' looks like a blockblob metadata file (based on the suffix), return the type of the file and
2170 //! set bb_id appropriately, else return 0 if it is an unrecognized file, else return -1 for error
2171 //!
2172 //! @pre
2173 //!
2174 //! @note
2175 //!
2176 static int typeof_blockblob_metadata_path(const blobstore * bs, const char *path, char *bb_id, unsigned int bb_id_size)
2177 {
2178 assert(path);
2179 assert(bs->path);
2180 assert(strstr(path, bs->path) == path);
2181
2182 const char *rel_path = path + strlen(bs->path) + 1; // +1 for '/'
2183 int p_len = strlen(rel_path);
2184
2185 for (int i = 1; i < BLOCKBLOB_PATH_TOTAL; i++) { // start at 1 to avoid BLOCKBLOB_PATH_NONE
2186 char suffix[1024];
2187 if (bs->format == BLOBSTORE_FORMAT_DIRECTORY) {
2188 snprintf(suffix, sizeof(suffix), "/%s", blobstore_metadata_suffixes[i]);
2189 } else {
2190 snprintf(suffix, sizeof(suffix), ".%s", blobstore_metadata_suffixes[i]);
2191 }
2192 unsigned int s_len = strlen(suffix);
2193 const char *sp = suffix + s_len - 1; // last char of suffix
2194 const char *pp = rel_path + p_len - 1; // last char of (relative) path
2195 unsigned int matched;
2196 for (matched = 0; *sp == *pp; sp--, pp--) {
2197 matched++;
2198 if (sp == suffix)
2199 break;
2200 if (pp == rel_path)
2201 break;
2202 }
2203 if (matched == s_len // whole suffix matched
2204 && matched < p_len) { // there is more than the suffix
2205 if ((bb_id_size - 1) < (p_len - s_len)) // not enough room in bb_id
2206 return -1;
2207 strncpy(bb_id, rel_path, p_len - s_len); // extract the name, without the suffix
2208 bb_id[p_len - s_len] = '\0'; // terminate the string
2209 return i;
2210 }
2211 }
2212 return 0;
2213 }
2214
2215 //!
2216 //!
2217 //!
2218 //! @param[in] bs
2219 //! @param[in] bb_id
2220 //!
2221 //! @return the number of files and directories deleted as part of removing the
2222 //! blob (thus, 0 means there was nothing to delete)
2223 //!
2224 //! @pre
2225 //!
2226 //! @note
2227 //!
2228 static int delete_blockblob_files(const blobstore * bs, const char *bb_id)
2229 {
2230 int count = 0;
2231
2232 for (int path_t = 1; path_t < BLOCKBLOB_PATH_TOTAL; path_t++) { // go through all types of blob-related files...
2233 char path[PATH_MAX];
2234 set_blockblob_metadata_path((blockblob_path_t) path_t, bs, bb_id, path, sizeof(path));
2235 if (unlink(path) == 0) // ...and try deleting them
2236 count++;
2237 }
2238
2239 // delete blob's subdirectories if there are any
2240 char path[PATH_MAX];
2241 snprintf(path, sizeof(path), "%s/%s%s", bs->path, bb_id, bs->format == BLOBSTORE_FORMAT_DIRECTORY ? "/" : "");
2242 for (int i = strlen(path) - 1; i > 0; i--) {
2243 if (path[i] == '/') {
2244 path[i] = '\0';
2245 if (rmdir(path) == 0) {
2246 count++;
2247 } else {
2248 break;
2249 }
2250 }
2251 }
2252
2253 return count;
2254 }
2255
2256 //!
2257 //! Helper for ensuring a directory required by blob exists
2258 //!
2259 //! @param[in] bs
2260 //! @param[in] bb_id
2261 //!
2262 //! @return 0 = already existed, 1 = created OK, -1 = error
2263 //!
2264 //! @pre
2265 //!
2266 //! @note
2267 //!
2268 static int ensure_blockblob_metadata_path(const blobstore * bs, const char *bb_id)
2269 {
2270 char base[PATH_MAX];
2271 snprintf(base, sizeof(base), "%s/%s", bs->path, bb_id);
2272 return ensure_directories_exist(base, !(bs->format == BLOBSTORE_FORMAT_DIRECTORY), NULL, NULL, BLOBSTORE_DIRECTORY_PERM);
2273 }
2274
2275 //!
2276 //!
2277 //!
2278 //! @param[in] bbs
2279 //!
2280 //! @pre
2281 //!
2282 //! @note
2283 //!
2284 static void free_bbs(blockblob * bbs)
2285 {
2286 while (bbs) {
2287 blockblob *next_bb = bbs->next;
2288 EUCA_FREE(bbs);
2289 bbs = next_bb;
2290 }
2291 }
2292
2293 //!
2294 //!
2295 //!
2296 //! @param[in] bs
2297 //! @param[in] bb_id
2298 //! @param[in] timeout_usec
2299 //!
2300 //! @return
2301 //!
2302 //! @pre
2303 //!
2304 //! @note
2305 //!
2306 static unsigned int check_in_use(blobstore * bs, const char *bb_id, long long timeout_usec)
2307 {
2308 unsigned int in_use = 0;
2309 char path[PATH_MAX];
2310
2311 // determine the path of the .lock file for this blob
2312 set_blockblob_metadata_path(BLOCKBLOB_PATH_LOCK, bs, bb_id, path, sizeof(path));
2313
2314 _err_off(); // do not complain if metadata files do not exist
2315 int fd = open_and_lock(path, BLOBSTORE_FLAG_RDWR, timeout_usec, BLOBSTORE_FILE_PERM); // try opening to see what happens
2316 if (fd != -1) {
2317 struct stat s;
2318 if (fstat(fd, &s) == 0) {
2319 if (s.st_size > 0) { // lock file was not truncated before being released => file not properly closed
2320 in_use |= BLOCKBLOB_STATUS_ABANDONED;
2321 }
2322 }
2323 close_and_unlock(fd);
2324 } else {
2325 in_use |= BLOCKBLOB_STATUS_OPENED; //! @TODO check if open failed for other reason?
2326 }
2327
2328 if (read_blockblob_metadata_path(BLOCKBLOB_PATH_REFS, bs, bb_id, path, sizeof(path)) > 0) {
2329 in_use |= BLOCKBLOB_STATUS_MAPPED;
2330 }
2331
2332 if (read_blockblob_metadata_path(BLOCKBLOB_PATH_DEPS, bs, bb_id, path, sizeof(path)) > 0) {
2333 in_use |= BLOCKBLOB_STATUS_BACKED;
2334 }
2335
2336 if (read_blockblob_metadata_path(BLOCKBLOB_PATH_DM, bs, bb_id, path, sizeof(path)) > 0) {
2337 in_use |= BLOCKBLOB_STATUS_BACKED;
2338 }
2339 _err_on();
2340
2341 return in_use;
2342 }
2343
2344 //!
2345 //!
2346 //!
2347 //! @param[in] bb
2348 //!
2349 //! @pre
2350 //!
2351 //! @note
2352 //!
2353 static void set_device_path(blockblob * bb)
2354 {
2355 char **dm_devs = NULL;
2356 int dm_devs_size = 0;
2357
2358 _err_off(); // do not care if .dm file does not exist
2359 read_array_blockblob_metadata_path(BLOCKBLOB_PATH_DM, bb->store, bb->id, &dm_devs, &dm_devs_size);
2360 _err_on();
2361
2362 if (dm_devs_size > 0) { // .dm is there => set device_path to the device-mapper path
2363 snprintf(bb->device_path, sizeof(bb->device_path), DM_FORMAT, dm_devs[dm_devs_size - 1]); // main device is the last one
2364 euca_strncpy(bb->dm_name, dm_devs[dm_devs_size - 1], sizeof(bb->dm_name));
2365 for (int i = 0; i < dm_devs_size; i++) {
2366 EUCA_FREE(dm_devs[i]);
2367 }
2368 EUCA_FREE(dm_devs);
2369 } else { // .dm is not there => set device_path to loopback
2370 char lo_dev[PATH_MAX] = "";
2371 _err_off(); // do not care if loopback file does not exist
2372 read_blockblob_metadata_path(BLOCKBLOB_PATH_LOOPBACK, bb->store, bb->id, lo_dev, sizeof(lo_dev));
2373 _err_on();
2374 euca_strncpy(bb->device_path, lo_dev, sizeof(bb->device_path));
2375 }
2376 }
2377
2378 //!
2379 //! Given a directory that may contain both blobstore files and
2380 //! non-blobstore files (e.g., instance metadata and soft-links),
2381 //! this deletes all files not managed by the blobstore.
2382 //!
2383 //! @param[in] bs blobstore that may contains blobs under dir_path
2384 //! @param[in] dir_path directory in which to delete non-blob files
2385 //!
2386 //! @return count of files that the function tried to delete or -1 on error
2387 //!
2388 //!
2389 int blobstore_delete_nonblobs(blobstore * bs, const char *dir_path)
2390 {
2391 int ndeleted = 0;
2392
2393 DIR *dir;
2394 if ((dir = opendir(dir_path)) == NULL) {
2395 return -1;
2396 }
2397
2398 struct dirent *dir_entry;
2399 while ((dir_entry = readdir(dir)) != NULL) {
2400 char *entry_name = dir_entry->d_name;
2401
2402 if (!strcmp(".", entry_name) || !strcmp("..", entry_name) || !strcmp(BLOBSTORE_METADATA_FILE, entry_name))
2403 continue; // ignore known unrelated files
2404
2405 // get the path of the directory item
2406 char entry_path[BLOBSTORE_MAX_PATH];
2407 snprintf(entry_path, sizeof(entry_path), "%s/%s", dir_path, entry_name);
2408
2409 char blob_id[BLOBSTORE_MAX_PATH];
2410 if (typeof_blockblob_metadata_path(bs, entry_path, blob_id, sizeof(blob_id)) > 0)
2411 continue; // ignore all blobstore files
2412
2413 char *base_name = strdup(dir_path);
2414 LOGDEBUG("[%s] removing %s\n", basename(base_name), entry_name);
2415 free(base_name);
2416 unlink(entry_path);
2417 ndeleted++;
2418 }
2419
2420 closedir(dir);
2421 return ndeleted;
2422 }
2423
2424 //!
2425 //!
2426 //!
2427 //! @param[in] bs
2428 //! @param[in] dir_path
2429 //! @param[in] tail_bb
2430 //! @param[in] bb_to_avoid
2431 //!
2432 //! @return
2433 //!
2434 //! @pre
2435 //!
2436 //! @note
2437 //!
2438 static blockblob **walk_bs(blobstore * bs, const char *dir_path, blockblob ** tail_bb, const blockblob * bb_to_avoid)
2439 {
2440 DIR *dir;
2441 if ((dir = opendir(dir_path)) == NULL) {
2442 return tail_bb; // ignore access errors in blobstore directory
2443 }
2444
2445 struct dirent *dir_entry;
2446 while ((dir_entry = readdir(dir)) != NULL) {
2447 char *entry_name = dir_entry->d_name;
2448
2449 if (!strcmp(".", entry_name) || !strcmp("..", entry_name) || !strcmp(BLOBSTORE_METADATA_FILE, entry_name))
2450 continue; // ignore known unrelated files
2451
2452 // get the path of the directory item
2453 char entry_path[BLOBSTORE_MAX_PATH];
2454 snprintf(entry_path, sizeof(entry_path), "%s/%s", dir_path, entry_name);
2455 struct stat sb;
2456 if (stat(entry_path, &sb) == -1) {
2457 // ignore access errors in the blobstore directory
2458 //! @TODO is this wise?
2459 continue;
2460 }
2461 // recurse if this is a directory
2462 if (S_ISDIR(sb.st_mode)) {
2463 tail_bb = walk_bs(bs, entry_path, tail_bb, bb_to_avoid);
2464 if (tail_bb == NULL) {
2465 closedir(dir);
2466 return NULL;
2467 }
2468 continue;
2469 }
2470
2471 char blob_id[BLOBSTORE_MAX_PATH];
2472 if (typeof_blockblob_metadata_path(bs, entry_path, blob_id, sizeof(blob_id)) != BLOCKBLOB_PATH_BLOCKS)
2473 continue; // ignore all files except .blocks file
2474
2475 if (bb_to_avoid != NULL && strncmp(blob_id, bb_to_avoid->id, sizeof(blob_id)) == 0)
2476 continue; // avoid that particular blockblob
2477
2478 blockblob *bb = EUCA_ZALLOC(1, sizeof(blockblob));
2479 if (bb == NULL) {
2480 goto free;
2481 }
2482 *tail_bb = bb; // add to LL
2483 tail_bb = &(bb->next);
2484
2485 // fill out the struct
2486 bb->store = bs;
2487 euca_strncpy(bb->id, blob_id, sizeof(bb->id));
2488 euca_strncpy(bb->blocks_path, entry_path, sizeof(bb->blocks_path));
2489 set_device_path(bb); // read .dm and .loopback and set bb->device_path accordingly
2490 bb->size_bytes = sb.st_size;
2491 bb->blocks_allocated = sb.st_blocks;
2492 bb->last_accessed = sb.st_atime;
2493 bb->last_modified = sb.st_mtime;
2494 bb->snapshot_type = BLOBSTORE_FORMAT_ANY; // it is not necessary to know whether this is a snapshot
2495 bb->in_use = check_in_use(bs, bb->id, 0);
2496
2497 // see if it's hollow
2498 char buf[64];
2499 if (read_blockblob_metadata_path(BLOCKBLOB_PATH_HOLLOW, bb->store, bb->id, buf, sizeof(buf)) != -1) {
2500 bb->is_hollow = TRUE;
2501 }
2502 // if there is a .refs file, subtract the mapped blocks, if any, from the size
2503 char **array = NULL;
2504 int array_size = 0;
2505 if (read_array_blockblob_metadata_path(BLOCKBLOB_PATH_DEPS, bb->store, bb->id, &array, &array_size) != -1) {
2506 for (int i = 0; i < array_size; i++) {
2507 char *store_path = NULL;
2508 char *blob_id = NULL;
2509 char *rel_type = NULL;
2510 char *start_block = NULL;
2511 char *len_blocks = NULL;
2512
2513 store_path = strtok(array[i], " ");
2514 blob_id = strtok(NULL, " ");
2515 rel_type = strtok(NULL, " ");
2516 start_block = strtok(NULL, " ");
2517 len_blocks = strtok(NULL, " ");
2518 if (rel_type && len_blocks && strcmp(rel_type, blobstore_relation_type_name[BLOBSTORE_MAP]) == 0) {
2519 bb->size_bytes -= strtoull(len_blocks, NULL, 0) * 512LL;
2520 }
2521 }
2522 }
2523
2524 if (array) {
2525 for (int i = 0; i < array_size; i++)
2526 EUCA_FREE(array[i]);
2527 EUCA_FREE(array);
2528 }
2529 }
2530
2531 free:
2532 closedir(dir);
2533 return tail_bb;
2534 }
2535
2536 //!
2537 //! Runs through the blobstore and puts all found blockblobs into a linked list, returning its head
2538 //!
2539 //! @param[in] bs
2540 //! @param[in] bb_to_avoid
2541 //!
2542 //! @return A pointer to the head of a linked list containing all found blockblobs
2543 //!
2544 //! @pre
2545 //!
2546 //! @note
2547 //!
2548 static blockblob *scan_blobstore(blobstore * bs, const blockblob * bb_to_avoid)
2549 {
2550 blockblob *bbs = NULL;
2551 if (walk_bs(bs, bs->path, &bbs, bb_to_avoid) == NULL) {
2552 if (bbs)
2553 free_bbs(bbs);
2554 bbs = NULL;
2555 }
2556
2557 return bbs;
2558 }
2559
2560 //!
2561 //!
2562 //!
2563 //! @param[in] bb1
2564 //! @param[in] bb2
2565 //!
2566 //! @return
2567 //!
2568 //! @pre
2569 //!
2570 //! @note
2571 //!
2572 static int compare_bbs(const void *bb1, const void *bb2)
2573 {
2574 return (int)((*(blockblob **) bb1)->last_modified - (*(blockblob **) bb2)->last_modified);
2575 }
2576
2577 //!
2578 //!
2579 //!
2580 //! @param[in] bs
2581 //! @param[in] bb_list
2582 //! @param[in] need_blocks
2583 //!
2584 //! @return
2585 //!
2586 //! @pre
2587 //!
2588 //! @note
2589 //!
2590 static long long purge_blockblobs_lru(blobstore * bs, blockblob * bb_list, long long need_blocks)
2591 {
2592 int list_length = 0;
2593 long long purged = 0;
2594
2595 for (blockblob * bb = bb_list; bb; bb = bb->next) {
2596 list_length++;
2597 }
2598
2599 if (list_length) {
2600 blockblob *bb;
2601 int i;
2602
2603 blockblob **bb_array = (blockblob **) EUCA_ZALLOC(list_length, sizeof(blockblob *));
2604 if (!bb_array)
2605 return purged;
2606
2607 for (i = 0, bb = bb_list; bb; bb = bb->next, i++) {
2608 bb_array[i] = bb;
2609 }
2610
2611 qsort(bb_array, list_length, sizeof(blockblob *), compare_bbs);
2612
2613 int iteration = 0;
2614 int deleted;
2615 do {
2616 // iterate multiple times in case there are dependencies
2617 //! @TODO unify with _fsck's iteration code?
2618 deleted = 0; // deleted in this round
2619 for (i = 0; i < list_length; i++) {
2620 bb = bb_array[i];
2621 if (bb == NULL) // was either deleted or deemed undeletable on previous iteration
2622 continue;
2623 bb->in_use = check_in_use(bs, bb->id, 0); // record in-use status
2624
2625 char code = '?';
2626 if (bb->in_use & BLOCKBLOB_STATUS_MAPPED) {
2627 // mapped blobs have children, thus cannot be deleted at this iteration
2628 code = 'C';
2629
2630 } else if (bb->in_use & BLOCKBLOB_STATUS_OPENED) {
2631 bb_array[i] = NULL; // mark it to skip in the future
2632 code = 'O';
2633
2634 } else if (delete_blob_state(bb, BLOBSTORE_DELETE_TIMEOUT_USEC, 1) == -1) {
2635 bb_array[i] = NULL; // mark it to skip in the future
2636 code = '!';
2637
2638 } else {
2639 purged += round_up_sec(bb->size_bytes) / 512;
2640 bb_array[i] = NULL; // mark it to skip in the future
2641 code = 'D';
2642 deleted++;
2643 }
2644 LOGDEBUG("LRU %d %08lld: %29s %c%c%c%c %c %9llu %s", iteration, purged, bb->id, (bb->in_use & BLOCKBLOB_STATUS_OPENED) ? ('o') : ('-'), // o = open
2645 (bb->in_use & BLOCKBLOB_STATUS_BACKED) ? ('p') : ('-'), // p = has parents
2646 (bb->in_use & BLOCKBLOB_STATUS_MAPPED) ? ('c') : ('-'), // c = has children
2647 (bb->in_use & BLOCKBLOB_STATUS_ABANDONED) ? ('a') : ('-'), // a = was abandoned
2648 code, // outcome codes: D=deleted, else C=children, !=undeletable, O=open
2649 bb->size_bytes / 512L, // size is in sectors
2650 ctime(&(bb->last_modified))); // ctime adds a newline
2651 if (purged >= need_blocks)
2652 break;
2653 }
2654 iteration++;
2655 } while (deleted && (purged < need_blocks));
2656 EUCA_FREE(bb_array);
2657 }
2658
2659 return purged;
2660 }
2661
2662 //!
2663 //!
2664 //!
2665 //! @param[in] bs
2666 //! @param[in] meta
2667 //!
2668 //! @return
2669 //!
2670 //! @pre
2671 //!
2672 //! @note
2673 //!
2674 int blobstore_stat(blobstore * bs, blobstore_meta * meta)
2675 {
2676 int ret = 0;
2677
2678 if (blobstore_lock(bs, BLOBSTORE_LOCK_TIMEOUT_USEC) == -1) { // lock it so we can traverse blobstore safely
2679 return EUCA_ERROR;
2680 }
2681 // put existing items in the blobstore into a LL
2682 _blobstore_errno = BLOBSTORE_ERROR_OK;
2683 blockblob *bbs = scan_blobstore(bs, NULL);
2684 if (bbs == NULL) {
2685 if (_blobstore_errno != BLOBSTORE_ERROR_OK) {
2686 goto unlock;
2687 }
2688 }
2689 // analyze the LL, calculating sizes
2690 meta->blocks_allocated = 0;
2691 meta->blocks_unlocked = 0;
2692 meta->blocks_locked = 0;
2693 meta->num_blobs = 0;
2694 for (blockblob * abb = bbs; abb;) {
2695 //! @TODO unify this with locked/unlocked calculation in open()
2696 long long abb_size_blocks = round_up_sec(abb->size_bytes) / 512;
2697 if (abb->in_use & BLOCKBLOB_STATUS_OPENED) {
2698 // these can't be purged if we need space
2699 //! @TODO look into recursive purging of unused references?
2700 meta->blocks_locked += abb_size_blocks;
2701 } else {
2702 // these potentially can be purged, unless they are depended on by locked ones
2703 meta->blocks_unlocked += abb_size_blocks;
2704 }
2705 meta->blocks_allocated += abb->blocks_allocated;
2706 meta->num_blobs++;
2707
2708 // free this node and move the pointer
2709 blockblob *old_bb = abb;
2710 abb = abb->next;
2711 EUCA_FREE(old_bb);
2712 }
2713
2714 unlock:
2715
2716 if (blobstore_unlock(bs) == -1) {
2717 ERR(BLOBSTORE_ERROR_UNKNOWN, "failed to unlock the blobstore");
2718 }
2719
2720 euca_strncpy(meta->id, bs->id, sizeof(meta->id));
2721 meta->revocation_policy = bs->revocation_policy;
2722 meta->snapshot_policy = bs->snapshot_policy;
2723 meta->format = bs->format;
2724 meta->blocks_limit = bs->limit_blocks;
2725 if (realpath(bs->path, meta->path) == NULL) {
2726 LOGERROR("failed to resolve the blobstore path %s\n", bs->path);
2727 ret = EUCA_ERROR;
2728 }
2729
2730 return ret;
2731 }
2732
2733 //!
2734 //! Read .refs file content and return any entries that point to blobs that no longer exist
2735 //!
2736 //! @param[in] bb
2737 //! @param[out] refs
2738 //!
2739 //! @return size of the array placed into *refs, which caller must free, or -1 on error
2740 //!
2741 //! @pre
2742 //!
2743 //! @note
2744 //!
2745 static int get_stale_refs(const blockblob * bb, char ***refs)
2746 {
2747 blobstore *bs = bb->store;
2748 char **array = NULL;
2749 int array_size = 0;
2750 int stale_refs = 0;
2751
2752 if (read_array_blockblob_metadata_path(BLOCKBLOB_PATH_REFS, bb->store, bb->id, &array, &array_size) != -1) {
2753 for (int i = 0; i < array_size; i++) {
2754 char ref[BLOBSTORE_MAX_PATH + MAX_DM_NAME + 1];
2755 euca_strncpy(ref, array[i], sizeof(ref));
2756
2757 char *store_path = strtok(array[i], " ");
2758 char *blob_id = strtok(NULL, " "); // the remaining entries in array[i] are ignored
2759 char ref_exists = 0;
2760
2761 if (strlen(store_path) < 1 || strlen(blob_id) < 1)
2762 goto stale_ref;
2763
2764 blobstore *ref_bs = bs;
2765 if (strcmp(bs->path, store_path)) { // if deleting reference in a different blobstore
2766 // need to open it
2767 ref_bs = blobstore_open(store_path, 0, BLOBSTORE_FLAG_CREAT, BLOBSTORE_FORMAT_ANY, BLOBSTORE_REVOCATION_ANY, BLOBSTORE_SNAPSHOT_ANY);
2768 if (ref_bs == NULL) // blobstore with a child blob does not exist
2769 goto stale_ref;
2770 }
2771
2772 blockblob *ref_bb = blockblob_open(ref_bs, blob_id, 0, 0, NULL, BLOBSTORE_FIND_TIMEOUT_USEC);
2773 if (ref_bb) {
2774 blockblob_close(ref_bb);
2775 ref_exists = 1;
2776 } else {
2777 if (_blobstore_errno != BLOBSTORE_ERROR_NOENT) // conservatively assume that unless the error says otherwise, the blob exists
2778 ref_exists = 1;
2779 }
2780 if (ref_bs != bs) {
2781 blobstore_close(ref_bs);
2782 }
2783
2784 stale_ref:
2785
2786 if (ref_exists) {
2787 EUCA_FREE(array[i]); // free names of refs that exist
2788 } else {
2789 strcpy(array[i], ref); // since strtok() clobbered the original value
2790 stale_refs++;
2791 }
2792 }
2793 }
2794
2795 if (stale_refs > 0) {
2796 if (refs) {
2797 *refs = EUCA_ZALLOC(stale_refs, sizeof(char *));
2798 if (*refs == NULL) {
2799 stale_refs = -1; // OOM error
2800 }
2801 }
2802 for (int i = 0, j = 0; i < array_size; i++) {
2803 if (array[i]) { // ref does not exist
2804 if (refs && *refs) {
2805 (*refs)[j++] = array[i];
2806 assert(j <= stale_refs);
2807 } else {
2808 EUCA_FREE(array[i]);
2809 }
2810 }
2811 }
2812 }
2813
2814 if (array_size > 0)
2815 EUCA_FREE(array);
2816
2817 return stale_refs;
2818 }
2819
2820 //!
2821 //! Checks the integrity check of the blobstore. With a non-NULL examiner(), each found
2822 //! blob is passed to it for examination and the blob is deleted if function returns non-zero
2823 //!
2824 //! @param[in] bs
2825 //! @param[in] examiner
2826 //!
2827 //! @return
2828 //!
2829 //! @pre
2830 //!
2831 //! @note
2832 //!
2833 int blobstore_fsck(blobstore * bs, int (*examiner) (const blockblob * bb))
2834 {
2835 int ret = 0;
2836
2837 if (blobstore_lock(bs, BLOBSTORE_LOCK_TIMEOUT_USEC) == -1) { // lock it so we can traverse blobstore safely
2838 ERR(BLOBSTORE_ERROR_UNKNOWN, "failed to lock the blobstore");
2839 return -1;
2840 }
2841 // put existing items in the blobstore into a LL
2842 _blobstore_errno = BLOBSTORE_ERROR_OK;
2843 blockblob *bbs = scan_blobstore(bs, NULL);
2844
2845 if (blobstore_unlock(bs) == -1) {
2846 ERR(BLOBSTORE_ERROR_UNKNOWN, "failed to unlock the blobstore");
2847 ret = -1;
2848 goto free;
2849 }
2850
2851 if (bbs == NULL) {
2852 if (_blobstore_errno != BLOBSTORE_ERROR_OK) {
2853 ret = -1;
2854 }
2855 goto free;
2856 }
2857
2858 { // check objects in the blobstore
2859
2860 unsigned int num_blobs = 0;
2861 unsigned int blobs_deleted = 0;
2862 unsigned int blobs_undeletable = 0;
2863 unsigned int blobs_unopenable = 0;
2864 unsigned int to_delete_prev = 0;
2865 unsigned int iterations = 1;
2866 for (; iterations < 10; iterations++) { // outer loop for multiple iterations over the list
2867 unsigned int to_delete = 0;
2868
2869 // run through LL, examining each blockblob
2870 for (blockblob * abb = bbs; abb; abb = abb->next) {
2871 if (iterations == 1)
2872 num_blobs++; // count all blobs on the first iteration
2873
2874 if (abb->store == NULL) // these were cleared or condemned on a previous iteration
2875 continue;
2876
2877 // examiner(), if specified, tell us whether to delete the blob
2878 if (blockblob_check(abb) || // blob state is inconsistent
2879 (examiner && examiner(abb))) { // blobstore user condemned the blob
2880
2881 blockblob *bb = blockblob_open(bs, abb->id, 0, 0, NULL, BLOBSTORE_FIND_TIMEOUT_USEC);
2882 if (bb != NULL) {
2883 if (bb->in_use & BLOCKBLOB_STATUS_MAPPED) {
2884
2885 // Since we are checking integrity, do not trust .refs file blindly,
2886 // but ensure that the entries -- blobs depending on this one -- exist
2887
2888 char **stale_refs;
2889 int num_stale_refs = get_stale_refs(bb, &stale_refs);
2890 if (num_stale_refs > 0) {
2891 for (int i = 0; i < num_stale_refs; i++) {
2892 // update the .refs file to remove this entry
2893 LOGINFO("removing stale/corrupted reference in blob %s to %s\n", bb->id, stale_refs[i]);
2894 update_entry_blockblob_metadata_path(BLOCKBLOB_PATH_REFS, bb->store, bb->id, stale_refs[i], 1);
2895 EUCA_FREE(stale_refs[i]);
2896 }
2897 EUCA_FREE(stale_refs);
2898 }
2899 // mapped blobs have children, thus cannot be deleted at this iteration
2900 blockblob_close(bb);
2901 to_delete++;
2902
2903 } else if (blockblob_delete(bb, BLOBSTORE_DELETE_TIMEOUT_USEC, 1) == -1) {
2904 LOGWARN("WARNING: failed to delete blockblob %s\n", abb->id);
2905 blockblob_close(bb);
2906 abb->store = NULL; // so it will get skipped on next iteration
2907 blobs_undeletable++;
2908
2909 } else {
2910 LOGINFO("deleted stale/corrupted blob %s\n", abb->id);
2911 abb->store = NULL; // so it will get skipped on next iteration
2912 blobs_deleted++;
2913 }
2914 } else {
2915 LOGWARN("could not open blockblob %s (it may be in use)\n", abb->id);
2916 abb->store = NULL; // so it will get skipped on next iteration
2917 blobs_unopenable++;
2918 }
2919 }
2920 }
2921 assert(iterations < 11);
2922
2923 if (to_delete == to_delete_prev) // could not delete anything new this iteration
2924 break;
2925 to_delete_prev = to_delete;
2926 if (to_delete == 0)
2927 break;
2928 }
2929
2930 if (num_blobs > 0)
2931 LOGINFO("%s: examined %d blob(s) in %d iteration(s): "
2932 "deleted %d, failed on %d + %d, failed to open %d\n", bs->path, num_blobs, iterations, blobs_deleted, to_delete_prev, blobs_undeletable, blobs_unopenable);
2933 }
2934 free:
2935 if (bbs) {
2936 free_bbs(bbs);
2937 }
2938
2939 return ret;
2940 }
2941
2942 //!
2943 //!
2944 //!
2945 //! @param[in] bs
2946 //! @param[in] regex
2947 //! @param[out] results
2948 //!
2949 //! @return
2950 //!
2951 //! @pre
2952 //!
2953 //! @note
2954 //!
2955 int blobstore_search(blobstore * bs, const char *regex, blockblob_meta ** results)
2956 {
2957 blockblob_meta *head = NULL;
2958 blockblob *bbs = NULL;
2959 int ret = 0;
2960 regex_t re;
2961
2962 if (regcomp(&re, regex, REG_NOSUB) != 0) {
2963 ERR(BLOBSTORE_ERROR_UNKNOWN, "failed to parse search regular expression");
2964 return -1;
2965 }
2966
2967 int blobstore_locked = 0;
2968 if (blobstore_lock(bs, BLOBSTORE_LOCK_TIMEOUT_USEC) == -1) { // lock it so we can traverse blobstore safely
2969 ERR(BLOBSTORE_ERROR_UNKNOWN, "failed to lock the blobstore");
2970 ret = -1;
2971 goto free;
2972 } else {
2973 blobstore_locked = 1;
2974 }
2975 // put existing items in the blobstore into a LL
2976 _blobstore_errno = BLOBSTORE_ERROR_OK;
2977 bbs = scan_blobstore(bs, NULL);
2978 if (bbs == NULL) {
2979 if (_blobstore_errno != BLOBSTORE_ERROR_OK) {
2980 ret = -1;
2981 goto free;
2982 }
2983 }
2984 // run through LL, looking for matches
2985 unsigned int num_blobs = 0;
2986 unsigned int blobs_matched = 0;
2987 blockblob_meta *prev = NULL;
2988 for (blockblob * abb = bbs; abb; abb = abb->next) {
2989 num_blobs++;
2990 if (regexec(&re, abb->id, 0, NULL, 0) != 0)
2991 continue;
2992 blobs_matched++;
2993
2994 blockblob_meta *bm = EUCA_ZALLOC(1, sizeof(blockblob_meta));
2995 if (bm == NULL) {
2996 ERR(BLOBSTORE_ERROR_NOMEM, NULL);
2997 ret = -1;
2998 goto free;
2999 }
3000
3001 euca_strncpy(bm->id, abb->id, sizeof(bm->id));
3002 bm->bs = bs;
3003 bm->size_bytes = abb->size_bytes;
3004 bm->in_use = abb->in_use;
3005 bm->is_hollow = abb->is_hollow;
3006 bm->last_accessed = abb->last_accessed;
3007 bm->last_modified = abb->last_modified;
3008 if (head == NULL) {
3009 head = bm;
3010 } else {
3011 prev->next = bm;
3012 bm->prev = prev;
3013 }
3014 prev = bm;
3015 }
3016
3017 ret = blobs_matched;
3018
3019 free:
3020 regfree(&re); // free the regular expression
3021 if (bbs)
3022 free_bbs(bbs); // free the blockblobs LL returned by the search function
3023
3024 if (blobstore_locked && blobstore_unlock(bs) == -1) {
3025 ERR(BLOBSTORE_ERROR_UNKNOWN, "failed to unlock the blobstore");
3026 ret = -1;
3027 }
3028
3029 if (ret < 0) { // there were problems, so free the partial linked list, if any
3030 for (blockblob_meta * bm = head; bm;) {
3031 blockblob_meta *next = bm->next;
3032 EUCA_FREE(bm);
3033 bm = next;
3034 }
3035 } else {
3036 *results = head;
3037 }
3038
3039 return ret;
3040 }
3041
3042 //!
3043 //!
3044 //!
3045 //! @param[in] bs
3046 //! @param[in] regex
3047 //!
3048 //! @return
3049 //!
3050 //! @pre
3051 //!
3052 //! @note
3053 //!
3054 int blobstore_delete_regex(blobstore * bs, const char *regex)
3055 {
3056 blockblob_meta *matches = NULL;
3057 int found = blobstore_search(bs, regex, &matches);
3058 int left_to_delete = found;
3059 int deleted;
3060 do {
3061 // iterate multiple times in case there are dependencies
3062 //! @TODO unify with _fsck's iteration code?
3063 deleted = 0; // deleted in this round
3064 for (blockblob_meta * bm = matches; bm; bm = bm->next) {
3065 blockblob *bb = blockblob_open(bs, bm->id, 0, 0, NULL, BLOBSTORE_FIND_TIMEOUT_USEC);
3066 if (bb != NULL) {
3067 if (bb->in_use & BLOCKBLOB_STATUS_MAPPED) {
3068 // mapped blobs have children, thus cannot be deleted at this iteration
3069 blockblob_close(bb);
3070 continue;
3071 }
3072 if (blockblob_delete(bb, BLOBSTORE_DELETE_TIMEOUT_USEC, 0) == -1) {
3073 blockblob_close(bb);
3074 } else {
3075 deleted++;
3076 }
3077 }
3078 }
3079 } while (deleted && (left_to_delete -= deleted));
3080
3081 // free the search results
3082 for (blockblob_meta * bm = matches; bm;) {
3083 blockblob_meta *next = bm->next;
3084 EUCA_FREE(bm);
3085 bm = next;
3086 }
3087
3088 return (left_to_delete == 0) ? (found) : (-1);
3089 }
3090
3091 //!
3092 //!
3093 //!
3094 //! @param[in] bs
3095 //! @param[in] id can be NULL if creating, in which case blobstore will pick a random ID
3096 //! @param[in] size_bytes on create: reserve this size; on open: verify the size, unless set to 0
3097 //! @param[in] flags BLOBSTORE_FLAG_CREAT | BLOBSTORE_FLAG_EXCL - same semantcs as for open() flags, BLOBSTORE_FLAG_HOLLOW - when creating
3098 //! @param[in] sig if non-NULL, on create sig is recorded, on open it is verified
3099 //! @param[in] timeout_usec maximum wait, in microseconds
3100 //!
3101 //! @return
3102 //!
3103 //! @pre
3104 //!
3105 //! @note
3106 //!
3107 blockblob *blockblob_open(blobstore * bs, const char *id, unsigned long long size_bytes, unsigned int flags, const char *sig, unsigned long long timeout_usec)
3108 {
3109 long long size_blocks = round_up_sec(size_bytes) / 512;
3110 if (flags & ~(BLOBSTORE_FLAG_CREAT | BLOBSTORE_FLAG_EXCL | BLOBSTORE_FLAG_HOLLOW)) {
3111 ERR(BLOBSTORE_ERROR_INVAL, "only _CREAT, _EXCL, & _HOLLOW flags are allowed");
3112 return NULL;
3113 }
3114 if (id == NULL && !(flags & BLOBSTORE_FLAG_CREAT)) {
3115 ERR(BLOBSTORE_ERROR_INVAL, "NULL id is only allowed with _CREAT");
3116 return NULL;
3117 }
3118 if (size_blocks == 0 && (flags & BLOBSTORE_FLAG_CREAT)) {
3119 ERR(BLOBSTORE_ERROR_INVAL, "size_blocks can be 0 only without _CREAT");
3120 return NULL;
3121 }
3122 if (size_blocks != 0 && (flags & BLOBSTORE_FLAG_CREAT) && (size_blocks > bs->limit_blocks) && !(flags && BLOBSTORE_FLAG_HOLLOW)) {
3123 ERR(BLOBSTORE_ERROR_NOSPC, NULL);
3124 return NULL;
3125 }
3126
3127 LOGTRACE("{%u} blockblob_open: opening blob id=%s flags=%d timeout=%lld\n", (unsigned int)pthread_self(), id, flags, timeout_usec);
3128
3129 blockblob *bbs = NULL; // a temp LL of blockblobs, used for computing free space and for purging
3130 blockblob *bb = EUCA_ZALLOC(1, sizeof(blockblob));
3131 if (bb == NULL) {
3132 ERR(BLOBSTORE_ERROR_NOMEM, NULL);
3133 goto out;
3134 }
3135
3136 bb->store = bs;
3137 if (id) {
3138 euca_strncpy(bb->id, id, sizeof(bb->id));
3139 } else {
3140 gen_id(bb->id, sizeof(bb->id));
3141 }
3142 bb->fd_lock = -1;
3143 bb->fd_blocks = -1;
3144 bb->size_bytes = size_bytes;
3145 set_blockblob_metadata_path(BLOCKBLOB_PATH_BLOCKS, bs, bb->id, bb->blocks_path, sizeof(bb->blocks_path));
3146
3147 int blobstore_locked = 0;
3148 if (blobstore_lock(bs, timeout_usec) == -1) { // lock it so we can create blob's file atomically
3149 goto free; // failed to obtain a lock on the blobstore
3150 } else {
3151 blobstore_locked = 1;
3152 }
3153
3154 //! @TODO maybe don't create directories needlessly if flags==0?
3155 int created_directory = ensure_blockblob_metadata_path(bs, bb->id);
3156 if (created_directory == -1) {
3157 PROPAGATE_ERR(BLOBSTORE_ERROR_UNKNOWN);
3158 goto unlock;
3159 }
3160 if (blobstore_unlock(bs) == -1) {
3161 ERR(BLOBSTORE_ERROR_UNKNOWN, "failed to unlock the blobstore");
3162 goto free;
3163 }
3164 blobstore_locked = 0;
3165
3166 int created_blob = 0;
3167 char lpath[PATH_MAX];
3168 set_blockblob_metadata_path(BLOCKBLOB_PATH_LOCK, bs, bb->id, lpath, sizeof(lpath));
3169 bb->fd_lock = open_and_lock(lpath, flags | BLOBSTORE_FLAG_RDWR, timeout_usec, BLOBSTORE_FILE_PERM); // blobs are always opened with exclusive write access
3170 if (bb->fd_lock == -1) {
3171 // failed to open/create and lock the blockblob
3172 goto clean;
3173 }
3174 char thread_id[512];
3175 int thread_id_len = 0;
3176 snprintf(thread_id, sizeof(thread_id), "%d/%u", getpid(), (unsigned int)pthread_self());
3177 thread_id_len = strlen(thread_id);
3178 if (write(bb->fd_lock, thread_id, thread_id_len) != thread_id_len) {
3179 // Fail to write our thread indentifier in the lock file.
3180 ERR(BLOBSTORE_ERROR_UNKNOWN, "failed to write to the blobstore");
3181 goto clean;
3182 }
3183 // convert BLOBSTORE_* flags into standard Posix open() flags and open/create the blocks file
3184 int o_flags = 0;
3185 if (flags & BLOBSTORE_FLAG_RDONLY) {
3186 o_flags |= O_RDONLY;
3187 } else if ((flags & BLOBSTORE_FLAG_RDWR) || (flags & BLOBSTORE_FLAG_CREAT)) {
3188 o_flags |= O_RDWR;
3189 if (flags & BLOBSTORE_FLAG_CREAT) {
3190 o_flags |= O_CREAT;
3191 // intentionally ignore _EXCL supplied without _CREAT
3192 if (flags & BLOBSTORE_FLAG_EXCL)
3193 o_flags |= O_EXCL;
3194 }
3195 }
3196 bb->fd_blocks = open(bb->blocks_path, o_flags, BLOBSTORE_FILE_PERM);
3197 if (bb->fd_blocks == -1) { // failed to open/create the content file
3198 PROPAGATE_ERR(BLOBSTORE_ERROR_UNKNOWN);
3199 goto clean;
3200 }
3201
3202 struct stat sb;
3203 if (fstat(bb->fd_blocks, &sb) == -1) {
3204 goto clean;
3205 }
3206
3207 if (sb.st_size == 0) { // new blob
3208 created_blob = 1;
3209
3210 if (blobstore_lock(bs, timeout_usec) == -1) { // lock it so we can traverse blobstore safely
3211 goto clean; // failed to obtain a lock on the blobstore
3212 } else {
3213 blobstore_locked = 1;
3214 }
3215
3216 // put existing items in the blobstore into a LL
3217 _blobstore_errno = BLOBSTORE_ERROR_OK;
3218 bbs = scan_blobstore(bs, bb);
3219 if (bbs == NULL) {
3220 if (_blobstore_errno != BLOBSTORE_ERROR_OK) {
3221 goto clean;
3222 }
3223 }
3224 // a bit of a hack: HOLLOW blobs skip the blobstore limit check upon creation
3225 if (flags & BLOBSTORE_FLAG_HOLLOW) {
3226 bb->is_hollow = TRUE;
3227 if (write_blockblob_metadata_path(BLOCKBLOB_PATH_HOLLOW, bs, bb->id, "this blob is hollow\n"))
3228 goto clean;
3229
3230 } else { // enforce blobstore limits
3231
3232 // analyze the LL, calculating sizes
3233 long long blocks_unlocked = 0;
3234 long long blocks_locked = 0;
3235 unsigned int num_blobs = 0;
3236 for (blockblob * abb = bbs; abb; abb = abb->next) {
3237 long long abb_size_blocks = round_up_sec(abb->size_bytes) / 512;
3238 if (abb->is_hollow)
3239 abb_size_blocks = 0;
3240 if (abb->in_use & BLOCKBLOB_STATUS_OPENED) {
3241 // these can't be purged if we need space
3242 //! @TODO look into recursive purging of unused references?
3243 blocks_locked += abb_size_blocks;
3244 } else {
3245 blocks_unlocked += abb_size_blocks; // these potentially can be purged, unless they are depended on by locked ones
3246 }
3247 num_blobs++;
3248 }
3249
3250 long long blocks_free = bs->limit_blocks - (blocks_unlocked + blocks_locked);
3251 if (blocks_free < size_blocks) {
3252 if (!(bs->revocation_policy == BLOBSTORE_REVOCATION_LRU) // not allowed to purge
3253 || (blocks_free + blocks_unlocked) < size_blocks) { // not enough purgeable material
3254 ERR(BLOBSTORE_ERROR_NOSPC, NULL);
3255 goto clean;
3256 }
3257 long long blocks_needed = size_blocks - blocks_free;
3258 _err_off(); // do not care about errors duing purging
3259 long long blocks_freed = purge_blockblobs_lru(bs, bbs, blocks_needed);
3260 _err_on();
3261 if (blocks_freed < blocks_needed) {
3262 ERR(BLOBSTORE_ERROR_NOSPC, "could not purge enough from cache");
3263 goto clean;
3264 }
3265 }
3266 }
3267
3268 if (lseek(bb->fd_blocks, size_bytes - 1, SEEK_CUR) == (off_t) - 1) { // create a file with a hole
3269 PROPAGATE_ERR(BLOBSTORE_ERROR_UNKNOWN);
3270 goto clean;
3271 }
3272 if (write(bb->fd_blocks, zero_buf, 1) != (ssize_t) 1) {
3273 PROPAGATE_ERR(BLOBSTORE_ERROR_UNKNOWN);
3274 goto clean;
3275 }
3276 if (sig)
3277 if (write_blockblob_metadata_path(BLOCKBLOB_PATH_SIG, bs, bb->id, sig)) {
3278 goto clean;
3279 }
3280 bb->snapshot_type = BLOBSTORE_SNAPSHOT_NONE; // just created, so not a snapshot
3281
3282 if (blobstore_unlock(bs) == -1) {
3283 ERR(BLOBSTORE_ERROR_UNKNOWN, "failed to unlock the blobstore");
3284 }
3285 blobstore_locked = 0;
3286
3287 } else { // blob existed
3288
3289 char buf[BLOBSTORE_SIG_MAX];
3290
3291 if (bb->size_bytes == 0) { // find out the size from the file size
3292 bb->size_bytes = sb.st_size;
3293 } else if (bb->size_bytes != sb.st_size) { // verify the size specified by the user
3294 LOGERROR("{%u} encountered a size mismatch when opening a blob (requested %lld, found %ld)\n", (unsigned int)pthread_self(), bb->size_bytes, sb.st_size);
3295 ERR(BLOBSTORE_ERROR_SIGNATURE, "size of the existing blockblob does not match");
3296 goto clean;
3297 }
3298 // determine whether this blob is a map of another,
3299 // in which case the blocks are backing and should
3300 // not be accessed directly
3301 if (read_blockblob_metadata_path(BLOCKBLOB_PATH_DM, bs, bb->id, buf, sizeof(buf)) > 0) {
3302 bb->snapshot_type = BLOBSTORE_SNAPSHOT_DM;
3303 } else {
3304 bb->snapshot_type = BLOBSTORE_SNAPSHOT_NONE;
3305 }
3306
3307 // check if its hollow
3308 if (read_blockblob_metadata_path(BLOCKBLOB_PATH_HOLLOW, bs, bb->id, buf, sizeof(buf)) != -1) {
3309 bb->is_hollow = TRUE;
3310 }
3311
3312 if (sig && (strlen(sig) > 0)) { // check the signature, if there
3313 int sig_size;
3314 if ((sig_size = read_blockblob_metadata_path(BLOCKBLOB_PATH_SIG, bs, bb->id, buf, sizeof(buf))) != strlen(sig)
3315 || (strncmp(sig, buf, sig_size) != 0)) {
3316 LOGERROR("{%u} encountered signature mismatch when opening a blob (requested size [%ld], found [%d])\n", (unsigned int)pthread_self(), strlen(sig), sig_size);
3317 ERR(BLOBSTORE_ERROR_SIGNATURE, NULL);
3318 goto clean;
3319 }
3320 }
3321 // check its in-use status
3322 bb->in_use = check_in_use(bs, bb->id, 0);
3323 }
3324
3325 { // create a loopback device, if there isn't a valid one already (this may happen whether the blob is new or old)
3326 char lo_dev[PATH_MAX] = "";
3327 struct stat sb;
3328
3329 _err_off(); // do not care if loopback file does not exist
3330 read_blockblob_metadata_path(BLOCKBLOB_PATH_LOOPBACK, bs, bb->id, lo_dev, sizeof(lo_dev));
3331 _err_on();
3332 if ((strlen(lo_dev) < 1) // nothing in .loopback file
3333 || (stat(lo_dev, &sb) == -1) // something in .loopback that does not exist
3334 || (!S_ISBLK(sb.st_mode))) { // something in .loopback that is not block device
3335
3336 if (diskutil_loop(bb->blocks_path, 0, lo_dev, sizeof(lo_dev))) {
3337 ERR(BLOBSTORE_ERROR_UNKNOWN, "failed to obtain a loopback device for a blockblob");
3338 goto clean;
3339 }
3340 write_blockblob_metadata_path(BLOCKBLOB_PATH_LOOPBACK, bs, bb->id, lo_dev);
3341 }
3342 }
3343
3344 set_device_path(bb); // read .dm and .loopback and set bb->device_path accordingly
3345
3346 goto out; // all is well
3347
3348 clean:
3349 {
3350 int saved_errno = _blobstore_errno; // save it because close_and_unlock() or delete_blockblob_files() may reset it
3351 if (bb->fd_lock != -1) {
3352 if (ftruncate(bb->fd_lock, 0) != 0) {
3353 ERR(BLOBSTORE_ERROR_UNKNOWN, "failed to truncate the blobstore lock file.");
3354 }
3355 close_and_unlock(bb->fd_lock);
3356 }
3357 if (bb->fd_blocks != -1) {
3358 close(bb->fd_blocks);
3359 }
3360 if (created_directory || created_blob) { // only delete disk state if we created it
3361 delete_blockblob_files(bs, bb->id);
3362 }
3363 if (saved_errno) {
3364 _blobstore_errno = saved_errno;
3365 }
3366 }
3367
3368 unlock:
3369 {
3370 int saved_errno = _blobstore_errno;
3371 if (blobstore_locked && blobstore_unlock(bs) == -1) {
3372 ERR(BLOBSTORE_ERROR_UNKNOWN, "failed to unlock the blobstore");
3373 if (saved_errno) {
3374 _blobstore_errno = saved_errno;
3375 }
3376 }
3377 }
3378
3379 free:
3380 EUCA_FREE(bb);
3381
3382 out:
3383 LOGTRACE("{%u} blockblob_open: done with blob id=%s ret=%p\n", (unsigned int)pthread_self(), id, bb);
3384 if (bb == NULL) {
3385 LOGTRACE("{%u} blockblob_open: errno=%d msg=%s\n", (unsigned int)pthread_self(), _blobstore_errno, blobstore_get_last_msg());
3386 }
3387
3388 free_bbs(bbs);
3389 return bb;
3390 }
3391
3392 //!
3393 //!
3394 //!
3395 //! @param[in] bs
3396 //! @param[in] bb_id
3397 //!
3398 //! @return
3399 //!
3400 //! @pre
3401 //!
3402 //! @note
3403 //!
3404 static int loop_remove(blobstore * bs, const char *bb_id)
3405 {
3406 char path[PATH_MAX] = "";
3407 int ret = 0;
3408
3409 _err_off(); // do not care if loopback file does not exist
3410 read_blockblob_metadata_path(BLOCKBLOB_PATH_LOOPBACK, bs, bb_id, path, sizeof(path)); // loads path of /dev/loop?
3411 _err_on();
3412
3413 if (strlen(path)) {
3414 if (diskutil_unloop(path)) {
3415 ERR(BLOBSTORE_ERROR_UNKNOWN, "failed to remove loopback device for blockblob");
3416 ret = -1;
3417 } else {
3418 set_blockblob_metadata_path(BLOCKBLOB_PATH_LOOPBACK, bs, bb_id, path, sizeof(path)); // load path of .../loopback file itself
3419 unlink(path);
3420 }
3421 }
3422
3423 return ret;
3424 }
3425
3426 //!
3427 //! releases the blob locks, allowing others to open() it, and frees the blockblob handle
3428 //!
3429 //! @param[in] bb
3430 //!
3431 //! @return
3432 //!
3433 //! @pre
3434 //!
3435 //! @note
3436 //!
3437 int blockblob_close(blockblob * bb)
3438 {
3439 if (bb == NULL) {
3440 ERR(BLOBSTORE_ERROR_INVAL, NULL);
3441 return -1;
3442 }
3443 int ret = 0;
3444 LOGTRACE("{%u} blockblob_close: closing blob id=%s\n", (unsigned int)pthread_self(), bb->id);
3445
3446 // do not remove /dev/loop* if it is used by device mapper
3447 // (we do not care about BLOCKBLOB_STATUS_OPENED because
3448 // it should be only this thread that has the blob open)
3449 int in_use = check_in_use(bb->store, bb->id, 0);
3450 if (!(in_use & (BLOCKBLOB_STATUS_MAPPED | BLOCKBLOB_STATUS_BACKED))) {
3451 ret = loop_remove(bb->store, bb->id);
3452 }
3453 ret |= close(bb->fd_blocks);
3454 if (ftruncate(bb->fd_lock, 0) != 0) {
3455 ERR(BLOBSTORE_ERROR_UNKNOWN, "failed to truncate the blobstore lock file.");
3456 }
3457 ret |= close_and_unlock(bb->fd_lock);
3458 EUCA_FREE(bb); // we free the blob regardless of whether closing succeeds or not
3459 return ret;
3460 }
3461
3462 //!
3463 //!
3464 //!
3465 //! @param[in] dev_name
3466 //!
3467 //! @return
3468 //!
3469 //! @pre
3470 //!
3471 //! @note
3472 //!
3473 static int dm_suspend_resume(const char *dev_name)
3474 {
3475 int ret = EUCA_OK;
3476
3477 if ((ret = euca_execlp(NULL, helpers_path[ROOTWRAP], helpers_path[DMSETUP], "suspend", dev_name, NULL)) != EUCA_OK) {
3478 ERR(BLOBSTORE_ERROR_UNKNOWN, "failed to suspend device with 'dmsetup'");
3479 return (-1);
3480 }
3481
3482 if ((ret = euca_execlp(NULL, helpers_path[ROOTWRAP], helpers_path[DMSETUP], "resume", dev_name, NULL)) != EUCA_OK) {
3483 ERR(BLOBSTORE_ERROR_UNKNOWN, "failed to resume device with 'dmsetup'");
3484 return (-1);
3485 }
3486
3487 return (0);
3488 }
3489
3490 //!
3491 //!
3492 //!
3493 //! @param[in] dev_name
3494 //!
3495 //! @return
3496 //!
3497 //! @pre
3498 //!
3499 //! @note
3500 //!
3501 static int dm_check_device(const char *dev_name)
3502 {
3503 // see if the device exists
3504 char dm_path[MAX_DM_PATH];
3505 snprintf(dm_path, sizeof(dm_path), DM_PATH "%s", dev_name);
3506 return check_path(dm_path); // we do not use check_block() because /dev/mapper/... entries can be sym links
3507 }
3508
3509 //!
3510 //!
3511 //!
3512 //! @param[in] dev_name
3513 //!
3514 //! @return
3515 //!
3516 //! @pre
3517 //!
3518 //! @note
3519 //!
3520 static int dm_delete_device(const char *dev_name)
3521 {
3522 int ret = 0;
3523 int retries = 1;
3524 char dm_path[MAX_DM_PATH] = "";
3525
3526 // see if the device to delete exists
3527 snprintf(dm_path, sizeof(dm_path), DM_PATH "%s", dev_name);
3528 errno = 0;
3529 if (check_path(dm_path) && (errno == ENOENT)) // we do not use check_block() because /dev/mapper/... entries can be sym links
3530 return (0);
3531
3532 try_again:
3533 myprintf(EUCA_LOG_INFO, "removing device %s (retries=%d)\n", dev_name, retries);
3534 if ((euca_execlp(NULL, helpers_path[ROOTWRAP], helpers_path[DMSETUP], "remove", dev_name, NULL)) != EUCA_OK) {
3535 if (retries--) {
3536 usleep(100);
3537 goto try_again;
3538 }
3539 ERR(BLOBSTORE_ERROR_UNKNOWN, "failed to remove device mapper device with 'dmsetup'");
3540 ret = -1;
3541 }
3542 return (ret);
3543 }
3544
3545 //!
3546 //!
3547 //!
3548 //! @param[in] dev_names
3549 //! @param[in] size
3550 //!
3551 //! @return
3552 //!
3553 //! @pre
3554 //!
3555 //! @note
3556 //!
3557 static int dm_delete_devices(char *dev_names[], int size)
3558 {
3559 if (size < 1)
3560 return 0;
3561 int ret = 0;
3562
3563 // construct list of device names in the order that they should be removed
3564 int devices = 0;
3565 char **dev_names_removable = EUCA_ZALLOC(size, sizeof(char *));
3566 if (dev_names_removable == NULL) {
3567 ERR(BLOBSTORE_ERROR_NOMEM, NULL);
3568 return -1;
3569 }
3570 for (int i = size - 1; i >= 0; i--) {
3571 char *name = dev_names[i];
3572 int seen = 0;
3573 for (int j = i + 1; j < size; j++) {
3574 if (!strcmp(name, dev_names[j])) {
3575 seen = 1;
3576 break;
3577 }
3578 }
3579 if (!seen) {
3580 dev_names_removable[devices++] = name;
3581 }
3582 }
3583
3584 // run through devices and remove them
3585 for (int i = 0; i < devices; i++) {
3586
3587 // some of these devices may have children devices that were created
3588 // by GNU parted for each of the partitions inside; here we look for
3589 // those devices and remove them so the main device is not 'busy'.
3590 for (int j = 1; j < 10; j++) {
3591 char name_p[1024]; // device mapper name of a potential partition entry
3592 char path_p[1024]; // path to the device mapper file
3593 // just append 'pN' to the name, e.g., sda -> sdap1
3594 snprintf(name_p, sizeof(name_p), "%sp%d", dev_names_removable[i], j);
3595 snprintf(path_p, sizeof(path_p), DM_FORMAT, name_p);
3596 if (check_path(path_p) == 0) {
3597 dm_delete_device(name_p);
3598 }
3599 // also try appending just 'N', since that may be the name format, too
3600 snprintf(name_p, sizeof(name_p), "%s%d", dev_names_removable[i], j);
3601 snprintf(path_p, sizeof(path_p), DM_FORMAT, name_p);
3602 if (check_path(path_p) == 0) {
3603 dm_delete_device(name_p);
3604 }
3605 }
3606 ret = dm_delete_device(dev_names_removable[i]);
3607 }
3608 EUCA_FREE(dev_names_removable);
3609
3610 return ret;
3611 }
3612
3613 //!
3614 //!
3615 //!
3616 //! @param[in] dev_names
3617 //! @param[in] dm_tables
3618 //! @param[in] size
3619 //!
3620 //! @return
3621 //!
3622 //! @pre
3623 //!
3624 //! @note
3625 //!
3626 static int dm_create_devices(char *dev_names[], char *dm_tables[], int size)
3627 {
3628 int i = 0;
3629 int fd = 0;
3630 int status = 0;
3631 int rc = EUCA_OK;
3632 int rbytes = 0;
3633 pid_t cpid = 0;
3634 char tmpfile[EUCA_MAX_PATH] = "";
3635 char dm_path[MAX_DM_PATH] = "";
3636
3637 for (i = 0; i < size; i++) {
3638 // create devices one by one
3639 myprintf(EUCA_LOG_INFO, "creating device %s\n", dev_names[i]);
3640
3641 if ((cpid = fork()) < 0) {
3642 // fork error
3643 PROPAGATE_ERR(BLOBSTORE_ERROR_UNKNOWN);
3644 goto cleanup;
3645 } else if (cpid == 0) {
3646 // child process - runs `dmsetup` using system()
3647 bzero(tmpfile, sizeof(tmpfile));
3648 snprintf(tmpfile, sizeof(tmpfile) - 1, "/tmp/dmsetup.XXXXXX");
3649 if ((fd = safe_mkstemp(tmpfile)) >= 0) {
3650 if ((rbytes = write(fd, dm_tables[i], strlen(dm_tables[i]))) != strlen(dm_tables[i])) {
3651 // if write error
3652 LOGERROR("{%u} error: dm_create_devices: write returned number of bytes != write buffer: %d/%ld\n", (unsigned int)pthread_self(), rbytes, strlen(dm_tables[i]));
3653 unlink(tmpfile);
3654 exit(1);
3655 }
3656 close(fd);
3657 } else {
3658 // couldn't get fd
3659 LOGERROR("{%u} error: dm_create_devices: couldn't open temporary file %s: %s\n", (unsigned int)pthread_self(), tmpfile, strerror(errno));
3660 unlink(tmpfile);
3661 exit(1);
3662 }
3663
3664 // invoke `dmsetup create ...`
3665 rc = euca_execlp(&status, helpers_path[ROOTWRAP], helpers_path[DMSETUP], "create", dev_names[i], tmpfile, NULL);
3666
3667 // free out temp file
3668 unlink(tmpfile);
3669
3670 // pass back dmsetup's return code
3671 exit(WEXITSTATUS(status));
3672 }
3673 // parent - waits for child, reacts to status
3674 if ((rc = timewait(cpid, &status, BLOBSTORE_DMSETUP_TIMEOUT_SEC)) <= 0) {
3675 LOGERROR("{%u} error: dm_create_devices: bad exit from dmsetup child: %d\n", (unsigned int)pthread_self(), rc);
3676 PROPAGATE_ERR(BLOBSTORE_ERROR_UNKNOWN);
3677 goto cleanup;
3678 }
3679
3680 if (WEXITSTATUS(status) != 0) {
3681 ERR(BLOBSTORE_ERROR_UNKNOWN, "failed to set up device mapper table with 'dmsetup'");
3682 myprintf(EUCA_LOG_INFO, "{%u} command: %s %s create %s\n", (unsigned int)pthread_self(), helpers_path[ROOTWRAP], helpers_path[DMSETUP], dev_names[i]);
3683 myprintf(EUCA_LOG_INFO, "{%u} input: %s", (unsigned int)pthread_self(), dm_tables[i]);
3684 goto cleanup;
3685 }
3686
3687 snprintf(dm_path, sizeof(dm_path), DM_PATH "%s", dev_names[i]);
3688 if (diskutil_ch(dm_path, get_username(), NULL, BLOBSTORE_FILE_PERM) != EUCA_OK) {
3689 ERR(BLOBSTORE_ERROR_UNKNOWN, "failed to change permissions on the device mapper file\n");
3690 goto cleanup;
3691 }
3692 }
3693
3694 return (0);
3695 cleanup:
3696 _err_off();
3697 dm_delete_devices(dev_names, i + 1);
3698 _err_on();
3699 return (-1);
3700 }
3701
3702 //!
3703 //!
3704 //!
3705 //! @return
3706 //!
3707 //! @pre
3708 //!
3709 //! @note
3710 //!
3711 static char *dm_get_zero(void)
3712 {
3713 static char dev_zero[] = DM_PATH EUCA_ZERO;
3714
3715 struct stat sb;
3716 int tried = 0;
3717 while (stat(dev_zero, &sb) == -1) {
3718 if (tried) {
3719 ERR(BLOBSTORE_ERROR_UNKNOWN, "failed to create blockblob zero block device");
3720 return NULL;
3721 }
3722
3723 char *dm_tables[1] = { "0 " EUCA_ZERO_SIZE " zero" };
3724 char *dm_names[1] = { EUCA_ZERO };
3725 dm_create_devices(dm_names, dm_tables, 1);
3726
3727 tried = 1;
3728 }
3729
3730 if (!S_ISBLK(sb.st_mode)) {
3731 ERR(BLOBSTORE_ERROR_UNKNOWN, "blockblob zero is not a block device");
3732 return NULL;
3733 }
3734
3735 return dev_zero;
3736 }
3737
3738 //!
3739 //!
3740 //!
3741 //! @param[in] bb
3742 //!
3743 //! @return
3744 //!
3745 //! @pre
3746 //!
3747 //! @note
3748 //!
3749 static int blockblob_check(const blockblob * bb)
3750 {
3751 char **array = NULL;
3752 int array_size = 0;
3753 int err = 0;
3754 _err_off(); // do not care if metadata files do not exist
3755
3756 // check on dm devices listed in .dm of this blob, if any
3757 if (read_array_blockblob_metadata_path(BLOCKBLOB_PATH_DM, bb->store, bb->id, &array, &array_size) != -1) {
3758 for (int i = 0; i < array_size; i++) {
3759 if (dm_check_device(array[i]))
3760 err++;
3761 EUCA_FREE(array[i]);
3762 }
3763 EUCA_FREE(array);
3764 }
3765 // check on the loop device listed in .loopback of the blob, if any
3766 char lo_dev[PATH_MAX] = "";
3767 read_blockblob_metadata_path(BLOCKBLOB_PATH_LOOPBACK, bb->store, bb->id, lo_dev, sizeof(lo_dev));
3768 if (strlen(lo_dev) > 0) {
3769 struct stat sb;
3770 if (stat(lo_dev, &sb) == -1) {
3771 err++;
3772 } else if (!S_ISBLK(sb.st_mode)) {
3773 err++;
3774 } else if (diskutil_loop_check(bb->blocks_path, lo_dev)) {
3775 err++;
3776 }
3777 }
3778 // check on .refs that point to blobs that no longer exist
3779 if (get_stale_refs(bb, NULL) > 0)
3780 err++;
3781
3782 // check on .lock files that are non-zero => blobs that were not closed properly
3783 if (bb->in_use & BLOCKBLOB_STATUS_ABANDONED)
3784 err++;
3785
3786 _err_on();
3787 return err;
3788 }
3789
3790 //!
3791 //!
3792 //!
3793 //! @param[in] bb
3794 //! @param[in] timeout_usec
3795 //! @param[in] do_force
3796 //!
3797 //! @return
3798 //!
3799 //! @pre
3800 //!
3801 //! @note
3802 //!
3803 static int delete_blob_state(blockblob * bb, long long timeout_usec, char do_force)
3804 {
3805 blobstore *bs = bb->store;
3806 char **array = NULL;
3807 int array_size = 0;
3808 int ret = 0;
3809
3810 // delete dm devices listed in .dm of this blob
3811 if (read_array_blockblob_metadata_path(BLOCKBLOB_PATH_DM, bb->store, bb->id, &array, &array_size) == -1 || dm_delete_devices(array, array_size) == -1) {
3812 if (!do_force) {
3813 ret = -1;
3814 goto free;
3815 }
3816 }
3817 for (int i = 0; i < array_size; i++) {
3818 EUCA_FREE(array[i]);
3819 }
3820 EUCA_FREE(array);
3821 array_size = 0;
3822 array = NULL;
3823
3824 // Read in .deps (blobs that this blob depends on),
3825 // so as to update their .refs (blobs depending on them).
3826 if (read_array_blockblob_metadata_path(BLOCKBLOB_PATH_DEPS, bb->store, bb->id, &array, &array_size) == -1) {
3827 ret = -1;
3828 if (!do_force) {
3829 ret = -1;
3830 goto free;
3831 }
3832 }
3833 char my_ref[BLOBSTORE_MAX_PATH + MAX_DM_NAME + 1];
3834 snprintf(my_ref, sizeof(my_ref), "%s %s", bb->store->path, bb->id);
3835 for (int i = 0; i < array_size; i++) {
3836 char *store_path = strtok(array[i], " ");
3837 char *blob_id = strtok(NULL, " "); // the remaining entries in array[i] are ignored
3838
3839 if (strlen(store_path) < 1 || strlen(blob_id) < 1) {
3840 continue; //! @TODO print a warning about store/blob corruption?
3841 }
3842
3843 blobstore *dep_bs = bs;
3844 if (strcmp(bs->path, store_path)) { // if deleting reference in a different blobstore
3845 // need to open it
3846 dep_bs = blobstore_open(store_path, 0, BLOBSTORE_FLAG_CREAT, BLOBSTORE_FORMAT_ANY, BLOBSTORE_REVOCATION_ANY, BLOBSTORE_SNAPSHOT_ANY);
3847 if (dep_bs == NULL)
3848 continue; //! @TODO print a warning about store/blob corruption?
3849 if (blobstore_lock(dep_bs, timeout_usec) == -1) { // lock this (different) blobstore, too, so .refs are updated atomically
3850 blobstore_close(dep_bs);
3851 continue; //! @TODO print a warning about store/blob corruption?
3852 }
3853 }
3854 // update .refs file on each of the dependencies
3855 if (update_entry_blockblob_metadata_path(BLOCKBLOB_PATH_REFS, dep_bs, blob_id, my_ref, 1) == -1) {
3856 //! @TODO print a warning about store/blob corruption?
3857 }
3858
3859 if (!(check_in_use(dep_bs, blob_id, 0) & ~(BLOCKBLOB_STATUS_ABANDONED))) { // in use except abandoned
3860 loop_remove(dep_bs, blob_id); //! @TODO do we care about errors?
3861 }
3862 if (dep_bs != bs) {
3863 blobstore_unlock(dep_bs);
3864 blobstore_close(dep_bs);
3865 }
3866 }
3867
3868 // remove the loopback entry for this blob
3869 if (loop_remove(bs, bb->id) == -1) {
3870 ret = -1;
3871 }
3872 // remove the files, data and metadata, for of this blob
3873 if (delete_blockblob_files(bs, bb->id) < 1) {
3874 ret = -1;
3875 }
3876
3877 free:
3878 for (int i = 0; i < array_size; i++) {
3879 EUCA_FREE(array[i]);
3880 }
3881 EUCA_FREE(array);
3882
3883 return ret;
3884 }
3885
3886 //!
3887 //! If no outside references to the blob exist, and blob is not protected,
3888 //! deletes the blob and its metadata
3889 //!
3890 //! @param[in] bb
3891 //! @param[in] timeout_usec
3892 //! @param[in] do_force
3893 //!
3894 //! @return 0 if cleanup was successful and frees the blockblob handle, -1 otherwise,
3895 //! and DOES NOT free the blockblob handle (so that it can be closed and freed
3896 //! with blockblob_close)
3897 //!
3898 //! @pre
3899 //!
3900 //! @note
3901 //!
3902 int blockblob_delete(blockblob * bb, long long timeout_usec, char do_force)
3903 {
3904 if (bb == NULL) {
3905 ERR(BLOBSTORE_ERROR_INVAL, NULL);
3906 return -1;
3907 }
3908 blobstore *bs = bb->store;
3909 int ret = 0;
3910 if (blobstore_lock(bs, timeout_usec) == -1) { // lock it so we can traverse it
3911 return -1; // failed to obtain a lock on the blobstore
3912 }
3913 // do not delete the blob if it is used by another one
3914 bb->in_use = check_in_use(bs, bb->id, 0); // update in_use status
3915 // if in use other than opened (by this thread), backed, or abandoned
3916 if (!do_force && (bb->in_use & ~(BLOCKBLOB_STATUS_OPENED | BLOCKBLOB_STATUS_BACKED | BLOCKBLOB_STATUS_ABANDONED))) {
3917 ERR(BLOBSTORE_ERROR_AGAIN, NULL);
3918 ret = -1;
3919 } else {
3920 ret = delete_blob_state(bb, timeout_usec, do_force); // do the bulk of the cleanup
3921
3922 // close the open file descriptors
3923 if (ftruncate(bb->fd_lock, 0) != 0) {
3924 ERR(BLOBSTORE_ERROR_UNKNOWN, "failed to truncate the blobstore lock file.");
3925 }
3926
3927 if (close_and_unlock(bb->fd_lock) == -1) {
3928 ret = -1;
3929 } else {
3930 bb->fd_lock = 0; //! @TODO needed? maybe -1?
3931 }
3932
3933 if (close(bb->fd_blocks) == -1) {
3934 ret = -1;
3935 } else {
3936 bb->fd_blocks = 0; //! @TODO needed? maybe -1?
3937 }
3938
3939 // free the blob struct if everything above was OK
3940 if (ret == 0) {
3941 EUCA_FREE(bb);
3942 }
3943 }
3944
3945 int saved_errno = 0;
3946 saved_errno = _blobstore_errno; // save it because blobstore_unlock may overwrite it
3947 if (blobstore_unlock(bs) == -1) {
3948 ERR(BLOBSTORE_ERROR_UNKNOWN, "failed to unlock the blobstore");
3949 }
3950 if (saved_errno) {
3951 _blobstore_errno = saved_errno;
3952 }
3953
3954 return ret;
3955 }
3956
3957 //!
3958 //!
3959 //!
3960 //! @param[in] bb
3961 //! @param[in] min_size_bytes
3962 //!
3963 //! @return
3964 //!
3965 //! @pre
3966 //!
3967 //! @note
3968 //!
3969 static int verify_bb(const blockblob * bb, unsigned long long min_size_bytes)
3970 {
3971 if (bb->fd_lock == -1) {
3972 ERR(BLOBSTORE_ERROR_INVAL, "blockblob lock involved in operation is not open");
3973 return -1;
3974 }
3975 if (bb->fd_blocks == -1) {
3976 ERR(BLOBSTORE_ERROR_INVAL, "blockblob involved in operation is not open");
3977 return -1;
3978 }
3979 struct stat sb;
3980 if (fstat(bb->fd_blocks, &sb) == -1) {
3981 PROPAGATE_ERR(BLOBSTORE_ERROR_NOENT);
3982 return -1;
3983 }
3984 if (sb.st_size < bb->size_bytes) {
3985 ERR(BLOBSTORE_ERROR_UNKNOWN, "blockblob involved in operation has backing of unexpected size");
3986 LOGERROR("sb.st_size=%ld bb->size_bytes=%lld\n", sb.st_size, bb->size_bytes);
3987 return -1;
3988 }
3989 if (sb.st_size < min_size_bytes) {
3990 ERR(BLOBSTORE_ERROR_INVAL, "blockblob involved in operation has backing that is too small");
3991 return -1;
3992 }
3993 if (stat(bb->device_path, &sb) == -1) {
3994 PROPAGATE_ERR(BLOBSTORE_ERROR_NOENT);
3995 return -1;
3996 }
3997 if (!S_ISBLK(sb.st_mode)) {
3998 ERR(BLOBSTORE_ERROR_INVAL, "blockblob involved in operation is missing a loopback block device");
3999 return -1;
4000 }
4001 return 0;
4002 }
4003
4004 //!
4005 //!
4006 //!
4007 //! @param[in] src_bb pointer to source blob to copy data from
4008 //! @param[in] src_offset_bytes start offset in source
4009 //! @param[in] dst_bb pointer to destination blob to copy data to
4010 //! @param[in] dst_offset_bytes start offset in destination
4011 //! @param[in] len_bytes 0 = copy until EOF of source
4012 //!
4013 //! @return
4014 //!
4015 //! @pre
4016 //!
4017 //! @note
4018 //!
4019 int blockblob_copy(blockblob * src_bb, unsigned long long src_offset_bytes, blockblob * dst_bb, unsigned long long dst_offset_bytes, unsigned long long len_bytes) //
4020 {
4021 int ret = 0;
4022
4023 if (src_bb == NULL || dst_bb == NULL) {
4024 ERR(BLOBSTORE_ERROR_INVAL, "blockblob pointer is NULL");
4025 return -1;
4026 }
4027
4028 long long copy_len_bytes = len_bytes;
4029 if (copy_len_bytes == 0) {
4030 copy_len_bytes = src_bb->size_bytes - src_offset_bytes;
4031 }
4032 if (copy_len_bytes < 1) {
4033 ERR(BLOBSTORE_ERROR_INVAL, "copy source offset outside of range");
4034 return -1;
4035 }
4036 // make sure both source and destination blobs are in good shape and big enough
4037 if (verify_bb(src_bb, src_offset_bytes + copy_len_bytes) || verify_bb(dst_bb, dst_offset_bytes + copy_len_bytes)) {
4038 return -1;
4039 }
4040 // determine the largest acceptable block size for dd, all the way down to a byte possibly
4041 int granularity = 4096;
4042 while (src_offset_bytes % granularity || dst_offset_bytes % granularity || copy_len_bytes % granularity) {
4043 granularity /= 2;
4044 }
4045
4046 // do the copy (with block devices dd will silently omit to copy bytes outside the block boundary, so we use paths for uncloned blobs)
4047 const char *src_path = (src_bb->snapshot_type == BLOBSTORE_SNAPSHOT_DM) ? (blockblob_get_dev(src_bb)) : (blockblob_get_file(src_bb));
4048 const char *dst_path = (dst_bb->snapshot_type == BLOBSTORE_SNAPSHOT_DM) ? (blockblob_get_dev(dst_bb)) : (blockblob_get_file(dst_bb));
4049 mode_t old_umask = umask(~BLOBSTORE_FILE_PERM);
4050 int error = diskutil_dd2(src_path, dst_path, granularity, copy_len_bytes / granularity, dst_offset_bytes / granularity, src_offset_bytes / granularity);
4051 umask(old_umask);
4052 if (error) {
4053 ERR(BLOBSTORE_ERROR_INVAL, "failed to copy a section");
4054 return -1;
4055 }
4056
4057 return ret;
4058 }
4059
4060 //!
4061 //! Sorts the device mapper table string sent to dmsetup. In some case, the table is
4062 //! sent in partition ordering rather than start block ordering. This cause dmsetup to
4063 //! get sick and puke some errors. For example, the following table will cause some
4064 //! errors:
4065 //! \li 0 63 linear /dev/mapper/euca-dsk-3AE63D3B-d6320e89-p0-snap 0
4066 //! \li 204863 2764800 linear /dev/loop0 0
4067 //! \li 2969663 6516 linear /dev/loop1 0
4068 //! \li 2976179 1024 linear /dev/loop2 0
4069 //! \li 63 204800 linear /dev/loop3 0
4070 //! This function will take the previous table and re-order it in starting block order
4071 //! as in the following:
4072 //! \li 0 63 linear /dev/mapper/euca-dsk-3AE63D3B-d6320e89-p0-snap 0
4073 //! \li 63 204800 linear /dev/loop3 0
4074 //! \li 204863 2764800 linear /dev/loop0 0
4075 //! \li 2969663 6516 linear /dev/loop1 0
4076 //! \li 2976179 1024 linear /dev/loop2 0
4077 //!
4078 //! @param[in,out] pOldTable the table string to sort
4079 //!
4080 //! @return a pointer to the newly allocated table string if successful or NULL if any
4081 //! error occured.
4082 //!
4083 //! @pre The provided table field must not be NULL and must contain more than 1 entry
4084 //! separated by the newline character.
4085 //!
4086 //! @post On success the given table will be freed and a newly constructed table will be
4087 //! returned. The original table pointer will be set to the newly returned table too.
4088 //!
4089 static char *dm_sort_table(char **pOldTable)
4090 {
4091 #define DM_MAX_LINES 32
4092 #define DM_LINE_LENGTH 256
4093
4094 unsigned int i = 0;
4095 unsigned int lineId = UINT32_MAX;
4096 unsigned long long minVal = UINT64_MAX;
4097 unsigned long long curVal = 0;
4098 char *aLines[DM_MAX_LINES] = { NULL }; //!< TODO: Turn this into a dynamic re-alloc'ed array?
4099 char sLine[DM_LINE_LENGTH] = "";
4100 char *pNewTable = NULL;
4101 char *pDupTable = NULL;
4102 register unsigned int j = 0;
4103 register unsigned int count = 0;
4104
4105 if (pOldTable == NULL)
4106 return (NULL);
4107
4108 // Make sure our given table isn't NULL.
4109 if ((*pOldTable) != NULL) {
4110 // Duplicate the original table in case we need it later. strtok() will mess it up
4111 pDupTable = strdup((*pOldTable));
4112
4113 // Split in lines and count
4114 aLines[count] = strtok((*pOldTable), "\n");
4115 while ((aLines[count] != NULL) && (count < (DM_MAX_LINES - 1))) {
4116 count++;
4117 aLines[count] = strtok(NULL, "\n");
4118 }
4119
4120 // Will we need to sort?
4121 if (aLines[count] != NULL) {
4122 // hmmm. This sounds list we has more than DM_MAX_LINES... Just return the table as is
4123 pNewTable = pDupTable;
4124 } else if (count == 1) {
4125 // So we have 1 line. Because strtok() messed up the original table
4126 // lets return the duplicate version of the original
4127 pNewTable = pDupTable;
4128 } else {
4129 // we need more than 1 line in this table to sort. At this point we know
4130 // we have less than DM_MAX_LINES so we don't have to worry 'bout it.
4131 if (count > 1) {
4132 // Sort every lines in the 'lines' array
4133 for (i = 0; i < count; i++) {
4134 // Search for the smaller starting block value in the lefover lines
4135 lineId = UINT32_MAX;
4136 minVal = UINT64_MAX;
4137 for (j = 0; j < count; j++) {
4138 // As we pick lines from the array, they become NULLs
4139 if (aLines[j] != NULL) {
4140 // Retrieve the starting block number which is the first item on the line
4141 if (sscanf(aLines[j], "%llu", &curVal) == 1) {
4142 // Is this a newest low?
4143 if (curVal < minVal) {
4144 lineId = j;
4145 minVal = curVal;
4146 }
4147 }
4148 }
4149 }
4150
4151 // Since we set line ID to UINT32_MAX, its safe to assume its valid if less than count
4152 if (lineId < count) {
4153 // Re-add the newline character at the end of this string.
4154 if (snprintf(sLine, DM_LINE_LENGTH, "%s\n", aLines[lineId]) > 0) {
4155 // Add it to our new table.
4156 if ((pNewTable = euca_strdupcat(pNewTable, sLine)) == NULL) {
4157 EUCA_FREE(pDupTable);
4158 EUCA_FREE((*pOldTable));
4159 return (NULL);
4160 }
4161 }
4162 // Lets no longer consider this line.
4163 aLines[lineId] = NULL;
4164 }
4165 }
4166 }
4167 // If count is anything else than 1, we no longer need pDupTable
4168 EUCA_FREE(pDupTable);
4169 }
4170 }
4171 // Free our given table and return the new one.
4172 EUCA_FREE((*pOldTable));
4173
4174 // Set our in/out parameter properly on our way out
4175 (*pOldTable) = pNewTable;
4176 return (pNewTable);
4177
4178 #undef DM_MAX_LINES
4179 #undef DM_LINE_LENGTH
4180 }
4181
4182 //!
4183 //!
4184 //!
4185 //! @param[in] bb pointer to destination blob, which blocks may be used as backing
4186 //! @param[in] map pointer to map of blocks from other blobs/devices to be copied/mapped/snapshotted
4187 //! @param[in] map_size size of the map[]
4188 //!
4189 //! @return
4190 //!
4191 //! @pre
4192 //!
4193 //! @note
4194 //!
4195 int blockblob_clone(blockblob * bb, const blockmap * map, unsigned int map_size)
4196 {
4197 int ret = 0;
4198 if (bb == NULL) {
4199 ERR(BLOBSTORE_ERROR_INVAL, "blockblob pointer is NULL");
4200 return -1;
4201 }
4202
4203 if (map == NULL || map_size < 1 || map_size > MAX_BLOCKMAP_SIZE) {
4204 ERR(BLOBSTORE_ERROR_INVAL, "invalid blockbmap or its size");
4205 return -1;
4206 }
4207 long long bb_size_blocks = round_down_sec(bb->size_bytes) / 512; // dmsetup will not map partial blocks, so we conservatively round down
4208
4209 // verify dependencies (block devices present, blob sizes make sense, zero device present)
4210 char *zero_dev = NULL;
4211 for (int i = 0; i < map_size; i++) {
4212 const blockmap *m = map + i;
4213 if (m->relation_type != BLOBSTORE_COPY && bb->store->snapshot_policy != BLOBSTORE_SNAPSHOT_DM) {
4214 ERR(BLOBSTORE_ERROR_INVAL, "relation type is incompatible with snapshot policy");
4215 return -1;
4216 }
4217
4218 switch (m->source_type) {
4219 case BLOBSTORE_DEVICE:{
4220 const char *path = m->source.device_path;
4221 if (path == NULL) {
4222 ERR(BLOBSTORE_ERROR_INVAL, "one of the device paths is NULL");
4223 return -1;
4224 }
4225 struct stat sb;
4226 if (stat(path, &sb) == -1) {
4227 PROPAGATE_ERR(BLOBSTORE_ERROR_NOENT);
4228 return -1;
4229 }
4230 if (!S_ISBLK(sb.st_mode)) {
4231 ERR(BLOBSTORE_ERROR_INVAL, "one of the device paths is not a block device");
4232 return -1;
4233 }
4234 break;
4235 }
4236 case BLOBSTORE_BLOCKBLOB:{
4237 const blockblob *sbb = m->source.blob;
4238 if (sbb == NULL) {
4239 ERR(BLOBSTORE_ERROR_INVAL, "one of the source blockblob pointers is NULL");
4240 return -1;
4241 }
4242 long long sbb_size_blocks = round_down_sec(sbb->size_bytes) / 512; // dmsetup will not map partial blocks, so we conservatively round down
4243 if (verify_bb(sbb, sbb_size_blocks)) {
4244 return -1;
4245 }
4246 if (sbb_size_blocks < (m->first_block_src + m->len_blocks)) {
4247 LOGWARN("source size = %lld mappped size = %lld\n", sbb_size_blocks, (m->first_block_src + m->len_blocks));
4248 ERR(BLOBSTORE_ERROR_INVAL, "one of the source blockblobs is too small for the map");
4249 return -1;
4250 }
4251 if (bb_size_blocks < (m->first_block_dst + m->len_blocks)) {
4252 ERR(BLOBSTORE_ERROR_INVAL, "the destination blockblob is too small for the map");
4253 return -1;
4254 }
4255 if (m->relation_type == BLOBSTORE_SNAPSHOT && m->len_blocks < MIN_BLOCKS_SNAPSHOT) {
4256 ERR(BLOBSTORE_ERROR_INVAL, "snapshot size is too small");
4257 return -1;
4258 }
4259 break;
4260 }
4261 case BLOBSTORE_ZERO:
4262 zero_dev = dm_get_zero();
4263 if (zero_dev == NULL) {
4264 return -1;
4265 }
4266
4267 break;
4268 default:
4269 ERR(BLOBSTORE_ERROR_INVAL, "invalid map entry type");
4270 return -1;
4271 }
4272 }
4273
4274 // compute the base name of the device mapper device
4275 char dm_base[MAX_DM_LINE];
4276 snprintf(dm_base, sizeof(dm_base), "euca-%s", bb->id);
4277 for (char *c = dm_base; *c != '\0'; c++) {
4278 if (*c == '/') // if the ID has slashes,
4279 *c = '-'; // replace them with hyphens
4280 }
4281
4282 int devices = 0;
4283 int mapped_or_snapshotted = 0;
4284 char buf[MAX_DM_LINE];
4285 char *main_dm_table = NULL;
4286 char **dev_names = EUCA_ZALLOC(map_size * 4 + 1, sizeof(char *)); // for device mapper dev names we will create
4287 if (dev_names == NULL) {
4288 ERR(BLOBSTORE_ERROR_NOMEM, NULL);
4289 return -1;
4290 }
4291 char **dm_tables = EUCA_ZALLOC(map_size * 4 + 1, sizeof(char *)); // for device mapper tables
4292 if (dm_tables == NULL) {
4293 ERR(BLOBSTORE_ERROR_NOMEM, NULL);
4294 EUCA_FREE(dev_names);
4295 return -1;
4296 }
4297 // either does copies or computes the device mapper tables
4298 for (int i = 0; i < map_size; i++) {
4299 const blockmap *m = map + i;
4300 const char *dev;
4301
4302 switch (m->source_type) {
4303 case BLOBSTORE_DEVICE:
4304 dev = m->source.device_path;
4305 break;
4306 case BLOBSTORE_BLOCKBLOB:
4307 dev = m->source.blob->device_path;
4308 break;
4309 case BLOBSTORE_ZERO:
4310 dev = zero_dev;
4311 break;
4312 default:
4313 ERR(BLOBSTORE_ERROR_INVAL, "invalid device map source type");
4314 ret = -1;
4315 goto free;
4316 }
4317
4318 long long first_block_src = m->first_block_src;
4319 switch (m->relation_type) {
4320 case BLOBSTORE_COPY:
4321 // do the copy
4322 if (diskutil_dd2(dev, bb->device_path, 512, m->len_blocks, m->first_block_dst, m->first_block_src)) {
4323 ERR(BLOBSTORE_ERROR_INVAL, "failed to copy a section");
4324 ret = -1;
4325 goto free;
4326 }
4327 // append to the main dm table (we do this here even if we never end up using the device mapper because all segments were copied)
4328 snprintf(buf, sizeof(buf), "%lld %lld linear %s %lld\n", m->first_block_dst, m->len_blocks, bb->device_path, m->first_block_dst);
4329 main_dm_table = euca_strdupcat(main_dm_table, buf);
4330 break;
4331
4332 case BLOBSTORE_SNAPSHOT:{
4333 int granularity = 16; // coarser granularity does not work
4334 while (m->len_blocks % granularity) { // do we need to do this?
4335 granularity /= 2;
4336 }
4337
4338 // with a linear map, create a backing device for the snapshot
4339 snprintf(buf, sizeof(buf), "%s-p%d-back", dm_base, i);
4340 dev_names[devices] = strdup(buf);
4341 char *backing_dev = dev_names[devices];
4342 snprintf(buf, sizeof(buf), "0 %lld linear %s %lld\n", m->len_blocks, bb->device_path, m->first_block_dst);
4343 dm_tables[devices] = strdup(buf);
4344 devices++;
4345
4346 // if there is an offset in the source device, create another map (since snapshots cannot be done at offsets)
4347 const char *snapshotted_dev = dev;
4348 if (m->first_block_src > 0 && m->source_type != BLOBSTORE_ZERO) {
4349 snprintf(buf, sizeof(buf), "%s-p%d-real", dm_base, i);
4350 dev_names[devices] = strdup(buf);
4351 snapshotted_dev = dev_names[devices];
4352 snprintf(buf, sizeof(buf), "0 %lld linear %s %lld\n", m->len_blocks, ((dev) ? dev : 0), m->first_block_src);
4353 dm_tables[devices] = strdup(buf);
4354 devices++;
4355 }
4356 // take a snapshot of the source
4357 snprintf(buf, sizeof(buf), "%s-p%d-snap", dm_base, i);
4358 dev_names[devices] = strdup(buf);
4359 dev = dev_names[devices];
4360 // We use 'n' for a non-persistent snapshot, which will not persist across a reboot.
4361 // With 'p' we could get a persistent snapshot at the cost of 0.2-3.0% overhead in
4362 // disk space, depending on chunksize [1-16], but then we would need to rebuild
4363 // device mapper entries and change space management to accommodate the overhead.
4364 snprintf(buf, sizeof(buf), "0 %lld snapshot %s%s " DM_PATH "%s n %d\n", m->len_blocks, snapshotted_dev[0] == 'e' ? DM_PATH : "",
4365 snapshotted_dev, backing_dev, granularity);
4366 dm_tables[devices] = strdup(buf);
4367 devices++;
4368
4369 first_block_src = 0; // for snapshots the mapping goes from the -snap device at offset 0
4370 // yes, fall through
4371 }
4372
4373 case BLOBSTORE_MAP:
4374 // append to the main dm table
4375 snprintf(buf, sizeof(buf), "%lld %lld linear %s%s %lld\n", m->first_block_dst, m->len_blocks, dev[0] == 'e' ? DM_PATH : "", dev, first_block_src);
4376 main_dm_table = euca_strdupcat(main_dm_table, buf);
4377 mapped_or_snapshotted++;
4378 break;
4379
4380 default:
4381 ERR(BLOBSTORE_ERROR_INVAL, "invalid device map source type");
4382 ret = -1;
4383 goto free;
4384 }
4385 }
4386
4387 if (mapped_or_snapshotted) { // we must use the device mapper
4388 if ((main_dm_table = dm_sort_table(&main_dm_table)) == NULL) {
4389 ret = -1;
4390 goto free;
4391 }
4392
4393 euca_strncpy(bb->dm_name, dm_base, sizeof(bb->dm_name));
4394 dev_names[devices] = strdup(dm_base);
4395 dm_tables[devices] = main_dm_table;
4396 devices++;
4397
4398 // change device_path from loopback to the device-mapper path
4399 snprintf(bb->device_path, sizeof(bb->device_path), DM_FORMAT, dm_base);
4400
4401 if (dm_create_devices(dev_names, dm_tables, devices)) {
4402 ret = -1;
4403 goto free;
4404 }
4405 // record new devices in .dm of this blob
4406 if (write_array_blockblob_metadata_path(BLOCKBLOB_PATH_DM, bb->store, bb->id, dev_names, devices) == -1) {
4407 ret = -1;
4408 goto cleanup;
4409 }
4410 bb->snapshot_type = BLOBSTORE_SNAPSHOT_DM; // remember that blobstore uses device mapper
4411
4412 // update .refs on dependencies and create .deps for this blob
4413 char my_ref[BLOBSTORE_MAX_PATH + MAX_DM_NAME + 1];
4414 snprintf(my_ref, sizeof(my_ref), "%s %s", bb->store->path, bb->id); //! @TODO use store ID to proof against moving blobstore?
4415 for (int i = 0; i < map_size; i++) {
4416 const blockmap *m = map + i;
4417 const blockblob *sbb = m->source.blob;
4418
4419 if (m->source_type != BLOBSTORE_BLOCKBLOB) // only blobstores have references
4420 continue;
4421
4422 if (m->relation_type == BLOBSTORE_COPY) // copies do not create references
4423 continue;
4424
4425 if (blobstore_lock(sbb->store, BLOBSTORE_LOCK_TIMEOUT_USEC) == -1) { // lock the source blobstore so the .refs are updated atomically
4426 LOGERROR("{%u} error: timed out on a blobstore lock while attempting to update .refs\n", (unsigned int)pthread_self());
4427 ret = -1;
4428 goto cleanup; //! @TODO remove .refs entries from this batch that succeeded, if any?
4429 }
4430 // update .refs
4431 if (update_entry_blockblob_metadata_path(BLOCKBLOB_PATH_REFS, sbb->store, sbb->id, my_ref, 0) == -1) {
4432 ret = -1;
4433 goto cleanup; //! @TODO remove .refs entries from this batch that succeeded, if any?
4434 }
4435
4436 if (blobstore_unlock(sbb->store) == -1) {
4437 ret = -1;
4438 goto cleanup; //! @TODO remove .refs entries from this batch that succeeded, if any?
4439 }
4440 // record the dependency in .deps (redundant entries will be filtered out)
4441 char dep_ref[BLOBSTORE_MAX_PATH + MAX_DM_NAME + 1];
4442 snprintf(dep_ref, sizeof(dep_ref), "%s %s %s %llu %llu", sbb->store->path, sbb->id, blobstore_relation_type_name[m->relation_type], m->first_block_dst, m->len_blocks);
4443 if (update_entry_blockblob_metadata_path(BLOCKBLOB_PATH_DEPS, bb->store, bb->id, dep_ref, 0) == -1) {
4444 ret = -1;
4445 goto cleanup; // ditto
4446 }
4447 }
4448 } else {
4449 EUCA_FREE(main_dm_table);
4450 }
4451
4452 goto free;
4453
4454 cleanup: // this is failure cleanup code path
4455 {
4456 int saved_errno;
4457
4458 saved_errno = _blobstore_errno; // save it because dm_delete_devices may overwrite it
4459 LOGERROR("error: blockblob_clone: %s (%d)\n", blobstore_get_last_msg(), _blobstore_errno);
4460
4461 // remove dm devices that may have been created
4462 if (dm_delete_devices(dev_names, devices) == 0) {
4463
4464 // remove the .dm file so that others do not
4465 // needlessly attempt to remove dm devices later
4466 char path[PATH_MAX];
4467 set_blockblob_metadata_path(BLOCKBLOB_PATH_DM, bb->store, bb->id, path, sizeof(path));
4468 unlink(path);
4469 }
4470 _blobstore_errno = saved_errno;
4471 }
4472
4473 free:
4474 // Only free main_dm_table if mapped_or_snapshotted is 0. If its greater than
4475 // 0, it would be assigned to the dm_tables array.
4476 if (mapped_or_snapshotted == 0) {
4477 EUCA_FREE(main_dm_table);
4478 }
4479
4480 for (int i = 0; i < devices; i++) {
4481 EUCA_FREE(dev_names[i]);
4482 EUCA_FREE(dm_tables[i]);
4483 }
4484 EUCA_FREE(dev_names);
4485 EUCA_FREE(dm_tables);
4486
4487 return ret;
4488 }
4489
4490 //!
4491 //! Retrieces a block device pointing to the blob
4492 //!
4493 //! @param[in] bb
4494 //!
4495 //! @return a block device pointing to the blob
4496 //!
4497 //! @pre
4498 //!
4499 //! @note
4500 //!
4501 const char *blockblob_get_dev(blockblob * bb)
4502 {
4503 if (bb == NULL) {
4504 ERR(BLOBSTORE_ERROR_INVAL, NULL);
4505 return NULL;
4506 }
4507 return bb->device_path;
4508 }
4509
4510 //!
4511 //! Retrieves a path to the file containg the blob, but only if snapshot_type is not DM
4512 //!
4513 //! @param[in] bb
4514 //!
4515 //! @return a path to the file containg the blob
4516 //!
4517 //! @pre
4518 //!
4519 //! @note
4520 //!
4521 const char *blockblob_get_file(blockblob * bb)
4522 {
4523 if (bb == NULL) {
4524 ERR(BLOBSTORE_ERROR_INVAL, NULL);
4525 return NULL;
4526 }
4527 if (bb->snapshot_type == BLOBSTORE_SNAPSHOT_DM) {
4528 ERR(BLOBSTORE_ERROR_INVAL, "file access only supported for uncloned blockblobs");
4529 return NULL;
4530 }
4531 return bb->blocks_path;
4532 }
4533
4534 //!
4535 //! Returns the blobstore of the blob
4536 //! @param[in] bb
4537 //!
4538 //! @return pointer to the blobstore
4539 //!
4540
4541 blobstore *blockblob_get_blobstore(blockblob * bb)
4542 {
4543 if (bb == NULL) {
4544 ERR(BLOBSTORE_ERROR_INVAL, NULL);
4545 return NULL;
4546 }
4547 return bb->store;
4548 }
4549
4550 //!
4551 //! Returns the directory in which the blob files are located
4552 //!
4553 //! @param[in] bb
4554 //! @param[in] buf
4555 //! @param[in] buflen
4556 //!
4557 //! @return success (0) or failure (-1)
4558 //!
4559 int blockblob_get_dir(blockblob * bb, char *buf, int buflen)
4560 {
4561 if (bb == NULL) {
4562 ERR(BLOBSTORE_ERROR_INVAL, NULL);
4563 return -1;
4564 }
4565 euca_strncpy(buf, bb->blocks_path, buflen);
4566 for (int i = (strlen(buf) - 1); i > 1; i--) {
4567 if (buf[i] == '/') {
4568 buf[i] = '\0';
4569 return 0;
4570 }
4571 }
4572 ERR(BLOBSTORE_ERROR_INVAL, NULL);
4573 return -1;
4574 }
4575
4576 //!
4577 //!
4578 //!
4579 //! @param[in] bb
4580 //!
4581 //! @return size of blob in blocks
4582 //!
4583 //! @pre
4584 //!
4585 //! @note
4586 //!
4587 unsigned long long blockblob_get_size_blocks(blockblob * bb)
4588 {
4589 if (bb == NULL) {
4590 ERR(BLOBSTORE_ERROR_INVAL, NULL);
4591 return 0;
4592 }
4593 return round_up_sec(bb->size_bytes) / 512;
4594 }
4595
4596 //!
4597 //!
4598 //!
4599 //! @param[in] bb
4600 //!
4601 //! @return size of blob in bytes
4602 //!
4603 //! @pre
4604 //!
4605 //! @note
4606 //!
4607 unsigned long long blockblob_get_size_bytes(blockblob * bb)
4608 {
4609 if (bb == NULL) {
4610 ERR(BLOBSTORE_ERROR_INVAL, NULL);
4611 return 0;
4612 }
4613 return bb->size_bytes;
4614 }
4615
4616 //!
4617 //! flushes outstanding I/O on:
4618 //! \li system's buffer cache
4619 //! \li dm device at dev_path (if specified)
4620 //! \li dm device pointing to the blob (if bb is specified)
4621 //!
4622 //! @param[in] dev_path
4623 //! @param[in] bb
4624 //!
4625 //! @return
4626 //!
4627 int blockblob_sync(const char *dev_path, const blockblob * bb)
4628 {
4629 int err = 0;
4630
4631 sync(); // ensure the whole buffer cache is flushed
4632
4633 if ((err == 0) && (dev_path != NULL)) {
4634 err = dm_suspend_resume(dev_path);
4635 }
4636
4637 if ((err == 0) && (bb != NULL)) {
4638 err = dm_suspend_resume(bb->device_path);
4639 }
4640
4641 return (err);
4642 }
4643
4644 #ifdef _UNIT_TEST
4645 //!
4646 //!
4647 //!
4648 //! @param[in] bb
4649 //! @param[in] c
4650 //! @param[in] use_file
4651 //!
4652 //! @return
4653 //!
4654 //! @pre
4655 //!
4656 //! @note
4657 //!
4658 static void _fill_blob(blockblob * bb, char c, int use_file)
4659 {
4660 const char *path;
4661 if (use_file) {
4662 path = blockblob_get_file(bb);
4663 } else {
4664 path = blockblob_get_dev(bb);
4665 }
4666
4667 char buf[1];
4668 buf[0] = c;
4669
4670 printf("filling out with dummy data %s\n", path);
4671 int fd = open(path, O_WRONLY);
4672 int failed_bytes = 0;
4673 if (fd != -1) {
4674 for (int i = 0; i < bb->size_bytes; i++) {
4675 if (write(fd, buf, 1) != 1)
4676 failed_bytes++;
4677 }
4678 }
4679 if (failed_bytes) {
4680 printf("WARNING: failed to fill %d byte(s) to path %s\n", failed_bytes, path);
4681 }
4682 if (fd >= 0) {
4683 fsync(fd);
4684 close(fd);
4685 }
4686 }
4687
4688 //!
4689 //!
4690 //!
4691 //! @param[in] size_blocks
4692 //! @param[in] base
4693 //! @param[in] name
4694 //! @param[in] format
4695 //! @param[in] revocation
4696 //! @param[in] snapshot
4697 //!
4698 //! @return
4699 //!
4700 //! @pre
4701 //!
4702 //! @note
4703 //!
4704 static blobstore *create_teststore(int size_blocks, const char *base, const char *name, blobstore_format_t format, blobstore_revocation_t revocation, blobstore_snapshot_t snapshot)
4705 {
4706 static int ts = 0;
4707 static int counter = 0;
4708
4709 if (ts == 0) {
4710 ts = ((int)time(NULL)) - 1292630988;
4711 //ts = (((int)time(NULL))<<24)>>24;
4712 }
4713
4714 char bs_path[PATH_MAX];
4715 snprintf(bs_path, sizeof(bs_path), "%s/test_blobstore_%05d_%s_%03d", base, ts, name, counter++);
4716 if (mkdir(bs_path, BLOBSTORE_DIRECTORY_PERM) == -1) {
4717 printf("failed to create %s\n", bs_path);
4718 return NULL;
4719 }
4720 printf("created %s\n", bs_path);
4721 blobstore *bs = blobstore_open(bs_path, size_blocks, BLOBSTORE_FLAG_CREAT, format, revocation, snapshot);
4722 if (bs == NULL) {
4723 printf("ERROR: %s\n", blobstore_get_error_str(blobstore_get_error()));
4724 return NULL;
4725 }
4726 return bs;
4727 }
4728
4729 //!
4730 //!
4731 //!
4732 //! @param[in] bb
4733 //! @param[in] seek
4734 //! @param[in] c
4735 //!
4736 //! @return
4737 //!
4738 //! @pre
4739 //!
4740 //! @note
4741 //!
4742 static int write_byte(blockblob * bb, int seek, char c)
4743 {
4744 const char *dev = blockblob_get_dev(bb);
4745 int fd = open(dev, O_WRONLY);
4746 if (fd == -1) {
4747 printf("ERROR: failed to open the blockblob dev %s\n", dev);
4748 return -1;
4749 }
4750 if (lseek(fd, seek, SEEK_SET) == -1) {
4751 printf("ERROR: failed to lseek in blockblob dev %s\n", dev);
4752 close(fd);
4753 return -1;
4754 }
4755 if (write(fd, &c, 1) != 1) {
4756 printf("ERROR: failed to write to blockblob dev %s\n", dev);
4757 close(fd);
4758 return -1;
4759 }
4760 fsync(fd);
4761 close(fd);
4762
4763 return 0;
4764 }
4765
4766 //!
4767 //!
4768 //!
4769 //! @param[in] bb
4770 //! @param[in] seek
4771 //!
4772 //! @return
4773 //!
4774 //! @pre
4775 //!
4776 //! @note
4777 //!
4778 static char read_byte(blockblob * bb, int seek)
4779 {
4780 const char *dev = blockblob_get_dev(bb);
4781 int fd = open(dev, O_RDONLY);
4782 if (fd == -1) {
4783 printf("ERROR: failed to open the blockblob dev %s\n", dev);
4784 return -1;
4785 }
4786 if (lseek(fd, seek, SEEK_SET) == -1) {
4787 printf("ERROR: failed to lseek in blockblob dev %s\n", dev);
4788 close(fd);
4789 return -1;
4790 }
4791 char buf[1];
4792 if (read(fd, buf, 1) != 1) {
4793 printf("ERROR: failed to write to blockblob dev %s\n", dev);
4794 close(fd);
4795 return -1;
4796 }
4797 close(fd);
4798
4799 return buf[0];
4800 }
4801
4802 //!
4803 //!
4804 //!
4805 //! @param[in] base
4806 //! @param[in] name
4807 //! @param[in] format
4808 //! @param[in] revocation
4809 //! @param[in] snapshot
4810 //!
4811 //! @return
4812 //!
4813 //! @pre
4814 //!
4815 //! @note
4816 //!
4817 static int do_clone_stresstest(const char *base, const char *name, blobstore_format_t format, blobstore_revocation_t revocation, blobstore_snapshot_t snapshot)
4818 {
4819 int errors = 0;
4820 blobstore *bs1 = NULL;
4821 blobstore *bs2 = NULL;
4822
4823 printf("commencing cloning stress-test...\n");
4824
4825 if ((bs1 = create_teststore(STRESS_BS_SIZE, base, name, BLOBSTORE_FORMAT_DIRECTORY, BLOBSTORE_REVOCATION_NONE, BLOBSTORE_SNAPSHOT_DM)) == NULL) {
4826 errors++;
4827 goto done;
4828 }
4829
4830 if ((bs2 = create_teststore(STRESS_BS_SIZE, base, name, BLOBSTORE_FORMAT_DIRECTORY, BLOBSTORE_REVOCATION_LRU, BLOBSTORE_SNAPSHOT_DM)) == NULL) {
4831 errors++;
4832 goto done;
4833 }
4834
4835 blockblob *bbs1[STRESS_BLOBS];
4836 long long bbs1_sizes[STRESS_BLOBS];
4837 blockblob *bbs2[STRESS_BLOBS * 2];
4838 long long bbs2_sizes[STRESS_BLOBS * 2];
4839
4840 // calculate sizes
4841 long long avg = STRESS_BS_SIZE / STRESS_BLOBS;
4842 if (avg < STRESS_MIN_BB * 2) {
4843 printf("ERROR: average blob size %lld for stress test is too small (<%d)\n", avg, STRESS_MIN_BB * 2);
4844 errors++;
4845 goto done;
4846 }
4847 for (int i = 0; i < STRESS_BLOBS; i++) {
4848 bbs1_sizes[i] = avg;
4849 bbs1[i] = NULL;
4850 bbs2[i] = NULL;
4851 bbs2[i + STRESS_BLOBS] = NULL;
4852 }
4853 for (int i = 0; i < STRESS_BLOBS * 3; i++) { // run over the array a few times
4854 int j = i % (STRESS_BLOBS / 2); // modify pairs from array
4855 int k = j + (STRESS_BLOBS / 2);
4856 long long max_delta = MIN(bbs1_sizes[j] - STRESS_MIN_BB, bbs1_sizes[k] - STRESS_MIN_BB);
4857 long long delta = max_delta * (((double)random() / RAND_MAX) - 0.5);
4858 bbs1_sizes[j] -= delta;
4859 bbs2_sizes[j] = bbs1_sizes[j] / 2;
4860 bbs2_sizes[j + STRESS_BLOBS] = bbs1_sizes[j] - bbs1_sizes[j] / 2;
4861
4862 bbs1_sizes[k] += delta;
4863 bbs2_sizes[k] = bbs1_sizes[k] / 2;
4864 bbs2_sizes[k + STRESS_BLOBS] = bbs1_sizes[k] - bbs1_sizes[k] / 2;
4865 }
4866 long long bbs1_totals = 0;
4867 for (int i = 0; i < STRESS_BLOBS; i++) {
4868 bbs1_totals += bbs1_sizes[i];
4869 long long pair = bbs2_sizes[i] + bbs2_sizes[i + STRESS_BLOBS];
4870 assert(pair == bbs1_sizes[i]);
4871 printf("%lld ", bbs1_sizes[i]);
4872 }
4873 assert(bbs1_totals == STRESS_BS_SIZE);
4874 printf("\n");
4875
4876 // fill the stores
4877 for (int i = 0; i < STRESS_BLOBS; i++) {
4878 #define _OPENERR(BS,BB,BBSIZE) \
4879 BB = blockblob_open (BS, NULL, BBSIZE*512, BLOBSTORE_FLAG_CREAT | BLOBSTORE_FLAG_EXCL, NULL, 1000); \
4880 if (BB == NULL) { \
4881 printf ("ERROR: failed to create blockblob i=%d\n", i); \
4882 errors++; \
4883 goto drain; \
4884 }
4885 printf("allocating slot %d\n", i);
4886 _OPENERR(bs1, bbs1[i], bbs1_sizes[i]);
4887 _OPENERR(bs2, bbs2[i], bbs2_sizes[i]);
4888 _OPENERR(bs2, bbs2[i + STRESS_BLOBS], bbs2_sizes[i + STRESS_BLOBS]);
4889 write_byte(bbs2[i + STRESS_BLOBS], 0, 'b'); // write a byte into beginning of blob that will be snapshotted
4890 blockmap map[] = {
4891 {BLOBSTORE_MAP, BLOBSTORE_BLOCKBLOB, {blob:bbs2[i]}, 0, 0, bbs2_sizes[i]},
4892 {BLOBSTORE_SNAPSHOT, BLOBSTORE_BLOCKBLOB, {blob:bbs2[i + STRESS_BLOBS]}, 0, bbs2_sizes[i], bbs2_sizes[i + STRESS_BLOBS]},
4893 };
4894 if (blockblob_clone(bbs1[i], map, 2) == -1) {
4895 printf("ERROR: failed to clone on iteration %i\n", i);
4896 errors++;
4897 goto drain;
4898 }
4899 // verify that mapping works
4900 write_byte(bbs2[i], bbs2_sizes[i] * 512 - 1, 'a'); // write a byte into the end of the blob that is being mapped
4901 dm_suspend_resume(bbs1[i]->dm_name);
4902 char c1 = read_byte(bbs1[i], bbs2_sizes[i] * 512 - 1); // read that byte back via bbs1
4903 char c2 = read_byte(bbs1[i], bbs2_sizes[i] * 512); // read the byte written before the snapshot
4904 if (c1 != 'a' || c2 != 'b') {
4905 printf("ERROR: clone verification failed (c1=='%c', c2=='%c')\n", c1, c2);
4906 errors++;
4907 goto drain;
4908 }
4909 }
4910
4911 // induce churn in stores
4912 for (int k = 0; k < STRESS_BLOBS * 1; k++) {
4913 usleep(100);
4914 // randomly free a few random blobs
4915 int to_free = (int)((STRESS_BLOBS / 2) * ((double)random() / RAND_MAX));
4916 printf("will free %d random blobs\n", to_free);
4917 for (int j = 0; j < to_free; j++) {
4918 int i = (int)((STRESS_BLOBS - 1) * ((double)random() / RAND_MAX));
4919 if (bbs1[i] != NULL) {
4920 printf("freeing slot %d\n", i);
4921 #define _DELWARN(BB) if (BB && blockblob_delete (BB, 1000, 0) == -1) { printf ("WARNING: failed to delete blockblob %s i=%d\n", BB->id, i); } BB=NULL
4922 _DELWARN(bbs1[i]);
4923 blockblob_close(bbs2[i]); // so it can be purged with LRU
4924 bbs2[i] = NULL;
4925 blockblob_close(bbs2[i + STRESS_BLOBS]); // so it can be purged with LRU
4926 bbs2[i + STRESS_BLOBS] = NULL;
4927 }
4928 }
4929
4930 // re-allocate those sizes
4931 for (int i = 0; i < STRESS_BLOBS; i++) {
4932 if (bbs1[i] != NULL)
4933 continue;
4934 printf("allocating slot %d\n", i);
4935 _OPENERR(bs1, bbs1[i], bbs1_sizes[i]);
4936 _OPENERR(bs2, bbs2[i], bbs2_sizes[i]);
4937 _OPENERR(bs2, bbs2[i + STRESS_BLOBS], bbs2_sizes[i + STRESS_BLOBS]);
4938 write_byte(bbs2[i + STRESS_BLOBS], 0, 'b'); // write a byte into beginning of blob that will be snapshotted
4939 blockmap map[] = {
4940 {BLOBSTORE_MAP, BLOBSTORE_BLOCKBLOB, {blob:bbs2[i]}
4941 , 0, 0, bbs2_sizes[i]}
4942 ,
4943 {BLOBSTORE_SNAPSHOT, BLOBSTORE_BLOCKBLOB, {blob:bbs2[i + STRESS_BLOBS]}
4944 , 0, bbs2_sizes[i], bbs2_sizes[i + STRESS_BLOBS]}
4945 ,
4946 };
4947 if (blockblob_clone(bbs1[i], map, 2) == -1) {
4948 printf("ERROR: failed to clone on iteration %i\n", i);
4949 errors++;
4950 goto drain;
4951 }
4952 // verify that mapping works
4953 write_byte(bbs2[i], bbs2_sizes[i] * 512 - 1, 'a'); // write a byte into the end of the blob that is being mapped
4954 dm_suspend_resume(bbs1[i]->dm_name);
4955 char c1 = read_byte(bbs1[i], bbs2_sizes[i] * 512 - 1); // read that byte back via bbs1
4956 char c2 = read_byte(bbs1[i], bbs2_sizes[i] * 512); // read the byte written before the snapshot
4957 if (c1 != 'a' || c2 != 'b') {
4958 printf("ERROR: clone verification failed (c1=='%c', c2=='%c')\n", c1, c2);
4959 errors++;
4960 goto drain;
4961 }
4962 }
4963 }
4964
4965 drain:
4966 // drain the stores
4967 printf("resting before draining...\n");
4968 sleep(1);
4969 for (int i = 0; i < STRESS_BLOBS; i++) {
4970 printf("freeing slot %d\n", i);
4971 _DELWARN(bbs1[i]);
4972 _DELWARN(bbs2[i]);
4973 _DELWARN(bbs2[i + STRESS_BLOBS]);
4974 }
4975
4976 printf("completed cloning stress-test\n");
4977 done:
4978 if (bs1 != NULL)
4979 blobstore_close(bs1);
4980 if (bs2 != NULL)
4981 blobstore_close(bs2);
4982 return errors;
4983 }
4984
4985 //!
4986 //!
4987 //!
4988 //! @param[in] bb4
4989 //! @param[in] op
4990 //!
4991 //! @return
4992 //!
4993 //! @pre
4994 //!
4995 //! @note
4996 //!
4997 static int check_destination(blockblob * bb4, char *op)
4998 {
4999 int errors = 0;
5000 const char *dev = blockblob_get_dev(bb4);
5001 if (dev != NULL) {
5002 int fd = open(dev, O_RDONLY);
5003 if (fd != -1) {
5004 for (int i = 1; i < 4; i++) {
5005 for (int j = 0; j < CBB_SIZE; j++) {
5006 char buf[512];
5007 int r = read(fd, buf, sizeof(buf));
5008 if (r < 1) {
5009 printf("ERROR: failed to read bock device %s\n", dev);
5010 errors++;
5011 goto stop_comparing;
5012 }
5013 if (buf[0] != '0' + i) {
5014 printf("ERROR: block device %s has unexpected data ('%c' (%d) != '%c')\n", dev, buf[0], buf[0], '0' + i);
5015 errors++;
5016 goto stop_comparing;
5017 }
5018 }
5019 }
5020 stop_comparing:
5021 close(fd);
5022 } else {
5023 printf("ERROR: failed to open block device %s for the %s\n", dev, op);
5024 errors++;
5025 }
5026 } else {
5027 printf("ERROR: failed to get a block device for the %s\n", op);
5028 errors++;
5029 }
5030
5031 return errors;
5032 }
5033
5034 //!
5035 //!
5036 //!
5037 //! @param[in] base
5038 //! @param[in] name
5039 //!
5040 //! @return
5041 //!
5042 //! @pre
5043 //!
5044 //! @note
5045 //!
5046 static int do_copy_test(const char *base, const char *name)
5047 {
5048 int ret;
5049 int errors = 0;
5050 printf("commencing copy test\n");
5051
5052 blobstore *bs = create_teststore(CBB_SIZE * 7, base, name, BLOBSTORE_FORMAT_DIRECTORY, BLOBSTORE_REVOCATION_ANY, BLOBSTORE_SNAPSHOT_ANY);
5053 if (bs == NULL) {
5054 errors++;
5055 goto done;
5056 }
5057
5058 blockblob *bb1, *bb2, *bb3, *bb4;
5059
5060 // these are to be copied to another
5061 _OPENBBb(bb1, B1, CBB_SIZE * 512 * 7 + 1, NULL, _CBB, 0, -1); // too big for bs
5062 if (errors)
5063 goto done;
5064 _OPENBBb(bb1, B1, CBB_SIZE * 512, NULL, _CBB, 0, 0); // bs size: 1
5065 _fill_blob(bb1, '1', TRUE);
5066 _OPENBBb(bb2, B2, CBB_SIZE * 512 + 1, NULL, _CBB, 0, 0); // bs size: 3
5067 _fill_blob(bb2, '2', TRUE);
5068 _OPENBBb(bb3, B3, CBB_SIZE * 512 - 2, NULL, _CBB, 0, 0); // bs size: 4
5069 _fill_blob(bb3, '3', TRUE);
5070
5071 // this is to be the destination of the copy
5072 _OPENBB(bb4, B4, CBB_SIZE * 3, NULL, _CBB, 0, 0); // bs size: 7
5073 _COPYBB(bb1, 0, bb4, 0, 0, 0); // check that len=0 works and that right block size is chosen
5074 _COPYBB(bb2, 0, bb4, CBB_SIZE * 512, CBB_SIZE * 512 + 1, 0);
5075 _COPYBB(bb3, 0, bb4, CBB_SIZE * 512 * 2, CBB_SIZE * 512 - 2, 0);
5076 _COPYBB(bb3, 0, bb4, CBB_SIZE * 512 * 3 - 2, 2, 0);
5077 _COPYBB(bb3, 0, bb4, CBB_SIZE * 512 * 2, CBB_SIZE * 512, -1); // source is too small
5078 _COPYBB(bb3, 2, bb4, CBB_SIZE * 512 * 2, CBB_SIZE * 512, -1); // source is too small
5079 _COPYBB(bb3, 0, bb4, CBB_SIZE * 512 * 3 - 1, 2, -1); // destination is too small
5080
5081 // see if copy worked
5082 errors += check_destination(bb4, "copy");
5083
5084 _DELEBB(bb1, B1, 0);
5085 _DELEBB(bb2, B2, 0);
5086 _DELEBB(bb3, B3, 0);
5087 _DELEBB(bb4, B4, 0);
5088 blobstore_close(bs);
5089
5090 printf("completed copy test\n");
5091 done:
5092 return errors;
5093 }
5094
5095 //!
5096 //!
5097 //!
5098 //! @param[in] base
5099 //! @param[in] name
5100 //! @param[in] format
5101 //! @param[in] revocation
5102 //! @param[in] snapshot
5103 //! @param[in] copy_or_snapshot
5104 //!
5105 //! @return
5106 //!
5107 //! @pre
5108 //!
5109 //! @note
5110 //!
5111 static int do_clone_test(const char *base, const char *name, blobstore_format_t format, blobstore_revocation_t revocation, blobstore_snapshot_t snapshot, int copy_or_snapshot)
5112 {
5113 int ret;
5114 int errors = 0;
5115 printf("commencing cloning test\n");
5116
5117 blobstore *bs = create_teststore(CBB_SIZE * 6, base, name, BLOBSTORE_FORMAT_DIRECTORY, BLOBSTORE_REVOCATION_ANY, BLOBSTORE_SNAPSHOT_ANY);
5118 if (bs == NULL) {
5119 errors++;
5120 goto done;
5121 }
5122
5123 blockblob *bb1, *bb2, *bb3, *bb4, *bb5;
5124
5125 // these are to be mapped to others
5126 _OPENBB(bb1, B1, CBB_SIZE, NULL, _CBB, 0, 0); // bs size: 1
5127 _fill_blob(bb1, '1', FALSE