cfengine  3.15.4
About: CFEngine is a configuration management system for configuring and maintaining Unix-like computers (using an own high level policy language). Community version.
  Fossies Dox: cfengine-3.15.4.tar.gz  ("unofficial" and yet experimental doxygen-generated source code documentation)  

repair.c
Go to the documentation of this file.
1 #include <platform.h>
2 #include <repair.h>
3 #include <logging.h>
4 
5 #if defined(__MINGW32__) || !defined(LMDB)
6 
7 int repair_main(ARG_UNUSED int argc, ARG_UNUSED const char *const *const argv)
8 {
10  "cf-check repair not available on this platform/build");
11  return 1;
12 }
13 
15 {
17  "database repair not available on this platform/build");
18  return 0;
19 }
20 
21 #else
22 
23 #include <stdio.h>
24 #include <errno.h>
25 #include <signal.h>
26 #include <lmdump.h>
27 #include <lmdb.h>
28 #include <diagnose.h>
29 #include <backup.h>
30 #include <sequence.h>
31 #include <utilities.h>
32 #include <diagnose.h>
33 #include <string_lib.h>
34 #include <file_lib.h>
35 #include <replicate_lmdb.h>
36 
37 static void print_usage(void)
38 {
39  printf("Usage: cf-check repair [-f] [FILE ...]\n");
40  printf("Example: cf-check repair /var/cfengine/state/cf_lastseen.lmdb\n");
41  printf("Options: -f|--force repair LMDB files that look OK ");
42 }
43 
44 int remove_files(Seq *files)
45 {
46  assert(files != NULL);
47 
48  size_t corruptions = SeqLength(files);
49  int failures = 0;
50 
51  for (size_t i = 0; i < corruptions; ++i)
52  {
53  const char *filename = SeqAt(files, i);
54  assert(filename != NULL);
55  Log(LOG_LEVEL_INFO, "Removing: '%s'", filename);
56 
57  if (unlink(filename) != 0)
58  {
60  "Failed to remove '%s' (%d - %s)",
61  filename,
62  errno,
63  strerror(errno));
64  ++failures;
65  continue;
66  }
67 
68  char *lock_file = StringConcatenate(2, filename, ".lock");
69  unlink(lock_file);
70  free(lock_file);
71 
72  lock_file = StringConcatenate(2, filename, "-lock");
73  unlink(lock_file);
74  free(lock_file);
75  }
76  if (failures != 0)
77  {
78  Log(LOG_LEVEL_ERR, "Failed to remove %d files", failures);
79  }
80  return failures;
81 }
82 
83 static bool record_repair_timestamp(int fd_tstamp)
84 {
85  time_t this_timestamp = time(NULL);
86  lseek(fd_tstamp, 0, SEEK_SET);
87  ssize_t n_written = write(fd_tstamp, &this_timestamp, sizeof(time_t));
88  if (n_written != sizeof(time_t))
89  {
90  /* should never happen */
91  return false;
92  }
93  return true;
94 }
95 
96 
97 /**
98  * @param file LMDB file to repair
99  * @param fd_tstamp An open FD to the repair timestamp file or -1
100  *
101  * @note If #fd_tstamp != -1 then it is expected to be open and with file locks
102  * taken care of. If #fd_tstamp == -1, this function opens the repair
103  * timestamp file on its own and takes care of the file locks.
104  */
105 int repair_lmdb_file(const char *file, int fd_tstamp)
106 {
107  int ret;
108  char *dest_file = StringFormat("%s"REPAIR_FILE_EXTENSION, file);
109 
110  FileLock lock = EMPTY_FILE_LOCK;
111  if (fd_tstamp == -1)
112  {
113  char *tstamp_file = StringFormat("%s.repaired", file);
114  int lock_ret = ExclusiveFileLockPath(&lock, tstamp_file, true); /* wait=true */
115  free(tstamp_file);
116  if (lock_ret < 0)
117  {
118  /* Should never happen because we tried to wait for the lock. */
120  "Failed to acquire lock for the '%s' DB repair timestamp file",
121  file);
122  ret = -1;
123  goto cleanup;
124  }
125  fd_tstamp = lock.fd;
126  }
127  pid_t child_pid = fork();
128  if (child_pid == 0)
129  {
130  /* child */
131  /* The process can receive a SIGBUS signal while trying to read a
132  * corrupted LMDB file. This has a special handling in cf-agent and
133  * other processes, but this child process should just die in case of
134  * SIGBUS (which is then detected by the parent and handled
135  * accordingly). */
136  signal(SIGBUS, SIG_DFL);
137  exit(replicate_lmdb(file, dest_file));
138  }
139  else
140  {
141  /* parent */
142  int status;
143  pid_t pid = waitpid(child_pid, &status, 0);
144  if (pid != child_pid)
145  {
146  /* real error that should never happen */
147  ret = -1;
148  goto cleanup;
149  }
150  if (WIFEXITED(status) && WEXITSTATUS(status) != CF_CHECK_OK
152  {
153  Log(LOG_LEVEL_ERR, "Failed to repair file '%s', removing", file);
154  if (unlink(file) != 0)
155  {
156  Log(LOG_LEVEL_ERR, "Failed to remove file '%s'", file);
157  ret = -1;
158  }
159  else
160  {
161  if (!record_repair_timestamp(fd_tstamp))
162  {
163  Log(LOG_LEVEL_ERR, "Failed to write the timestamp of repair of the '%s' file",
164  file);
165  }
166  ret = WEXITSTATUS(status);
167  }
168  goto cleanup;
169  }
170  else if (WIFSIGNALED(status))
171  {
172  Log(LOG_LEVEL_ERR, "Failed to repair file '%s', child process signaled (%d), removing",
173  file, WTERMSIG(status));
174  if (unlink(file) != 0)
175  {
176  Log(LOG_LEVEL_ERR, "Failed to remove file '%s'", file);
177  ret = -1;
178  }
179  else
180  {
181  if (!record_repair_timestamp(fd_tstamp))
182  {
183  Log(LOG_LEVEL_ERR, "Failed to write the timestamp of repair of the '%s' file",
184  file);
185  }
186  ret = signal_to_cf_check_code(WTERMSIG(status));
187  }
188  goto cleanup;
189  }
190  else
191  {
192  /* replication successfull */
193  Log(LOG_LEVEL_INFO, "Replacing '%s' with the new copy", file);
194  if (rename(dest_file, file) != 0)
195  {
197  "Failed to replace file '%s' with the repaired copy: %s",
198  file, strerror(errno));
199  unlink(dest_file);
200  ret = -1;
201  goto cleanup;
202  }
203  if (!record_repair_timestamp(fd_tstamp))
204  {
205  Log(LOG_LEVEL_ERR, "Failed to write the timestamp of repair of the '%s' file",
206  file);
207  }
208  ret = 0;
209  }
210  }
211  cleanup:
212  free(dest_file);
213  if (lock.fd != -1)
214  {
215  ExclusiveFileUnlock(&lock, true); /* close=true */
216  }
217  return ret;
218 }
219 
220 int repair_lmdb_files(Seq *files, bool force)
221 {
222  assert(files != NULL);
223  assert(SeqLength(files) > 0);
224 
225  Seq *corrupt;
226  if (force)
227  {
228  corrupt = files;
229  }
230  else
231  {
232  const int corruptions = diagnose_files(files, &corrupt, false, false, false);
233  if (corruptions != 0)
234  {
235  assert(corrupt != NULL);
237  "%d corrupt database%s to fix",
238  corruptions,
239  corruptions != 1 ? "s" : "");
240  }
241  else
242  {
243  Log(LOG_LEVEL_INFO, "No corrupted LMDB files - nothing to do");
244  return 0;
245  }
246  }
247 
248  int ret = 0;
249  const size_t length = SeqLength(corrupt);
250  assert(length > 0);
251  backup_files_copy(corrupt);
252  for (int i = 0; i < length; ++i)
253  {
254  const char *file = SeqAt(corrupt, i);
255  if (repair_lmdb_file(file, -1) == -1)
256  {
257  ret++;
258  }
259  }
260 
261  if (!force)
262  {
263  /* see 'if (force)' above */
264  SeqDestroy(corrupt);
265  }
266 
267  if (ret == 0)
268  {
269  Log(LOG_LEVEL_NOTICE, "Database repair successful");
270  }
271  else
272  {
273  Log(LOG_LEVEL_ERR, "Database repair failed");
274  }
275 
276  return ret;
277 }
278 
279 int repair_main(int argc, const char *const *const argv)
280 {
281  size_t offset = 1;
282  bool force = false;
283  if (argc > 1 && argv[1] != NULL && argv[1][0] == '-')
284  {
285  if (StringMatchesOption(argv[1], "--force", "-f"))
286  {
287  offset++;
288  force = true;
289  }
290  else
291  {
292  print_usage();
293  printf("Unrecognized option: '%s'\n", argv[1]);
294  return 1;
295  }
296  }
297  Seq *files = argv_to_lmdb_files(argc, argv, offset);
298  if (files == NULL || SeqLength(files) == 0)
299  {
300  Log(LOG_LEVEL_ERR, "No database files to repair");
301  return 1;
302  }
303  const int ret = repair_lmdb_files(files, force);
304  SeqDestroy(files);
305  return ret;
306 }
307 
308 int repair_lmdb_default(bool force)
309 {
310  // This function is used by cf-execd and cf-agent, not cf-check
311 
312  // Consistency checks are not enabled by default (--skip-db-check=yes)
313  // This log message can be changed to verbose if it happens by default:
314  Log(LOG_LEVEL_INFO, "Running internal DB (LMDB) consistency checks");
315 
316  Seq *files = default_lmdb_files();
317  if (files == NULL)
318  {
319  // Error message printed default_lmdb_files()
320  return 1;
321  }
322  if (SeqLength(files) == 0)
323  {
324  // First agent run - no LMDB files
325  Log(LOG_LEVEL_INFO, "Skipping local database repair, no lmdb files");
326  return 0;
327  }
328  const int ret = repair_lmdb_files(files, force);
329  SeqDestroy(files);
330 
331  if (ret != 0)
332  {
333  Log(LOG_LEVEL_ERR, "Something went wrong during database repair");
334  Log(LOG_LEVEL_ERR, "Try running `cf-check repair` manually");
335  }
336  return ret;
337 }
338 
339 #endif
int backup_files_copy(Seq *filenames)
Definition: backup.c:23
#define ARG_UNUSED
Definition: cf-net.c:47
void free(void *)
size_t diagnose_files(const Seq *filenames, Seq **corrupt, bool foreground, bool validate, bool test_write)
Definition: diagnose.c:15
@ CF_CHECK_LMDB_CORRUPT_PAGE
Definition: diagnose.h:80
@ CF_CHECK_OK
Definition: diagnose.h:80
int signal_to_cf_check_code(int sig)
static void cleanup(void *generic_data)
Definition: fchmodat.c:56
int ExclusiveFileLockPath(FileLock *lock, const char *fpath, bool wait)
Definition: file_lib.c:1808
int ExclusiveFileUnlock(FileLock *lock, bool close_fd)
Definition: file_lib.c:1684
#define EMPTY_FILE_LOCK
Definition: file_lib.h:204
int errno
#define NULL
Definition: getopt1.c:56
void Log(LogLevel level, const char *fmt,...)
Definition: logging.c:409
@ LOG_LEVEL_ERR
Definition: logging.h:42
@ LOG_LEVEL_NOTICE
Definition: logging.h:44
@ LOG_LEVEL_INFO
Definition: logging.h:45
#define WEXITSTATUS(s)
Definition: platform.h:141
#define WTERMSIG(s)
Definition: platform.h:150
#define WIFSIGNALED(s)
Definition: platform.h:147
#define WIFEXITED(s)
Definition: platform.h:144
int repair_main(int argc, const char *const *const argv)
Definition: repair.c:7
int repair_lmdb_default(bool force)
Definition: repair.c:14
#define REPAIR_FILE_EXTENSION
Definition: repair.h:4
int repair_lmdb_file(const char *file, int fd_tstamp)
int replicate_lmdb(const char *s_file, const char *d_file)
Definition: replicate_lmdb.c:7
size_t SeqLength(const Seq *seq)
Length of the sequence.
Definition: sequence.c:354
void SeqDestroy(Seq *seq)
Destroy an existing Sequence.
Definition: sequence.c:60
static void * SeqAt(const Seq *seq, int i)
Definition: sequence.h:57
char * strerror(int err)
Definition: strerror.c:35
bool StringMatchesOption(const char *const supplied, const char *const longopt, const char *const shortopt)
Check if a command line argument matches a short or long option.
Definition: string_lib.c:1585
char * StringConcatenate(size_t count, const char *first,...)
Definition: string_lib.c:348
char * StringFormat(const char *fmt,...)
Format string like sprintf and return formatted string allocated on heap as a return value.
Definition: string_lib.c:51
Sequence data-structure.
Definition: sequence.h:50
int fd
Definition: file_lib.h:201
Seq * argv_to_lmdb_files(const int argc, const char *const *const argv, const size_t offset)
Definition: utilities.c:22
Seq * default_lmdb_files()
Definition: utilities.c:11