Monitor.c (mdadm-4.1) | : | Monitor.c (mdadm-4.2) | ||
---|---|---|---|---|
skipping to change at line 32 | skipping to change at line 32 | |||
* Email: <neilb@suse.de> | * Email: <neilb@suse.de> | |||
*/ | */ | |||
#include "mdadm.h" | #include "mdadm.h" | |||
#include "md_p.h" | #include "md_p.h" | |||
#include "md_u.h" | #include "md_u.h" | |||
#include <sys/wait.h> | #include <sys/wait.h> | |||
#include <signal.h> | #include <signal.h> | |||
#include <limits.h> | #include <limits.h> | |||
#include <syslog.h> | #include <syslog.h> | |||
#ifndef NO_LIBUDEV | ||||
#include <libudev.h> | ||||
#endif | ||||
struct state { | struct state { | |||
char *devname; | char *devname; | |||
char devnm[32]; /* to sync with mdstat info */ | char devnm[32]; /* to sync with mdstat info */ | |||
unsigned int utime; | unsigned int utime; | |||
int err; | int err; | |||
char *spare_group; | char *spare_group; | |||
int active, working, failed, spare, raid; | int active, working, failed, spare, raid; | |||
int from_config; | int from_config; | |||
int from_auto; | int from_auto; | |||
skipping to change at line 66 | skipping to change at line 69 | |||
}; | }; | |||
struct alert_info { | struct alert_info { | |||
char *mailaddr; | char *mailaddr; | |||
char *mailfrom; | char *mailfrom; | |||
char *alert_cmd; | char *alert_cmd; | |||
int dosyslog; | int dosyslog; | |||
}; | }; | |||
static int make_daemon(char *pidfile); | static int make_daemon(char *pidfile); | |||
static int check_one_sharer(int scan); | static int check_one_sharer(int scan); | |||
static void write_autorebuild_pid(void); | ||||
static void alert(char *event, char *dev, char *disc, struct alert_info *info); | static void alert(char *event, char *dev, char *disc, struct alert_info *info); | |||
static int check_array(struct state *st, struct mdstat_ent *mdstat, | static int check_array(struct state *st, struct mdstat_ent *mdstat, | |||
int test, struct alert_info *info, | int test, struct alert_info *info, | |||
int increments, char *prefer); | int increments, char *prefer); | |||
static int add_new_arrays(struct mdstat_ent *mdstat, struct state **statelist, | static int add_new_arrays(struct mdstat_ent *mdstat, struct state **statelist, | |||
int test, struct alert_info *info); | int test, struct alert_info *info); | |||
static void try_spare_migration(struct state *statelist, struct alert_info *info ); | static void try_spare_migration(struct state *statelist, struct alert_info *info ); | |||
static void link_containers_with_subarrays(struct state *list); | static void link_containers_with_subarrays(struct state *list); | |||
#ifndef NO_LIBUDEV | ||||
static int check_udev_activity(void); | ||||
#endif | ||||
int Monitor(struct mddev_dev *devlist, | int Monitor(struct mddev_dev *devlist, | |||
char *mailaddr, char *alert_cmd, | char *mailaddr, char *alert_cmd, | |||
struct context *c, | struct context *c, | |||
int daemonise, int oneshot, | int daemonise, int oneshot, | |||
int dosyslog, char *pidfile, int increments, | int dosyslog, char *pidfile, int increments, | |||
int share) | int share) | |||
{ | { | |||
/* | /* | |||
* Every few seconds, scan every md device looking for changes | * Every few seconds, scan every md device looking for changes | |||
skipping to change at line 131 | skipping to change at line 138 | |||
* that appears in /proc/mdstat | * that appears in /proc/mdstat | |||
*/ | */ | |||
struct state *statelist = NULL; | struct state *statelist = NULL; | |||
struct state *st2; | struct state *st2; | |||
int finished = 0; | int finished = 0; | |||
struct mdstat_ent *mdstat = NULL; | struct mdstat_ent *mdstat = NULL; | |||
char *mailfrom; | char *mailfrom; | |||
struct alert_info info; | struct alert_info info; | |||
struct mddev_ident *mdlist; | struct mddev_ident *mdlist; | |||
int delay_for_event = c->delay; | ||||
if (!mailaddr) { | if (!mailaddr) { | |||
mailaddr = conf_get_mailaddr(); | mailaddr = conf_get_mailaddr(); | |||
if (mailaddr && ! c->scan) | if (mailaddr && ! c->scan) | |||
pr_err("Monitor using email address \"%s\" from config fi le\n", | pr_err("Monitor using email address \"%s\" from config fi le\n", | |||
mailaddr); | mailaddr); | |||
} | } | |||
mailfrom = conf_get_mailfrom(); | mailfrom = conf_get_mailfrom(); | |||
if (!alert_cmd) { | if (!alert_cmd) { | |||
skipping to change at line 155 | skipping to change at line 163 | |||
} | } | |||
if (c->scan && !mailaddr && !alert_cmd && !dosyslog) { | if (c->scan && !mailaddr && !alert_cmd && !dosyslog) { | |||
pr_err("No mail address or alert command - not monitoring.\n"); | pr_err("No mail address or alert command - not monitoring.\n"); | |||
return 1; | return 1; | |||
} | } | |||
info.alert_cmd = alert_cmd; | info.alert_cmd = alert_cmd; | |||
info.mailaddr = mailaddr; | info.mailaddr = mailaddr; | |||
info.mailfrom = mailfrom; | info.mailfrom = mailfrom; | |||
info.dosyslog = dosyslog; | info.dosyslog = dosyslog; | |||
if (share){ | ||||
if (check_one_sharer(c->scan)) | ||||
return 1; | ||||
} | ||||
if (daemonise) { | if (daemonise) { | |||
int rv = make_daemon(pidfile); | int rv = make_daemon(pidfile); | |||
if (rv >= 0) | if (rv >= 0) | |||
return rv; | return rv; | |||
} | } | |||
if (share) | if (share) | |||
if (check_one_sharer(c->scan)) | write_autorebuild_pid(); | |||
return 1; | ||||
if (devlist == NULL) { | if (devlist == NULL) { | |||
mdlist = conf_get_ident(NULL); | mdlist = conf_get_ident(NULL); | |||
for (; mdlist; mdlist = mdlist->next) { | for (; mdlist; mdlist = mdlist->next) { | |||
struct state *st; | struct state *st; | |||
if (mdlist->devname == NULL) | if (mdlist->devname == NULL) | |||
continue; | continue; | |||
if (strcasecmp(mdlist->devname, "<ignore>") == 0) | if (strcasecmp(mdlist->devname, "<ignore>") == 0) | |||
continue; | continue; | |||
skipping to change at line 215 | skipping to change at line 227 | |||
st->spare_group = xstrdup(mdlist->spare_g roup); | st->spare_group = xstrdup(mdlist->spare_g roup); | |||
} | } | |||
statelist = st; | statelist = st; | |||
} | } | |||
} | } | |||
while (!finished) { | while (!finished) { | |||
int new_found = 0; | int new_found = 0; | |||
struct state *st, **stp; | struct state *st, **stp; | |||
int anydegraded = 0; | int anydegraded = 0; | |||
int anyredundant = 0; | ||||
if (mdstat) | if (mdstat) | |||
free_mdstat(mdstat); | free_mdstat(mdstat); | |||
mdstat = mdstat_read(oneshot ? 0 : 1, 0); | mdstat = mdstat_read(oneshot ? 0 : 1, 0); | |||
if (!mdstat) | ||||
mdstat_close(); | ||||
for (st = statelist; st; st = st->next) | for (st = statelist; st; st = st->next) { | |||
if (check_array(st, mdstat, c->test, &info, | if (check_array(st, mdstat, c->test, &info, | |||
increments, c->prefer)) | increments, c->prefer)) | |||
anydegraded = 1; | anydegraded = 1; | |||
/* for external arrays, metadata is filled for | ||||
* containers only | ||||
*/ | ||||
if (st->metadata && st->metadata->ss->external) | ||||
continue; | ||||
if (st->err == 0 && !anyredundant) | ||||
anyredundant = 1; | ||||
} | ||||
/* now check if there are any new devices found in mdstat */ | /* now check if there are any new devices found in mdstat */ | |||
if (c->scan) | if (c->scan) | |||
new_found = add_new_arrays(mdstat, &statelist, c->test, | new_found = add_new_arrays(mdstat, &statelist, c->test, | |||
&info); | &info); | |||
/* If an array has active < raid && spare == 0 && spare_group != NULL | /* If an array has active < raid && spare == 0 && spare_group != NULL | |||
* Look for another array with spare > 0 and active == raid and s ame spare_group | * Look for another array with spare > 0 and active == raid and s ame spare_group | |||
* if found, choose a device and hotremove/hotadd | * if found, choose a device and hotremove/hotadd | |||
*/ | */ | |||
if (share && anydegraded) | if (share && anydegraded) | |||
try_spare_migration(statelist, &info); | try_spare_migration(statelist, &info); | |||
if (!new_found) { | if (!new_found) { | |||
if (oneshot) | if (oneshot) | |||
break; | break; | |||
else | else if (!anyredundant) { | |||
mdstat_wait(c->delay); | pr_err("No array with redundancy detected, stoppi | |||
ng\n"); | ||||
break; | ||||
} | ||||
else { | ||||
#ifndef NO_LIBUDEV | ||||
/* | ||||
* Wait for udevd to finish new devices | ||||
* processing. | ||||
*/ | ||||
if (mdstat_wait(delay_for_event) && | ||||
check_udev_activity()) | ||||
pr_err("Error while waiting for UDEV to c | ||||
omplete new devices processing\n"); | ||||
#else | ||||
int wait_result = mdstat_wait(delay_for_event); | ||||
/* | ||||
* Give chance to process new device | ||||
*/ | ||||
if (wait_result != 0) { | ||||
if (c->delay > 5) | ||||
delay_for_event = 5; | ||||
} else | ||||
delay_for_event = c->delay; | ||||
#endif | ||||
mdstat_close(); | ||||
} | ||||
} | } | |||
c->test = 0; | c->test = 0; | |||
for (stp = &statelist; (st = *stp) != NULL; ) { | for (stp = &statelist; (st = *stp) != NULL; ) { | |||
if (st->from_auto && st->err > 5) { | if (st->from_auto && st->err > 5) { | |||
*stp = st->next; | *stp = st->next; | |||
free(st->devname); | free(st->devname); | |||
free(st->spare_group); | free(st->spare_group); | |||
free(st); | free(st); | |||
} else | } else | |||
skipping to change at line 279 | skipping to change at line 322 | |||
* -1 in the forked daemon | * -1 in the forked daemon | |||
* 0 in the parent | * 0 in the parent | |||
* 1 on error | * 1 on error | |||
* so a none-negative becomes the exit code. | * so a none-negative becomes the exit code. | |||
*/ | */ | |||
int pid = fork(); | int pid = fork(); | |||
if (pid > 0) { | if (pid > 0) { | |||
if (!pidfile) | if (!pidfile) | |||
printf("%d\n", pid); | printf("%d\n", pid); | |||
else { | else { | |||
FILE *pid_file; | FILE *pid_file = NULL; | |||
pid_file=fopen(pidfile, "w"); | int fd = open(pidfile, O_WRONLY | O_CREAT | O_TRUNC, | |||
0644); | ||||
if (fd >= 0) | ||||
pid_file = fdopen(fd, "w"); | ||||
if (!pid_file) | if (!pid_file) | |||
perror("cannot create pid file"); | perror("cannot create pid file"); | |||
else { | else { | |||
fprintf(pid_file,"%d\n", pid); | fprintf(pid_file,"%d\n", pid); | |||
fclose(pid_file); | fclose(pid_file); | |||
} | } | |||
} | } | |||
return 0; | return 0; | |||
} | } | |||
if (pid < 0) { | if (pid < 0) { | |||
perror("daemonise"); | perror("daemonise"); | |||
return 1; | return 1; | |||
} | } | |||
close(0); | manage_fork_fds(0); | |||
open("/dev/null", O_RDWR); | ||||
dup2(0, 1); | ||||
dup2(0, 2); | ||||
setsid(); | setsid(); | |||
return -1; | return -1; | |||
} | } | |||
static int check_one_sharer(int scan) | static int check_one_sharer(int scan) | |||
{ | { | |||
int pid, rv; | int pid; | |||
FILE *comm_fp; | ||||
FILE *fp; | FILE *fp; | |||
char dir[20]; | char comm_path[PATH_MAX]; | |||
char path[100]; | char path[PATH_MAX]; | |||
struct stat buf; | char comm[20]; | |||
sprintf(path, "%s/autorebuild.pid", MDMON_DIR); | sprintf(path, "%s/autorebuild.pid", MDMON_DIR); | |||
fp = fopen(path, "r"); | fp = fopen(path, "r"); | |||
if (fp) { | if (fp) { | |||
if (fscanf(fp, "%d", &pid) != 1) | if (fscanf(fp, "%d", &pid) != 1) | |||
pid = -1; | pid = -1; | |||
sprintf(dir, "/proc/%d", pid); | snprintf(comm_path, sizeof(comm_path), | |||
rv = stat(dir, &buf); | "/proc/%d/comm", pid); | |||
if (rv != -1) { | comm_fp = fopen(comm_path, "r"); | |||
if (scan) { | if (comm_fp) { | |||
pr_err("Only one autorebuild process allowed in s | if (fscanf(comm_fp, "%19s", comm) && | |||
can mode, aborting\n"); | strncmp(basename(comm), Name, strlen(Name)) == 0) { | |||
fclose(fp); | if (scan) { | |||
return 1; | pr_err("Only one autorebuild process allo | |||
} else { | wed in scan mode, aborting\n"); | |||
pr_err("Warning: One autorebuild process already | fclose(comm_fp); | |||
running.\n"); | fclose(fp); | |||
return 1; | ||||
} else { | ||||
pr_err("Warning: One autorebuild process | ||||
already running.\n"); | ||||
} | ||||
} | } | |||
fclose(comm_fp); | ||||
} | } | |||
fclose(fp); | fclose(fp); | |||
} | } | |||
if (scan) { | return 0; | |||
if (mkdir(MDMON_DIR, S_IRWXU) < 0 && errno != EEXIST) { | } | |||
static void write_autorebuild_pid() | ||||
{ | ||||
char path[PATH_MAX]; | ||||
int pid; | ||||
FILE *fp = NULL; | ||||
sprintf(path, "%s/autorebuild.pid", MDMON_DIR); | ||||
if (mkdir(MDMON_DIR, 0700) < 0 && errno != EEXIST) { | ||||
pr_err("Can't create autorebuild.pid file\n"); | ||||
} else { | ||||
int fd = open(path, O_WRONLY | O_CREAT | O_TRUNC, 0700); | ||||
if (fd >= 0) | ||||
fp = fdopen(fd, "w"); | ||||
if (!fp) | ||||
pr_err("Can't create autorebuild.pid file\n"); | pr_err("Can't create autorebuild.pid file\n"); | |||
} else { | else { | |||
fp = fopen(path, "w"); | pid = getpid(); | |||
if (!fp) | fprintf(fp, "%d\n", pid); | |||
pr_err("Cannot create autorebuild.pidfile\n"); | fclose(fp); | |||
else { | ||||
pid = getpid(); | ||||
fprintf(fp, "%d\n", pid); | ||||
fclose(fp); | ||||
} | ||||
} | } | |||
} | } | |||
return 0; | ||||
} | } | |||
static void alert(char *event, char *dev, char *disc, struct alert_info *info) | static void alert(char *event, char *dev, char *disc, struct alert_info *info) | |||
{ | { | |||
int priority; | int priority; | |||
if (!info->alert_cmd && !info->mailaddr && !info->dosyslog) { | if (!info->alert_cmd && !info->mailaddr && !info->dosyslog) { | |||
time_t now = time(0); | time_t now = time(0); | |||
printf("%1.15s: %s on %s %s\n", ctime(&now) + 4, | printf("%1.15s: %s on %s %s\n", ctime(&now) + 4, | |||
skipping to change at line 537 | skipping to change at line 599 | |||
if (array.utime == 0) | if (array.utime == 0) | |||
/* external arrays don't update utime, so | /* external arrays don't update utime, so | |||
* just make sure it is always different. */ | * just make sure it is always different. */ | |||
array.utime = st->utime + 1;; | array.utime = st->utime + 1;; | |||
if (st->err) { | if (st->err) { | |||
/* New array appeared where previously had an error */ | /* New array appeared where previously had an error */ | |||
st->err = 0; | st->err = 0; | |||
st->percent = RESYNC_NONE; | st->percent = RESYNC_NONE; | |||
new_array = 1; | new_array = 1; | |||
alert("NewArray", st->devname, NULL, ainfo); | if (!is_container) | |||
alert("NewArray", st->devname, NULL, ainfo); | ||||
} | } | |||
if (st->utime == array.utime && st->failed == sra->array.failed_disks && | if (st->utime == array.utime && st->failed == sra->array.failed_disks && | |||
st->working == sra->array.working_disks && | st->working == sra->array.working_disks && | |||
st->spare == sra->array.spare_disks && | st->spare == sra->array.spare_disks && | |||
(mse == NULL || (mse->percent == st->percent))) { | (mse == NULL || (mse->percent == st->percent))) { | |||
if ((st->active < st->raid) && st->spare == 0) | if ((st->active < st->raid) && st->spare == 0) | |||
retval = 1; | retval = 1; | |||
goto out; | goto out; | |||
} | } | |||
skipping to change at line 671 | skipping to change at line 734 | |||
retval = 1; | retval = 1; | |||
out: | out: | |||
if (sra) | if (sra) | |||
sysfs_free(sra); | sysfs_free(sra); | |||
if (fd >= 0) | if (fd >= 0) | |||
close(fd); | close(fd); | |||
return retval; | return retval; | |||
disappeared: | disappeared: | |||
if (!st->err) | if (!st->err && !is_container) | |||
alert("DeviceDisappeared", dev, NULL, ainfo); | alert("DeviceDisappeared", dev, NULL, ainfo); | |||
st->err++; | st->err++; | |||
goto out; | goto out; | |||
} | } | |||
static int add_new_arrays(struct mdstat_ent *mdstat, struct state **statelist, | static int add_new_arrays(struct mdstat_ent *mdstat, struct state **statelist, | |||
int test, struct alert_info *info) | int test, struct alert_info *info) | |||
{ | { | |||
struct mdstat_ent *mse; | struct mdstat_ent *mse; | |||
int new_found = 0; | int new_found = 0; | |||
skipping to change at line 991 | skipping to change at line 1054 | |||
for (cont = list; cont; cont = cont->next) | for (cont = list; cont; cont = cont->next) | |||
if (!cont->err && cont->parent_devnm[0] == 0 && | if (!cont->err && cont->parent_devnm[0] == 0 && | |||
strcmp(cont->devnm, st->parent_devnm) == 0) { | strcmp(cont->devnm, st->parent_devnm) == 0) { | |||
st->parent = cont; | st->parent = cont; | |||
st->subarray = cont->subarray; | st->subarray = cont->subarray; | |||
cont->subarray = st; | cont->subarray = st; | |||
break; | break; | |||
} | } | |||
} | } | |||
#ifndef NO_LIBUDEV | ||||
/* function: check_udev_activity | ||||
* Description: Function waits for udev to finish | ||||
* events processing. | ||||
* Returns: | ||||
* 1 - detected error while opening udev | ||||
* 2 - timeout | ||||
* 0 - successfull completion | ||||
*/ | ||||
static int check_udev_activity(void) | ||||
{ | ||||
struct udev *udev = NULL; | ||||
struct udev_queue *udev_queue = NULL; | ||||
int timeout_cnt = 30; | ||||
int rc = 0; | ||||
/* | ||||
* In rare cases systemd may not have udevm, | ||||
* in such cases just exit with rc 0 | ||||
*/ | ||||
if (!use_udev()) | ||||
goto out; | ||||
udev = udev_new(); | ||||
if (!udev) { | ||||
rc = 1; | ||||
goto out; | ||||
} | ||||
udev_queue = udev_queue_new(udev); | ||||
if (!udev_queue) { | ||||
rc = 1; | ||||
goto out; | ||||
} | ||||
if (udev_queue_get_queue_is_empty(udev_queue)) | ||||
goto out; | ||||
while (!udev_queue_get_queue_is_empty(udev_queue)) { | ||||
sleep(1); | ||||
if (timeout_cnt) | ||||
timeout_cnt--; | ||||
else { | ||||
rc = 2; | ||||
goto out; | ||||
} | ||||
} | ||||
out: | ||||
if (udev_queue) | ||||
udev_queue_unref(udev_queue); | ||||
if (udev) | ||||
udev_unref(udev); | ||||
return rc; | ||||
} | ||||
#endif | ||||
/* Not really Monitor but ... */ | /* Not really Monitor but ... */ | |||
int Wait(char *dev) | int Wait(char *dev) | |||
{ | { | |||
char devnm[32]; | char devnm[32]; | |||
dev_t rdev; | dev_t rdev; | |||
char *tmp; | char *tmp; | |||
int rv = 1; | int rv = 1; | |||
int frozen_remaining = 3; | int frozen_remaining = 3; | |||
if (!stat_is_blkdev(dev, &rdev)) | if (!stat_is_blkdev(dev, &rdev)) | |||
skipping to change at line 1058 | skipping to change at line 1179 | |||
} | } | |||
free_mdstat(ms); | free_mdstat(ms); | |||
return rv; | return rv; | |||
} | } | |||
free_mdstat(ms); | free_mdstat(ms); | |||
rv = 0; | rv = 0; | |||
mdstat_wait(5); | mdstat_wait(5); | |||
} | } | |||
} | } | |||
/* The state "broken" is used only for RAID0/LINEAR - it's the same as | ||||
* "clean", but used in case the array has one or more members missing. | ||||
*/ | ||||
static char *clean_states[] = { | static char *clean_states[] = { | |||
"clear", "inactive", "readonly", "read-auto", "clean", NULL }; | "clear", "inactive", "readonly", "read-auto", "clean", "broken", NULL }; | |||
int WaitClean(char *dev, int verbose) | int WaitClean(char *dev, int verbose) | |||
{ | { | |||
int fd; | int fd; | |||
struct mdinfo *mdi; | struct mdinfo *mdi; | |||
int rv = 1; | int rv = 1; | |||
char devnm[32]; | char devnm[32]; | |||
if (!stat_is_blkdev(dev, NULL)) | if (!stat_is_blkdev(dev, NULL)) | |||
return 2; | return 2; | |||
skipping to change at line 1119 | skipping to change at line 1243 | |||
/* minimize the safe_mode_delay and prepare to wait up to 5s | /* minimize the safe_mode_delay and prepare to wait up to 5s | |||
* for writes to quiesce | * for writes to quiesce | |||
*/ | */ | |||
sysfs_set_safemode(mdi, 1); | sysfs_set_safemode(mdi, 1); | |||
/* wait for array_state to be clean */ | /* wait for array_state to be clean */ | |||
while (1) { | while (1) { | |||
rv = read(state_fd, buf, sizeof(buf)); | rv = read(state_fd, buf, sizeof(buf)); | |||
if (rv < 0) | if (rv < 0) | |||
break; | break; | |||
if (sysfs_match_word(buf, clean_states) <= 4) | if (sysfs_match_word(buf, clean_states) < | |||
(int)ARRAY_SIZE(clean_states) - 1) | ||||
break; | break; | |||
rv = sysfs_wait(state_fd, &delay); | rv = sysfs_wait(state_fd, &delay); | |||
if (rv < 0 && errno != EINTR) | if (rv < 0 && errno != EINTR) | |||
break; | break; | |||
lseek(state_fd, 0, SEEK_SET); | lseek(state_fd, 0, SEEK_SET); | |||
} | } | |||
if (rv < 0) | if (rv < 0) | |||
rv = 1; | rv = 1; | |||
else if (ping_monitor(mdi->text_version) == 0) { | else if (ping_monitor(mdi->text_version) == 0) { | |||
/* we need to ping to close the window between array | /* we need to ping to close the window between array | |||
End of changes. 27 change blocks. | ||||
45 lines changed or deleted | 172 lines changed or added |