"Fossies" - the Fresh Open Source Software Archive 
Member "eucalyptus-4.4.2/node/handlers.c" (4 Aug 2017, 179099 Bytes) of package /linux/misc/eucalyptus-4.4.2.tar.gz:
As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style:
standard) with prefixed line numbers and
code folding option.
Alternatively you can here
view or
download the uninterpreted source code file.
For more information about "handlers.c" see the
Fossies "Dox" file reference documentation and the latest
Fossies "Diffs" side-by-side code changes report:
4.4.1_vs_4.4.2.
1 // -*- mode: C; c-basic-offset: 4; tab-width: 4; indent-tabs-mode: nil -*-
2 // vim: set softtabstop=4 shiftwidth=4 tabstop=4 expandtab:
3
4 /*************************************************************************
5 * (c) Copyright 2009-2017 Hewlett Packard Enterprise Development Company LP
6 *
7 * This program is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; version 3 of the License.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program. If not, see http://www.gnu.org/licenses/.
18 *
19 * Please contact Eucalyptus Systems, Inc., 6755 Hollister Ave., Goleta
20 * CA 93117, USA or visit http://www.eucalyptus.com/licenses/ if you need
21 * additional information or have any questions.
22 *
23 * This file may incorporate work covered under the following copyright
24 * and permission notice:
25 *
26 * Software License Agreement (BSD License)
27 *
28 * Copyright (c) 2008, Regents of the University of California
29 * All rights reserved.
30 *
31 * Redistribution and use of this software in source and binary forms,
32 * with or without modification, are permitted provided that the
33 * following conditions are met:
34 *
35 * Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 *
38 * Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer
40 * in the documentation and/or other materials provided with the
41 * distribution.
42 *
43 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
44 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
45 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
46 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
47 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
48 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
49 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
50 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
51 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
52 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
53 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
54 * POSSIBILITY OF SUCH DAMAGE. USERS OF THIS SOFTWARE ACKNOWLEDGE
55 * THE POSSIBLE PRESENCE OF OTHER OPEN SOURCE LICENSED MATERIAL,
56 * COPYRIGHTED MATERIAL OR PATENTED MATERIAL IN THIS SOFTWARE,
57 * AND IF ANY SUCH MATERIAL IS DISCOVERED THE PARTY DISCOVERING
58 * IT MAY INFORM DR. RICH WOLSKI AT THE UNIVERSITY OF CALIFORNIA,
59 * SANTA BARBARA WHO WILL THEN ASCERTAIN THE MOST APPROPRIATE REMEDY,
60 * WHICH IN THE REGENTS' DISCRETION MAY INCLUDE, WITHOUT LIMITATION,
61 * REPLACEMENT OF THE CODE SO IDENTIFIED, LICENSING OF THE CODE SO
62 * IDENTIFIED, OR WITHDRAWAL OF THE CODE CAPABILITY TO THE EXTENT
63 * NEEDED TO COMPLY WITH ANY SUCH LICENSES OR RIGHTS.
64 ************************************************************************/
65
66 //!
67 //! @file node/handlers.c
68 //! This implements the default operations handlers supported by all hypervisor.
69 //!
70
71 /*----------------------------------------------------------------------------*\
72 | |
73 | INCLUDES |
74 | |
75 \*----------------------------------------------------------------------------*/
76
77 #define _FILE_OFFSET_BITS 64 //!< so large-file support works on 32-bit systems
78 #include <stdio.h>
79 #include <stdlib.h>
80 #define __USE_GNU /* strnlen */
81 #include <string.h> /* strlen, strcpy */
82 #include <time.h>
83 #include <limits.h> /* INT_MAX */
84 #include <sys/unistd.h>
85 #include <sys/types.h> /* fork */
86 #include <sys/wait.h> /* waitpid */
87 #include <unistd.h>
88 #include <fcntl.h>
89 #include <assert.h>
90 #include <sys/errno.h>
91 #include <sys/stat.h>
92 #include <pthread.h>
93 #ifndef __DARWIN_UNIX03
94 #include <sys/vfs.h> /* statfs */
95 #endif /* ! __DARWIN_UNIX03 */
96 #include <signal.h> /* SIGINT */
97 #include <linux/limits.h>
98 #include <pwd.h> /* getpwuid_r */
99 #include <netdb.h>
100 #include <sys/socket.h>
101 #include <netinet/in.h>
102 #include <arpa/inet.h>
103
104 #include <eucalyptus.h>
105 #include <eucalyptus-config.h>
106 #include <ipc.h>
107 #include <misc.h>
108 #include <backing.h>
109 #include <diskutil.h>
110 #include <euca_auth.h>
111 #include <euca_axis.h>
112 #include <euca_network.h>
113 #include <euca_gni.h>
114
115 #include <vbr.h>
116 #include <iscsi.h>
117 #include <config.h>
118 #include <fault.h>
119 #include <log.h>
120 #include <euca_string.h>
121 #include <euca_system.h>
122
123 #define HANDLERS_FANOUT
124 #include "handlers.h"
125 #include "xml.h"
126 #include "hooks.h"
127 #include <ebs_utils.h>
128 #include "objectstorage.h"
129 #include "stats.h"
130 #include "message_sensor.h"
131 #include "message_stats.h"
132 #include "service_sensor.h"
133
134 /*----------------------------------------------------------------------------*\
135 | |
136 | DEFINES |
137 | |
138 \*----------------------------------------------------------------------------*/
139
140 #define MONITORING_PERIOD (5) //!< Instance state transition monitoring period in seconds.
141 #define MAX_CREATE_TRYS 5
142 #define CREATE_TIMEOUT_SEC 300
143 #define LIBVIRT_TIMEOUT_SEC 5
144 #define NETWORK_GATE_TIMEOUT_SEC 1200
145 #define PER_INSTANCE_BUFFER_MB 20 //!< by default reserve this much extra room (in MB) per instance (for kernel, ramdisk, and metadata overhead)
146 #define SEC_PER_MB ((1024 * 1024) / 512)
147
148 #define MIN_BLOBSTORE_SIZE_MB 10 //!< even with boot-from-EBS one will need work space for kernel and ramdisk
149 #define FS_BUFFER_PERCENT 0.03 //!< leave 3% extra when deciding on blobstore sizes automatically
150 #define WORK_BS_PERCENT 0.33 //!< give a third of available space to work, the rest to cache
151 #define MAX_CONNECTION_ERRORS 5
152
153 /*----------------------------------------------------------------------------*\
154 | |
155 | TYPEDEFS |
156 | |
157 \*----------------------------------------------------------------------------*/
158
159 /*----------------------------------------------------------------------------*\
160 | |
161 | ENUMERATIONS |
162 | |
163 \*----------------------------------------------------------------------------*/
164
165 /*----------------------------------------------------------------------------*\
166 | |
167 | STRUCTURES |
168 | |
169 \*----------------------------------------------------------------------------*/
170
171 /*----------------------------------------------------------------------------*\
172 | |
173 | EXTERNAL VARIABLES |
174 | |
175 \*----------------------------------------------------------------------------*/
176
177 /* Should preferably be handled in header file */
178
179 // declarations of available handlers
180 extern struct handlers xen_libvirt_handlers;
181 extern struct handlers kvm_libvirt_handlers;
182 extern struct handlers default_libvirt_handlers;
183
184 /*----------------------------------------------------------------------------*\
185 | |
186 | GLOBAL VARIABLES |
187 | |
188 \*----------------------------------------------------------------------------*/
189
190 #ifndef NO_COMP
191 const char *euca_this_component_name = "nc"; //!< Name of this component
192 const char *euca_client_component_name = "cc"; //!< Name of this component's client
193 #endif /* NO_COMP */
194
195 /* used by lower level handlers */
196
197 sem *hyp_sem = NULL; //!< semaphore for serializing domain creation
198 sem *inst_sem = NULL; //!< guarding access to global instance structs
199 sem *inst_copy_sem = NULL; //!< guarding access to global instance structs
200 sem *addkey_sem = NULL; //!< guarding access to global instance structs
201 sem *loop_sem = NULL; //!< created in diskutils.c for serializing 'losetup' invocations
202 sem *log_sem = NULL; //!< used by log.c
203 sem *service_state_sem = NULL; //!< Used to guard service state updates (i.e. topology updates)
204 sem *stats_sem = NULL; //!< Used to guard the internal message stats data on updates
205
206 bunchOfInstances *global_instances = NULL; //!< pointer to the instance list
207 bunchOfInstances *global_instances_copy = NULL; //!< pointer to the copied instance list
208
209 const int default_staging_cleanup_threshold = 60 * 60 * 2; //!< after this many seconds any STAGING domains will be cleaned up
210 const int default_booting_cleanup_threshold = 60; //!< after this many seconds any BOOTING domains will be cleaned up
211 const int default_booting_envwait_threshold = NETWORK_GATE_TIMEOUT_SEC; //!< after this many seconds an instance will fail to boot unless network environment is ready
212 const int default_bundling_cleanup_threshold = 60 * 60 * 2; //!< after this many seconds any BUNDLING domains will be cleaned up
213 const int default_createImage_cleanup_threshold = 60 * 60 * 2; //!< after this many seconds any CREATEIMAGE domains will be cleaned up
214 const int default_teardown_state_duration = 60 * 3; //!< after this many seconds in TEARDOWN state (no resources), we'll forget about the instance
215 const int default_migration_ready_threshold = 60 * 15; //!< after this many seconds ready (and waiting) to migrate, migration will terminate and roll back
216
217 struct nc_state_t nc_state = { 0 }; //!< Global NC state structure
218
219 configEntry configKeysRestartNC[] = {
220 {CONFIG_ENABLE_WS_SECURITY, "Y"},
221 {"EUCALYPTUS", "/"},
222 {NULL, NULL},
223 };
224
225 configEntry configKeysNoRestartNC[] = {
226 {"LOGLEVEL", "INFO"},
227 {"LOGROLLNUMBER", "10"},
228 {"LOGMAXSIZE", "104857600"},
229 {"LOGPREFIX", ""},
230 {"LOGFACILITY", ""},
231 {CONFIG_NC_CEPH_USER, DEFAULT_CEPH_USER},
232 {CONFIG_NC_CEPH_KEYS, DEFAULT_CEPH_KEYRING},
233 {CONFIG_NC_CEPH_CONF, DEFAULT_CEPH_CONF},
234 {SENSOR_LIST_CONF_PARAM_NAME, SENSOR_LIST_CONF_PARAM_DEFAULT},
235 {NULL, NULL},
236 };
237
238 int incoming_migrations_in_progress = 0;
239 int outgoing_migrations_in_progress = 0;
240
241 /*----------------------------------------------------------------------------*\
242 | |
243 | STATIC VARIABLES |
244 | |
245 \*----------------------------------------------------------------------------*/
246
247 #ifdef EUCA_COMPILE_TIMESTAMP
248 static char *compile_timestamp_str = EUCA_COMPILE_TIMESTAMP;
249 #else /* EUCA_COMPILE_TIMESTAMP */
250 static char *compile_timestamp_str = "";
251 #endif /* EUCA_COMPILE_TIMESTAMP */
252
253 //! a NULL-terminated array of available handlers
254 static struct handlers *available_handlers[] = {
255 &default_libvirt_handlers,
256 &xen_libvirt_handlers,
257 &kvm_libvirt_handlers,
258 NULL,
259 };
260
261 static json_object *stats_json = NULL; //!< The json object that holds all of the internal message counters
262 static int stats_sensor_interval_sec; //!< Keeps the current value for sensor interval. Set during init
263 static int hypervisor_conn_errors = 0;
264
265 /*----------------------------------------------------------------------------*\
266 | |
267 | STATIC PROTOTYPES |
268 | |
269 \*----------------------------------------------------------------------------*/
270
271 static void *libvirt_thread(void *ptr);
272 static void refresh_instance_info(struct nc_state_t *nc, ncInstance * instance);
273 static void update_log_params(void);
274 static void update_ebs_params(void);
275 static void nc_signal_handler(int sig);
276 static int init(void);
277 static void updateServiceStateInfo(ncMetadata * pMeta, boolean authoritative);
278 static void printNCServiceStateInfo(void);
279 static void printMsgServiceStateInfo(ncMetadata * pMeta);
280
281 //! Helpers for internal stats handling in the NC
282 static json_object **message_stats_getter();
283 static void message_stats_setter();
284 static int initialize_stats_system(int interval_sec);
285 static void *nc_run_stats(void *ignored_arg);
286
287 /*----------------------------------------------------------------------------*\
288 | |
289 | MACROS |
290 | |
291 \*----------------------------------------------------------------------------*/
292
293 //! rejection of certain operations when NC is disabled
294 #define DISABLED_CHECK \
295 { \
296 if (nc_state.is_enabled == FALSE) { \
297 LOGERROR("operation %s is not allowed when node is DISABLED\n", __func__); \
298 return (EUCA_ERROR); \
299 } \
300 }
301
302 /*----------------------------------------------------------------------------*\
303 | |
304 | IMPLEMENTATION |
305 | |
306 \*----------------------------------------------------------------------------*/
307
308 /*----------------------------------------------------------------------------*\
309 | |
310 | IMPLEMENTATION |
311 | |
312 \*----------------------------------------------------------------------------*/
313
314 static void *nc_run_stats(void *ignored_arg)
315 {
316 LOGDEBUG("Starting stats subsystem execution. Will not terminate until service halts\n");
317 if (run_stats(FALSE, stats_sensor_interval_sec, NULL) != EUCA_OK) {
318 LOGERROR("Stats run call returned with error. Unexepcted. Should not have returned\n");
319 }
320 return NULL;
321 }
322
323 //! Runs a check on service and returns result in string form
324 //! for the stats sensor
325 static char *stats_service_check_call()
326 {
327 LOGTRACE("Invoking NC check function for internal stats\n");
328 if (nc_state.is_enabled) {
329 return SERVICE_CHECK_OK_MSG;
330 }
331 return SERVICE_CHECK_FAILED_MSG;
332 }
333
334 //! Gets the CC state as a string for use by the stats system
335 static char *stats_service_state_call()
336 {
337 LOGTRACE("Getting NC service state for internal stats\n");
338 if (nc_state.is_enabled) {
339 return "ENABLED";
340 } else {
341 return "DISABLED";
342 }
343 }
344
345 //! Gets the reference to the stats json object, basically a no-op for the NC
346 static json_object **message_stats_getter()
347 {
348 LOGTRACE("Fetching latest message stats from shared memory\n");
349 return &stats_json;
350 }
351
352 //! Updates the stats json data, literally a No-op for the NC (as opposed to the CC)
353 static void message_stats_setter()
354 {
355 LOGTRACE("Updating latest message stats from shared memory\n");
356 //No-op
357 return;
358 }
359
360 void nc_lock_stats()
361 {
362 sem_p(stats_sem);
363 }
364
365 void nc_unlock_stats()
366 {
367 sem_v(stats_sem);
368 }
369
370 //! Update the message stat structure
371 //! Wraps the message stats update with the necessary caching copies and locking
372 int nc_update_message_stats(const char *message_name, long call_time, int msg_failed)
373 {
374 LOGTRACE("Updating message stats for message %s\n", message_name);
375
376 nc_lock_stats();
377 json_object **stats_state = message_stats_getter();
378
379 //Update the counters
380 update_message_stats(*stats_state, message_name, call_time, msg_failed);
381 message_stats_setter();
382
383 nc_unlock_stats();
384 LOGTRACE("Message stats update complete\n");
385 return EUCA_OK;
386 }
387
388 //! Provides NC-specific initializations for the stats system of
389 //! internal service sensors (state sensors, message statistics, etc)
390 //! @returns EUCA_OK on success, or error code on failure
391 static int initialize_stats_system(int interval_sec)
392 {
393 LOGDEBUG("Initializing stats subsystem for NC\n");
394 int ret = EUCA_OK;
395 int stats_ttl = interval_sec + 1;
396 stats_sensor_interval_sec = interval_sec;
397 nc_lock_stats();
398 {
399 //Init the message sensor with component-specific data
400 ret = initialize_message_sensor(euca_this_component_name, interval_sec, stats_ttl, message_stats_getter, message_stats_setter);
401 if (ret != EUCA_OK) {
402 LOGERROR("Error initializing internal message sensor: %d\n", ret);
403 goto cleanup;
404 } else {
405 json_object **tmp = message_stats_getter();
406 const char *tmp_out = json_object_to_json_string(*tmp);
407 LOGINFO("Initialized internal message stats: %s\n", tmp_out);
408
409 }
410
411 //Init the service state sensor with component-specific data
412 ret = initialize_service_state_sensor(euca_this_component_name, interval_sec, stats_ttl, stats_service_state_call, stats_service_check_call);
413 if (ret != EUCA_OK) {
414 LOGERROR("Error initializing internal service state sensor: %d\n", ret);
415 goto cleanup;
416 }
417
418 ret = init_stats(nc_state.home, euca_this_component_name, nc_lock_stats, nc_unlock_stats);
419 if (ret != EUCA_OK) {
420 LOGERROR("Could not initialize CC stats system: %d\n", ret);
421 goto cleanup;
422 }
423 }
424
425 if (!ret) {
426 LOGINFO("Stats subsystem initialized\n");
427 } else {
428 LOGERROR("Stat subsystem init failed: %d\n", ret);
429 }
430 cleanup:
431 nc_unlock_stats();
432 return ret;
433 }
434
435
436 //!
437 //! Deauthorize all migration keys on destination host
438 //! @param[in] lock_hyp_sem set to true to hold the 'lock_hyp_sem' semaphore
439 //!
440 //! @return EUCA_OK, EUCA_SYSTEM_ERROR
441 //!
442 int deauthorize_migration_keys(boolean lock_hyp_sem)
443 {
444 int rc = 0;
445 char euca_rootwrap[EUCA_MAX_PATH] = "";
446 char command[EUCA_MAX_PATH] = "";
447 char *euca_base = getenv(EUCALYPTUS_ENV_VAR_NAME);
448
449 snprintf(command, EUCA_MAX_PATH, EUCALYPTUS_AUTHORIZE_MIGRATION_KEYS, NP(euca_base));
450 snprintf(euca_rootwrap, EUCA_MAX_PATH, EUCALYPTUS_ROOTWRAP, NP(euca_base));
451
452 LOGDEBUG("migration key de-authorization command: '%s %s %s %s'\n", euca_rootwrap, command, "-D", "-r");
453 if (lock_hyp_sem == TRUE) {
454 sem_p(hyp_sem);
455 }
456
457 rc = euca_execlp(NULL, euca_rootwrap, command, "-D", "-r", NULL);
458
459 if (lock_hyp_sem == TRUE) {
460 sem_v(hyp_sem);
461 }
462
463 if (rc != EUCA_OK) {
464 LOGERROR("'%s %s %s %s' failed. rc=%d\n", euca_rootwrap, command, "-D", "-r", rc);
465 return (EUCA_SYSTEM_ERROR);
466 } else {
467 LOGDEBUG("migration key deauthorization succeeded\n");
468 }
469 return (EUCA_OK);
470 }
471
472 //!
473 //! Authorize migration keys on destination host.
474 //!
475 //! @param[in] host hostname (IP address) to authorize
476 //! @param[in] credentials shared secret to authorize
477 //! @param[in] instance pointer to instance struct for logging information (optional--can be NULL)
478 //! @param[in] lock_hyp_sem set to true to hold the 'lock_hyp_sem' semaphore
479 //!
480 //! @return EUCA_OK, EUCA_INVALID_ERROR, or EUCA_SYSTEM_ERROR
481 //!
482 int authorize_migration_keys(char *host, char *credentials, ncInstance * instance, boolean lock_hyp_sem)
483 {
484 int rc = 0;
485 char euca_rootwrap[EUCA_MAX_PATH] = "";
486 char command[EUCA_MAX_PATH] = "";
487 char *euca_base = getenv(EUCALYPTUS_ENV_VAR_NAME);
488 char *instanceId = instance ? instance->instanceId : "UNSET";
489
490 if (!host && !credentials) {
491 LOGERROR("[%s] called with invalid arguments: host=%s, creds=%s\n", SP(instanceId), SP(host), (credentials == NULL) ? "UNSET" : "present");
492 return (EUCA_INVALID_ERROR);
493 }
494
495 snprintf(command, EUCA_MAX_PATH, EUCALYPTUS_AUTHORIZE_MIGRATION_KEYS, NP(euca_base));
496 snprintf(euca_rootwrap, EUCA_MAX_PATH, EUCALYPTUS_ROOTWRAP, NP(euca_base));
497 LOGDEBUG("[%s] migration key authorization command: '%s %s %s %s %s'\n", SP(instanceId), euca_rootwrap, command, "-a", NP(host), NP(credentials));
498 if (lock_hyp_sem == TRUE) {
499 sem_p(hyp_sem);
500 }
501
502 rc = euca_execlp(NULL, euca_rootwrap, command, "-a", NP(host), NP(credentials), NULL);
503
504 if (lock_hyp_sem == TRUE) {
505 sem_v(hyp_sem);
506 }
507
508 if (rc != EUCA_OK) {
509 LOGERROR("[%s] '%s %s %s %s %s' failed. rc=%d\n", SP(instanceId), euca_rootwrap, command, "-a", NP(host), NP(credentials), rc);
510 return (EUCA_SYSTEM_ERROR);
511 } else {
512 LOGDEBUG("[%s] migration key authorization succeeded\n", SP(instanceId));
513 }
514 return (EUCA_OK);
515 }
516
517 //!
518 //! Configure libvirtd to not use polkitd by default.
519 //!
520 //! Only needs to be run during init() as a one time operation. In most cases
521 //! this will check the config and not restart libvirt if everything is ok.
522 //!
523 //! @param[in] use_polkit set 1, will enable polkit, 0 will disable (default)
524 //! @return EUCA_OK, EUCA_INVALID_ERROR, or EUCA_SYSTEM_ERROR
525 //!
526 int config_polkit(int use_polkit)
527 {
528 int rc = 0;
529 char euca_rootwrap[EUCA_MAX_PATH] = "";
530 char command[EUCA_MAX_PATH] = "";
531 char *euca_base = getenv(EUCALYPTUS_ENV_VAR_NAME);
532
533 snprintf(command, EUCA_MAX_PATH, EUCALYPTUS_CONFIG_NO_POLKIT, NP(euca_base));
534 snprintf(euca_rootwrap, EUCA_MAX_PATH, EUCALYPTUS_ROOTWRAP, NP(euca_base));
535 LOGDEBUG("config-no-polkit command: '%s %s'\n", euca_rootwrap, command);
536
537 if (use_polkit)
538 rc = euca_execlp(NULL, euca_rootwrap, command, "-e", NULL); // enable
539 else
540 rc = euca_execlp(NULL, euca_rootwrap, command, NULL); // disable - default
541
542 if (rc != EUCA_OK) {
543 LOGERROR("%s %s' failed. rc=%d\n",euca_rootwrap, command, rc);
544 return (EUCA_SYSTEM_ERROR);
545 } else {
546 LOGDEBUG("Libvirtd polkit configuration succeeded\n");
547 }
548 return (EUCA_OK);
549 }
550
551 //!
552 //! Copies the url string of the ENABLED service of the requested type into dest_buffer.
553 //! dest_buffer MUST be the same size as the services uri array length, 512.
554 //!
555 //! @param[in] service_type
556 //! @param[in] nc
557 //! @param[in] dest_buffer
558 //! @return EUCA_OK on success, EUCA_ERROR on failure.
559 //! @pre
560 //!
561 //! @post
562 //!
563 int get_service_url(const char *service_type, struct nc_state_t *nc, char *dest_buffer)
564 {
565 int i = 0;
566 boolean found = FALSE;
567
568 if (service_type == NULL || nc == NULL || dest_buffer == NULL) {
569 LOGERROR("Invalid input parameters. At least one is NULL.\n");
570 return (EUCA_ERROR);
571 }
572
573 sem_p(service_state_sem);
574
575 for (i = 0; i < 16; i++) {
576 if (!strcmp(service_type, nc->services[i].type)) {
577 //Winner!
578 if (nc->services[i].urisLen > 0) {
579 euca_strncpy(dest_buffer, nc->services[i].uris[0], 512);
580 found = TRUE;
581 }
582 }
583 }
584 sem_v(service_state_sem);
585
586 if (found) {
587 LOGTRACE("Found enabled service URI for service type %s as %s\n", service_type, dest_buffer);
588 return (EUCA_OK);
589 }
590
591 dest_buffer[0] = '\0'; //Ensure 0 length string
592 LOGTRACE("No enabled service found for service type %s\n", service_type);
593 return (EUCA_ERROR);
594 }
595
596 //!
597 //!
598 //!
599 //! @pre
600 //!
601 //! @post
602 //!
603 static void printNCServiceStateInfo(void)
604 {
605 int i = 0;
606 //Don't bother if not at trace logging
607 if (log_level_get() <= EUCA_LOG_TRACE) {
608 sem_p(service_state_sem);
609 LOGTRACE("Printing %d services\n", nc_state.servicesLen);
610 LOGTRACE("Epoch %d\n", nc_state.ncStatus.localEpoch);
611 for (i = 0; i < nc_state.servicesLen; i++) {
612 LOGTRACE("Service - %s %s %s %s\n", nc_state.services[i].name, nc_state.services[i].partition, nc_state.services[i].type, nc_state.services[i].uris[0]);
613 }
614 for (i = 0; i < nc_state.disabledServicesLen; i++) {
615 LOGTRACE("Disabled Service - %s %s %s %s\n", nc_state.disabledServices[i].name, nc_state.disabledServices[i].partition, nc_state.disabledServices[i].type,
616 nc_state.disabledServices[i].uris[0]);
617 }
618 for (i = 0; i < nc_state.servicesLen; i++) {
619 LOGTRACE("Notready Service - %s %s %s %s\n", nc_state.notreadyServices[i].name, nc_state.notreadyServices[i].partition, nc_state.notreadyServices[i].type,
620 nc_state.notreadyServices[i].uris[0]);
621 }
622 sem_v(service_state_sem);
623 }
624 }
625
626 //!
627 //!
628 //!
629 //! @param[in] pMeta
630 //!
631 //! @pre
632 //!
633 //! @post
634 //!
635 static void printMsgServiceStateInfo(ncMetadata * pMeta)
636 {
637 int i = 0;
638 //Don't bother if not at trace logging
639 if (log_level_get() <= EUCA_LOG_TRACE) {
640 LOGTRACE("Printing %d services\n", pMeta->servicesLen);
641 LOGTRACE("Msg-Meta epoch %d\n", pMeta->epoch);
642
643 for (i = 0; i < pMeta->servicesLen; i++) {
644 LOGTRACE("Msg-Meta: Service - %s %s %s %s\n", pMeta->services[i].name, pMeta->services[i].partition, pMeta->services[i].type, pMeta->services[i].uris[0]);
645 }
646
647 for (i = 0; i < pMeta->disabledServicesLen; i++) {
648 LOGTRACE("Msg-Meta: Disabled Service - %s %s %s %s\n", pMeta->disabledServices[i].name, pMeta->disabledServices[i].partition, pMeta->disabledServices[i].type,
649 pMeta->disabledServices[i].uris[0]);
650 }
651
652 for (i = 0; i < pMeta->servicesLen; i++) {
653 LOGTRACE("Msg-Meta: Notready Service - %s %s %s %s\n", pMeta->notreadyServices[i].name, pMeta->notreadyServices[i].partition, pMeta->notreadyServices[i].type,
654 pMeta->notreadyServices[i].uris[0]);
655 }
656 }
657 }
658
659 //!
660 //! Update the state of the services and topology as received from the CC
661 //!
662 //! @param[in] pMeta a pointer to the node controller (NC) metadata structure
663 //! @param[in] authoritative indicates whether this request is allowed to reset epoch
664 //! @pre
665 //!
666 //! @note
667 //!
668 static void updateServiceStateInfo(ncMetadata * pMeta, boolean authoritative)
669 {
670 int i = 0;
671 char scURL[512];
672 if ((pMeta != NULL) && (pMeta->servicesLen > 0)) {
673 LOGTRACE("Updating NC's topology/service state info: pMeta: userId=%s\n", pMeta->userId);
674
675 // store information from CLC that needs to be kept up-to-date in the NC
676 sem_p(service_state_sem);
677
678 if (pMeta->epoch >= nc_state.ncStatus.localEpoch || // we have updates ('=' is there in case CC does not bump epoch numbers)
679 authoritative // trust the authoritative requests and always take their services info, even if epoch goes backward
680 ) {
681 //Update the epoch first
682 nc_state.ncStatus.localEpoch = pMeta->epoch;
683
684 //Copy new services info wholesale
685 memcpy(nc_state.services, pMeta->services, sizeof(serviceInfoType) * 16);
686 memcpy(nc_state.disabledServices, pMeta->disabledServices, sizeof(serviceInfoType) * 16);
687 memcpy(nc_state.notreadyServices, pMeta->notreadyServices, sizeof(serviceInfoType) * 16);
688 nc_state.servicesLen = pMeta->servicesLen;
689 nc_state.disabledServicesLen = pMeta->disabledServicesLen;
690 nc_state.notreadyServicesLen = pMeta->notreadyServicesLen;
691
692 //Make a copy of the SC url to use outside of the semaphore
693 for (i = 0; i < nc_state.servicesLen; i++) {
694 if (!strcmp(nc_state.services[i].type, "storage")) {
695 if (nc_state.services[i].urisLen > 0) {
696 memcpy(scURL, nc_state.services[i].uris[0], 512);
697 break;
698 }
699 }
700 }
701 }
702 sem_v(service_state_sem);
703
704 LOGTRACE("Updating VBR localhost config sc url to: %s\n", scURL);
705 //Push the change to the vbr code
706 vbr_update_hostconfig_scurl(scURL);
707
708 } else {
709 LOGTRACE("Cannot update service infos, null found\n");
710 return;
711 }
712
713 //Log the results...
714 printNCServiceStateInfo();
715 printMsgServiceStateInfo(pMeta);
716 }
717
718 //!
719 //! Utilitarian functions used in the lower level handlers. This scans the string buffer
720 //! 's' for a matching parameter 'name' to fill in the 'valp' value.
721 //!
722 //! @param[in] s a non NULL string buffer
723 //! @param[in] name the name of the parameter we're looking for
724 //! @param[out] valp a pointer to the integer returned if we found the parameter in 's'
725 //!
726 //! @return EUCA_OK on success; EUCA_ERROR if any parameters are invalid; or EUCA_NO_FOUND_ERROR
727 //! if the 'name' parameter is not found in 's'. In any error case, 'valp' will remain
728 //! invalid and could be modified.
729 //!
730 int get_value(char *s, const char *name, long long *valp)
731 {
732 char buf[CHAR_BUFFER_SIZE] = "";
733
734 if ((s == NULL) || (name == NULL) || (valp == NULL))
735 return (EUCA_ERROR);
736 snprintf(buf, CHAR_BUFFER_SIZE, "%s=%%lld", name);
737 return ((euca_lscanf(s, buf, valp) == 1) ? EUCA_OK : EUCA_NOT_FOUND_ERROR);
738 }
739
740 //!
741 //! Handles the logging of libvirt errors
742 //!
743 //! @param[in] userData (UNUSED)
744 //! @param[in] error a pointer to the libvirt error information
745 //!
746 void libvirt_err_handler(void *userData, virErrorPtr error)
747 {
748 boolean ignore_error = FALSE;
749
750 if (error == NULL) {
751 LOGERROR("libvirt error handler was given a NULL pointer\n");
752 return;
753 }
754
755 if (error->code == VIR_ERR_NO_DOMAIN) {
756 char *instanceId = euca_strestr(error->message, "'", "'"); // try to find instance ID in the message
757 if (instanceId) {
758 // NOTE: sem_p/v(inst_sem) cannot be used as this err_handler can be called in refresh_instance_info's context
759 ncInstance *instance = find_instance(&global_instances, instanceId);
760 if (instance && (instance->terminationRequestedTime // termination of this instance was requested
761 || (instance->state == BOOTING) // it is booting or rebooting
762 || (instance->state == BUNDLING_SHUTDOWN || instance->state == BUNDLING_SHUTOFF)
763 || (instance->state == CREATEIMAGE_SHUTDOWN || instance->state == CREATEIMAGE_SHUTOFF))) {
764 ignore_error = TRUE;
765 }
766 free(instanceId);
767 }
768 }
769
770 if (!ignore_error) {
771 EUCALOG(EUCA_LOG_ERROR, "libvirt: %s (code=%d)\n", error->message, error->code);
772 }
773 }
774
775 //!
776 //! converts 'dev' into canonical form (e.g., "sda" of "/dev/sda") unless
777 //! it is already in canonical form
778 //!
779 //! @param[in] dev the device name string (e.g. /dev/sda or sda)
780 //! @param[out] cdev the device name in canonical form (without /dev/)
781 //! @param[in] cdev_len length of the cdev buffer in bytes
782 //!
783 //! @return EUCA_OK on success or EUCA_ERROR on failure
784 //!
785 int canonicalize_dev(const char *dev, char *cdev, int cdev_len)
786 {
787 char cdev_local[128];
788 euca_strncpy(cdev_local, dev, sizeof(cdev_local));
789
790 const char *s = cdev_local;
791 if (strstr(dev, "/dev/") == dev) {
792 s = s + strlen("/dev/");
793 }
794 if (strchr(s, '/')) {
795 LOGERROR("device name string of unexpected format (must be /dev/XXX)\n");
796 return EUCA_ERROR;
797 }
798 if (strlen(s) > (cdev_len - 1)) {
799 LOGERROR("buffer size (%d) exceeded for device name string\n", cdev_len);
800 return EUCA_ERROR;
801 }
802 euca_strncpy(cdev, s, cdev_len);
803
804 return EUCA_OK;
805 }
806
807 //!
808 //! This updates the 'aliases' of sensor 'dimensions' that store sensor data for specific
809 //! block devices. Dimensions are strings like 'root', 'ephemeral0', 'vol-XYZ', etc. The
810 //! purpose of aliases is to map block device statistics returned by getstats.pl script,
811 //! which use guest block device names, such as 'sda' or 'vdb', into dimensions. To deduce
812 //! the mapping, we use .xml files that are passed to libvirt. This is somewhat awkward, but
813 //! it gets us the guest device actually used by the hypervisor. (The device we request may
814 //! be modified by XSL transforms and NC hooks.)
815 //!
816 //! @param[in] instance a pointer to the instance
817 //!
818 //! @return Always return EUCA_OK
819 //!
820 int update_disk_aliases(ncInstance * instance)
821 {
822 int i = 0;
823 int j = 0;
824 char *volumeId = NULL;
825 char **devs = NULL;
826 char lpath[EUCA_MAX_PATH] = "";
827 boolean saw_ephemeral0 = FALSE;
828 boolean saw_root = FALSE;
829 ncVolume *volume = NULL;
830
831 // update block devices from instance XML file
832 if ((devs = get_xpath_content(instance->libvirtFilePath, "/domain/devices/disk/target[@dev]/@dev")) != NULL) {
833 for (i = 0; devs[i]; i++) {
834 volumeId = NULL;
835 if (strstr(devs[i], "da1")) { // regexp: [hsvx]v?da1?
836 volumeId = "root";
837 saw_root = TRUE;
838 } else if (strstr(devs[i], "da2")) {
839 if (saw_ephemeral0) {
840 LOGERROR("[%s] unexpected disk layout in instance", instance->instanceId);
841 } else {
842 volumeId = "ephemeral0";
843 saw_ephemeral0 = TRUE;
844 }
845 } else if (strstr(devs[i], "da")) {
846 volumeId = "root";
847 saw_root = TRUE;
848 } else if (strstr(devs[i], "db")) {
849 if (saw_ephemeral0) {
850 LOGERROR("[%s] unexpected disk layout in instance", instance->instanceId);
851 } else {
852 volumeId = "ephemeral0";
853 saw_ephemeral0 = TRUE;
854 }
855 } else if (strstr(devs[i], "dc")) {
856 volumeId = "ephemeral1";
857 } else if (strstr(devs[i], "dd")) {
858 volumeId = "ephemeral2";
859 } else if (strstr(devs[i], "de")) {
860 volumeId = "ephemeral3";
861 }
862
863 if (volumeId) {
864 ebs_volume_data *vol_data = NULL;
865
866 if (strcmp("root", volumeId) == 0) {
867 if (instance->params.root->locationType == NC_LOCATION_SC) {
868 if (deserialize_volume(instance->params.root->resourceLocation, &vol_data) == 0) {
869 volumeId = vol_data->volumeId;
870 }
871 }
872 }
873 sensor_set_volume(instance->instanceId, volumeId, devs[i]);
874
875 EUCA_FREE(vol_data);
876 }
877 EUCA_FREE(devs[i]);
878 }
879 EUCA_FREE(devs);
880 }
881
882 if (!saw_root) {
883 LOGWARN("[%s] failed to find 'dev' entry for root\n", instance->instanceId);
884 }
885 // now update attached or detached volumes, if any
886 for (i = 0; i < EUCA_MAX_VOLUMES; ++i) {
887 volume = &instance->volumes[i];
888 if (strlen(volume->volumeId) == 0)
889 continue;
890
891 snprintf(lpath, sizeof(lpath), EUCALYPTUS_VOLUME_LIBVIRT_XML_PATH_FORMAT, instance->instancePath, volume->volumeId); // vol-XXX-libvirt.xml
892 if ((devs = get_xpath_content(lpath, "/disk/target[@dev]/@dev")) != NULL) {
893 if (devs[0] && devs[1] == NULL) {
894 sensor_set_volume(instance->instanceId, volume->volumeId, devs[0]);
895 } else {
896 LOGWARN("[%s] failed to find 'dev' entry in %s\n", lpath, instance->instanceId);
897 }
898
899 for (j = 0; devs[j]; j++) {
900 EUCA_FREE(devs[j]);
901 }
902 EUCA_FREE(devs);
903 } else {
904 sensor_set_volume(instance->instanceId, volume->volumeId, NULL);
905 }
906 }
907
908 return EUCA_OK;
909 }
910
911 //!
912 //! Logs the currently running domains
913 //!
914 void print_running_domains(void)
915 {
916 ncInstance *instance = NULL;
917 bunchOfInstances *head = NULL;
918 char buf[CHAR_BUFFER_SIZE] = "";
919
920 sem_p(inst_sem);
921 {
922 for (head = global_instances; head; head = head->next) {
923 instance = head->instance;
924 if (instance->state == STAGING || instance->state == BOOTING || instance->state == RUNNING || instance->state == BLOCKED || instance->state == PAUSED) {
925 strcat(buf, " ");
926 strcat(buf, instance->instanceId);
927 }
928 }
929 }
930 sem_v(inst_sem);
931 LOGINFO("currently running/booting: %s\n", buf);
932 }
933
934 //!
935 //!
936 //!
937 //! @param[in] ptr
938 //!
939 static void *libvirt_thread(void *ptr)
940 {
941 int rc = 0;
942 sigset_t mask = { {0} };
943
944 // allow SIGUSR1 signal to be delivered to this thread and its children
945 sigemptyset(&mask);
946 sigaddset(&mask, SIGUSR1);
947 sigprocmask(SIG_UNBLOCK, &mask, NULL);
948
949 if (nc_state.conn) {
950 if ((rc = virConnectClose(nc_state.conn)) != 0) {
951 LOGDEBUG("refcount on close was non-zero: %d\n", rc);
952 }
953 }
954 nc_state.conn = virConnectOpen(nc_state.uri);
955 return (NULL);
956 }
957
958 //!
959 //! Checks and reset the hypervisor connection.
960 //!
961 //! @return a pointer to the hypervisor connection structure or NULL if we failed.
962 //!
963 virConnectPtr lock_hypervisor_conn()
964 {
965 int rc = 0;
966 int status = 0;
967 pid_t cpid = 0;
968 pthread_t thread = { 0 };
969 long long thread_par = 0L;
970 boolean bail = FALSE;
971 //boolean try_again = FALSE;
972 struct timespec ts = { 0 };
973 virConnectPtr tmp_conn = NULL;
974
975 // Acquire our hypervisor semaphore
976 sem_p(hyp_sem);
977
978 if (call_hooks(NC_EVENT_PRE_HYP_CHECK, nc_state.home)) {
979 LOGFATAL("hooks prevented check on the hypervisor\n");
980 sem_v(hyp_sem);
981 return NULL;
982 }
983 // Fork off a process just to open and immediately close a libvirt connection.
984 // The purpose is to try to identify periods when open or close calls block indefinitely.
985 // Success in the child process does not guarantee success in the parent process, but
986 // hopefully it will flag certain bad conditions and will allow the parent to avoid them.
987
988 if ((cpid = fork()) < 0) { // fork error
989 LOGERROR("failed to fork to check hypervisor connection\n");
990 bail = TRUE; // we are in big trouble if we cannot fork
991 } else if (cpid == 0) { // child process - checks on the connection
992 if ((tmp_conn = virConnectOpen(nc_state.uri)) == NULL)
993 exit(1);
994 virConnectClose(tmp_conn);
995 exit(0);
996 } else { // parent process - waits for the child, kills it if necessary
997 if ((rc = timewait(cpid, &status, LIBVIRT_TIMEOUT_SEC)) < 0) {
998 LOGERROR("failed to wait for forked process: %s\n", strerror(errno));
999 bail = TRUE;
1000 } else if (rc == 0) {
1001 LOGERROR("timed out waiting for hypervisor checker pid=%d\n", cpid);
1002 bail = TRUE;
1003 } else if (WEXITSTATUS(status) != 0) {
1004 LOGERROR("child process failed to connect to hypervisor\n");
1005 bail = TRUE;
1006 }
1007 // terminate the child, if any
1008 killwait(cpid);
1009 }
1010
1011 if (bail) {
1012 sem_v(hyp_sem);
1013 return NULL; // better fail the operation than block the whole NC
1014 }
1015
1016 LOGTRACE("process check for libvirt succeeded\n");
1017
1018 // At this point, the check for libvirt done in a separate process was
1019 // successful, so we proceed to close and reopen the connection in a
1020 // separate thread, which we will try to wake up with SIGUSR1 if it
1021 // blocks for too long (as a last-resource effort). The reason we reset
1022 // the connection so often is because libvirt operations have a
1023 // tendency to block indefinitely if we do not do this.
1024
1025 if (pthread_create(&thread, NULL, libvirt_thread, (void *)&thread_par) != 0) {
1026 LOGERROR("failed to create the libvirt refreshing thread\n");
1027 bail = TRUE;
1028 } else {
1029 for (;;) {
1030 if (clock_gettime(CLOCK_REALTIME, &ts) == -1) {
1031 LOGERROR("failed to obtain time\n");
1032 bail = TRUE;
1033 break;
1034 }
1035
1036 ts.tv_sec += LIBVIRT_TIMEOUT_SEC;
1037 if ((rc = pthread_timedjoin_np(thread, NULL, &ts)) == 0)
1038 break; // all is well
1039
1040 if (rc != ETIMEDOUT) { // error other than timeout
1041 LOGERROR("failed to wait for libvirt refreshing thread (rc=%d)\n", rc);
1042 bail = TRUE;
1043 break;
1044 }
1045
1046 LOGERROR("timed out on libvirt refreshing thread\n");
1047 pthread_kill(thread, SIGUSR1);
1048 sleep(1);
1049 }
1050 }
1051
1052 if (bail) {
1053 sem_v(hyp_sem);
1054 return NULL;
1055 }
1056 LOGTRACE("thread check for libvirt succeeded\n");
1057
1058 if (nc_state.conn == NULL) {
1059 LOGERROR("failed to connect to %s\n", nc_state.uri);
1060 sem_v(hyp_sem);
1061 return NULL;
1062 }
1063 return nc_state.conn;
1064 }
1065
1066 //!
1067 //! Closes the connection with the hypervisor
1068 //!
1069 void unlock_hypervisor_conn()
1070 {
1071 sem_v(hyp_sem);
1072 }
1073
1074 //!
1075 //! Instance state state machine.
1076 //!
1077 //! @param[in] instance a pointer to the instance to modify
1078 //! @param[in] state the new instance state
1079 //!
1080 void change_state(ncInstance * instance, instance_states state)
1081 {
1082 int old_state = instance->state;
1083
1084 instance->state = ((int)state);
1085 switch (state) { /* mapping from NC's internal states into external ones */
1086 case STAGING:
1087 case CANCELED:
1088 // Mark primary and secondary network interfaces as attached
1089 euca_strncpy(instance->ncnet.stateName, VOL_STATE_ATTACHED, sizeof(instance->ncnet.stateName)); // primary nic
1090 for (int i = 0; i < EUCA_MAX_NICS; i++) { // secondary nics in VPC mode only
1091 if (strlen(instance->secNetCfgs[i].interfaceId) == 0)
1092 continue; // empty slot, move on
1093 else
1094 euca_strncpy(instance->secNetCfgs[i].stateName, VOL_STATE_ATTACHED, sizeof(instance->secNetCfgs[i].stateName));
1095 }
1096 instance->stateCode = PENDING;
1097 break;
1098 case BOOTING:
1099 case RUNNING:
1100 case BLOCKED:
1101 case PAUSED:
1102 instance->stateCode = EXTANT;
1103 instance->retries = LIBVIRT_QUERY_RETRIES;
1104 break;
1105 case CRASHED:
1106 case BUNDLING_SHUTDOWN:
1107 case BUNDLING_SHUTOFF:
1108 case CREATEIMAGE_SHUTDOWN:
1109 case CREATEIMAGE_SHUTOFF:
1110 case SHUTDOWN:
1111 case SHUTOFF:
1112 if (instance->stateCode != EXTANT) {
1113 instance->stateCode = PENDING;
1114 }
1115 instance->retries = LIBVIRT_QUERY_RETRIES;
1116 break;
1117 case TEARDOWN:
1118 // Mark primary and secondary network interfaces as detached
1119 euca_strncpy(instance->ncnet.stateName, VOL_STATE_DETACHED, sizeof(instance->ncnet.stateName)); // primary nic
1120 for (int i = 0; i < EUCA_MAX_NICS; i++) { // secondary nics in VPC mode only
1121 if (strlen(instance->secNetCfgs[i].interfaceId) == 0)
1122 continue; // empty slot, move on
1123 else
1124 euca_strncpy(instance->secNetCfgs[i].stateName, VOL_STATE_DETACHED, sizeof(instance->secNetCfgs[i].stateName));
1125 }
1126 instance->stateCode = TEARDOWN;
1127 break;
1128 default:
1129 LOGERROR("[%s] unexpected state (%d)\n", instance->instanceId, instance->state);
1130 return;
1131 }
1132
1133 euca_strncpy(instance->stateName, instance_state_names[instance->stateCode], CHAR_BUFFER_SIZE);
1134 if (old_state != state) {
1135 LOGDEBUG("[%s] state change for instance: %s -> %s (%s)\n",
1136 instance->instanceId, instance_state_names[old_state], instance_state_names[instance->state], instance_state_names[instance->stateCode]);
1137 }
1138 }
1139
1140 //!
1141 //! waits indefinitely until a state transition takes place (timeouts are implemented in the
1142 //! monitoring thread) and returns 0 if from_state->to_state transition takes place and 1 otherwise
1143 //!
1144 //! @param[in] instance a pointer to the instance we're monitoring
1145 //! @param[in] from_state the starting state of the transition
1146 //! @param[in] to_state the ending state of the transition
1147 //!
1148 //! @return EUCA_OK on success or EUCA_ERROR on failure.
1149 //!
1150 int wait_state_transition(ncInstance * instance, instance_states from_state, instance_states to_state)
1151 {
1152 instance_states current_state = NO_STATE;
1153
1154 while (1) {
1155 current_state = instance->state;
1156 if (current_state == to_state)
1157 return (EUCA_OK);
1158
1159 if (current_state != from_state)
1160 return (EUCA_ERROR);
1161
1162 // no point in checking more frequently
1163 sleep(MONITORING_PERIOD);
1164 }
1165 return (EUCA_ERROR);
1166 }
1167
1168 //!
1169 //! Refresh instance information.
1170 //!
1171 //! (This is called while holding inst_sem.)
1172 //!
1173 //! @param[in] nc a pointer to the global NC state structure.
1174 //! @param[in] instance a pointer to the instance being refreshed
1175 //!
1176 static void refresh_instance_info(struct nc_state_t *nc, ncInstance * instance)
1177 {
1178 int error = 0;
1179 int rc = 0;
1180 char *ip = NULL;
1181 virDomainInfo info = { 0 };
1182 instance_states new_state = NO_STATE;
1183 instance_states old_state = instance->state;
1184
1185 // no need to bug for domains without state on Hypervisor
1186 if (old_state == TEARDOWN || old_state == STAGING || old_state == BUNDLING_SHUTOFF || old_state == CREATEIMAGE_SHUTOFF)
1187 return;
1188
1189 { // all this is done while holding the hypervisor lock, with a valid connection
1190 virConnectPtr conn = lock_hypervisor_conn();
1191 if (conn == NULL) {
1192 hypervisor_conn_errors++;
1193 // This is last resort. restarting libvirtd
1194 if (hypervisor_conn_errors >= MAX_CONNECTION_ERRORS) {
1195 LOGWARN("Got %d connection errors to libvirt. Restarting libvirtd service...\n", hypervisor_conn_errors);
1196 euca_execlp(NULL, nc_state.rootwrap_cmd_path, "/sbin/service", "libvirtd", "restart", NULL);
1197 sleep(LIBVIRT_TIMEOUT_SEC);
1198 }
1199 return;
1200 } else {
1201 hypervisor_conn_errors = 0;
1202 }
1203
1204 virDomainPtr dom = virDomainLookupByName(conn, instance->instanceId);
1205
1206 if (dom == NULL) { // hypervisor doesn't know about it
1207 if (old_state == BUNDLING_SHUTDOWN) {
1208 LOGINFO("[%s] detected disappearance of bundled domain\n", instance->instanceId);
1209 change_state(instance, BUNDLING_SHUTOFF);
1210 } else if (old_state == CREATEIMAGE_SHUTDOWN) {
1211 LOGINFO("[%s] detected disappearance of createImage domain\n", instance->instanceId);
1212 change_state(instance, CREATEIMAGE_SHUTOFF);
1213 } else if (old_state == RUNNING || old_state == BLOCKED || old_state == PAUSED || old_state == SHUTDOWN) {
1214 // If we just finished migration, then this is normal.
1215 //
1216 // Could this be a bad assumption if the
1217 // virDomainLookupByName() call above returns NULL for
1218 // some transient reason rather than because hypervisor
1219 // doesn't know of the domain any more?
1220 if (is_migration_src(instance)) {
1221 if (instance->migration_state == MIGRATION_IN_PROGRESS) {
1222 // This usually occurs when there has been some
1223 // glitch in the migration: an i/o error or
1224 // reset connction. When that happens, we do
1225 // *not* want to shut off the instance!
1226 //
1227 // It can also happen absent an anomaly, such as
1228 // when refresh_instance_info() is called right
1229 // as the migration is completing (there's a race).
1230 LOGDEBUG("[%s] possible migration anomaly, not yet assuming completion\n", instance->instanceId);
1231 unlock_hypervisor_conn();
1232 return;
1233 }
1234 LOGINFO("[%s] migration completed (state='%s'), cleaning up\n", instance->instanceId, migration_state_names[instance->migration_state]);
1235 change_state(instance, SHUTOFF);
1236 unlock_hypervisor_conn();
1237 return;
1238 }
1239 // most likely the user has shut it down from the inside
1240 if (instance->stop_requested) {
1241 LOGDEBUG("[%s] ignoring domain in stopped state\n", instance->instanceId);
1242 } else if (instance->terminationRequestedTime) {
1243 LOGDEBUG("[%s] hypervisor not finding the terminating domain\n", instance->instanceId);
1244 } else if (instance->retries) {
1245 LOGWARN("[%s] hypervisor failed to find domain, will retry %d more time(s)\n", instance->instanceId, instance->retries);
1246 instance->retries--;
1247 } else {
1248 LOGWARN("[%s] hypervisor failed to find domain, assuming it was shut off\n", instance->instanceId);
1249 change_state(instance, SHUTOFF);
1250 }
1251 }
1252 // else 'old_state' stays in SHUTFOFF, BOOTING, CANCELED, or CRASHED
1253
1254 // set guest power state
1255 strncpy(instance->guestStateName, GUEST_STATE_POWERED_OFF, CHAR_BUFFER_SIZE);
1256
1257 // persist state updates to disk
1258 save_instance_struct(instance);
1259
1260 unlock_hypervisor_conn();
1261 return;
1262 }
1263
1264 error = virDomainGetInfo(dom, &info);
1265 if ((error < 0) || (info.state == VIR_DOMAIN_NOSTATE)) {
1266 LOGWARN("[%s] failed to get information for domain\n", instance->instanceId);
1267 // what to do? hopefully we'll find out more later
1268 virDomainFree(dom);
1269 unlock_hypervisor_conn();
1270 return;
1271 }
1272
1273 new_state = info.state;
1274 switch (old_state) {
1275 case BOOTING:
1276 case RUNNING:
1277 case BLOCKED:
1278 case PAUSED:
1279 // migration-related logic
1280 if (is_migration_dst(instance)) {
1281 if (old_state == BOOTING && new_state == PAUSED) {
1282 incoming_migrations_in_progress++;
1283 LOGINFO("[%s] incoming (%s < %s) migration in progress (1 of %d)\n", instance->instanceId, instance->migration_dst, instance->migration_src,
1284 incoming_migrations_in_progress);
1285 instance->migration_state = MIGRATION_IN_PROGRESS;
1286 LOGDEBUG("[%s] incoming (%s < %s) migration_state set to '%s'\n", instance->instanceId,
1287 instance->migration_dst, instance->migration_src, migration_state_names[instance->migration_state]);
1288
1289 if (!strcmp(nc->pEucaNet->sMode, NETMODE_VPCMIDO)) {
1290 bridge_instance_interfaces_remove(nc, instance);
1291 }
1292 if (!strcmp(nc->pEucaNet->sMode, NETMODE_EDGE)) {
1293 char iface[16];
1294 snprintf(iface, 16, "vn_%s", instance->instanceId);
1295 bridge_interface_set_hairpin(nc, instance, iface);
1296 }
1297 } else if ((old_state == BOOTING || old_state == PAUSED)
1298 && (new_state == RUNNING || new_state == BLOCKED)) {
1299 LOGINFO("[%s] completing incoming (%s < %s) migration...\n", instance->instanceId, instance->migration_dst, instance->migration_src);
1300 instance->migration_state = NOT_MIGRATING; // done!
1301 bzero(instance->migration_src, HOSTNAME_SIZE);
1302 bzero(instance->migration_dst, HOSTNAME_SIZE);
1303 bzero(instance->migration_credentials, CREDENTIAL_SIZE);
1304 instance->migrationTime = 0;
1305 save_instance_struct(instance);
1306 // copy_intances is called upon return in monitoring_thread().
1307 incoming_migrations_in_progress--;
1308 LOGINFO("[%s] incoming migration complete (%d other incoming migration[s] actively in progress)\n", instance->instanceId, incoming_migrations_in_progress);
1309 // If no remaining incoming or pending migrations, deauthorize all clients.
1310 // TO-DO: Consolidate with similar sequence in handlers_kvm.c into a utility function?
1311 if (!incoming_migrations_in_progress) {
1312 int incoming_migrations_pending = 0;
1313 int incoming_migrations_counted = 0;
1314 LOGINFO("no remaining active incoming migrations -- checking to see if there are any pending migrations\n");
1315 bunchOfInstances *head = NULL;
1316 for (head = global_instances; head; head = head->next) {
1317 if ((head->instance->migration_state == MIGRATION_PREPARING) || (head->instance->migration_state == MIGRATION_READY)) {
1318 LOGINFO("[%s] is pending migration, migration_state='%s', deferring deauthorization of migration keys\n", head->instance->instanceId,
1319 migration_state_names[head->instance->migration_state]);
1320 incoming_migrations_pending++;
1321 }
1322 // Belt and suspenders...
1323 if ((head->instance->migration_state == MIGRATION_IN_PROGRESS) && !strcmp(nc_state.ip, head->instance->migration_dst)) {
1324 LOGWARN("[%s] Possible internal bug detected: instance migration_state='%s', but incoming_migrations_in_progress=%d\n", head->instance->instanceId,
1325 migration_state_names[head->instance->migration_state], incoming_migrations_in_progress);
1326 incoming_migrations_counted++;
1327 }
1328 }
1329 if (incoming_migrations_counted != incoming_migrations_in_progress) {
1330 LOGWARN("Possible internal bug detected: incoming_migrations_in_progress=%d, but %d incoming migrations counted\n", incoming_migrations_in_progress,
1331 incoming_migrations_counted);
1332 }
1333 if (!incoming_migrations_pending) {
1334 LOGINFO("no remaining incoming or pending migrations -- deauthorizing all migration client keys\n");
1335 deauthorize_migration_keys(FALSE);
1336 }
1337 } else {
1338 // Verify that our count of incoming_migrations_in_progress matches our version of reality.
1339 bunchOfInstances *head = NULL;
1340 int incoming_migrations_counted = 0;
1341 for (head = global_instances; head; head = head->next) {
1342 if ((head->instance->migration_state == MIGRATION_IN_PROGRESS) && !strcmp(nc_state.ip, head->instance->migration_dst)) {
1343 incoming_migrations_counted++;
1344 }
1345 }
1346 if (incoming_migrations_counted != incoming_migrations_in_progress) {
1347 LOGWARN("Possible internal bug detected: incoming_migrations_in_progress=%d, but %d incoming migrations counted\n", incoming_migrations_in_progress,
1348 incoming_migrations_counted);
1349 }
1350 }
1351 } else if (new_state == SHUTOFF || new_state == SHUTDOWN) {
1352 // this is normal at the beginning of incoming migration, before a domain is created in PAUSED state
1353 break;
1354 }
1355 }
1356
1357 if (new_state == SHUTOFF || new_state == SHUTDOWN || new_state == CRASHED) {
1358 LOGWARN("[%s] hypervisor reported previously running domain as %s\n", instance->instanceId, instance_state_names[new_state]);
1359 }
1360 // change to state, whatever it happens to be
1361 change_state(instance, new_state);
1362 break;
1363 case SHUTDOWN:
1364 case SHUTOFF:
1365 case CRASHED:
1366 if (new_state == RUNNING || new_state == BLOCKED || new_state == PAUSED) {
1367 // cannot go back!
1368 LOGWARN("[%s] detected prodigal domain, terminating it\n", instance->instanceId);
1369 virDomainDestroy(dom);
1370 } else {
1371 change_state(instance, new_state);
1372 }
1373 break;
1374 case BUNDLING_SHUTDOWN:
1375 case CREATEIMAGE_SHUTDOWN:
1376 LOGDEBUG("[%s] hypervisor state for bundle/createImage domain is %s\n", instance->instanceId, instance_state_names[new_state]);
1377 break;
1378 default:
1379 LOGERROR("[%s] unexpected state (%d) in refresh\n", instance->instanceId, old_state);
1380 }
1381
1382 virDomainFree(dom);
1383 unlock_hypervisor_conn();
1384 }
1385
1386 // if instance is running, try to find out its IP address
1387 if (instance->state == RUNNING || instance->state == BLOCKED || instance->state == PAUSED) {
1388 ip = NULL;
1389
1390 if (!strncmp(instance->ncnet.privateIp, "0.0.0.0", INET_ADDR_LEN)) {
1391 rc = MAC2IP(instance->ncnet.privateMac, &ip);
1392 if (!rc && ip) {
1393 LOGINFO("[%s] discovered private IP %s for instance\n", instance->instanceId, ip);
1394 euca_strncpy(instance->ncnet.privateIp, ip, INET_ADDR_LEN);
1395 EUCA_FREE(ip);
1396 }
1397 }
1398 // set guest power state
1399 strncpy(instance->guestStateName, GUEST_STATE_POWERED_ON, CHAR_BUFFER_SIZE);
1400 } else {
1401 strncpy(instance->guestStateName, GUEST_STATE_POWERED_OFF, CHAR_BUFFER_SIZE);
1402 }
1403
1404 // persist state updates to disk
1405 save_instance_struct(instance);
1406 }
1407
1408 //!
1409 //! copying the linked list for use by Describe* requests
1410 //!
1411 void copy_instances(void)
1412 {
1413 ncInstance *instance = NULL;
1414 ncInstance *src_instance = NULL;
1415 ncInstance *dst_instance = NULL;
1416 bunchOfInstances *head = NULL;
1417 bunchOfInstances *container = NULL;
1418
1419 sem_p(inst_copy_sem);
1420 {
1421 // free the old linked list copy
1422 for (head = global_instances_copy; head;) {
1423 container = head;
1424 instance = head->instance;
1425 head = head->next;
1426 EUCA_FREE(instance);
1427 EUCA_FREE(container);
1428 }
1429
1430 global_instances_copy = NULL;
1431
1432 // make a fresh copy
1433 for (head = global_instances; head; head = head->next) {
1434 src_instance = head->instance;
1435 dst_instance = (ncInstance *) EUCA_ALLOC(1, sizeof(ncInstance));
1436 memcpy(dst_instance, src_instance, sizeof(ncInstance));
1437 add_instance(&global_instances_copy, dst_instance);
1438 }
1439 }
1440 sem_v(inst_copy_sem);
1441 }
1442
1443 //!
1444 //! helper that is used during initialization and by monitornig thread
1445 //!
1446 static void update_log_params(void)
1447 {
1448 int log_level = 0;
1449 int log_roll_number = 0;
1450 long log_max_size_bytes = 0;
1451 char *log_prefix = NULL;
1452 char *log_facility = NULL;
1453
1454 // read log params from config file and update in-memory configuration
1455 configReadLogParams(&log_level, &log_roll_number, &log_max_size_bytes, &log_prefix);
1456
1457 // reconfigure the logging subsystem to use the new values, if any
1458 log_params_set(log_level, log_roll_number, log_max_size_bytes);
1459 log_prefix_set(log_prefix);
1460 EUCA_FREE(log_prefix);
1461
1462 if ((log_facility = configFileValue("LOGFACILITY")) != NULL) {
1463 if (strlen(log_facility) > 0) {
1464 log_facility_set(log_facility, "nc");
1465 }
1466 EUCA_FREE(log_facility);
1467 }
1468 }
1469
1470 //!
1471 //! helper that is used during initialization and by monitornig thread
1472 //!
1473 static void update_ebs_params(void)
1474 {
1475 char *ceph_user = getConfString(nc_state.configFiles, 2, CONFIG_NC_CEPH_USER);
1476 char *ceph_keys = getConfString(nc_state.configFiles, 2, CONFIG_NC_CEPH_KEYS);
1477 char *ceph_conf = getConfString(nc_state.configFiles, 2, CONFIG_NC_CEPH_CONF);
1478 init_iscsi(nc_state.home,
1479 (ceph_user == NULL) ? (DEFAULT_CEPH_USER) : (ceph_user),
1480 (ceph_keys == NULL) ? (DEFAULT_CEPH_KEYRING) : (ceph_keys), (ceph_conf == NULL) ? (DEFAULT_CEPH_CONF) : (ceph_conf));
1481 EUCA_FREE(ceph_user);
1482 EUCA_FREE(ceph_keys);
1483 EUCA_FREE(ceph_conf);
1484 }
1485
1486 //!
1487 //! This defines the NC monitoring thread
1488 //!
1489 //! @param[in] arg a transparent pointer to the global NC state structure
1490 //!
1491 //! @return Always return NULL
1492 //!
1493 void *monitoring_thread(void *arg)
1494 {
1495 #define EUCANETD_PID_FILE "%s/var/run/eucalyptus/eucanetd.pid"
1496 #define EUCANETD_SERVICE_NAME "eucanetd"
1497
1498 int i = 0;
1499 int tmpint = 0;
1500 int left = 0;
1501 int cleaned_up = 0;
1502 int destroy_files = 0;
1503 char *psPid = NULL;
1504 char sPidFile[EUCA_MAX_PATH] = "";
1505 char nfile[EUCA_MAX_PATH] = "";
1506 char nfilefinal[EUCA_MAX_PATH] = "";
1507 char URL[EUCA_MAX_PATH] = "";
1508 char ccHost[EUCA_MAX_PATH] = "";
1509 char clcHost[EUCA_MAX_PATH] = "";
1510 char tmpbuf[EUCA_MAX_PATH] = "";
1511 long long iteration = 0;
1512 long long work_fs_size_mb = 0;
1513 long long work_fs_avail_mb = 0;
1514 long long cache_fs_size_mb = 0;
1515 long long cache_fs_avail_mb = 0;
1516 FILE *FP = NULL;
1517 time_t now = 0;
1518 struct nc_state_t *nc = NULL;
1519 bunchOfInstances *head = NULL;
1520 bunchOfInstances *vnhead = NULL;
1521 ncInstance *instance = NULL;
1522 ncInstance *vninstance = NULL;
1523
1524 LOGINFO("spawning monitoring thread\n");
1525 if (arg == NULL) {
1526 LOGFATAL("internal error (NULL parameter to monitoring_thread)\n");
1527 return NULL;
1528 }
1529
1530 nc = ((struct nc_state_t *)arg);
1531
1532 for (iteration = 0; TRUE; iteration++) {
1533 now = time(NULL);
1534
1535 // EUCA-10056 we need to check if EUCANETD is running when in EDGE of VPC mode
1536 if (!strcmp(nc_state.pEucaNet->sMode, NETMODE_EDGE)) {
1537 snprintf(sPidFile, EUCA_MAX_PATH, EUCANETD_PID_FILE, nc_state.home);
1538 if ((psPid = file2str(sPidFile)) != NULL) {
1539 // Is the
1540 if (euca_is_running(atoi(psPid), EUCANETD_SERVICE_NAME)) {
1541 if (nc_state.isEucanetdEnabled == FALSE)
1542 LOGDEBUG("Service %s detected and running.\n", EUCANETD_SERVICE_NAME);
1543 nc_state.isEucanetdEnabled = TRUE;
1544 } else if (nc_state.isEucanetdEnabled) {
1545 // EUCANETD isn't running... Throw a fault for the user to correct
1546 LOGERROR("Service %s not running (even if PID file is detected).\n", EUCANETD_SERVICE_NAME);
1547 nc_state.isEucanetdEnabled = FALSE;
1548 log_eucafault("1008", "daemon", EUCANETD_SERVICE_NAME, NULL);
1549 }
1550 EUCA_FREE(psPid);
1551 } else if (nc_state.isEucanetdEnabled) {
1552 // EUCANETD isn't running... Throw a fault for the user to correct
1553 LOGERROR("Service %s not running.\n", EUCANETD_SERVICE_NAME);
1554 nc_state.isEucanetdEnabled = FALSE;
1555 log_eucafault("1008", "daemon", EUCANETD_SERVICE_NAME, NULL);
1556 }
1557 }
1558
1559 sem_p(inst_sem);
1560
1561 snprintf(nfile, EUCA_MAX_PATH, EUCALYPTUS_LOG_DIR "/local-net.stage", nc_state.home);
1562 snprintf(nfilefinal, EUCA_MAX_PATH, EUCALYPTUS_LOG_DIR "/local-net", nc_state.home);
1563 if ((FP = fopen(nfile, "w")) == NULL) {
1564 LOGWARN("could not open file %s for writing\n", nfile);
1565 } else {
1566 // print out latest CC and CLC IP addr to the local-net file
1567 URL[0] = ccHost[0] = clcHost[0] = '\0';
1568
1569 for (i = 0; i < nc_state.servicesLen; i++) {
1570 if (!strcmp(nc_state.services[i].type, "cluster")) {
1571 if (nc_state.services[i].urisLen > 0) {
1572 memcpy(URL, nc_state.services[i].uris[0], 512);
1573 if (strlen(URL)) {
1574 if (tokenize_uri(URL, tmpbuf, ccHost, &tmpint, tmpbuf)) {
1575 snprintf(ccHost, EUCA_MAX_PATH, "0.0.0.0");
1576 }
1577 }
1578 }
1579 } else if (!strcmp(nc_state.services[i].type, "eucalyptus")) {
1580 if (nc_state.services[i].urisLen > 0) {
1581 memcpy(URL, nc_state.services[i].uris[0], 512);
1582 if (strlen(URL)) {
1583 if (tokenize_uri(URL, tmpbuf, clcHost, &tmpint, tmpbuf)) {
1584 snprintf(clcHost, EUCA_MAX_PATH, "0.0.0.0");
1585 }
1586 }
1587 }
1588 }
1589 }
1590
1591 if (strlen(ccHost)) {
1592 fprintf(FP, "CCIP=%s\n", ccHost);
1593 }
1594
1595 if (strlen(clcHost)) {
1596 fprintf(FP, "CLCIP=%s\n", clcHost);
1597 }
1598 fflush(FP);
1599 }
1600
1601 cleaned_up = 0;
1602 for (head = global_instances; head; head = head->next) {
1603 instance = head->instance;
1604
1605 // query for current state, if any
1606 refresh_instance_info(nc, instance);
1607
1608 // time out logic for migration-ready instances
1609 if (!strcmp(instance->stateName, "Extant") && ((instance->migration_state == MIGRATION_READY) || (instance->migration_state == MIGRATION_PREPARING))
1610 && ((now - instance->migrationTime) > nc_state.migration_ready_threshold)) {
1611 if (instance->migrationTime) {
1612 if (outgoing_migrations_in_progress) {
1613 LOGINFO("[%s] has been in migration state '%s' on source for %d seconds (threshold is %d), but not rolling back due to %d ongoing outgoing migration[s]\n",
1614 instance->instanceId, migration_state_names[instance->migration_state], (int)(now - instance->migrationTime), nc_state.migration_ready_threshold,
1615 outgoing_migrations_in_progress);
1616 continue;
1617 }
1618
1619 LOGWARN("[%s] has been in migration state '%s' on source for %d seconds (threshold is %d), rolling back [%d].\n",
1620 instance->instanceId, migration_state_names[instance->migration_state], (int)(now - instance->migrationTime), nc_state.migration_ready_threshold,
1621 instance->migrationTime);
1622 migration_rollback(instance);
1623 continue;
1624 } else {
1625 if (instance->state == BOOTING) {
1626 // Assume destination node. (Is this a safe assumption?)
1627 LOGDEBUG("[%s] destination node ready: instance in booting state with no migrationTime.\n", instance->instanceId);
1628 } else {
1629 LOGWARN("[%s] in instance state '%s' is ready to migrate but has a zero instance migrationTime.\n",
1630 instance->instanceId, instance_state_names[instance->state]);
1631 migration_rollback(instance);
1632 }
1633 }
1634 }
1635 // don't touch running or canceled threads
1636 if (instance->state != STAGING && instance->state != BOOTING &&
1637 instance->state != SHUTOFF &&
1638 instance->state != SHUTDOWN &&
1639 instance->state != BUNDLING_SHUTDOWN &&
1640 instance->state != BUNDLING_SHUTOFF && instance->state != CREATEIMAGE_SHUTDOWN && instance->state != CREATEIMAGE_SHUTOFF && instance->state != TEARDOWN) {
1641
1642 if (FP && !strcmp(instance->stateName, "Extant")) {
1643 //! @TODO is this still being used?
1644 //! @TODO yes! for EDGE networking
1645 // have a running instance, write its information to local state file
1646 fprintf(FP, "%s %s %s %d %s %s %s\n",
1647 SP(instance->instanceId), SP(nc_state.pEucaNet->sPublicDevice), "NA", instance->ncnet.vlan, SP(instance->ncnet.privateMac),
1648 SP(instance->ncnet.publicIp), SP(instance->ncnet.privateIp));
1649 fflush(FP);
1650 }
1651 continue;
1652 }
1653
1654 if (instance->state == TEARDOWN) {
1655 // it's been long enough, we can forget the instance
1656 if ((now - instance->terminationTime) > nc_state.teardown_state_duration) {
1657 remove_instance(&global_instances, instance);
1658 LOGINFO("[%s] forgetting about instance\n", instance->instanceId);
1659 free_instance(&instance);
1660 break; // need to get out since the list changed
1661 }
1662 continue;
1663 }
1664 // time out logic for STAGING or BOOTING or BUNDLING instances
1665 if (instance->state == STAGING && (now - instance->launchTime) < nc_state.staging_cleanup_threshold)
1666 continue; // hasn't been long enough, spare it
1667
1668 if (instance->state == BOOTING && (now - instance->bootTime) < nc_state.booting_cleanup_threshold)
1669 continue;
1670
1671 if ((instance->state == BUNDLING_SHUTDOWN || instance->state == BUNDLING_SHUTOFF)
1672 && (now - instance->bundlingTime) < nc_state.bundling_cleanup_threshold)
1673 continue;
1674
1675 if ((instance->state == CREATEIMAGE_SHUTDOWN || instance->state == CREATEIMAGE_SHUTOFF)
1676 && (now - instance->createImageTime) < nc_state.createImage_cleanup_threshold)
1677 continue;
1678
1679 // terminate a booting instance as a special case, though not if it's an incoming migration
1680 if (instance->state == BOOTING) {
1681 if ((instance->migration_state == MIGRATION_PREPARING) || (instance->migration_state == MIGRATION_READY)) {
1682 LOGDEBUG("[%s] instance has exceeded BOOTING cleanup threshold of %d seconds, but has migration_state=%s, so not terminating\n", instance->instanceId,
1683 nc_state.booting_cleanup_threshold, migration_state_names[instance->migration_state]);
1684 continue;
1685 } else {
1686 LOGDEBUG("[%s] finding and terminating BOOTING instance, which has exceeded cleanup threshold of %d seconds\n", instance->instanceId,
1687 nc_state.booting_cleanup_threshold);
1688
1689 // do the shutdown in a thread
1690 pthread_attr_t tattr;
1691 pthread_t tid;
1692 pthread_attr_init(&tattr);
1693 pthread_attr_setdetachstate(&tattr, PTHREAD_CREATE_DETACHED);
1694 void *param = (void *)strdup(instance->instanceId);
1695 if (pthread_create(&tid, &tattr, terminating_thread, (void *)param) != 0) {
1696 LOGERROR("[%s] failed to start VM termination thread\n", instance->instanceId);
1697 }
1698 }
1699 }
1700
1701 if (cleaned_up < nc_state.concurrent_cleanup_ops) {
1702 // ok, it's been condemned => destroy the files
1703 cleaned_up++;
1704 destroy_files = !nc_state.save_instance_files;
1705 if (call_hooks(NC_EVENT_PRE_CLEAN, instance->instancePath)) {
1706 if (destroy_files) {
1707 LOGERROR("[%s] cancelled instance cleanup via hooks\n", instance->instanceId);
1708 destroy_files = 0;
1709 }
1710 }
1711
1712 LOGINFO("[%s] cleaning up state for instance%s\n", instance->instanceId, (destroy_files) ? ("") : (" (but keeping the files)"));
1713 if (destroy_instance_backing(instance, destroy_files)) {
1714 LOGWARN("[%s] failed to cleanup instance state\n", instance->instanceId);
1715 }
1716 // check to see if this is the last instance running on vlan, handle local networking information drop
1717 left = 0;
1718 for (vnhead = global_instances; vnhead; vnhead = vnhead->next) {
1719 vninstance = vnhead->instance;
1720 if (vninstance->ncnet.vlan == (instance->ncnet).vlan && strcmp(instance->instanceId, vninstance->instanceId)) {
1721 left++;
1722 }
1723 }
1724
1725 change_state(instance, TEARDOWN); // TEARDOWN = no more resources
1726 instance->terminationTime = time(NULL);
1727 }
1728 }
1729
1730 if (FP) {
1731 fclose(FP);
1732 rename(nfile, nfilefinal);
1733 }
1734
1735 copy_instances(); // copy global_instances to global_instances_copy
1736 sem_v(inst_sem);
1737
1738 if (head) {
1739 // we got out because of modified list, no need to sleep now
1740 continue;
1741 }
1742
1743 sleep(MONITORING_PERIOD);
1744
1745 // do this on every iteration (every MONITORING_PERIOD seconds)
1746 if ((iteration % 1) == 0) {
1747 // see if config file has changed and react to those changes
1748 if (isConfigModified(nc_state.configFiles, 2) > 0) { // config modification time has changed
1749 if (readConfigFile(nc_state.configFiles, 2)) {
1750 // something has changed that can be read in
1751 LOGINFO("configuration file has been modified, ingressing new options\n");
1752
1753 // log-related options
1754 update_log_params();
1755
1756 // EBS-related options
1757 update_ebs_params();
1758
1759 //! @todo pick up other NC options dynamically?
1760 }
1761 }
1762 }
1763 // do this every 10th iteration (every 10*MONITORING_PERIOD seconds)
1764 if ((iteration % 10) == 0) {
1765 //! @todo 3.2 change 1 to 10
1766
1767 // check file system state and blobstore state
1768 blobstore_meta work_meta, cache_meta;
1769 if (stat_backing_store(NULL, &work_meta, &cache_meta) == EUCA_OK) {
1770 work_fs_size_mb = (long long)(work_meta.fs_bytes_size / MEGABYTE);
1771 work_fs_avail_mb = (long long)(work_meta.fs_bytes_available / MEGABYTE);
1772 cache_fs_size_mb = (long long)(cache_meta.fs_bytes_size / MEGABYTE);
1773 cache_fs_avail_mb = (long long)(cache_meta.fs_bytes_available / MEGABYTE);
1774
1775 if (work_fs_avail_mb < ((work_fs_size_mb * DISK_TOO_LOW_PERCENT) / 100)) {
1776 log_eucafault("1003", "component", euca_this_component_name, "file", work_meta.path, NULL);
1777 }
1778
1779 if (cache_fs_size_mb > 0 && cache_fs_avail_mb < ((cache_fs_size_mb * DISK_TOO_LOW_PERCENT) / 100)) {
1780 log_eucafault("1003", "component", euca_this_component_name, "file", cache_meta.path, NULL);
1781 }
1782 //! @todo add more faults (cache or work reserved exceeds available space on file system)
1783 }
1784 }
1785 }
1786
1787 return NULL;
1788
1789 #undef EUCANETD_PID_FILE
1790 #undef EUCANETD_SERVICE_NAME
1791 }
1792
1793 //!
1794 //! Fills in some of the fields of instance struct
1795 //!
1796 //! @param[in] instance struct to fill in
1797 //!
1798 void set_instance_params(ncInstance * instance)
1799 {
1800 char *s = NULL;
1801
1802 if (nc_state.config_use_virtio_net) {
1803 instance->params.nicType = NIC_TYPE_VIRTIO;
1804 } else {
1805 if (strstr(instance->platform, "windows")) {
1806 instance->params.nicType = NIC_TYPE_WINDOWS;
1807 } else {
1808 instance->params.nicType = NIC_TYPE_LINUX;
1809 }
1810 }
1811
1812 euca_strncpy(instance->hypervisorType, nc_state.H->name, sizeof(instance->hypervisorType)); // set the hypervisor type
1813
1814 instance->hypervisorCapability = nc_state.capability; // set the cap (xen/hw/hw+xen)
1815 if ((s = system_output("getconf LONG_BIT")) != NULL) {
1816 int bitness = atoi(s);
1817 if (bitness == 32 || bitness == 64) {
1818 instance->hypervisorBitness = bitness;
1819 } else {
1820 LOGWARN("[%s] can't determine the host's bitness (%s, assuming 64)\n", instance->instanceId, s);
1821 instance->hypervisorBitness = 64;
1822 }
1823 EUCA_FREE(s);
1824 } else {
1825 LOGWARN("[%s] can't determine the host's bitness (assuming 64)\n", instance->instanceId);
1826 instance->hypervisorBitness = 64;
1827 }
1828 instance->combinePartitions = nc_state.convert_to_disk;
1829 instance->do_inject_key = nc_state.do_inject_key;
1830 }
1831
1832 //!
1833 //! Defines the instance startup thread
1834 //!
1835 //! @param[in] arg a transparent pointer to the instance structure to start
1836 //!
1837 //! @return Always return NULL
1838 //!
1839 void *startup_thread(void *arg)
1840 {
1841 int i = 0;
1842 int error = EUCA_OK;
1843 int status = 0;
1844 int rc = 0;
1845 int create_timedout = 0;
1846 char *xml = NULL;
1847 char brname[IF_NAME_LEN] = "";
1848 pid_t cpid = 0;
1849 boolean try_killing = FALSE;
1850 boolean created = FALSE;
1851 ncInstance *instance = ((ncInstance *) arg);
1852 virDomainPtr dom = NULL;
1853
1854 LOGDEBUG("[%s] spawning startup thread\n", instance->instanceId);
1855 virConnectPtr conn = lock_hypervisor_conn();
1856 if (conn == NULL) {
1857 LOGERROR("[%s] could not contact the hypervisor, abandoning the instance\n", instance->instanceId);
1858 hypervisor_conn_errors++;
1859 goto shutoff;
1860 }
1861 unlock_hypervisor_conn(); // unlock right away, since we are just checking on it
1862
1863 // set up networking
1864 snprintf(brname, IF_NAME_LEN, "%s", nc_state.pEucaNet->sBridgeDevice);
1865
1866 euca_strncpy(instance->params.guestNicDeviceName, brname, sizeof(instance->params.guestNicDeviceName));
1867
1868 // set parameters like hypervisor type, bitness, NIC type, key injection, etc.
1869 set_instance_params(instance);
1870
1871 if ((error = create_instance_backing(instance, FALSE)) // do the heavy lifting on the disk
1872 || (error = gen_instance_xml(instance)) // create euca-specific instance XML file
1873 || (error = gen_libvirt_instance_xml(instance))) { // transform euca-specific XML into libvirt XML
1874 LOGERROR("[%s] failed to prepare images for instance (error=%d)\n", instance->instanceId, error);
1875 goto shutoff;
1876 }
1877
1878 if (instance->state == TEARDOWN) { // timed out in STAGING
1879 goto free;
1880 }
1881
1882 if (instance->state == CANCELED) {
1883 LOGERROR("[%s] cancelled instance startup\n", instance->instanceId);
1884 goto shutoff;
1885 }
1886
1887 if (call_hooks(NC_EVENT_PRE_BOOT, instance->instancePath)) {
1888 LOGERROR("[%s] cancelled instance startup via hooks\n", instance->instanceId);
1889 goto shutoff;
1890 }
1891
1892 if (instance_network_gate(instance, nc_state.booting_envwait_threshold)) {
1893 LOGERROR("[%s] cancelled instance startup via network_gate\n", instance->instanceId);
1894 goto shutoff;
1895 }
1896
1897 xml = file2str(instance->libvirtFilePath);
1898
1899 save_instance_struct(instance); // to enable NC recovery
1900 sensor_add_resource(instance->instanceId, "instance", instance->uuid);
1901 sensor_set_resource_alias(instance->instanceId, instance->ncnet.privateIp);
1902 update_disk_aliases(instance);
1903
1904 // serialize domain creation as hypervisors can get confused with
1905 // too many simultaneous create requests
1906 LOGTRACE("[%s] instance about to boot\n", instance->instanceId);
1907
1908 for (i = 0; i < MAX_CREATE_TRYS; i++) { // retry loop
1909 // TODO: CHUCK -----> Find better
1910 if (i == 0) {
1911 sleep(10);
1912 }
1913
1914 if (i > 0) {
1915 LOGINFO("[%s] attempt %d of %d to create the instance\n", instance->instanceId, i + 1, MAX_CREATE_TRYS);
1916 }
1917
1918 { // all this is done while holding the hypervisor lock, with a valid connection
1919 virConnectPtr conn = lock_hypervisor_conn();
1920 if (conn == NULL) { // get a new connection for each loop iteration
1921 LOGERROR("[%s] could not contact the hypervisor, abandoning the instance\n", instance->instanceId);
1922 hypervisor_conn_errors++;
1923 goto shutoff;
1924 }
1925
1926 sem_p(loop_sem);
1927
1928 if (i > 0 && create_timedout == 1) {
1929 dom = virDomainLookupByName(conn, instance->instanceId);
1930 if (dom) {
1931
1932 // a forked process failed to return in a timely manner, yet the instance
1933 // launched. Since we can't verify the validity of the instance, terminate and
1934 // let the NC clean up.
1935 LOGERROR("[%s] failed to launch cleanly after %d seconds, destroying instance\n", instance->instanceId, CREATE_TIMEOUT_SEC);
1936 error = virDomainDestroy(dom);
1937 LOGINFO("[%s] instance destroyed - return: %d\n", instance->instanceId, error);
1938
1939 virDomainFree(dom);
1940 sem_v(loop_sem);
1941 unlock_hypervisor_conn();
1942
1943 goto shutoff;
1944 }
1945 }
1946
1947 // We have seen virDomainCreateLinux() on occasion block indefinitely,
1948 // which freezes all activity on the NC since hyp_sem and loop_sem are
1949 // being held by the thread. (This is on Lucid with AppArmor enabled.)
1950 // To protect against that, we invoke the function in a process and
1951 // terminate it after CREATE_TIMEOUT_SEC seconds.
1952 //
1953 // #0 0x00007f359f0b1f93 in poll () from /lib/libc.so.6
1954 // #1 0x00007f359a9a44e2 in ?? () from /usr/lib/libvirt.so.0
1955 // #2 0x00007f359a9a5060 in ?? () from /usr/lib/libvirt.so.0
1956 // #3 0x00007f359a9ac159 in ?? () from /usr/lib/libvirt.so.0
1957 // #4 0x00007f359a98d65b in virDomainCreateXML () from /usr/lib/libvirt.so.0
1958 // #5 0x00007f359b053c8e in startup_thread (arg=0x7f358813bf40) at handlers.c:644
1959 // #6 0x00007f359f3619ca in start_thread () from /lib/libpthread.so.0
1960 // #7 0x00007f359f0be70d in clone () from /lib/libc.so.6
1961 // #8 0x0000000000000000 in ?? ()
1962
1963 if ((cpid = fork()) < 0) { // fork error
1964 LOGERROR("[%s] failed to fork to start instance\n", instance->instanceId);
1965 } else if (cpid == 0) { // child process - creates the domain
1966 if ((dom = virDomainCreateLinux(conn, xml, 0)) != NULL) {
1967 virDomainFree(dom); // To be safe. Docs are not clear on whether the handle exists outside the process.
1968
1969 if (!strcmp(nc_state.pEucaNet->sMode, NETMODE_VPCMIDO)) {
1970 bridge_instance_interfaces_remove(&nc_state, instance);
1971 }
1972 // Fix for EUCA-12608
1973 if (!strcmp(nc_state.pEucaNet->sMode, NETMODE_EDGE)) {
1974 char iface[16];
1975 snprintf(iface, 16, "vn_%s", instance->instanceId);
1976 bridge_interface_set_hairpin(&nc_state, instance, iface);
1977 }
1978
1979 exit(0);
1980 } else {
1981 exit(1);
1982 }
1983 } else {
1984 // parent process - waits for the child, kills it if necessary
1985 try_killing = FALSE;
1986 if ((rc = timewait(cpid, &status, CREATE_TIMEOUT_SEC)) < 0) {
1987 LOGERROR("[%s] failed to wait for forked process: %s\n", instance->instanceId, strerror(errno));
1988 try_killing = TRUE;
1989 } else if (rc == 0) {
1990 LOGERROR("[%s] timed out waiting for forked process pid=%d\n", instance->instanceId, cpid);
1991 create_timedout = 1; // Sometimes a timeout can occur but the instance is running...
1992 try_killing = TRUE;
1993 } else if (WEXITSTATUS(status) != 0) {
1994 LOGERROR("[%s] hypervisor failed to create the instance\n", instance->instanceId);
1995 } else {
1996 created = TRUE;
1997 }
1998
1999 if (try_killing) {
2000 killwait(cpid);
2001 }
2002 }
2003
2004 sem_v(loop_sem);
2005 unlock_hypervisor_conn(); // guard against libvirtd connection badness
2006 }
2007
2008 if (created)
2009 break;
2010
2011 sleep(1);
2012 }
2013
2014 if (!created) {
2015 goto shutoff;
2016 }
2017 //! @TODO bring back correlationId
2018 eventlog("NC", instance->userId, "", "instanceBoot", "begin");
2019
2020 { // make instance state changes while under lock
2021 sem_p(inst_sem);
2022 // check one more time for cancellation
2023 if (instance->state == TEARDOWN) {
2024 // timed out in BOOTING
2025 } else if (instance->state == CANCELED || instance->state == SHUTOFF) {
2026 LOGERROR("[%s] startup of instance was cancelled\n", instance->instanceId);
2027 change_state(instance, SHUTOFF);
2028 } else {
2029 LOGINFO("[%s] booting\n", instance->instanceId);
2030 instance->bootTime = time(NULL);
2031 change_state(instance, BOOTING);
2032 }
2033 copy_instances();
2034 sem_v(inst_sem);
2035 }
2036 goto free;
2037
2038 shutoff: // escape point for error conditions
2039 change_state(instance, SHUTOFF);
2040
2041 free:
2042 EUCA_FREE(xml);
2043 unset_corrid(get_corrid());
2044 return NULL;
2045 }
2046
2047 //!
2048 //! Defines the termination thread.
2049 //!
2050 //! @param[in] arg a transparent pointer to the argument passed to this thread handler
2051 //!
2052 //! @return Always return NULL
2053 //!
2054 void *terminating_thread(void *arg)
2055 {
2056 char *instanceId = (char *)arg;
2057
2058 LOGDEBUG("[%s] spawning terminating thread\n", instanceId);
2059
2060 int err = find_and_terminate_instance(instanceId);
2061 if (err != EUCA_OK) {
2062 goto free;
2063 }
2064
2065 {
2066 sem_p(inst_sem);
2067 ncInstance *instance = find_instance(&global_instances, instanceId);
2068 if (instance == NULL) {
2069 sem_v(inst_sem);
2070 goto free;
2071 }
2072 // change the state and let the monitoring_thread clean up state
2073 if (instance->state != TEARDOWN && instance->state != CANCELED) {
2074 // do not leave TEARDOWN (cleaned up) or CANCELED (already trying to terminate)
2075 if (instance->state == STAGING) {
2076 change_state(instance, CANCELED);
2077 } else {
2078 change_state(instance, SHUTOFF);
2079 }
2080 }
2081 copy_instances();
2082 sem_v(inst_sem);
2083 }
2084 free:
2085 EUCA_FREE(arg);
2086 unset_corrid(get_corrid());
2087 return NULL;
2088 }
2089
2090 //!
2091 //! On startup, adopt instance found running on the hypervisor.
2092 //!
2093 void adopt_instances()
2094 {
2095 int dom_ids[MAXDOMS] = { 0 };
2096 int num_doms = 0;
2097 int i = 0;
2098 int error = 0;
2099 int err = 0;
2100 virDomainInfo info = { 0 };
2101 const char *dom_name = NULL;
2102 ncInstance *instance = NULL;
2103 virDomainPtr dom = NULL;
2104 virConnectPtr conn = NULL;
2105
2106 conn = lock_hypervisor_conn();
2107 while (conn == NULL) {
2108 LOGERROR("Can't get connection to libvirt. Restarting libvirtd service...\n");
2109 euca_execlp(NULL, nc_state.rootwrap_cmd_path, "/sbin/service", "libvirtd", "restart", NULL);
2110 sleep(LIBVIRT_TIMEOUT_SEC);
2111 LOGINFO("Trying to re-connect");
2112 conn = lock_hypervisor_conn();
2113 }
2114
2115 LOGINFO("looking for existing domains\n");
2116 virSetErrorFunc(NULL, libvirt_err_handler);
2117
2118 num_doms = virConnectListDomains(conn, dom_ids, MAXDOMS);
2119 if (num_doms == 0) {
2120 LOGINFO("no currently running domains to adopt\n");
2121 unlock_hypervisor_conn();
2122 return;
2123 }
2124 if (num_doms < 0) {
2125 LOGWARN("failed to find out about running domains\n");
2126 unlock_hypervisor_conn();
2127 return;
2128 }
2129 // WARNING: be sure to call virDomainFree when necessary so as to avoid leaking the virDomainPtr
2130 for (i = 0; i < num_doms; i++) {
2131 dom = virDomainLookupByID(conn, dom_ids[i]);
2132 if (!dom) {
2133 LOGWARN("failed to lookup running domain #%d, ignoring it\n", dom_ids[i]);
2134 continue;
2135 }
2136 error = virDomainGetInfo(dom, &info);
2137 if ((error < 0) || (info.state == VIR_DOMAIN_NOSTATE)) {
2138 LOGWARN("failed to get info on running domain #%d, ignoring it\n", dom_ids[i]);
2139 virDomainFree(dom);
2140 continue;
2141 }
2142
2143 if (info.state == VIR_DOMAIN_SHUTDOWN || info.state == VIR_DOMAIN_SHUTOFF || info.state == VIR_DOMAIN_CRASHED) {
2144 LOGDEBUG("ignoring non-running domain #%d\n", dom_ids[i]);
2145 virDomainFree(dom);
2146 continue;
2147 }
2148
2149 if ((dom_name = virDomainGetName(dom)) == NULL) {
2150 LOGWARN("failed to get name of running domain #%d, ignoring it\n", dom_ids[i]);
2151 virDomainFree(dom);
2152 continue;
2153 }
2154 if (!strcmp(dom_name, "Domain-0")) {
2155 virDomainFree(dom);
2156 continue;
2157 }
2158
2159 if ((instance = load_instance_struct(dom_name)) == NULL) {
2160 LOGWARN("failed to recover Eucalyptus metadata of running domain %s, ignoring it\n", dom_name);
2161 virDomainFree(dom);
2162 continue;
2163 }
2164
2165 virDomainFree(dom);
2166
2167 if (call_hooks(NC_EVENT_ADOPTING, instance->instancePath)) {
2168 LOGINFO("[%s] ignoring running domain due to hooks\n", instance->instanceId);
2169 free_instance(&instance);
2170 continue;
2171 }
2172
2173 change_state(instance, info.state);
2174 sem_p(inst_sem);
2175 {
2176 err = add_instance(&global_instances, instance);
2177 }
2178 sem_v(inst_sem);
2179
2180 if (err) {
2181 free_instance(&instance);
2182 continue;
2183 }
2184
2185 sensor_add_resource(instance->instanceId, "instance", instance->uuid); // ensure the sensor system monitors this instance
2186 sensor_set_resource_alias(instance->instanceId, instance->ncnet.privateIp);
2187 update_disk_aliases(instance);
2188
2189 //! @TODO try to re-check IPs?
2190 LOGINFO("[%s] - adopted running domain from user %s\n", instance->instanceId, instance->userId);
2191 }
2192 unlock_hypervisor_conn();
2193
2194 sem_p(inst_sem);
2195 {
2196 copy_instances(); // copy global_instances to global_instances_copy
2197 }
2198 sem_v(inst_sem);
2199 }
2200
2201 //!
2202 //!
2203 //!
2204 //! @param[in] sig
2205 //!
2206 static void nc_signal_handler(int sig)
2207 {
2208 LOGDEBUG("signal handler caught %d\n", sig);
2209 }
2210
2211 //!
2212 //! Initialize the NC handlers
2213 //!
2214 //! @return EUCA_OK on success or proper error code. Known error code returned include EUCA_ERROR,
2215 //! EUCA_FATAL_ERROR
2216 //!
2217 static int init(void)
2218 {
2219 #define GET_VAR_INT(_var, _name, _def) \
2220 { \
2221 s = getConfString(nc_state.configFiles, 2, (_name)); \
2222 if (s) { \
2223 (_var) = atoi(s); \
2224 EUCA_FREE(s); \
2225 } else { \
2226 (_var) = (_def); \
2227 } \
2228 }
2229
2230 static int initialized = 0;
2231 int do_warn = 0, i;
2232 char logFile[EUCA_MAX_PATH] = "";
2233 char logFileReqTrack[EUCA_MAX_PATH] = "";
2234 char *bridge = NULL;
2235 char *s = NULL;
2236 char *tmp = NULL;
2237 char *pubinterface = NULL;
2238 struct stat mystat = { 0 };
2239 struct handlers **h = NULL;
2240 sigset_t mask = { {0} };
2241 struct sigaction act = { {0} };
2242
2243 // 0 => hasn't run, -1 => failed, 1 => ok
2244 if (initialized > 0)
2245 return EUCA_OK;
2246 else if (initialized < 0)
2247 return EUCA_ERROR;
2248
2249 // ensure that MAXes are zeroed out
2250 bzero(&nc_state, sizeof(struct nc_state_t));
2251 strncpy(nc_state.version, EUCA_VERSION, sizeof(nc_state.version)); // set the version
2252 nc_state.is_enabled = TRUE; // NC is enabled unless disk state will say otherwise
2253
2254 // configure signal handling for this thread and its children:
2255 // - ignore SIGALRM, which may be used in libraries we depend on
2256 // - deliver SIGUSR1 to a no-op signal handler, as a way to unblock 'stuck' system calls in libraries we depend on
2257 {
2258 // add SIGUSR1 & SIGALRM to the list of signals blocked by this thread and all of its children threads
2259 sigemptyset(&mask);
2260 sigaddset(&mask, SIGUSR1);
2261 sigaddset(&mask, SIGALRM);
2262 sigprocmask(SIG_BLOCK, &mask, NULL);
2263
2264 // establish function nc_signal_handler() as the handler for delivery of SIGUSR1, in whatever thread
2265 bzero(&act, sizeof(struct sigaction));
2266 act.sa_handler = nc_signal_handler;
2267 act.sa_flags = 0;
2268 sigemptyset(&act.sa_mask);
2269 sigaction(SIGUSR1, &act, NULL);
2270 }
2271
2272 // read in configuration - this should be first!
2273
2274 // determine home ($EUCALYPTUS)
2275 if ((tmp = getenv(EUCALYPTUS_ENV_VAR_NAME)) == NULL) {
2276 nc_state.home[0] = '\0'; // empty string means '/'
2277 do_warn = 1;
2278 } else {
2279 strncpy(nc_state.home, tmp, EUCA_MAX_PATH - 1);
2280 }
2281
2282 //Set the SC client policy file path
2283 char policyFile[EUCA_MAX_PATH];
2284 bzero(policyFile, EUCA_MAX_PATH);
2285 snprintf(policyFile, EUCA_MAX_PATH, EUCALYPTUS_POLICIES_DIR "/sc-client-policy.xml", nc_state.home);
2286 euca_strncpy(nc_state.config_sc_policy_file, policyFile, EUCA_MAX_PATH);
2287
2288 // set the minimum log for now
2289 snprintf(logFile, EUCA_MAX_PATH, EUCALYPTUS_LOG_DIR "/nc.log", nc_state.home);
2290 snprintf(logFileReqTrack, EUCA_MAX_PATH, EUCALYPTUS_LOG_DIR "/nc-tracking.log", nc_state.home);
2291 log_file_set(logFile, logFileReqTrack);
2292 LOGINFO("spawning Eucalyptus node controller v%s %s\n", nc_state.version, compile_timestamp_str);
2293 if (do_warn)
2294 LOGWARN("env variable %s not set, using /\n", EUCALYPTUS_ENV_VAR_NAME);
2295
2296 // search for the config file
2297 snprintf(nc_state.configFiles[1], EUCA_MAX_PATH, EUCALYPTUS_CONF_LOCATION, nc_state.home);
2298 if (stat(nc_state.configFiles[1], &mystat)) {
2299 LOGFATAL("could not open configuration file %s\n", nc_state.configFiles[1]);
2300 return (EUCA_ERROR);
2301 }
2302 snprintf(nc_state.configFiles[0], EUCA_MAX_PATH, EUCALYPTUS_CONF_OVERRIDE_LOCATION, nc_state.home);
2303 LOGINFO("NC is looking for configuration in %s,%s\n", nc_state.configFiles[1], nc_state.configFiles[0]);
2304
2305 configInitValues(configKeysRestartNC, configKeysNoRestartNC); // initialize config subsystem
2306 readConfigFile(nc_state.configFiles, 2);
2307 update_log_params();
2308 LOGINFO("running as user '%s'\n", get_username());
2309
2310 // set default in the paths. the driver will override
2311 nc_state.config_network_path[0] = '\0';
2312 nc_state.xm_cmd_path[0] = '\0';
2313 nc_state.virsh_cmd_path[0] = '\0';
2314 nc_state.get_info_cmd_path[0] = '\0';
2315 snprintf(nc_state.libvirt_xslt_path, EUCA_MAX_PATH, EUCALYPTUS_LIBVIRT_XSLT, nc_state.home); // for now, this must be set before anything in xml.c is invoked
2316 snprintf(nc_state.rootwrap_cmd_path, EUCA_MAX_PATH, EUCALYPTUS_ROOTWRAP, nc_state.home);
2317
2318 { // determine the hypervisor to use
2319 char *hypervisor = getConfString(nc_state.configFiles, 2, CONFIG_HYPERVISOR);
2320 if (!hypervisor) {
2321 LOGFATAL("value %s is not set in the config file\n", CONFIG_HYPERVISOR);
2322 return (EUCA_FATAL_ERROR);
2323 }
2324 // let's look for the right hypervisor driver
2325 for (h = available_handlers; *h; h++) {
2326 if (!strncmp((*h)->name, "default", CHAR_BUFFER_SIZE))
2327 nc_state.D = *h;
2328
2329 if (!strncmp((*h)->name, hypervisor, CHAR_BUFFER_SIZE))
2330 nc_state.H = *h;
2331
2332 if (!strncmp((*h)->name, "kvm", CHAR_BUFFER_SIZE) && !strcmp(hypervisor, "qemu")) {
2333 nc_state.H = *h;
2334 strcpy(nc_state.H->name, "qemu"); // TODO: kind of a hack, to make instance->hypervisorType right
2335 }
2336 }
2337
2338 if (nc_state.H == NULL) {
2339 LOGFATAL("requested hypervisor type (%s) is not available\n", hypervisor);
2340 EUCA_FREE(hypervisor);
2341 return (EUCA_FATAL_ERROR);
2342 }
2343 // only load virtio config for kvm
2344 if (!strncmp("kvm", hypervisor, CHAR_BUFFER_SIZE) || !strncmp("qemu", hypervisor, CHAR_BUFFER_SIZE) || !strncmp("KVM", hypervisor, CHAR_BUFFER_SIZE)) {
2345 GET_VAR_INT(nc_state.config_use_virtio_net, CONFIG_USE_VIRTIO_NET, 0); // for now, these three Virtio settings must be set before anything in xml.c is invoked
2346 GET_VAR_INT(nc_state.config_use_virtio_disk, CONFIG_USE_VIRTIO_DISK, 0);
2347 GET_VAR_INT(nc_state.config_use_virtio_root, CONFIG_USE_VIRTIO_ROOT, 0);
2348 }
2349 EUCA_FREE(hypervisor);
2350 }
2351
2352 GET_VAR_INT(nc_state.config_cpu_passthrough, CONFIG_CPU_PASSTHROUGH, 0);
2353 LOGINFO("CPU passthrough to instance: %s\n", (nc_state.config_cpu_passthrough) ? ("enabled") : ("disabled"));
2354
2355 {
2356 // load NC's state from disk, if any
2357 struct nc_state_t nc_state_disk = { 0 };
2358
2359 // allocate temporary network struct (we cannot put vnetConfig on the stack, it is large: 102MB)
2360 if ((nc_state_disk.pEucaNet = EUCA_ZALLOC(1, sizeof(euca_network))) == NULL) {
2361 LOGFATAL("Cannot allocate network configuration structure!\n");
2362 return (EUCA_FATAL_ERROR);
2363 }
2364 // Allocate our network structure
2365 if ((nc_state.pEucaNet = EUCA_ZALLOC(1, sizeof(euca_network))) == NULL) {
2366 LOGFATAL("Cannot allocate network configuration structure!\n");
2367 EUCA_FREE(nc_state_disk.pEucaNet);
2368 return (EUCA_FATAL_ERROR);
2369 }
2370
2371 if (read_nc_xml(&nc_state_disk) == EUCA_OK) {
2372 //! @TODO currently read_nc_xml() relies on nc_state.libvirt_xslt_path and virtio flags being set, which is brittle - fix init() in xml.c
2373 LOGINFO("loaded NC state from previous invocation\n");
2374
2375 // check on the version, in case it has changed
2376 if (strcmp(nc_state_disk.version, nc_state.version) != 0 && nc_state_disk.version[0] != '\0') {
2377 LOGINFO("found state from NC v%s while starting NC v%s\n", nc_state_disk.version, nc_state.version);
2378 // any NC upgrade/downgrade-related code can go here
2379 }
2380 // check on the state
2381 if (nc_state_disk.is_enabled == FALSE) {
2382 LOGINFO("NC will start up as DISABLED based on disk state\n");
2383 nc_state.is_enabled = FALSE;
2384 }
2385 } else {
2386 // there is no disk state, so create it
2387 if (gen_nc_xml(&nc_state) != EUCA_OK) {
2388 LOGERROR("failed to update NC state on disk\n");
2389 } else {
2390 LOGINFO("wrote NC state to disk\n");
2391 }
2392 }
2393 }
2394
2395 {
2396 /* Initialize libvirtd.conf, since some buggy versions of libvirt
2397 * require it. At least two versions of libvirt have had this issue,
2398 * most recently the version in RHEL 6.1. Note that this happens
2399 * at each startup of the NC mainly because the location of the
2400 * required file depends on the process owner's home directory, which
2401 * may change after the initial installation.
2402 */
2403 int use_polkit = 0;
2404 char libVirtConf[EUCA_MAX_PATH];
2405 uid_t uid = geteuid();
2406 struct passwd *pw;
2407 FILE *fd;
2408 struct stat lvcstat;
2409 pw = getpwuid(uid);
2410 errno = 0;
2411 if (pw != NULL) {
2412 snprintf(libVirtConf, EUCA_MAX_PATH, "%s/.libvirt/libvirtd.conf", pw->pw_dir);
2413 if (access(libVirtConf, R_OK) == -1 && errno == ENOENT) {
2414 libVirtConf[strlen(libVirtConf) - strlen("/libvirtd.conf")] = '\0';
2415 errno = 0;
2416 if (stat(libVirtConf, &lvcstat) == -1 && errno == ENOENT) {
2417 mkdir(libVirtConf, 0755);
2418 } else if (errno) {
2419 LOGINFO("Failed to stat %s/.libvirt\n", pw->pw_dir);
2420 }
2421 libVirtConf[strlen(libVirtConf)] = '/';
2422 errno = 0;
2423 fd = fopen(libVirtConf, "a");
2424 if (fd == NULL) {
2425 LOGINFO("Failed to open %s, error code %d\n", libVirtConf, errno);
2426 } else {
2427 fclose(fd);
2428 }
2429 } else if (errno) {
2430 LOGINFO("Failed to access libvirtd.conf, error code %d\n", errno);
2431 }
2432 } else {
2433 LOGINFO("Cannot get EUID, not creating libvirtd.conf\n");
2434 }
2435
2436 //
2437 // Configure libvirtd polkit authentication on the libvirt sockets
2438 // by default we *disable* polkit authentication due to stability issues.
2439 // If the configuration parameter is set to -1 we won't touch the configuration
2440 //
2441 GET_VAR_INT(use_polkit, CONFIG_LIBVIRT_USE_POLICY_KIT, 0);
2442 if (use_polkit >= 0) {
2443 if (config_polkit(use_polkit) != EUCA_OK) {
2444 LOGERROR("Unable to %s polkitd for libvirtd.\n", use_polkit ? "enable" : "disable");
2445 } else {
2446 LOGINFO("libvirtd configured to %s polkitd.\n", use_polkit ? "use" : "not use");
2447 }
2448 } else {
2449 LOGDEBUG("Skipping libvirt policy kit configuration\n");
2450 }
2451 }
2452 { // initialize hooks if their directory looks ok
2453 char dir[EUCA_MAX_PATH];
2454 snprintf(dir, sizeof(dir), EUCALYPTUS_NC_HOOKS_DIR, nc_state.home);
2455 // if 'dir' does not exist, init_hooks() will silently fail,
2456 // and all future call_hooks() will silently succeed
2457 init_hooks(nc_state.home, dir);
2458
2459 if (call_hooks(NC_EVENT_PRE_INIT, nc_state.home)) {
2460 LOGFATAL("hooks prevented initialization\n");
2461 return (EUCA_FATAL_ERROR);
2462 }
2463 }
2464
2465 GET_VAR_INT(nc_state.config_max_mem, CONFIG_MAX_MEM, 0);
2466 GET_VAR_INT(nc_state.config_max_cores, CONFIG_MAX_CORES, 0);
2467 GET_VAR_INT(nc_state.save_instance_files, CONFIG_SAVE_INSTANCES, 0);
2468 GET_VAR_INT(nc_state.concurrent_disk_ops, CONFIG_CONCURRENT_DISK_OPS, 4);
2469 GET_VAR_INT(nc_state.sc_request_timeout_sec, CONFIG_SC_REQUEST_TIMEOUT, 45);
2470 GET_VAR_INT(nc_state.concurrent_cleanup_ops, CONFIG_CONCURRENT_CLEANUP_OPS, 30);
2471 GET_VAR_INT(nc_state.disable_snapshots, CONFIG_DISABLE_SNAPSHOTS, 0);
2472 GET_VAR_INT(nc_state.shutdown_grace_period_sec, CONFIG_SHUTDOWN_GRACE_PERIOD_SEC, 60);
2473
2474 strcpy(nc_state.admin_user_id, EUCALYPTUS_ADMIN);
2475 GET_VAR_INT(nc_state.staging_cleanup_threshold, CONFIG_NC_STAGING_CLEANUP_THRESHOLD, default_staging_cleanup_threshold);
2476 GET_VAR_INT(nc_state.booting_cleanup_threshold, CONFIG_NC_BOOTING_CLEANUP_THRESHOLD, default_booting_cleanup_threshold);
2477 GET_VAR_INT(nc_state.booting_envwait_threshold, CONFIG_NC_BOOTING_ENVWAIT_THRESHOLD, default_booting_envwait_threshold);
2478 GET_VAR_INT(nc_state.bundling_cleanup_threshold, CONFIG_NC_BUNDLING_CLEANUP_THRESHOLD, default_bundling_cleanup_threshold);
2479 GET_VAR_INT(nc_state.createImage_cleanup_threshold, CONFIG_NC_CREATEIMAGE_CLEANUP_THRESHOLD, default_createImage_cleanup_threshold);
2480 GET_VAR_INT(nc_state.teardown_state_duration, CONFIG_NC_TEARDOWN_STATE_DURATION, default_teardown_state_duration);
2481 GET_VAR_INT(nc_state.migration_ready_threshold, CONFIG_NC_MIGRATION_READY_THRESHOLD, default_migration_ready_threshold);
2482 // largest ephemeral volume that NC will cache; larger volumes will be created under 'work' blobstore
2483 GET_VAR_INT(nc_state.ephemeral_cache_highwater_gb, CONFIG_NC_EPHEMERAL_CACHE_HIGHWATER_GB, 0);
2484 int max_attempts;
2485 GET_VAR_INT(max_attempts, CONFIG_WALRUS_DOWNLOAD_MAX_ATTEMPTS, -1);
2486 if (max_attempts > 0 && max_attempts < 99)
2487 objectstorage_set_max_download_attempts(max_attempts);
2488
2489 // add three eucalyptus directories with executables to PATH of this process
2490 add_euca_to_path(nc_state.home);
2491
2492 // read in .pem files
2493 if (euca_init_cert()) {
2494 LOGWARN("no cryptographic certificates found: waiting for node to be registered...\n");
2495 // return (EUCA_FATAL_ERROR);
2496 }
2497 // check on dependencies (3rd-party programs that NC invokes)
2498 if (diskutil_init(0)) {
2499 LOGFATAL("failed to find required dependencies for disk operations\n");
2500 return (EUCA_FATAL_ERROR);
2501 }
2502 // check on the Imaging Toolkit readyness
2503 char node_pk_path[EUCA_MAX_PATH];
2504 snprintf(node_pk_path, sizeof(node_pk_path), EUCALYPTUS_KEYS_DIR "/node-pk.pem", nc_state.home);
2505 char cloud_cert_path[EUCA_MAX_PATH];
2506 snprintf(cloud_cert_path, sizeof(cloud_cert_path), EUCALYPTUS_KEYS_DIR "/cloud-cert.pem", nc_state.home);
2507 if (imaging_init(nc_state.home, cloud_cert_path, node_pk_path)) {
2508 LOGFATAL("failed to find required dependencies for image work\n");
2509 return (EUCA_FATAL_ERROR);
2510 }
2511
2512 //// from now on we have unrecoverable failure, so no point in retrying to re-init ////
2513 initialized = -1;
2514
2515 hyp_sem = sem_alloc(1, IPC_MUTEX_SEMAPHORE);
2516 inst_sem = sem_alloc(1, IPC_MUTEX_SEMAPHORE);
2517 inst_copy_sem = sem_alloc(1, IPC_MUTEX_SEMAPHORE);
2518 addkey_sem = sem_alloc(1, IPC_MUTEX_SEMAPHORE);
2519 log_sem = sem_alloc(1, IPC_MUTEX_SEMAPHORE);
2520 service_state_sem = sem_alloc(1, IPC_MUTEX_SEMAPHORE);
2521 stats_sem = sem_alloc(1, IPC_MUTEX_SEMAPHORE);
2522
2523 if (!hyp_sem || !inst_sem || !inst_copy_sem || !addkey_sem || !log_sem || !service_state_sem) {
2524 LOGFATAL("failed to create and initialize semaphores\n");
2525 return (EUCA_FATAL_ERROR);
2526 }
2527 if (log_sem_set(log_sem) != 0) {
2528 LOGFATAL("failed to set logging semaphore\n");
2529 return (EUCA_FATAL_ERROR);
2530 }
2531
2532 if ((loop_sem = diskutil_get_loop_sem()) == NULL) { // NC does not need GRUB for now
2533 LOGFATAL("failed to find all dependencies\n");
2534 return (EUCA_FATAL_ERROR);
2535 }
2536
2537 if (init_eucafaults(euca_this_component_name) == 0) {
2538 LOGFATAL("failed to initialize fault-logging subsystem\n");
2539 return (EUCA_FATAL_ERROR);
2540 }
2541
2542 if (init_ebs_utils(nc_state.sc_request_timeout_sec) != 0) {
2543 LOGFATAL("Failed to initialize ebs utils\n");
2544 return (EUCA_FATAL_ERROR);
2545 }
2546 // initialize the EBS subsystem
2547 update_ebs_params();
2548
2549 deauthorize_migration_keys(TRUE);
2550
2551 // NOTE: this is the only call which needs to be called on both
2552 // the default and the specific handler! All the others will be
2553 // either or
2554 i = nc_state.D->doInitialize(&nc_state);
2555 if (nc_state.H->doInitialize)
2556 i += nc_state.H->doInitialize(&nc_state);
2557
2558 if (i) {
2559 LOGFATAL("failed to initialized hypervisor driver!\n");
2560 return (EUCA_FATAL_ERROR);
2561 }
2562
2563 {
2564 // check on hypervisor and pull out capabilities
2565 virConnectPtr conn = lock_hypervisor_conn();
2566 if (conn == NULL) {
2567 // libvirt could be unresponsive for some time if there are log of instances after previous restart via deauthorize_migration_keys call
2568 // let's wait a bit and ask for a connection again
2569 sleep(LIBVIRT_TIMEOUT_SEC);
2570 conn = lock_hypervisor_conn();
2571 if (conn == NULL) {
2572 LOGFATAL("unable to contact hypervisor\n");
2573 return (EUCA_FATAL_ERROR);
2574 }
2575 }
2576 char *caps_xml = virConnectGetCapabilities(conn);
2577 if (caps_xml == NULL) {
2578 LOGFATAL("unable to obtain hypervisor capabilities\n");
2579 unlock_hypervisor_conn();
2580 return (EUCA_FATAL_ERROR);
2581 }
2582 unlock_hypervisor_conn();
2583 if (strstr(caps_xml, "<live/>") != NULL) {
2584 nc_state.migration_capable = 1;
2585 }
2586 EUCA_FREE(caps_xml);
2587 }
2588 LOGINFO("hypervisor %scapable of live migration\n", nc_state.migration_capable ? "" : "not ");
2589
2590 // now that hypervisor-specific initializers have discovered mem_max and cores_max,
2591 // adjust the values based on configuration parameters, if any
2592 if (nc_state.config_max_mem) {
2593 if (nc_state.config_max_mem > nc_state.phy_max_mem)
2594 LOGWARN("MAX_MEM value is set to %lldMB that is greater than the amount of physical memory: %lldMB\n", nc_state.config_max_mem, nc_state.phy_max_mem);
2595 nc_state.mem_max = nc_state.config_max_mem;
2596 } else {
2597 nc_state.mem_max = nc_state.phy_max_mem;
2598 }
2599
2600 if (nc_state.config_max_cores) {
2601 nc_state.cores_max = nc_state.config_max_cores;
2602 if (nc_state.cores_max > nc_state.phy_max_cores)
2603 LOGINFO("MAX_CORES value is set to %lld that is greater than the amount of physical cores: %lld\n", nc_state.cores_max, nc_state.phy_max_cores);
2604 } else {
2605 nc_state.cores_max = nc_state.phy_max_cores;
2606 }
2607
2608 LOGINFO("physical memory available for instances: %lldMB\n", nc_state.mem_max);
2609 LOGINFO("virtual cpu cores available for instances: %lld\n", nc_state.cores_max);
2610
2611 // sensor subsystem
2612 if (sensor_init(NULL, NULL, nc_state.cores_max, FALSE, NULL) != EUCA_OK) {
2613 LOGERROR("failed to initialize sensor subsystem in this process\n");
2614 return (EUCA_FATAL_ERROR);
2615 }
2616
2617 if (sensor_set_hyp_sem(hyp_sem) != 0) {
2618 LOGFATAL("failed to set hypervisor semaphore for the sensor subsystem\n");
2619 return (EUCA_FATAL_ERROR);
2620 }
2621
2622 {
2623 // backing store configuration
2624 init_backing_errors(); // configure backingstore/blobstore errors to log using the backing::bs_errors() function
2625
2626 char *instances_path = getConfString(nc_state.configFiles, 2, INSTANCE_PATH);
2627
2628 if (instances_path == NULL) {
2629 LOGERROR("%s is not set\n", INSTANCE_PATH);
2630 return (EUCA_FATAL_ERROR);
2631 }
2632 // create work and cache sub-directories so that stat_backing_store() below succeeds
2633 char cache_path[EUCA_MAX_PATH];
2634 snprintf(cache_path, sizeof(cache_path), "%s/cache", instances_path);
2635 if (ensure_directories_exist(cache_path, 0, NULL, NULL, BACKING_DIRECTORY_PERM) == -1) {
2636 EUCA_FREE(instances_path);
2637 return (EUCA_ERROR);
2638 }
2639
2640 char work_path[EUCA_MAX_PATH];
2641 snprintf(work_path, sizeof(work_path), "%s/work", instances_path);
2642 if (ensure_directories_exist(work_path, 0, NULL, NULL, BACKING_DIRECTORY_PERM) == -1) {
2643 EUCA_FREE(instances_path);
2644 return (EUCA_ERROR);
2645 }
2646 // determine how much is used/available in work and cache areas on the backing store
2647 blobstore_meta work_meta, cache_meta;
2648 stat_backing_store(instances_path, &work_meta, &cache_meta); // will zero-out work_ and cache_meta
2649 long long work_fs_size_mb = (long long)(work_meta.fs_bytes_size / MEGABYTE);
2650 long long work_fs_avail_mb = (long long)(work_meta.fs_bytes_available / MEGABYTE);
2651 long long cache_fs_size_mb = (long long)(cache_meta.fs_bytes_size / MEGABYTE);
2652 long long cache_fs_avail_mb = (long long)(cache_meta.fs_bytes_available / MEGABYTE);
2653 long long work_bs_size_mb = work_meta.blocks_limit ? (work_meta.blocks_limit / SEC_PER_MB) : (-1L); // convert sectors->MB
2654 long long work_bs_allocated_mb = work_meta.blocks_limit ? (work_meta.blocks_allocated / SEC_PER_MB) : 0;
2655 long long work_bs_reserved_mb = work_meta.blocks_limit ? ((work_meta.blocks_locked + work_meta.blocks_unlocked) / SEC_PER_MB) : 0;
2656 long long cache_bs_size_mb = cache_meta.blocks_limit ? (cache_meta.blocks_limit / SEC_PER_MB) : (-1L);
2657 long long cache_bs_allocated_mb = cache_meta.blocks_limit ? (cache_meta.blocks_allocated / SEC_PER_MB) : 0;
2658 long long cache_bs_reserved_mb = cache_meta.blocks_limit ? ((cache_meta.blocks_locked + cache_meta.blocks_unlocked) / SEC_PER_MB) : 0;
2659
2660 // sanity check
2661 if (work_fs_avail_mb < MIN_BLOBSTORE_SIZE_MB) {
2662 LOGERROR("insufficient available work space (%lld MB) under %s/work\n", work_fs_avail_mb, instances_path);
2663 EUCA_FREE(instances_path);
2664 return (EUCA_FATAL_ERROR);
2665 }
2666 // look up configuration file settings for work and cache size
2667 long long conf_work_size_mb;
2668 GET_VAR_INT(conf_work_size_mb, CONFIG_NC_WORK_SIZE, -1);
2669
2670 long long conf_cache_size_mb;
2671 GET_VAR_INT(conf_cache_size_mb, CONFIG_NC_CACHE_SIZE, -1);
2672
2673 long long conf_work_overhead_mb;
2674 GET_VAR_INT(conf_work_overhead_mb, CONFIG_NC_OVERHEAD_SIZE, PER_INSTANCE_BUFFER_MB);
2675
2676 { // accommodate legacy MAX_DISK setting by converting it
2677 int max_disk_gb;
2678 GET_VAR_INT(max_disk_gb, CONFIG_MAX_DISK, -1);
2679 if (max_disk_gb != -1) {
2680 if (conf_work_size_mb == -1) {
2681 LOGWARN("using deprecated setting %s for the new setting %s\n", CONFIG_MAX_DISK, CONFIG_NC_WORK_SIZE);
2682 if (max_disk_gb == 0) {
2683 conf_work_size_mb = -1; // change in semantics: 0 used to mean 'unlimited', now 'unset' or -1 means that
2684 } else {
2685 conf_work_size_mb = max_disk_gb * 1024;
2686 }
2687 } else {
2688 LOGWARN("ignoring deprecated setting %s in favor of the new setting %s\n", CONFIG_MAX_DISK, CONFIG_NC_WORK_SIZE);
2689 }
2690 }
2691 }
2692
2693 // decide what work and cache sizes should be, based on all the inputs
2694 long long work_size_mb = -1;
2695 long long cache_size_mb = -1;
2696
2697 // above all, try to respect user-specified limits for work and cache
2698 if (conf_work_size_mb != -1) {
2699 if (conf_work_size_mb < MIN_BLOBSTORE_SIZE_MB) {
2700 LOGWARN("ignoring specified work size (%s=%lld) that is below acceptable minimum (%d)\n", CONFIG_NC_WORK_SIZE, conf_work_size_mb, MIN_BLOBSTORE_SIZE_MB);
2701 } else {
2702 if (work_bs_size_mb != -1 && work_bs_size_mb != conf_work_size_mb) {
2703 LOGWARN("specified work size (%s=%lld) differs from existing work size (%lld), will try resizing\n", CONFIG_NC_WORK_SIZE, conf_work_size_mb, work_bs_size_mb);
2704 }
2705 work_size_mb = conf_work_size_mb;
2706 }
2707 }
2708
2709 if (conf_cache_size_mb != -1) { // respect user-specified limit
2710 if (conf_cache_size_mb < MIN_BLOBSTORE_SIZE_MB) {
2711 cache_size_mb = 0; // so it won't be used
2712 } else {
2713 if (cache_bs_size_mb != -1 && cache_bs_size_mb != conf_cache_size_mb) {
2714 LOGWARN("specified cache size (%s=%lld) differs from existing cache size (%lld), will try resizing\n",
2715 CONFIG_NC_CACHE_SIZE, conf_cache_size_mb, cache_bs_size_mb);
2716 }
2717 cache_size_mb = conf_cache_size_mb;
2718 }
2719 }
2720 // if the user did not specify sizes, try existing blobstores,
2721 // if any, whose limits would have been chosen earlier
2722 if (work_size_mb == -1 && work_bs_size_mb != -1)
2723 work_size_mb = work_bs_size_mb;
2724
2725 if (cache_size_mb == -1 && cache_bs_size_mb != -1)
2726 cache_size_mb = cache_bs_size_mb;
2727
2728 // if the user did not specify either or both of the sizes,
2729 // and blobstores do not exist yet, make reasonable choices
2730 if (memcmp(&work_meta.fs_id, &cache_meta.fs_id, sizeof(fsid_t)) == 0) { // cache and work are on the same file system
2731 long long fs_usable_mb = (long long)((double)work_fs_avail_mb - (double)(work_fs_avail_mb) * FS_BUFFER_PERCENT);
2732 if (work_size_mb == -1 && cache_size_mb == -1) {
2733 work_size_mb = (long long)((double)fs_usable_mb * WORK_BS_PERCENT);
2734 cache_size_mb = fs_usable_mb - work_size_mb;
2735 } else if (work_size_mb == -1) {
2736 work_size_mb = fs_usable_mb - cache_size_mb + cache_bs_allocated_mb;
2737 } else if (cache_size_mb == -1) {
2738 cache_size_mb = fs_usable_mb - work_size_mb + work_bs_allocated_mb;
2739 }
2740 // sanity check
2741 if ((cache_size_mb + work_size_mb - cache_bs_allocated_mb - work_bs_allocated_mb) > work_fs_avail_mb) {
2742 LOGWARN("sum of work and cache sizes exceeds available disk space\n");
2743 }
2744 } else { // cache and work are on different file systems
2745 if (work_size_mb == -1) {
2746 work_size_mb = (long long)((double)work_fs_avail_mb - (double)(work_fs_avail_mb) * FS_BUFFER_PERCENT);
2747 }
2748
2749 if (cache_size_mb == -1) {
2750 cache_size_mb = (long long)((double)cache_fs_avail_mb - (double)(cache_fs_avail_mb) * FS_BUFFER_PERCENT);
2751 }
2752 }
2753
2754 // sanity-check final results
2755 if (cache_size_mb < MIN_BLOBSTORE_SIZE_MB)
2756 cache_size_mb = 0;
2757
2758 if (work_size_mb < MIN_BLOBSTORE_SIZE_MB) {
2759 LOGERROR("insufficient disk space for virtual machines\n");
2760 EUCA_FREE(instances_path);
2761 return (EUCA_FATAL_ERROR);
2762 }
2763
2764 if (init_backing_store(instances_path, work_size_mb, cache_size_mb)) {
2765 LOGFATAL("failed to initialize backing store\n");
2766 EUCA_FREE(instances_path);
2767 return (EUCA_FATAL_ERROR);
2768 }
2769 // record the work-space limit for max_disk
2770 long long work_size_gb = (long long)(work_size_mb / MB_PER_DISK_UNIT);
2771 if (conf_work_overhead_mb < 0 || conf_work_overhead_mb > work_size_mb) { // sanity check work overhead
2772 conf_work_overhead_mb = PER_INSTANCE_BUFFER_MB;
2773 }
2774
2775 long long overhead_mb = work_size_gb * conf_work_overhead_mb; // work_size_gb is the theoretical max number of instances
2776 long long disk_max_mb = work_size_mb - overhead_mb;
2777 nc_state.disk_max = disk_max_mb / MB_PER_DISK_UNIT;
2778
2779 LOGINFO("disk space for instances: %s/work\n", instances_path);
2780 LOGINFO(" %06lldMB limit (%.1f%% of the file system) - %lldMB overhead = %lldMB = %lldGB\n",
2781 work_size_mb, ((double)work_size_mb / (double)work_fs_size_mb) * 100.0, overhead_mb, disk_max_mb, nc_state.disk_max);
2782 LOGINFO(" %06lldMB reserved for use (%.1f%% of limit)\n", work_bs_reserved_mb, ((double)work_bs_reserved_mb / (double)work_size_mb) * 100.0);
2783 LOGINFO(" %06lldMB allocated for use (%.1f%% of limit, %.1f%% of the file system)\n", work_bs_allocated_mb,
2784 ((double)work_bs_allocated_mb / (double)work_size_mb) * 100.0, ((double)work_bs_allocated_mb / (double)work_fs_size_mb) * 100.0);
2785
2786 if (cache_size_mb) {
2787 LOGINFO(" disk space for cache: %s/cache\n", instances_path);
2788 LOGINFO(" %06lldMB limit (%.1f%% of the file system)\n", cache_size_mb, ((double)cache_size_mb / (double)cache_fs_size_mb) * 100.0);
2789 LOGINFO(" %06lldMB reserved for use (%.1f%% of limit)\n", cache_bs_reserved_mb,
2790 ((double)cache_bs_reserved_mb / (double)cache_size_mb) * 100.0);
2791 LOGINFO(" %06lldMB allocated for use (%.1f%% of limit, %.1f%% of the file system)\n", cache_bs_allocated_mb,
2792 ((double)cache_bs_allocated_mb / (double)cache_size_mb) * 100.0, ((double)cache_bs_allocated_mb / (double)cache_fs_size_mb) * 100.0);
2793 } else {
2794 LOGWARN("disk cache will not be used\n");
2795 }
2796
2797 EUCA_FREE(instances_path);
2798 }
2799
2800 // adopt running instances -- do this before disk integrity check so we know what can be purged
2801 adopt_instances();
2802
2803 if (check_backing_store(&global_instances) != EUCA_OK) { // integrity check, cleanup of unused instances and shrinking of cache
2804 LOGFATAL("integrity check of the backing store failed");
2805 return (EUCA_FATAL_ERROR);
2806 }
2807 // setup the network
2808 snprintf(nc_state.config_network_path, EUCA_MAX_PATH, NC_NET_PATH_DEFAULT, nc_state.home);
2809
2810 tmp = getConfString(nc_state.configFiles, 2, "VNET_MODE");
2811 if (!tmp) {
2812 LOGWARN("VNET_MODE is not defined, defaulting to '%s'\n", NETMODE_INVALID);
2813 tmp = strdup(NETMODE_INVALID);
2814 if (!tmp) {
2815 LOGFATAL("Out of memory\n");
2816 return (EUCA_FATAL_ERROR);
2817 }
2818 }
2819
2820 int initFail = 0;
2821
2822 if (tmp && !(!strcmp(tmp, NETMODE_EDGE) || !strcmp(tmp, NETMODE_VPCMIDO))) {
2823 char errorm[256];
2824 memset(errorm, 0, 256);
2825 sprintf(errorm, "Invalid VNET_MODE setting: %s", tmp);
2826 LOGFATAL("%s\n", errorm);
2827 initFail = 1;
2828 }
2829
2830 if (tmp && (!strcmp(tmp, NETMODE_EDGE) || !strcmp(tmp, NETMODE_VPCMIDO))) {
2831 bridge = getConfString(nc_state.configFiles, 2, "VNET_BRIDGE");
2832 if (!bridge) {
2833 LOGFATAL("in 'EDGE' or 'VPCMIDO' network mode, you must specify a value for VNET_BRIDGE\n");
2834 initFail = 1;
2835 }
2836 }
2837
2838 if (tmp && !strcmp(tmp, NETMODE_EDGE)) {
2839 pubinterface = getConfString(nc_state.configFiles, 2, "VNET_PUBINTERFACE");
2840 if (!pubinterface)
2841 pubinterface = getConfString(nc_state.configFiles, 2, "VNET_INTERFACE");
2842
2843 if (!pubinterface) {
2844 LOGWARN("VNET_PUBINTERFACE is not defined, defaulting to 'eth0'\n");
2845 pubinterface = strdup("eth0");
2846 if (!pubinterface) {
2847 LOGFATAL("out of memory!\n");
2848 initFail = 1;
2849 }
2850 }
2851 }
2852
2853 snprintf(nc_state.pEucaNet->sMode, NETMODE_LEN, "%s", tmp);
2854 if (pubinterface)
2855 snprintf(nc_state.pEucaNet->sPublicDevice, IF_NAME_LEN, "%s", pubinterface);
2856
2857 if (bridge)
2858 snprintf(nc_state.pEucaNet->sBridgeDevice, IF_NAME_LEN, "%s", bridge);
2859
2860 EUCA_FREE(pubinterface);
2861 EUCA_FREE(bridge);
2862 EUCA_FREE(tmp);
2863
2864 if (initFail)
2865 return (EUCA_FATAL_ERROR);
2866
2867 // set NC helper path
2868 tmp = getConfString(nc_state.configFiles, 2, CONFIG_NC_BUNDLE_UPLOAD);
2869 if (tmp) {
2870 snprintf(nc_state.ncBundleUploadCmd, EUCA_MAX_PATH, "%s", tmp);
2871 EUCA_FREE(tmp);
2872 } else {
2873 snprintf(nc_state.ncBundleUploadCmd, EUCA_MAX_PATH, "%s", EUCALYPTUS_NC_BUNDLE_UPLOAD); // default value
2874 }
2875
2876 // set NC helper path
2877 tmp = getConfString(nc_state.configFiles, 2, CONFIG_NC_CHECK_BUCKET);
2878 if (tmp) {
2879 snprintf(nc_state.ncCheckBucketCmd, EUCA_MAX_PATH, "%s", tmp);
2880 EUCA_FREE(tmp);
2881 } else {
2882 snprintf(nc_state.ncCheckBucketCmd, EUCA_MAX_PATH, "%s", EUCALYPTUS_NC_CHECK_BUCKET); // default value
2883 }
2884
2885 // set NC helper path
2886 tmp = getConfString(nc_state.configFiles, 2, CONFIG_NC_DELETE_BUNDLE);
2887 if (tmp) {
2888 snprintf(nc_state.ncDeleteBundleCmd, EUCA_MAX_PATH, "%s", tmp);
2889 EUCA_FREE(tmp);
2890 } else {
2891 snprintf(nc_state.ncDeleteBundleCmd, EUCA_MAX_PATH, "%s", EUCALYPTUS_NC_DELETE_BUNDLE); // default value
2892 }
2893
2894 {
2895 // set enable ws-security
2896 tmp = getConfString(nc_state.configFiles, 2, CONFIG_ENABLE_WS_SECURITY);
2897 if (tmp && !strcmp(tmp, "N")) {
2898 LOGDEBUG("Configuring no use of WS-SEC as specified in config file by explicit 'no' value\n");
2899 nc_state.config_use_ws_sec = 0;
2900 EUCA_FREE(tmp);
2901 } else {
2902 LOGDEBUG("Configured to use WS-SEC by default\n");
2903 if (tmp)
2904 EUCA_FREE(tmp);
2905 nc_state.config_use_ws_sec = 1;
2906 }
2907 }
2908
2909 { // find and set iqn
2910 snprintf(nc_state.iqn, CHAR_BUFFER_SIZE, "UNSET");
2911 char *ptr = NULL, *iqn = NULL, *tmp = NULL, cmd[EUCA_MAX_PATH];
2912 snprintf(cmd, EUCA_MAX_PATH, "%s cat /etc/iscsi/initiatorname.iscsi", nc_state.rootwrap_cmd_path);
2913 ptr = system_output(cmd);
2914 if (ptr) {
2915 iqn = strstr(ptr, "InitiatorName=");
2916 if (iqn) {
2917 iqn += strlen("InitiatorName=");
2918 tmp = strstr(iqn, "\n");
2919 if (tmp)
2920 *tmp = '\0';
2921 snprintf(nc_state.iqn, CHAR_BUFFER_SIZE, "%s", iqn);
2922 }
2923 EUCA_FREE(ptr);
2924 }
2925 }
2926
2927 { // find and set IP
2928 char hostname[HOSTNAME_SIZE];
2929 if (gethostname(hostname, sizeof(hostname)) != 0) {
2930 LOGFATAL("failed to find hostname\n");
2931 return (EUCA_FATAL_ERROR);
2932 }
2933 LOGDEBUG("Searching for IP by hostname %s\n", hostname);
2934
2935 struct addrinfo hints, *servinfo, *p;
2936 struct sockaddr_in *h;
2937 memset(&hints, 0, sizeof hints);
2938 hints.ai_family = AF_INET;
2939 hints.ai_socktype = SOCK_STREAM;
2940 int rv;
2941 if ((rv = getaddrinfo(hostname, "http", &hints, &servinfo)) != 0) {
2942 LOGFATAL("getaddrinfo: %s\n", gai_strerror(rv));
2943 return (EUCA_FATAL_ERROR);
2944 }
2945 int found = 0;
2946 for(p = servinfo; !found && p != NULL; p = p->ai_next) {
2947 if (!found) {
2948 h = (struct sockaddr_in *) p->ai_addr;
2949 euca_strncpy(nc_state.ip, inet_ntoa(h->sin_addr), sizeof(nc_state.ip));
2950 found = 1;
2951 }
2952 }
2953 freeaddrinfo(servinfo);
2954 if (!found) {
2955 LOGFATAL("failed to obtain IP for %s\n", hostname);
2956 return (EUCA_FATAL_ERROR);
2957 }
2958 LOGINFO("using IP %s\n", nc_state.ip);
2959 LOGINFO("Initializing localhost info for vbr processing\n");
2960 if (vbr_init_hostconfig
2961 (nc_state.iqn, nc_state.ip, nc_state.config_sc_policy_file, nc_state.config_use_ws_sec, nc_state.config_use_virtio_root, nc_state.config_use_virtio_disk) != 0) {
2962 LOGFATAL("Error initializing vbr localhost configuration\n");
2963 return (EUCA_FATAL_ERROR);
2964 }
2965 }
2966
2967 {
2968 LOGINFO("Initializing service state and epoch\n");
2969 //Initialize the service state info.
2970 nc_state.ncStatus.localEpoch = 0;
2971 snprintf(nc_state.ncStatus.details, 1024, "ERRORS=0");
2972 snprintf(nc_state.ncStatus.serviceId.type, 32, "node");
2973 snprintf(nc_state.ncStatus.serviceId.name, 32, "self");
2974 snprintf(nc_state.ncStatus.serviceId.partition, 32, "unset");
2975 nc_state.ncStatus.serviceId.urisLen = 0;
2976 nc_state.servicesLen = 0;
2977 nc_state.disabledServicesLen = 0;
2978 nc_state.notreadyServicesLen = 0;
2979
2980 for (i = 0; i < 32 && nc_state.ncStatus.serviceId.urisLen < 8; i++) {
2981 if (nc_state.pEucaNet->aLocalIps[i]) {
2982 char *host;
2983 host = hex2dot(nc_state.pEucaNet->aLocalIps[i]);
2984 if (host) {
2985 snprintf(nc_state.ncStatus.serviceId.uris[nc_state.ncStatus.serviceId.urisLen], 512, "http://%s:8775/axis2/services/EucalyptusNC", host);
2986 nc_state.ncStatus.serviceId.urisLen++;
2987 EUCA_FREE(host);
2988 }
2989 }
2990 }
2991
2992 LOGINFO("Done initializing services state\n");
2993 }
2994
2995 { // start the monitoring thread
2996 pthread_t tcb;
2997 if (pthread_create(&tcb, NULL, monitoring_thread, &nc_state)) {
2998 LOGFATAL("failed to spawn a monitoring thread\n");
2999 return (EUCA_FATAL_ERROR);
3000 }
3001 if (pthread_detach(tcb)) {
3002 LOGFATAL("failed to detach the monitoring thread\n");
3003 return (EUCA_FATAL_ERROR);
3004 }
3005 }
3006
3007 {
3008
3009 if (initialize_stats_system(DEFAULT_SENSOR_INTERVAL_SEC) != EUCA_OK) {
3010 // if (init_stats(nc_state.home, euca_this_component_name, nc_stats_lock, nc_stats_unlock) != EUCA_OK) {
3011 LOGERROR("Could not initialize NC stats system\n");
3012 return EUCA_ERROR;
3013 }
3014 LOGDEBUG("Stats system initialized for NC\n");
3015
3016 //Stats thread. Independent of the monitoring thread because the monitoring thread fires irregularly
3017 pthread_t stats_thread;
3018 if (pthread_create(&stats_thread, NULL, nc_run_stats, &nc_state)) {
3019 LOGFATAL("Failed to spawn the internal stats thread\n");
3020 return (EUCA_FATAL_ERROR);
3021 }
3022 if (pthread_detach(stats_thread)) {
3023 LOGFATAL("Failed to detach the internal stats thread\n");
3024 return (EUCA_FATAL_ERROR);
3025 }
3026
3027 }
3028
3029 // post-init hook
3030 if (call_hooks(NC_EVENT_POST_INIT, nc_state.home)) {
3031 LOGFATAL("hooks prevented initialization\n");
3032 return (EUCA_FATAL_ERROR);
3033 }
3034
3035 initialized = 1;
3036 return (EUCA_OK);
3037
3038 #undef GET_VAR_INT
3039 }
3040
3041 //!
3042 //!
3043 //!
3044 //! @note this routine runs immediately when the process is started
3045 //!
3046 void doInitNC(void)
3047 {
3048 if (init()) {
3049 LOGWARN("could not initialize\n");
3050 }
3051 LOGINFO("component started\n");
3052 }
3053
3054 //!
3055 //! Handles the describe instance request
3056 //!
3057 //! @param[in] pMeta a pointer to the node controller (NC) metadata structure
3058 //! @param[in] instIds a pointer the list of instance identifiers to retrieve data for
3059 //! @param[in] instIdsLen the number of instance identifiers in the instIds list
3060 //! @param[out] outInsts a pointer the list of instances for which we have data
3061 //! @param[out] outInstsLen the number of instances in the outInsts list.
3062 //!
3063 //! @return EUCA_OK on success or proper error code. Known error code returned include: EUCA_ERROR,
3064 //! EUCA_MEMORY_ERROR, EUCA_MEMORY_ERROR
3065 //!
3066 int doDescribeInstances(ncMetadata * pMeta, char **instIds, int instIdsLen, ncInstance *** outInsts, int *outInstsLen)
3067 {
3068 #define NC_MONIT_FILENAME EUCALYPTUS_RUN_DIR "/nc-stats"
3069
3070 int i = 0;
3071 int j = 0;
3072 int ret = EUCA_OK;
3073 int len = 0;
3074 char *s = "";
3075 char *file_name = NULL;
3076 char myName[CHAR_BUFFER_SIZE] = "";
3077 FILE *f = NULL;
3078 long long used_mem = 0;
3079 long long used_disk = 0;
3080 long long used_cores = 0;
3081 u_int vols_count = 0;
3082 u_int nics_count = 0;
3083
3084 if (init())
3085 return (EUCA_ERROR);
3086
3087 LOGTRACE("invoked\n"); // response will be at INFO, so this is TRACE
3088
3089 updateServiceStateInfo(pMeta, FALSE);
3090 if (nc_state.H->doDescribeInstances)
3091 ret = nc_state.H->doDescribeInstances(&nc_state, pMeta, instIds, instIdsLen, outInsts, outInstsLen);
3092 else
3093 ret = nc_state.D->doDescribeInstances(&nc_state, pMeta, instIds, instIdsLen, outInsts, outInstsLen);
3094
3095 if (ret)
3096 return ret;
3097
3098 for (i = 0; i < (*outInstsLen); i++) {
3099 char vols_str[128] = "";
3100 char vol_str[16] = "";
3101 char nics_str[128] = "";
3102 char nic_str[16] = "";
3103 char status_str[128] = "running";
3104 ncInstance *instance = (*outInsts)[i];
3105
3106 // construct a string summarizing the volumes attached to the instance
3107 vols_count = 0;
3108 for (j = 0; j < EUCA_MAX_VOLUMES; ++j) {
3109 ncVolume *volume = &instance->volumes[j];
3110 if (strlen(volume->volumeId) == 0)
3111 continue;
3112 vols_count++;
3113
3114 s = "";
3115 if (!strcmp(volume->stateName, VOL_STATE_ATTACHING))
3116 s = "a";
3117 else if (!strcmp(volume->stateName, VOL_STATE_ATTACHED))
3118 s = "A";
3119 else if (!strcmp(volume->stateName, VOL_STATE_ATTACHING_FAILED))
3120 s = "af";
3121 else if (!strcmp(volume->stateName, VOL_STATE_DETACHING))
3122 s = "d";
3123 else if (!strcmp(volume->stateName, VOL_STATE_DETACHED))
3124 s = "D";
3125 else if (!strcmp(volume->stateName, VOL_STATE_DETACHING_FAILED))
3126 s = "df";
3127
3128 snprintf(vol_str, sizeof(vol_str), "%s%s:%s", (vols_count > 1) ? (",") : (""), volume->volumeId, s);
3129 if ((strlen(vols_str) + strlen(vol_str)) < sizeof(vols_str)) {
3130 strcat(vols_str, vol_str);
3131 }
3132 }
3133
3134 nics_count = 0;
3135 for (j = 0; j < EUCA_MAX_NICS; ++j) {
3136 netConfig *net = &instance->secNetCfgs[j];
3137 if (strlen(net->interfaceId) == 0)
3138 continue;
3139 nics_count++;
3140
3141 s = "";
3142 if (!strcmp(net->stateName, VOL_STATE_ATTACHING))
3143 s = "a";
3144 else if (!strcmp(net->stateName, VOL_STATE_ATTACHED))
3145 s = "A";
3146 else if (!strcmp(net->stateName, VOL_STATE_ATTACHING_FAILED))
3147 s = "af";
3148 else if (!strcmp(net->stateName, VOL_STATE_DETACHING))
3149 s = "d";
3150 else if (!strcmp(net->stateName, VOL_STATE_DETACHED))
3151 s = "D";
3152 else if (!strcmp(net->stateName, VOL_STATE_DETACHING_FAILED))
3153 s = "df";
3154 else
3155 s = "U"; //unknown state
3156
3157 snprintf(nic_str, sizeof(nic_str), "%s%s:%s", (nics_count > 1) ? (",") : (""), net->interfaceId, s);
3158 if ((strlen(nics_str) + strlen(nic_str)) < sizeof(nics_str)) {
3159 strcat(nics_str, nic_str);
3160 }
3161 }
3162
3163 if (instance->migration_state != NOT_MIGRATING) { // construct migration status string
3164 char *peer = "?";
3165 char dir = '?';
3166 if (!strcmp(nc_state.ip, instance->migration_src)) {
3167 peer = instance->migration_dst;
3168 dir = '>';
3169 } else {
3170 peer = instance->migration_src;
3171 dir = '<';
3172 }
3173 snprintf(status_str, sizeof(status_str), "%s %c%s", migration_state_names[instance->migration_state], dir, peer);
3174 } else if (instance->terminationTime) {
3175 strncpy(status_str, "terminated", sizeof(status_str));
3176 } else if (instance->terminationRequestedTime) {
3177 strncpy(status_str, "terminating", sizeof(status_str));
3178 } else if (instance->state == BUNDLING_SHUTDOWN || instance->state == BUNDLING_SHUTOFF) {
3179 strncpy(status_str, "bundling", sizeof(status_str));
3180 } else if (instance->state == CREATEIMAGE_SHUTDOWN || instance->state == CREATEIMAGE_SHUTOFF) {
3181 strncpy(status_str, "creating image", sizeof(status_str));
3182 } else if (instance->bootTime == 0) {
3183 strncpy(status_str, "staging", sizeof(status_str));
3184 } // else it is "running"
3185
3186 if (nics_count > 0) {
3187 LOGDEBUG("[%s] %s (%s) pub=%s vols=%s nics=%s\n", instance->instanceId, instance->stateName, status_str, instance->ncnet.publicIp, vols_str, nics_str);
3188 } else {
3189 LOGDEBUG("[%s] %s (%s) pub=%s vols=%s\n", instance->instanceId, instance->stateName, status_str, instance->ncnet.publicIp, vols_str);
3190 }
3191 }
3192
3193 // allocate enough memory
3194 len = (strlen(EUCALYPTUS_CONF_LOCATION) > strlen(NC_MONIT_FILENAME)) ? strlen(EUCALYPTUS_CONF_LOCATION) : strlen(NC_MONIT_FILENAME);
3195 len += 2 + strlen(nc_state.home);
3196 if ((file_name = EUCA_ALLOC(1, sizeof(char) * len)) == NULL) {
3197 LOGERROR("Out of memory!\n");
3198 return (EUCA_MEMORY_ERROR);
3199 }
3200
3201 sprintf(file_name, NC_MONIT_FILENAME, nc_state.home);
3202 if (!strcmp(pMeta->userId, EUCALYPTUS_ADMIN)) {
3203 if ((f = fopen(file_name, "w")) == NULL) {
3204 if ((f = fopen(file_name, "w+")) == NULL) {
3205 LOGWARN("Cannot create %s!\n", file_name);
3206 } else {
3207 if ((len = fileno(f)) > 0)
3208 fchmod(len, S_IRUSR | S_IWUSR);
3209 }
3210 }
3211
3212 if (f) {
3213 fprintf(f, "version: %s\n", EUCA_VERSION);
3214 fprintf(f, "timestamp: %ld\n", time(NULL));
3215 if (gethostname(myName, CHAR_BUFFER_SIZE) == 0)
3216 fprintf(f, "node: %s\n", myName);
3217 fprintf(f, "hypervisor: %s\n", nc_state.H->name);
3218 fprintf(f, "network: %s\n", nc_state.pEucaNet->sMode);
3219
3220 used_disk = used_mem = used_cores = 0;
3221 for (i = 0; i < (*outInstsLen); i++) {
3222 ncInstance *instance = (*outInsts)[i];
3223 used_disk += instance->params.disk;
3224 used_mem += instance->params.mem;
3225 used_cores += instance->params.cores;
3226 }
3227
3228 fprintf(f, "memory (max/avail/used) MB: %lld/%lld/%lld\n", nc_state.mem_max, nc_state.mem_max - used_mem, used_mem);
3229 fprintf(f, "disk (max/avail/used) GB: %lld/%lld/%lld\n", nc_state.disk_max, nc_state.disk_max - used_disk, used_disk);
3230 fprintf(f, "cores (max/avail/used): %lld/%lld/%lld\n", nc_state.cores_max, nc_state.cores_max - used_cores, used_cores);
3231
3232 for (i = 0; i < (*outInstsLen); i++) {
3233 ncInstance *instance = (*outInsts)[i];
3234 fprintf(f, "id: %s", instance->instanceId);
3235 fprintf(f, " userId: %s", instance->userId);
3236 fprintf(f, " state: %s", instance->stateName);
3237 fprintf(f, " mem: %d", instance->params.mem);
3238 fprintf(f, " disk: %d", instance->params.disk);
3239 fprintf(f, " cores: %d", instance->params.cores);
3240 fprintf(f, " private: %s", instance->ncnet.privateIp);
3241 fprintf(f, " public: %s\n", instance->ncnet.publicIp);
3242 }
3243 fclose(f);
3244 }
3245 }
3246 EUCA_FREE(file_name);
3247
3248 LOGTRACE("done\n");
3249 return (EUCA_OK);
3250 }
3251
3252 //!
3253 //! Handles the broadcast network info request
3254 //!
3255 //! @param[in] pMeta a pointer to the node controller (NC) metadata structure
3256 //! @param[in] networkInfo is a string
3257 //!
3258 //! @return EUCA_ERROR on failure or the result of the proper doBroadcastNetworkInfo() handler call.
3259 //!
3260 int doBroadcastNetworkInfo(ncMetadata * pMeta, char *networkInfo)
3261 {
3262 int ret = EUCA_OK;
3263
3264 if (init())
3265 return (EUCA_ERROR);
3266
3267 LOGDEBUG("invoked\n");
3268 LOGTRACE("invoked with networkInfo='%s'\n", SP(networkInfo));
3269
3270 if (nc_state.H->doBroadcastNetworkInfo)
3271 ret = nc_state.H->doBroadcastNetworkInfo(&nc_state, pMeta, networkInfo);
3272 else
3273 ret = nc_state.D->doBroadcastNetworkInfo(&nc_state, pMeta, networkInfo);
3274
3275 return ret;
3276 }
3277
3278 //!
3279 //! Handles the assign address request
3280 //!
3281 //! @param[in] pMeta a pointer to the node controller (NC) metadata structure
3282 //! @param[in] instanceId the instance identifier string (i-XXXXXXXX)
3283 //! @param[in] publicIp a string representation of the public IP to assign to the instance
3284 //!
3285 //! @return EUCA_ERROR on failure or the result of the proper doAssignAddress() handler call.
3286 //!
3287 int doAssignAddress(ncMetadata * pMeta, char *instanceId, char *publicIp)
3288 {
3289 int ret = EUCA_OK;
3290
3291 if (init())
3292 return (EUCA_ERROR);
3293
3294 LOGINFO("[%s] assigning address: [%s]\n", SP(instanceId), SP(publicIp));
3295 LOGDEBUG("[%s] invoked (publicIp=%s)\n", instanceId, publicIp);
3296
3297 if (nc_state.H->doAssignAddress)
3298 ret = nc_state.H->doAssignAddress(&nc_state, pMeta, instanceId, publicIp);
3299 else
3300 ret = nc_state.D->doAssignAddress(&nc_state, pMeta, instanceId, publicIp);
3301
3302 return ret;
3303 }
3304
3305 //!
3306 //! Handles the power down request.
3307 //!
3308 //! @param[in] pMeta a pointer to the node controller (NC) metadata structure
3309 //!
3310 //! @return EUCA_ERROR on failure or the result of the proper doPowerDown() handler call.
3311 //!
3312 int doPowerDown(ncMetadata * pMeta)
3313 {
3314 int ret = EUCA_OK;
3315
3316 if (init())
3317 return (EUCA_ERROR);
3318
3319 LOGINFO("powering down\n");
3320 LOGDEBUG("invoked\n");
3321
3322 if (nc_state.H->doPowerDown)
3323 ret = nc_state.H->doPowerDown(&nc_state, pMeta);
3324 else
3325 ret = nc_state.D->doPowerDown(&nc_state, pMeta);
3326
3327 return ret;
3328 }
3329
3330 //!
3331 //! Handles the run instance request.
3332 //!
3333 //! @param[in] pMeta a pointer to the node controller (NC) metadata structure
3334 //! @param[in] uuid unique user identifier string
3335 //! @param[in] instanceId the instance identifier string (i-XXXXXXXX)
3336 //! @param[in] reservationId the reservation identifier string
3337 //! @param[in] params a pointer to the virtual machine parameters to use
3338 //! @param[in] imageId UNUSED
3339 //! @param[in] imageURL UNUSED
3340 //! @param[in] kernelId the kernel image identifier (eki-XXXXXXXX)
3341 //! @param[in] kernelURL the kernel image URL address
3342 //! @param[in] ramdiskId the ramdisk image identifier (eri-XXXXXXXX)
3343 //! @param[in] ramdiskURL the ramdisk image URL address
3344 //! @param[in] ownerId the owner identifier string
3345 //! @param[in] accountId the account identifier string
3346 //! @param[in] keyName the key name string
3347 //! @param[in] netparams a pointer to the network parameters string
3348 //! @param[in] userData the user data string
3349 //! @param[in] launchIndex the launch index string
3350 //! @param[in] platform the platform name string
3351 //! @param[in] expiryTime the reservation expiration time
3352 //! @param[in] groupNames a list of group name string
3353 //! @param[in] groupNamesSize the number of group name in the groupNames list
3354 //! @param[out] outInst the list of instances created by this request
3355 //!
3356 //! @return EUCA_ERROR on failure or the result of the proper doRunInstance() handler call.
3357 //!
3358 int doRunInstance(ncMetadata * pMeta, char *uuid, char *instanceId, char *reservationId, virtualMachine * params, char *imageId, char *imageURL,
3359 char *kernelId, char *kernelURL, char *ramdiskId, char *ramdiskURL, char *ownerId, char *accountId, char *keyName,
3360 netConfig * netparams, char *userData, char *credential, char *launchIndex, char *platform, int expiryTime, char **groupNames, int groupNamesSize,
3361 char *rootDirective, char **groupIds, int groupIdsSize, netConfig * secNetCfgs, int secNetCfgsLen, ncInstance ** outInst)
3362 {
3363 int ret = EUCA_OK;
3364
3365 if (init())
3366 return (EUCA_ERROR);
3367 DISABLED_CHECK;
3368
3369 LOGINFO("[%s] running instance groupId=%s cores=%d disk=%d memory=%d vlan=%d net=%d priMAC=%s privIp=%s plat=%s kernel=%s ramdisk=%s\n",
3370 instanceId, SP(groupIds[0]), params->cores, params->disk, params->mem, netparams->vlan, netparams->networkIndex, netparams->privateMac, netparams->privateIp, platform,
3371 kernelId, ramdiskId);
3372 if (vbr_legacy(instanceId, params, imageId, imageURL, kernelId, kernelURL, ramdiskId, ramdiskURL) != EUCA_OK)
3373 return (EUCA_ERROR);
3374 // spark: kernel and ramdisk id are required for linux bundle-instance, but are not in the runInstance request;
3375 if (!kernelId || !ramdiskId) {
3376 for (int i = 0; i < EUCA_MAX_VBRS && i < params->virtualBootRecordLen; i++) {
3377 virtualBootRecord *vbr = &(params->virtualBootRecord[i]);
3378 if (strlen(vbr->resourceLocation) > 0) {
3379 if (!strcmp(vbr->typeName, "kernel")) {
3380 // free our string if it was previously set
3381 EUCA_FREE(kernelId);
3382 kernelId = strdup(vbr->id);
3383 }
3384
3385 if (!strcmp(vbr->typeName, "ramdisk")) {
3386 // free our string if it was previously set
3387 EUCA_FREE(ramdiskId);
3388 ramdiskId = strdup(vbr->id);
3389 }
3390 } else {
3391 break;
3392 }
3393 }
3394 }
3395 if (nc_state.H->doRunInstance) {
3396 ret = nc_state.H->doRunInstance(&nc_state, pMeta, uuid, instanceId, reservationId, params, imageId, imageURL, kernelId, kernelURL, ramdiskId,
3397 ramdiskURL, ownerId, accountId, keyName, netparams, userData, credential, launchIndex, platform, expiryTime, groupNames, groupNamesSize,
3398 rootDirective, groupIds, groupIdsSize, secNetCfgs, secNetCfgsLen, outInst);
3399 } else {
3400 ret = nc_state.D->doRunInstance(&nc_state, pMeta, uuid, instanceId, reservationId, params, imageId, imageURL, kernelId, kernelURL, ramdiskId,
3401 ramdiskURL, ownerId, accountId, keyName, netparams, userData, credential, launchIndex, platform, expiryTime, groupNames, groupNamesSize,
3402 rootDirective, groupIds, groupIdsSize, secNetCfgs, secNetCfgsLen, outInst);
3403 }
3404 return ret;
3405 }
3406
3407 //!
3408 //! Finds and terminate an instance.
3409 //!
3410 //! @param[in] pMeta a pointer to the node controller (NC) metadata structure
3411 //! @param[in] instanceId the instance identifier string (i-XXXXXXXX)
3412 //! @param[in] force if set to 1 will force the termination of the instance
3413 //! @param[out] shutdownState the instance state code after the call to find_and_terminate_instance() if successful
3414 //! @param[out] previousState the instance state code after the call to find_and_terminate_instance() if successful
3415 //!
3416 //! @return EUCA_ERROR on failure or the result of the proper doTerminateInstance() handler call.
3417 //!
3418 int doTerminateInstance(ncMetadata * pMeta, char *instanceId, int force, int *shutdownState, int *previousState)
3419 {
3420 int ret = EUCA_OK;
3421
3422 if (init())
3423 return (EUCA_ERROR);
3424 DISABLED_CHECK;
3425
3426 LOGINFO("[%s] termination requested\n", instanceId);
3427
3428 if (nc_state.H->doTerminateInstance)
3429 ret = nc_state.H->doTerminateInstance(&nc_state, pMeta, instanceId, force, shutdownState, previousState);
3430 else
3431 ret = nc_state.D->doTerminateInstance(&nc_state, pMeta, instanceId, force, shutdownState, previousState);
3432
3433 return ret;
3434 }
3435
3436 //!
3437 //! Handles the reboot instance request
3438 //!
3439 //! @param[in] pMeta a pointer to the node controller (NC) metadata structure
3440 //! @param[in] instanceId the instance identifier string (i-XXXXXXXX)
3441 //!
3442 //! @return EUCA_ERROR on failure or the result of the proper doRebootInstance() handler call.
3443 //!
3444 int doRebootInstance(ncMetadata * pMeta, char *instanceId)
3445 {
3446 int ret = EUCA_OK;
3447
3448 if (init())
3449 return (EUCA_ERROR);
3450 DISABLED_CHECK;
3451
3452 LOGINFO("[%s] rebooting requested\n", SP(instanceId));
3453 LOGDEBUG("[%s] invoked\n", instanceId);
3454
3455 if (nc_state.H->doRebootInstance)
3456 ret = nc_state.H->doRebootInstance(&nc_state, pMeta, instanceId);
3457 else
3458 ret = nc_state.D->doRebootInstance(&nc_state, pMeta, instanceId);
3459
3460 return ret;
3461 }
3462
3463 //!
3464 //! Handles the get console output request
3465 //!
3466 //! @param[in] pMeta a pointer to the node controller (NC) metadata structure
3467 //! @param[in] instanceId the instance identifier string (i-XXXXXXXX)
3468 //! @param[out] consoleOutput a pointer to the unallocated string that will contain the output
3469 //!
3470 //! @return EUCA_ERROR on failure or the result of the proper doGetConsoleOutput() handler call.
3471 //!
3472 int doGetConsoleOutput(ncMetadata * pMeta, char *instanceId, char **consoleOutput)
3473 {
3474 int ret = EUCA_OK;
3475
3476 if (init())
3477 return 1;
3478
3479 LOGINFO("[%s] console output requested\n", instanceId);
3480
3481 if (nc_state.H->doGetConsoleOutput)
3482 ret = nc_state.H->doGetConsoleOutput(&nc_state, pMeta, instanceId, consoleOutput);
3483 else
3484 ret = nc_state.D->doGetConsoleOutput(&nc_state, pMeta, instanceId, consoleOutput);
3485
3486 return ret;
3487 }
3488
3489 //!
3490 //! Handles the describe resource request.
3491 //!
3492 //! @param[in] pMeta a pointer to the node controller (NC) metadata structure
3493 //! @param[in] resourceType UNUSED
3494 //! @param[out] outRes a list of resources we retrieved data for
3495 //!
3496 //! @return EUCA_ERROR on failure or the result of the proper doDescribeResource() handler call.
3497 //!
3498 int doDescribeResource(ncMetadata * pMeta, char *resourceType, ncResource ** outRes)
3499 {
3500 int ret = EUCA_OK;
3501
3502 if (init())
3503 return (EUCA_ERROR);
3504
3505 updateServiceStateInfo(pMeta, TRUE);
3506
3507 if (nc_state.H->doDescribeResource)
3508 ret = nc_state.H->doDescribeResource(&nc_state, pMeta, resourceType, outRes);
3509 else
3510 ret = nc_state.D->doDescribeResource(&nc_state, pMeta, resourceType, outRes);
3511
3512 return ret;
3513 }
3514
3515 //!
3516 //! Starts the network process.
3517 //!
3518 //! @param[in] pMeta a pointer to the node controller (NC) metadata structure
3519 //! @param[in] uuid a string containing the user unique identifier (UNUSED)
3520 //! @param[in] remoteHosts the list of remote hosts (UNUSED)
3521 //! @param[in] remoteHostsLen the number of hosts in the remoteHosts list (UNUSED)
3522 //! @param[in] port the port number to use for the network (UNUSED)
3523 //! @param[in] vlan the network vlan to use.
3524 //!
3525 //! @return EUCA_ERROR on failure or the result of the proper doStartNetwork() handler call.
3526 //!
3527 int doStartNetwork(ncMetadata * pMeta, char *uuid, char **remoteHosts, int remoteHostsLen, int port, int vlan)
3528 {
3529 int ret = EUCA_OK;
3530
3531 if (init())
3532 return (EUCA_ERROR);
3533
3534 LOGINFO("starting network (remoteHostsLen=%d port=%d vlan=%d)\n", remoteHostsLen, port, vlan);
3535 LOGDEBUG("invoked (remoteHostsLen=%d port=%d vlan=%d)\n", remoteHostsLen, port, vlan);
3536
3537 if (nc_state.H->doStartNetwork)
3538 ret = nc_state.H->doStartNetwork(&nc_state, pMeta, uuid, remoteHosts, remoteHostsLen, port, vlan);
3539 else
3540 ret = nc_state.D->doStartNetwork(&nc_state, pMeta, uuid, remoteHosts, remoteHostsLen, port, vlan);
3541
3542 return ret;
3543 }
3544
3545 //!
3546 //! Attach a given volume to an instance.
3547 //!
3548 //! @param[in] pMeta a pointer to the node controller (NC) metadata structure
3549 //! @param[in] instanceId the instance identifier string (i-XXXXXXXX)
3550 //! @param[in] volumeId the volume identifier string (vol-XXXXXXXX)
3551 //! @param[in] remoteDev the target device name
3552 //! @param[in] localDev the local device name
3553 //!
3554 //! @return EUCA_ERROR on failure or the result of the proper doAttachVolume() handler call.
3555 //!
3556 int doAttachVolume(ncMetadata * pMeta, char *instanceId, char *volumeId, char *remoteDev, char *localDev)
3557 {
3558 int ret = EUCA_OK;
3559
3560 if (init())
3561 return (EUCA_ERROR);
3562 DISABLED_CHECK;
3563
3564 LOGINFO("[%s][%s] attaching volume\n", instanceId, volumeId);
3565 LOGDEBUG("[%s][%s] volume attaching (remoteDev=%s localDev=%s)\n", instanceId, volumeId, remoteDev, localDev);
3566
3567 if (nc_state.H->doAttachVolume)
3568 ret = nc_state.H->doAttachVolume(&nc_state, pMeta, instanceId, volumeId, remoteDev, localDev);
3569 else
3570 ret = nc_state.D->doAttachVolume(&nc_state, pMeta, instanceId, volumeId, remoteDev, localDev);
3571
3572 return ret;
3573 }
3574
3575 //!
3576 //! Detach a given volume from an instance.
3577 //!
3578 //! @param[in] pMeta a pointer to the node controller (NC) metadata structure
3579 //! @param[in] instanceId the instance identifier string (i-XXXXXXXX)
3580 //! @param[in] volumeId the volume identifier string (vol-XXXXXXXX)
3581 //! @param[in] attachmentToken the target device name
3582 //! @param[in] localDev the local device name
3583 //! @param[in] force if set to 1, this will force the volume to detach
3584 //! @param[in] grab_inst_sem if set to 1, will require the usage of the instance semaphore
3585 //!
3586 //! @return EUCA_ERROR on failure or the result of the proper doDetachVolume() handler call.
3587 //!
3588 int doDetachVolume(ncMetadata * pMeta, char *instanceId, char *volumeId, char *attachmentToken, char *localDev, int force)
3589 {
3590 int ret = EUCA_OK;
3591
3592 if (init())
3593 return (EUCA_ERROR);
3594 DISABLED_CHECK;
3595
3596 LOGINFO("[%s][%s] detaching volume\n", instanceId, volumeId);
3597 LOGDEBUG("[%s][%s] volume detaching (localDev=%s force=%d)\n", instanceId, volumeId, localDev, force);
3598
3599 if (nc_state.H->doDetachVolume)
3600 ret = nc_state.H->doDetachVolume(&nc_state, pMeta, instanceId, volumeId, attachmentToken, localDev, force);
3601 else
3602 ret = nc_state.D->doDetachVolume(&nc_state, pMeta, instanceId, volumeId, attachmentToken, localDev, force);
3603
3604 return ret;
3605 }
3606
3607 //!
3608 //! Attach a given network interface to an instance (VPC mode only)
3609 //!
3610 //! @param[in] pMeta a pointer to the node controller (NC) metadata structure
3611 //! @param[in] instanceId the instance identifier string (i-XXXXXXXX)
3612 //! @param[in] netConfig the pointer to netConfig structure
3613 //!
3614 //! @return EUCA_ERROR on failure or the result of the proper doAttachNetworkInterface() handler call.
3615 //!
3616 int doAttachNetworkInterface(ncMetadata * pMeta, char *instanceId, netConfig *netCfg)
3617 {
3618 int ret = EUCA_OK;
3619
3620 if (init())
3621 return (EUCA_ERROR);
3622 DISABLED_CHECK;
3623
3624 LOGINFO("[%s][%s] attaching network interface\n", instanceId, netCfg->interfaceId);
3625 LOGDEBUG("[%s][%s] network interface attaching (vlan=%d networkIndex=%d privateMac=%s publicIp=%s privateIp=%s device=%d attachmentId=%s)\n",
3626 instanceId, netCfg->interfaceId, netCfg->vlan, netCfg->networkIndex, netCfg->privateMac, netCfg->publicIp,
3627 netCfg->privateIp, netCfg->device, netCfg->attachmentId);
3628
3629 if (nc_state.H->doAttachNetworkInterface)
3630 ret = nc_state.H->doAttachNetworkInterface(&nc_state, pMeta, instanceId, netCfg);
3631 else
3632 ret = nc_state.D->doAttachNetworkInterface(&nc_state, pMeta, instanceId, netCfg);
3633
3634 return ret;
3635 }
3636
3637 //!
3638 //! Detach a given network interface from an instance (VPC mode only)
3639 //!
3640 //! @param[in] pMeta a pointer to the node controller (NC) metadata structure
3641 //! @param[in] instanceId the instance identifier string (i-XXXXXXXX)
3642 //! @param[in] attachmentId the attachment ID string (eni-attach-XXXXXXXX)
3643 //! @param[in] force if set to 1, this will force the network interface to detach
3644 //!
3645 //! @return EUCA_ERROR on failure or the result of the proper doDetachNetworkInterface() handler call.
3646 //!
3647 int doDetachNetworkInterface(ncMetadata * pMeta, char *instanceId, char *attachmentId, int force)
3648 {
3649 int ret = EUCA_OK;
3650
3651 if (init())
3652 return (EUCA_ERROR);
3653 DISABLED_CHECK;
3654
3655 LOGINFO("[%s][%s] detaching network interface\n", instanceId, attachmentId);
3656
3657 if (nc_state.H->doDetachNetworkInterface)
3658 ret = nc_state.H->doDetachNetworkInterface(&nc_state, pMeta, instanceId, attachmentId, force);
3659 else
3660 ret = nc_state.D->doDetachNetworkInterface(&nc_state, pMeta, instanceId, attachmentId, force);
3661
3662 return ret;
3663 }
3664
3665 //!
3666 //! Handles the bundling instance request.
3667 //!
3668 //! @param[in] pMeta a pointer to the node controller (NC) metadata structure
3669 //! @param[in] instanceId the instance identifier string (i-XXXXXXXX)
3670 //! @param[in] bucketName the bucket name string to which the bundle will be saved
3671 //! @param[in] filePrefix the prefix name string of the bundle
3672 //! @param[in] objectStorageURL the objectstorage URL address string
3673 //! @param[in] userPublicKey the public key string
3674 //! @param[in] S3Policy the S3 engine policy
3675 //! @param[in] S3PolicySig the S3 engine policy signature
3676 //! @param[in] architecture image/instance architecture
3677 //!
3678 //! @return EUCA_ERROR on failure or the result of the proper doBundleInstance() handler call.
3679 //!
3680 int doBundleInstance(ncMetadata * pMeta, char *instanceId, char *bucketName, char *filePrefix, char *objectStorageURL, char *userPublicKey, char *S3Policy, char *S3PolicySig,
3681 char *architecture)
3682 {
3683 int ret = EUCA_OK;
3684
3685 if (init())
3686 return (EUCA_ERROR);
3687 DISABLED_CHECK;
3688
3689 LOGINFO("[%s] starting instance bundling into bucket %s\n", instanceId, bucketName);
3690 LOGDEBUG("[%s] bundling parameters: bucketName=%s filePrefix=%s objectStorageURL=%s userPublicKey=%s S3Policy=%s, S3PolicySig=%s, architecture=%s\n",
3691 instanceId, bucketName, filePrefix, objectStorageURL, userPublicKey, S3Policy, S3PolicySig, architecture);
3692
3693 if (nc_state.H->doBundleInstance)
3694 ret = nc_state.H->doBundleInstance(&nc_state, pMeta, instanceId, bucketName, filePrefix, objectStorageURL, userPublicKey, S3Policy, S3PolicySig, architecture);
3695 else
3696 ret = nc_state.D->doBundleInstance(&nc_state, pMeta, instanceId, bucketName, filePrefix, objectStorageURL, userPublicKey, S3Policy, S3PolicySig, architecture);
3697
3698 return ret;
3699 }
3700
3701 //!
3702 //! Handles the bundle restart request.
3703 //!
3704 //! @param[in] pMeta a pointer to the node controller (NC) metadata structure
3705 //! @param[in] instanceId the instance identifier string (i-XXXXXXXX)
3706 //!
3707 //! @return EUCA_ERROR on failure or the result of the proper doBundleRestartInstance() handler call.
3708 //!
3709 int doBundleRestartInstance(ncMetadata * pMeta, char *instanceId)
3710 {
3711 if (init())
3712 return (EUCA_ERROR);
3713 DISABLED_CHECK;
3714
3715 LOGINFO("[%s] restarting bundling instance\n", instanceId);
3716 if (nc_state.H->doBundleRestartInstance)
3717 return (nc_state.H->doBundleRestartInstance(&nc_state, pMeta, instanceId));
3718 return (nc_state.D->doBundleRestartInstance(&nc_state, pMeta, instanceId));
3719 }
3720
3721 //!
3722 //! Handles the cancel bundle task request.
3723 //!
3724 //! @param[in] pMeta a pointer to the node controller (NC) metadata structure
3725 //! @param[in] instanceId the instance identifier string (i-XXXXXXXX)
3726 //!
3727 //! @return EUCA_ERROR on failure or the result of the proper doCancelBundleTask() handler call.
3728 //!
3729 int doCancelBundleTask(ncMetadata * pMeta, char *instanceId)
3730 {
3731 int ret = EUCA_OK;
3732
3733 if (init())
3734 return (EUCA_ERROR);
3735 DISABLED_CHECK;
3736
3737 LOGINFO("[%s] canceling bundling instance\n", instanceId);
3738
3739 if (nc_state.H->doCancelBundleTask)
3740 ret = nc_state.H->doCancelBundleTask(&nc_state, pMeta, instanceId);
3741 else
3742 ret = nc_state.D->doCancelBundleTask(&nc_state, pMeta, instanceId);
3743
3744 return ret;
3745 }
3746
3747 //!
3748 //! Handles the describe bundle tasks request.
3749 //!
3750 //! @param[in] pMeta a pointer to the node controller (NC) metadata structure
3751 //! @param[in] instIds a list of instance identifier string
3752 //! @param[in] instIdsLen the number of instance identifiers in the instIds list
3753 //! @param[out] outBundleTasks a pointer to the created bundle tasks list
3754 //! @param[out] outBundleTasksLen the number of bundle tasks in the outBundleTasks list
3755 //!
3756 //! @return EUCA_ERROR on failure or the result of the proper doDescribeBundleTasks() handler call.
3757 //!
3758 int doDescribeBundleTasks(ncMetadata * pMeta, char **instIds, int instIdsLen, bundleTask *** outBundleTasks, int *outBundleTasksLen)
3759 {
3760 int ret = EUCA_OK;
3761
3762 if (init())
3763 return (EUCA_ERROR);
3764 DISABLED_CHECK;
3765
3766 LOGINFO("describing bundle tasks (for %d instances)\n", instIdsLen);
3767
3768 if (nc_state.H->doDescribeBundleTasks)
3769 ret = nc_state.H->doDescribeBundleTasks(&nc_state, pMeta, instIds, instIdsLen, outBundleTasks, outBundleTasksLen);
3770 else
3771 ret = nc_state.D->doDescribeBundleTasks(&nc_state, pMeta, instIds, instIdsLen, outBundleTasks, outBundleTasksLen);
3772
3773 return ret;
3774 }
3775
3776 //!
3777 //! Handles the image creation request.
3778 //!
3779 //! @param[in] pMeta a pointer to the node controller (NC) metadata structure
3780 //! @param[in] instanceId the instance identifier string (i-XXXXXXXX)
3781 //! @param[in] volumeId the volume identifier string (vol-XXXXXXXX)
3782 //! @param[in] remoteDev the remote device name
3783 //!
3784 //! @return EUCA_ERROR on failure or the result of the proper doCreateImage() handler call.
3785 //!
3786 int doCreateImage(ncMetadata * pMeta, char *instanceId, char *volumeId, char *remoteDev)
3787 {
3788 int ret = EUCA_OK;
3789
3790 if (init())
3791 return (EUCA_ERROR);
3792 DISABLED_CHECK;
3793
3794 LOGINFO("[%s][%s] creating image\n", instanceId, volumeId);
3795
3796 if (nc_state.H->doCreateImage)
3797 ret = nc_state.H->doCreateImage(&nc_state, pMeta, instanceId, volumeId, remoteDev);
3798 else
3799 ret = nc_state.D->doCreateImage(&nc_state, pMeta, instanceId, volumeId, remoteDev);
3800
3801 return ret;
3802 }
3803
3804 //!
3805 //! Handles the describe sensors request.
3806 //!
3807 //! @param[in] pMeta a pointer to the node controller (NC) metadata structure
3808 //! @param[in] historySize the size of the data history to retrieve
3809 //! @param[in] collectionIntervalTimeMs the data collection interval in milliseconds
3810 //! @param[in] instIds the list of instance identifiers string
3811 //! @param[in] instIdsLen the number of instance identifiers in the instIds list
3812 //! @param[in] sensorIds a list of sensor identifiers string
3813 //! @param[in] sensorIdsLen the number of sensor identifiers string in the sensorIds list
3814 //! @param[out] outResources a list of sensor resources created by this request
3815 //! @param[out] outResourcesLen the number of sensor resources contained in the outResources list
3816 //!
3817 //! @return EUCA_ERROR on failure or the result of the proper doDescribeSensors() handler call.
3818 //!
3819 int doDescribeSensors(ncMetadata * pMeta, int historySize, long long collectionIntervalTimeMs, char **instIds, int instIdsLen, char **sensorIds,
3820 int sensorIdsLen, sensorResource *** outResources, int *outResourcesLen)
3821 {
3822 int ret = EUCA_OK;
3823
3824 if (init())
3825 return (EUCA_ERROR);
3826
3827 LOGDEBUG("invoked (instIdsLen=%d sensorIdsLen=%d)\n", instIdsLen, sensorIdsLen);
3828
3829 if (nc_state.H->doDescribeSensors) {
3830 ret = nc_state.H->doDescribeSensors(&nc_state, pMeta, historySize, collectionIntervalTimeMs, instIds, instIdsLen, sensorIds, sensorIdsLen, outResources, outResourcesLen);
3831 } else {
3832 ret = nc_state.D->doDescribeSensors(&nc_state, pMeta, historySize, collectionIntervalTimeMs, instIds, instIdsLen, sensorIds, sensorIdsLen, outResources, outResourcesLen);
3833 }
3834
3835 return ret;
3836 }
3837
3838 //!
3839 //! Handles the modify node request.
3840 //!
3841 //! @param[in] pMeta a pointer to the node controller (NC) metadata structure
3842 //! @param[in] stateName
3843 //!
3844 //! @return EUCA_OK on success or EUCA_ERROR on failure
3845 //!
3846 //! TODO: doxygen
3847 int doModifyNode(ncMetadata * pMeta, char *stateName)
3848 {
3849 int ret = EUCA_OK;
3850
3851 if (init())
3852 return (EUCA_ERROR);
3853
3854 LOGINFO("modifying node\n");
3855 LOGDEBUG("invoked (stateName=%s)\n", stateName);
3856
3857 if (nc_state.H->doModifyNode) {
3858 ret = nc_state.H->doModifyNode(&nc_state, pMeta, stateName);
3859 } else {
3860 ret = nc_state.D->doModifyNode(&nc_state, pMeta, stateName);
3861 }
3862
3863 return ret;
3864 }
3865
3866 //!
3867 //! Handles the instance migration request.
3868 //!
3869 //! @param[in] pMeta a pointer to the node controller (NC) metadata structure
3870 //! @param[in] instances metadata for the instance to migrate to destination
3871 //! @param[in] instancesLen number of instances in the instance list
3872 //! @param[in] action IP of the destination Node Controller
3873 //! @param[in] credentials credentials that enable the migration
3874 //! @param[in] resourceLocations ID=URL list of self-signed URLs (only relevant for 'prepare' on source node)
3875 //! @param[in] resourceLocationsLen number of URLs in the list (only relevant for 'prepare' on source node)
3876 //!
3877 //! @return EUCA_OK on sucess or EUCA_ERROR on failure
3878 //!
3879 //! TODO: doxygen
3880 //!
3881 int doMigrateInstances(ncMetadata * pMeta, ncInstance ** instances, int instancesLen, char *action, char *credentials, char ** resourceLocations, int resourceLocationsLen)
3882 {
3883 int ret = EUCA_OK;
3884
3885 if (init())
3886 return (EUCA_ERROR);
3887
3888 LOGINFO("migrating %d instances\n", instancesLen);
3889 LOGTRACE("invoked\n");
3890
3891 LOGDEBUG("verifying %d instance[s] for migration...\n", instancesLen);
3892 for (int i = 0; i < instancesLen; i++) {
3893 LOGDEBUG("verifying instance # %d...\n", i);
3894 if (instances[i]) {
3895 LOGDEBUG("invoked (action=%s instance[%d].{id=%s src=%s dst=%s) creds=%s\n",
3896 action, i, instances[i]->instanceId, instances[i]->migration_src, instances[i]->migration_dst, (credentials == NULL) ? "UNSET" : "present");
3897 if (!strcmp(instances[i]->migration_src, instances[i]->migration_dst)) {
3898 if (strcmp(action, "rollback")) {
3899 // Anything but rollback.
3900 LOGERROR("[%s] rejecting proposed SAME-NODE migration from %s to %s\n", instances[i]->instanceId, instances[i]->migration_src, instances[i]->migration_dst);
3901 return (EUCA_UNSUPPORTED_ERROR);
3902 } else {
3903 // Ignore the fact src & dst are the same if a rollback--it doesn't matter.
3904 LOGDEBUG("[%s] ignoring apparent same-node migration hosts (%s > %s) for action '%s'\n", instances[i]->instanceId, instances[i]->migration_src,
3905 instances[i]->migration_dst, action);
3906 }
3907 }
3908 }
3909 }
3910
3911 if (nc_state.H->doMigrateInstances) {
3912 ret = nc_state.H->doMigrateInstances(&nc_state, pMeta, instances, instancesLen, action, credentials, resourceLocations, resourceLocationsLen);
3913 } else {
3914 ret = nc_state.D->doMigrateInstances(&nc_state, pMeta, instances, instancesLen, action, credentials, resourceLocations, resourceLocationsLen);
3915 }
3916
3917 LOGTRACE("done\n");
3918
3919 return ret;
3920 }
3921
3922 //!
3923 //! Handles the instance start request
3924 //!
3925 //! @param[in] pMeta a pointer to the node controller (NC) metadata structure
3926 //! @param[in] instanceId the instance identifier string (i-XXXXXXXX)
3927 //!
3928 //! @return EUCA_ERROR on failure or the result of the actual doStartInstance() call
3929 //!
3930 int doStartInstance(ncMetadata * pMeta, char *instanceId)
3931 {
3932 int ret = EUCA_OK;
3933
3934 if (init())
3935 return (EUCA_ERROR);
3936 DISABLED_CHECK;
3937
3938 LOGINFO("[%s] instance start requested\n", instanceId);
3939 if (nc_state.H->doStartInstance)
3940 ret = nc_state.H->doStartInstance(&nc_state, pMeta, instanceId);
3941 else
3942 ret = nc_state.D->doStartInstance(&nc_state, pMeta, instanceId);
3943
3944 return ret;
3945 }
3946
3947 //!
3948 //! Handles the instance stop request
3949 //!
3950 //! @param[in] pMeta a pointer to the node controller (NC) metadata structure
3951 //! @param[in] instanceId the instance identifier string (i-XXXXXXXX)
3952 //!
3953 //! @return EUCA_ERROR on failure or the result of the actual doStopInstance() call
3954 //!
3955 int doStopInstance(ncMetadata * pMeta, char *instanceId)
3956 {
3957 int ret = EUCA_OK;
3958
3959 if (init())
3960 return (EUCA_ERROR);
3961 DISABLED_CHECK;
3962
3963 LOGINFO("[%s] instance shutdown requested\n", instanceId);
3964 if (nc_state.H->doStopInstance)
3965 ret = nc_state.H->doStopInstance(&nc_state, pMeta, instanceId);
3966 else
3967 ret = nc_state.D->doStopInstance(&nc_state, pMeta, instanceId);
3968
3969 return ret;
3970 }
3971
3972 //!
3973 //! Finds an instance in the global instance list
3974 //!
3975 //! @param[in] instanceId the instance identifier string (i-XXXXXXXX)
3976 //!
3977 //! @return a pointer to the instance structure if found. Otherwise NULL is returned.
3978 //!
3979 ncInstance *find_global_instance(const char *instanceId)
3980 {
3981 return NULL;
3982 }
3983
3984 //!
3985 //! Predicate determining whether the instance is a migration destination
3986 //!
3987 //! @param[in] instance pointer to the instance struct
3988 //!
3989 //! @return true or false
3990 //!
3991 int is_migration_dst(const ncInstance * instance)
3992 {
3993 if (instance->migration_state != NOT_MIGRATING && !strcmp(instance->migration_dst, nc_state.ip))
3994 return TRUE;
3995 return FALSE;
3996 }
3997
3998 //!
3999 //! Predicate determining whether the instance is a migration source
4000 //!
4001 //! @param[in] instance pointer to the instance struct
4002 //!
4003 //! @return true or false
4004 //!
4005 int is_migration_src(const ncInstance * instance)
4006 {
4007 if (instance->migration_state != NOT_MIGRATING && !strcmp(instance->migration_src, nc_state.ip))
4008 return TRUE;
4009 return FALSE;
4010 }
4011
4012 //!
4013 //! Rollback a pending migration request on a source NC
4014 //!
4015 //! Currently only safe to call under the protection of inst_sem, such as from the migrating_thread().
4016 //!
4017 //! @param[in] instance pointer to the instance struct
4018 //!
4019 //! @return true or false
4020 //!
4021 int migration_rollback(ncInstance * instance)
4022 {
4023 // TO-DO: duplicated code in two parts of conditional. Refactor.
4024 if (is_migration_src(instance)) {
4025 LOGINFO("[%s] starting migration rollback of instance on source %s\n", instance->instanceId, instance->migration_src);
4026 instance->migration_state = NOT_MIGRATING;
4027 // Not zeroing out the src & dst for debugging purposes:
4028 // There's a problem with refresh_instances_info() not finding domains
4029 // and eventually shutting them down.
4030 //bzero(instance->migration_src, HOSTNAME_SIZE);
4031 //bzero(instance->migration_dst, HOSTNAME_SIZE);
4032 bzero(instance->migration_credentials, CREDENTIAL_SIZE);
4033 instance->migrationTime = 0;
4034 save_instance_struct(instance);
4035 copy_instances();
4036 LOGINFO("[%s] migration source rolled back\n", instance->instanceId);
4037 return TRUE;
4038 } else if (is_migration_dst(instance)) {
4039 // TO-DO: Do I want to protect this functionality by requiring something like a 'force' option be passed to this function?
4040 LOGWARN("[%s] resetting migration state '%s' to 'none' for an already-migrated (%s < %s) instance. Something went wrong somewhere...\n",
4041 instance->instanceId, migration_state_names[instance->migration_state], instance->migration_dst, instance->migration_src);
4042 instance->migration_state = NOT_MIGRATING;
4043 bzero(instance->migration_src, HOSTNAME_SIZE);
4044 bzero(instance->migration_dst, HOSTNAME_SIZE);
4045 bzero(instance->migration_credentials, CREDENTIAL_SIZE);
4046 instance->migrationTime = 0;
4047 save_instance_struct(instance);
4048 copy_instances();
4049 LOGINFO("[%s] migration state reset.\n", instance->instanceId);
4050 return TRUE;
4051 }
4052 // Neither source nor destination node?
4053 LOGERROR("[%s] request to roll back migration of instance on non-source/destination node %s\n", instance->instanceId, nc_state.ip);
4054 // We've seen this case caused by a bug in the migration code--one that left the migration_dst blank in the instance struct.
4055 // So if this happens, we'll assume the rollback request was valid, and we'll reset its state and time so that it will get cleaned up--rather than stuck!
4056 instance->migration_state = NOT_MIGRATING;
4057 instance->migrationTime = 0;
4058 save_instance_struct(instance);
4059 copy_instances();
4060 return FALSE;
4061 }
4062
4063
4064 // function that performs any local checks to determine that networking is in place enough to boot instance
4065 int instance_network_gate(ncInstance *instance, time_t timeout_seconds) {
4066 char *filebuf=NULL, path[EUCA_MAX_PATH], needle[EUCA_MAX_PATH];
4067 time_t max_time=0;
4068 int count = 1;
4069
4070 if (timeout_seconds == 0) {
4071 LOGDEBUG("skipping network gate (NC_BOOTING_ENVWAIT_THRESHOLD has been manually set to 0 seconds in eucalyptus.conf)\n");
4072 return(0);
4073 }
4074
4075 if (!instance || timeout_seconds < 0 || timeout_seconds > 3600) {
4076 LOGERROR("invalid input params\n");
4077 return(0);
4078 }
4079
4080 max_time = time(NULL) + timeout_seconds;
4081
4082 LOGDEBUG("[%s] waiting at most %d seconds for required instance networking to exist before booting instance\n", SP(instance->instanceId), (int)timeout_seconds);
4083 while(time(NULL) < max_time) {
4084
4085 LOGTRACE("[%s] instance state code %d\n", SP(instance->instanceId), instance->state);
4086
4087 if (instance == NULL) {
4088 LOGWARN("[%s] instance no longer valid - aborting instance gate\n", SP(instance->instanceId));
4089 return(0);
4090 }
4091
4092 LOGTRACE("[%s] instance state code new=%d orig=%d\n", SP(instance->instanceId), instance->state, instance->state);
4093
4094 if (instance->state != STAGING) {
4095 LOGINFO("[%s] returning from gate since instance is no longer STAGING\n", SP(instance->instanceId));
4096 return(0);
4097 }
4098
4099 if (!strcmp(nc_state.pEucaNet->sMode, NETMODE_EDGE)) {
4100 // check to ensure that dhcpd config contains the mac for the instance
4101 snprintf(path, EUCA_MAX_PATH, "%s/var/run/eucalyptus/net/euca-dhcp.conf", nc_state.home);
4102 snprintf(needle, EUCA_MAX_PATH, "node-%s ", instance->ncnet.privateIp);
4103 filebuf = file2str(path);
4104 if (filebuf && strstr(filebuf, needle)) {
4105 LOGDEBUG("[%s] local dhcpd config contains required instance record, continuing\n", SP(instance->instanceId));
4106 EUCA_FREE(filebuf);
4107 return(0);
4108 } else {
4109 LOGTRACE("[%s] local dhcpd config does not (yet) contain required instance record, waiting...(%d seconds remaining)\n", SP(instance->instanceId), (int)(max_time - time(NULL)));
4110 }
4111 EUCA_FREE(filebuf);
4112 } else if (!strcmp(nc_state.pEucaNet->sMode, NETMODE_VPCMIDO)) {
4113 char *fileBuf = NULL, *vers=NULL, *appvers=NULL, *startBuf=NULL;
4114 char xmlfile[EUCA_MAX_PATH] = "";
4115
4116 snprintf(xmlfile, EUCA_MAX_PATH, "%s/var/run/eucalyptus/global_network_info.xml", nc_state.home);
4117
4118 fileBuf = file2str(xmlfile);
4119 if (fileBuf) startBuf = strstr(fileBuf, "network-data");
4120
4121 if (startBuf) {
4122 vers = euca_gettok(startBuf, "version=\"");
4123 appvers = euca_gettok(startBuf, "applied-version=\"");
4124
4125 if (vers && appvers && !strcmp(vers, appvers)) {
4126 LOGDEBUG("[%s] version (%s) and applied version (%s) match\n", instance->instanceId, vers, appvers);
4127
4128 if (strstr(fileBuf, instance->instanceId)) {
4129 LOGDEBUG("[%s] global network config contains required instance record\n", SP(instance->instanceId));
4130 EUCA_FREE(vers);
4131 EUCA_FREE(appvers);
4132 EUCA_FREE(fileBuf);
4133 return(0);
4134 } else {
4135 LOGTRACE("[%s] global network config does not (yet) contain required instance record, waiting...(%d seconds remaining)\n", SP(instance->instanceId), (int)(max_time - time(NULL)));
4136 }
4137 } else {
4138 LOGDEBUG("[%s] version (%s) and applied version (%s) do not match (yet), waiting\n", instance->instanceId, vers, appvers);
4139 }
4140
4141 EUCA_FREE(vers);
4142 EUCA_FREE(appvers);
4143 } else {
4144 LOGDEBUG("[%s] cannot read valid global network view file '%s' (yet), waiting\n", instance->instanceId, xmlfile);
4145 }
4146 EUCA_FREE(fileBuf);
4147 } else {
4148 return(0);
4149 }
4150
4151 count++;
4152 sleep(1);
4153 }
4154
4155 LOGERROR("[%s] timed out waiting for instance network information to appear before booting instance\n", SP(instance->instanceId));
4156 return(1);
4157 }
4158
4159 /**
4160 * Removes instance NIC specified in the argument from bridge.
4161 * @param nc [in] pointer to nc_state data structure.
4162 * @param instance [in] pointer to ncInstance data structure of the instance of interest.
4163 * @param iface [in] pointer to string with the interface name of interest.
4164 * @return 0 on success. 1 otherwise.
4165 */
4166 int bridge_interface_remove(struct nc_state_t *nc, ncInstance *instance, char *iface) {
4167 char cmd[EUCA_MAX_PATH], obuf[256], ebuf[256], sPath[EUCA_MAX_PATH];
4168 int rc = 0;
4169
4170 if (!nc || !instance || !iface) {
4171 LOGWARN("Invalid argument: cannot remove NULL bridge interface.\n");
4172 return (1);
4173 }
4174 LOGTRACE("checking if VM interface is attached to a bridge (%s/%s)\n", iface, instance->params.guestNicDeviceName);
4175
4176 // If this device does not have a 'brport' path, this isn't a bridge device
4177 snprintf(sPath, EUCA_MAX_PATH, "/sys/class/net/%s/brport/", iface);
4178 if (!check_directory(sPath)) {
4179 LOGTRACE("VM interface is attached to a bridge (%s/%s)\n", iface, instance->params.guestNicDeviceName);
4180 snprintf(cmd, EUCA_MAX_PATH, "%s brctl delif %s %s", nc->rootwrap_cmd_path, instance->params.guestNicDeviceName, iface);
4181 rc = timeshell(cmd, obuf, ebuf, 256, 10);
4182 if (rc) {
4183 LOGERROR("unable to remove instance interface from bridge: instance will not be able to connect to midonet (will not connect to network): check bridge/libvirt/kvm health\n");
4184 LOGINFO("Failed to remove %s from %s\n", iface, instance->params.guestNicDeviceName);
4185 } else {
4186 LOGTRACE("VM interface removed from bridge (%s/%s)\n", iface, instance->params.guestNicDeviceName);
4187 }
4188 }
4189 return (rc);
4190 }
4191
4192 /**
4193 * Removes instance NIC(s) from bridge.
4194 * @param nc [in] pointer to nc_state data structure.
4195 * @param instance [in] pointer to ncInstance data structure of the instance of interest.
4196 * @return 0 on success. Positive integer otherwise.
4197 */
4198 int bridge_instance_interfaces_remove(struct nc_state_t *nc, ncInstance *instance) {
4199 char iface[16];
4200 int rc = 0;
4201
4202 if (!nc || !instance) {
4203 LOGWARN("Invalid argument: cannot remove NULL bridge interface.\n");
4204 return (1);
4205 }
4206 snprintf(iface, 16, "vn_%s", instance->instanceId);
4207 rc += bridge_interface_remove(nc, instance, iface);
4208
4209 // Repeat process for secondary interfaces as well
4210 for (int i = 0; i < EUCA_MAX_NICS; i++) {
4211 if (strlen(instance->secNetCfgs[i].interfaceId) == 0)
4212 continue;
4213
4214 snprintf(iface, 16, "vn_%s", instance->secNetCfgs[i].interfaceId);
4215 rc += bridge_interface_remove(nc, instance, iface);
4216 }
4217
4218 return (rc);
4219 }
4220
4221 /**
4222 * Enables hairpin mode of a linux bridge port (instance interface) - address EUCA-12608
4223 * @param nc [in] pointer to nc_state data structure.
4224 * @param instance [in] pointer to ncInstance data structure of the instance of interest.
4225 * @param iface [in] pointer to string with the interface name of interest.
4226 * @return 0 on success. 1 otherwise.
4227 */
4228 int bridge_interface_set_hairpin(struct nc_state_t *nc, ncInstance *instance, char *iface) {
4229 char cmd[EUCA_MAX_PATH], obuf[256], ebuf[256], sPath[EUCA_MAX_PATH];
4230 int rc = 0;
4231
4232 if (!nc || !instance || !iface) {
4233 LOGWARN("Invalid argument: cannot set hairpin on NULL bridge interface.\n");
4234 return (1);
4235 }
4236
4237 // Make sure that this is a bridge port and that hairpin mode is supported
4238 // RHEL7 bridge port has bpdu_guard parameter (RHEL6 does not)
4239 snprintf(sPath, EUCA_MAX_PATH, "/sys/class/net/%s/brport/bpdu_guard", iface);
4240 if (!check_file(sPath)) {
4241 snprintf(cmd, EUCA_MAX_PATH, "%s brctl hairpin %s %s on", nc->rootwrap_cmd_path, instance->params.guestNicDeviceName, iface);
4242 rc = timeshell(cmd, obuf, ebuf, 256, 10);
4243 if (rc) {
4244 LOGERROR("Unable to set hairpin mode for %s port on %s\n", iface, instance->params.guestNicDeviceName);
4245 LOGINFO("%s may suffer limited connectivity (EUCA-12608)\n", instance->instanceId);
4246 } else {
4247 LOGTRACE("%s/%s hairpin mode is on\n", iface, instance->params.guestNicDeviceName);
4248 }
4249 }
4250 return (rc);
4251 }