"Fossies" - the Fresh Open Source Software Archive

Member "eucalyptus-4.4.2/node/handlers.c" (4 Aug 2017, 179099 Bytes) of package /linux/misc/eucalyptus-4.4.2.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "handlers.c" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 4.4.1_vs_4.4.2.

    1 // -*- mode: C; c-basic-offset: 4; tab-width: 4; indent-tabs-mode: nil -*-
    2 // vim: set softtabstop=4 shiftwidth=4 tabstop=4 expandtab:
    3 
    4 /*************************************************************************
    5  * (c) Copyright 2009-2017 Hewlett Packard Enterprise Development Company LP 
    6  *
    7  * This program is free software: you can redistribute it and/or modify
    8  * it under the terms of the GNU General Public License as published by
    9  * the Free Software Foundation; version 3 of the License.
   10  *
   11  * This program is distributed in the hope that it will be useful,
   12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
   13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   14  * GNU General Public License for more details.
   15  *
   16  * You should have received a copy of the GNU General Public License
   17  * along with this program.  If not, see http://www.gnu.org/licenses/.
   18  *
   19  * Please contact Eucalyptus Systems, Inc., 6755 Hollister Ave., Goleta
   20  * CA 93117, USA or visit http://www.eucalyptus.com/licenses/ if you need
   21  * additional information or have any questions.
   22  *
   23  * This file may incorporate work covered under the following copyright
   24  * and permission notice:
   25  *
   26  *   Software License Agreement (BSD License)
   27  *
   28  *   Copyright (c) 2008, Regents of the University of California
   29  *   All rights reserved.
   30  *
   31  *   Redistribution and use of this software in source and binary forms,
   32  *   with or without modification, are permitted provided that the
   33  *   following conditions are met:
   34  *
   35  *     Redistributions of source code must retain the above copyright
   36  *     notice, this list of conditions and the following disclaimer.
   37  *
   38  *     Redistributions in binary form must reproduce the above copyright
   39  *     notice, this list of conditions and the following disclaimer
   40  *     in the documentation and/or other materials provided with the
   41  *     distribution.
   42  *
   43  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   44  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   45  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   46  *   FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   47  *   COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   48  *   INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   49  *   BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   50  *   LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   51  *   CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   52  *   LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   53  *   ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   54  *   POSSIBILITY OF SUCH DAMAGE. USERS OF THIS SOFTWARE ACKNOWLEDGE
   55  *   THE POSSIBLE PRESENCE OF OTHER OPEN SOURCE LICENSED MATERIAL,
   56  *   COPYRIGHTED MATERIAL OR PATENTED MATERIAL IN THIS SOFTWARE,
   57  *   AND IF ANY SUCH MATERIAL IS DISCOVERED THE PARTY DISCOVERING
   58  *   IT MAY INFORM DR. RICH WOLSKI AT THE UNIVERSITY OF CALIFORNIA,
   59  *   SANTA BARBARA WHO WILL THEN ASCERTAIN THE MOST APPROPRIATE REMEDY,
   60  *   WHICH IN THE REGENTS' DISCRETION MAY INCLUDE, WITHOUT LIMITATION,
   61  *   REPLACEMENT OF THE CODE SO IDENTIFIED, LICENSING OF THE CODE SO
   62  *   IDENTIFIED, OR WITHDRAWAL OF THE CODE CAPABILITY TO THE EXTENT
   63  *   NEEDED TO COMPLY WITH ANY SUCH LICENSES OR RIGHTS.
   64  ************************************************************************/
   65 
   66 //!
   67 //! @file node/handlers.c
   68 //! This implements the default operations handlers supported by all hypervisor.
   69 //!
   70 
   71 /*----------------------------------------------------------------------------*\
   72  |                                                                            |
   73  |                                  INCLUDES                                  |
   74  |                                                                            |
   75 \*----------------------------------------------------------------------------*/
   76 
   77 #define _FILE_OFFSET_BITS      64      //!< so large-file support works on 32-bit systems
   78 #include <stdio.h>
   79 #include <stdlib.h>
   80 #define __USE_GNU                      /* strnlen */
   81 #include <string.h>                    /* strlen, strcpy */
   82 #include <time.h>
   83 #include <limits.h>                    /* INT_MAX */
   84 #include <sys/unistd.h>
   85 #include <sys/types.h>                 /* fork */
   86 #include <sys/wait.h>                  /* waitpid */
   87 #include <unistd.h>
   88 #include <fcntl.h>
   89 #include <assert.h>
   90 #include <sys/errno.h>
   91 #include <sys/stat.h>
   92 #include <pthread.h>
   93 #ifndef __DARWIN_UNIX03
   94 #include <sys/vfs.h>                   /* statfs */
   95 #endif /* ! __DARWIN_UNIX03 */
   96 #include <signal.h>                    /* SIGINT */
   97 #include <linux/limits.h>
   98 #include <pwd.h>                       /* getpwuid_r */
   99 #include <netdb.h>
  100 #include <sys/socket.h>
  101 #include <netinet/in.h>
  102 #include <arpa/inet.h>
  103 
  104 #include <eucalyptus.h>
  105 #include <eucalyptus-config.h>
  106 #include <ipc.h>
  107 #include <misc.h>
  108 #include <backing.h>
  109 #include <diskutil.h>
  110 #include <euca_auth.h>
  111 #include <euca_axis.h>
  112 #include <euca_network.h>
  113 #include <euca_gni.h>
  114 
  115 #include <vbr.h>
  116 #include <iscsi.h>
  117 #include <config.h>
  118 #include <fault.h>
  119 #include <log.h>
  120 #include <euca_string.h>
  121 #include <euca_system.h>
  122 
  123 #define HANDLERS_FANOUT
  124 #include "handlers.h"
  125 #include "xml.h"
  126 #include "hooks.h"
  127 #include <ebs_utils.h>
  128 #include "objectstorage.h"
  129 #include "stats.h"
  130 #include "message_sensor.h"
  131 #include "message_stats.h"
  132 #include "service_sensor.h"
  133 
  134 /*----------------------------------------------------------------------------*\
  135  |                                                                            |
  136  |                                  DEFINES                                   |
  137  |                                                                            |
  138 \*----------------------------------------------------------------------------*/
  139 
  140 #define MONITORING_PERIOD                           (5) //!< Instance state transition monitoring period in seconds.
  141 #define MAX_CREATE_TRYS                              5
  142 #define CREATE_TIMEOUT_SEC                           300
  143 #define LIBVIRT_TIMEOUT_SEC                          5
  144 #define NETWORK_GATE_TIMEOUT_SEC                     1200
  145 #define PER_INSTANCE_BUFFER_MB                       20 //!< by default reserve this much extra room (in MB) per instance (for kernel, ramdisk, and metadata overhead)
  146 #define SEC_PER_MB                                   ((1024 * 1024) / 512)
  147 
  148 #define MIN_BLOBSTORE_SIZE_MB                        10 //!< even with boot-from-EBS one will need work space for kernel and ramdisk
  149 #define FS_BUFFER_PERCENT                            0.03   //!< leave 3% extra when deciding on blobstore sizes automatically
  150 #define WORK_BS_PERCENT                              0.33   //!< give a third of available space to work, the rest to cache
  151 #define MAX_CONNECTION_ERRORS                        5
  152 
  153 /*----------------------------------------------------------------------------*\
  154  |                                                                            |
  155  |                                  TYPEDEFS                                  |
  156  |                                                                            |
  157 \*----------------------------------------------------------------------------*/
  158 
  159 /*----------------------------------------------------------------------------*\
  160  |                                                                            |
  161  |                                ENUMERATIONS                                |
  162  |                                                                            |
  163 \*----------------------------------------------------------------------------*/
  164 
  165 /*----------------------------------------------------------------------------*\
  166  |                                                                            |
  167  |                                 STRUCTURES                                 |
  168  |                                                                            |
  169 \*----------------------------------------------------------------------------*/
  170 
  171 /*----------------------------------------------------------------------------*\
  172  |                                                                            |
  173  |                             EXTERNAL VARIABLES                             |
  174  |                                                                            |
  175 \*----------------------------------------------------------------------------*/
  176 
  177 /* Should preferably be handled in header file */
  178 
  179 // declarations of available handlers
  180 extern struct handlers xen_libvirt_handlers;
  181 extern struct handlers kvm_libvirt_handlers;
  182 extern struct handlers default_libvirt_handlers;
  183 
  184 /*----------------------------------------------------------------------------*\
  185  |                                                                            |
  186  |                              GLOBAL VARIABLES                              |
  187  |                                                                            |
  188 \*----------------------------------------------------------------------------*/
  189 
  190 #ifndef NO_COMP
  191 const char *euca_this_component_name = "nc";    //!< Name of this component
  192 const char *euca_client_component_name = "cc";  //!< Name of this component's client
  193 #endif /* NO_COMP */
  194 
  195 /* used by lower level handlers */
  196 
  197 sem *hyp_sem = NULL;                   //!< semaphore for serializing domain creation
  198 sem *inst_sem = NULL;                  //!< guarding access to global instance structs
  199 sem *inst_copy_sem = NULL;             //!< guarding access to global instance structs
  200 sem *addkey_sem = NULL;                //!< guarding access to global instance structs
  201 sem *loop_sem = NULL;                  //!< created in diskutils.c for serializing 'losetup' invocations
  202 sem *log_sem = NULL;                   //!< used by log.c
  203 sem *service_state_sem = NULL;         //!< Used to guard service state updates (i.e. topology updates)
  204 sem *stats_sem = NULL;                 //!< Used to guard the internal message stats data on updates
  205 
  206 bunchOfInstances *global_instances = NULL;  //!< pointer to the instance list
  207 bunchOfInstances *global_instances_copy = NULL; //!< pointer to the copied instance list
  208 
  209 const int default_staging_cleanup_threshold = 60 * 60 * 2;  //!< after this many seconds any STAGING domains will be cleaned up
  210 const int default_booting_cleanup_threshold = 60;   //!< after this many seconds any BOOTING domains will be cleaned up
  211 const int default_booting_envwait_threshold = NETWORK_GATE_TIMEOUT_SEC;   //!< after this many seconds an instance will fail to boot unless network environment is ready
  212 const int default_bundling_cleanup_threshold = 60 * 60 * 2; //!< after this many seconds any BUNDLING domains will be cleaned up
  213 const int default_createImage_cleanup_threshold = 60 * 60 * 2;  //!< after this many seconds any CREATEIMAGE domains will be cleaned up
  214 const int default_teardown_state_duration = 60 * 3; //!< after this many seconds in TEARDOWN state (no resources), we'll forget about the instance
  215 const int default_migration_ready_threshold = 60 * 15;  //!< after this many seconds ready (and waiting) to migrate, migration will terminate and roll back
  216 
  217 struct nc_state_t nc_state = { 0 };    //!< Global NC state structure
  218 
  219 configEntry configKeysRestartNC[] = {
  220     {CONFIG_ENABLE_WS_SECURITY, "Y"},
  221     {"EUCALYPTUS", "/"},
  222     {NULL, NULL},
  223 };
  224 
  225 configEntry configKeysNoRestartNC[] = {
  226     {"LOGLEVEL", "INFO"},
  227     {"LOGROLLNUMBER", "10"},
  228     {"LOGMAXSIZE", "104857600"},
  229     {"LOGPREFIX", ""},
  230     {"LOGFACILITY", ""},
  231     {CONFIG_NC_CEPH_USER, DEFAULT_CEPH_USER},
  232     {CONFIG_NC_CEPH_KEYS, DEFAULT_CEPH_KEYRING},
  233     {CONFIG_NC_CEPH_CONF, DEFAULT_CEPH_CONF},
  234     {SENSOR_LIST_CONF_PARAM_NAME, SENSOR_LIST_CONF_PARAM_DEFAULT},
  235     {NULL, NULL},
  236 };
  237 
  238 int incoming_migrations_in_progress = 0;
  239 int outgoing_migrations_in_progress = 0;
  240 
  241 /*----------------------------------------------------------------------------*\
  242  |                                                                            |
  243  |                              STATIC VARIABLES                              |
  244  |                                                                            |
  245 \*----------------------------------------------------------------------------*/
  246 
  247 #ifdef EUCA_COMPILE_TIMESTAMP
  248 static char *compile_timestamp_str = EUCA_COMPILE_TIMESTAMP;
  249 #else /* EUCA_COMPILE_TIMESTAMP */
  250 static char *compile_timestamp_str = "";
  251 #endif /* EUCA_COMPILE_TIMESTAMP */
  252 
  253 //! a NULL-terminated array of available handlers
  254 static struct handlers *available_handlers[] = {
  255     &default_libvirt_handlers,
  256     &xen_libvirt_handlers,
  257     &kvm_libvirt_handlers,
  258     NULL,
  259 };
  260 
  261 static json_object *stats_json = NULL; //!< The json object that holds all of the internal message counters
  262 static int stats_sensor_interval_sec;  //!< Keeps the current value for sensor interval. Set during init
  263 static int hypervisor_conn_errors = 0;
  264 
  265 /*----------------------------------------------------------------------------*\
  266  |                                                                            |
  267  |                              STATIC PROTOTYPES                             |
  268  |                                                                            |
  269 \*----------------------------------------------------------------------------*/
  270 
  271 static void *libvirt_thread(void *ptr);
  272 static void refresh_instance_info(struct nc_state_t *nc, ncInstance * instance);
  273 static void update_log_params(void);
  274 static void update_ebs_params(void);
  275 static void nc_signal_handler(int sig);
  276 static int init(void);
  277 static void updateServiceStateInfo(ncMetadata * pMeta, boolean authoritative);
  278 static void printNCServiceStateInfo(void);
  279 static void printMsgServiceStateInfo(ncMetadata * pMeta);
  280 
  281 //! Helpers for internal stats handling in the NC
  282 static json_object **message_stats_getter();
  283 static void message_stats_setter();
  284 static int initialize_stats_system(int interval_sec);
  285 static void *nc_run_stats(void *ignored_arg);
  286 
  287 /*----------------------------------------------------------------------------*\
  288  |                                                                            |
  289  |                                   MACROS                                   |
  290  |                                                                            |
  291 \*----------------------------------------------------------------------------*/
  292 
  293 //! rejection of certain operations when NC is disabled
  294 #define DISABLED_CHECK                                                             \
  295 {                                                                                  \
  296     if (nc_state.is_enabled == FALSE) {                                            \
  297         LOGERROR("operation %s is not allowed when node is DISABLED\n", __func__); \
  298         return (EUCA_ERROR);                                                       \
  299     }                                                                              \
  300 }
  301 
  302 /*----------------------------------------------------------------------------*\
  303  |                                                                            |
  304  |                               IMPLEMENTATION                               |
  305  |                                                                            |
  306 \*----------------------------------------------------------------------------*/
  307 
  308 /*----------------------------------------------------------------------------*\
  309  |                                                                            |
  310  |                               IMPLEMENTATION                               |
  311  |                                                                            |
  312 \*----------------------------------------------------------------------------*/
  313 
  314 static void *nc_run_stats(void *ignored_arg)
  315 {
  316     LOGDEBUG("Starting stats subsystem execution. Will not terminate until service halts\n");
  317     if (run_stats(FALSE, stats_sensor_interval_sec, NULL) != EUCA_OK) {
  318         LOGERROR("Stats run call returned with error. Unexepcted. Should not have returned\n");
  319     }
  320     return NULL;
  321 }
  322 
  323 //! Runs a check on service and returns result in string form
  324 //! for the stats sensor
  325 static char *stats_service_check_call()
  326 {
  327     LOGTRACE("Invoking NC check function for internal stats\n");
  328     if (nc_state.is_enabled) {
  329         return SERVICE_CHECK_OK_MSG;
  330     }
  331     return SERVICE_CHECK_FAILED_MSG;
  332 }
  333 
  334 //! Gets the CC state as a string for use by the stats system
  335 static char *stats_service_state_call()
  336 {
  337     LOGTRACE("Getting NC service state for internal stats\n");
  338     if (nc_state.is_enabled) {
  339         return "ENABLED";
  340     } else {
  341         return "DISABLED";
  342     }
  343 }
  344 
  345 //! Gets the reference to the stats json object, basically a no-op for the NC
  346 static json_object **message_stats_getter()
  347 {
  348     LOGTRACE("Fetching latest message stats from shared memory\n");
  349     return &stats_json;
  350 }
  351 
  352 //! Updates the stats json data, literally a No-op for the NC (as opposed to the CC)
  353 static void message_stats_setter()
  354 {
  355     LOGTRACE("Updating latest message stats from shared memory\n");
  356     //No-op
  357     return;
  358 }
  359 
  360 void nc_lock_stats()
  361 {
  362     sem_p(stats_sem);
  363 }
  364 
  365 void nc_unlock_stats()
  366 {
  367     sem_v(stats_sem);
  368 }
  369 
  370 //! Update the message stat structure
  371 //! Wraps the message stats update with the necessary caching copies and locking
  372 int nc_update_message_stats(const char *message_name, long call_time, int msg_failed)
  373 {
  374     LOGTRACE("Updating message stats for message %s\n", message_name);
  375 
  376     nc_lock_stats();
  377     json_object **stats_state = message_stats_getter();
  378 
  379     //Update the counters
  380     update_message_stats(*stats_state, message_name, call_time, msg_failed);
  381     message_stats_setter();
  382 
  383     nc_unlock_stats();
  384     LOGTRACE("Message stats update complete\n");
  385     return EUCA_OK;
  386 }
  387 
  388 //! Provides NC-specific initializations for the stats system of
  389 //! internal service sensors (state sensors, message statistics, etc)
  390 //! @returns EUCA_OK on success, or error code on failure
  391 static int initialize_stats_system(int interval_sec)
  392 {
  393     LOGDEBUG("Initializing stats subsystem for NC\n");
  394     int ret = EUCA_OK;
  395     int stats_ttl = interval_sec + 1;
  396     stats_sensor_interval_sec = interval_sec;
  397     nc_lock_stats();
  398     {
  399         //Init the message sensor with component-specific data
  400         ret = initialize_message_sensor(euca_this_component_name, interval_sec, stats_ttl, message_stats_getter, message_stats_setter);
  401         if (ret != EUCA_OK) {
  402             LOGERROR("Error initializing internal message sensor: %d\n", ret);
  403             goto cleanup;
  404         } else {
  405             json_object **tmp = message_stats_getter();
  406             const char *tmp_out = json_object_to_json_string(*tmp);
  407             LOGINFO("Initialized internal message stats: %s\n", tmp_out);
  408 
  409         }
  410 
  411         //Init the service state sensor with component-specific data
  412         ret = initialize_service_state_sensor(euca_this_component_name, interval_sec, stats_ttl, stats_service_state_call, stats_service_check_call);
  413         if (ret != EUCA_OK) {
  414             LOGERROR("Error initializing internal service state sensor: %d\n", ret);
  415             goto cleanup;
  416         }
  417 
  418         ret = init_stats(nc_state.home, euca_this_component_name, nc_lock_stats, nc_unlock_stats);
  419         if (ret != EUCA_OK) {
  420             LOGERROR("Could not initialize CC stats system: %d\n", ret);
  421             goto cleanup;
  422         }
  423     }
  424 
  425     if (!ret) {
  426         LOGINFO("Stats subsystem initialized\n");
  427     } else {
  428         LOGERROR("Stat subsystem init failed: %d\n", ret);
  429     }
  430 cleanup:
  431     nc_unlock_stats();
  432     return ret;
  433 }
  434 
  435 
  436 //!
  437 //! Deauthorize all migration keys on destination host
  438 //! @param[in] lock_hyp_sem set to true to hold the 'lock_hyp_sem' semaphore
  439 //!
  440 //! @return EUCA_OK, EUCA_SYSTEM_ERROR
  441 //!
  442 int deauthorize_migration_keys(boolean lock_hyp_sem) 
  443 {
  444     int rc = 0;
  445     char euca_rootwrap[EUCA_MAX_PATH] = "";
  446     char command[EUCA_MAX_PATH] = "";
  447     char *euca_base = getenv(EUCALYPTUS_ENV_VAR_NAME);
  448 
  449     snprintf(command, EUCA_MAX_PATH, EUCALYPTUS_AUTHORIZE_MIGRATION_KEYS, NP(euca_base));
  450     snprintf(euca_rootwrap, EUCA_MAX_PATH, EUCALYPTUS_ROOTWRAP, NP(euca_base));
  451 
  452     LOGDEBUG("migration key de-authorization command: '%s %s %s %s'\n", euca_rootwrap, command, "-D", "-r");
  453     if (lock_hyp_sem == TRUE) {
  454         sem_p(hyp_sem);
  455     }
  456 
  457     rc = euca_execlp(NULL, euca_rootwrap, command, "-D", "-r", NULL);
  458 
  459     if (lock_hyp_sem == TRUE) {
  460         sem_v(hyp_sem);
  461     }
  462 
  463     if (rc != EUCA_OK) {
  464         LOGERROR("'%s %s %s %s' failed. rc=%d\n", euca_rootwrap, command, "-D", "-r", rc);
  465         return (EUCA_SYSTEM_ERROR);
  466     } else {
  467         LOGDEBUG("migration key deauthorization succeeded\n");
  468     }
  469     return (EUCA_OK);
  470 }
  471 
  472 //!
  473 //! Authorize migration keys on destination host.
  474 //!
  475 //! @param[in] host hostname (IP address) to authorize
  476 //! @param[in] credentials shared secret to authorize
  477 //! @param[in] instance pointer to instance struct for logging information (optional--can be NULL)
  478 //! @param[in] lock_hyp_sem set to true to hold the 'lock_hyp_sem' semaphore
  479 //!
  480 //! @return EUCA_OK, EUCA_INVALID_ERROR, or EUCA_SYSTEM_ERROR
  481 //!
  482 int authorize_migration_keys(char *host, char *credentials, ncInstance * instance, boolean lock_hyp_sem)
  483 {
  484     int rc = 0;
  485     char euca_rootwrap[EUCA_MAX_PATH] = "";
  486     char command[EUCA_MAX_PATH] = "";
  487     char *euca_base = getenv(EUCALYPTUS_ENV_VAR_NAME);
  488     char *instanceId = instance ? instance->instanceId : "UNSET";
  489 
  490     if (!host && !credentials) {
  491         LOGERROR("[%s] called with invalid arguments: host=%s, creds=%s\n", SP(instanceId), SP(host), (credentials == NULL) ? "UNSET" : "present");
  492         return (EUCA_INVALID_ERROR);
  493     }
  494 
  495     snprintf(command, EUCA_MAX_PATH, EUCALYPTUS_AUTHORIZE_MIGRATION_KEYS, NP(euca_base));
  496     snprintf(euca_rootwrap, EUCA_MAX_PATH, EUCALYPTUS_ROOTWRAP, NP(euca_base));
  497     LOGDEBUG("[%s] migration key authorization command: '%s %s %s %s %s'\n", SP(instanceId), euca_rootwrap, command, "-a", NP(host), NP(credentials));
  498     if (lock_hyp_sem == TRUE) {
  499         sem_p(hyp_sem);
  500     }
  501 
  502     rc = euca_execlp(NULL, euca_rootwrap, command, "-a", NP(host), NP(credentials), NULL);
  503 
  504     if (lock_hyp_sem == TRUE) {
  505         sem_v(hyp_sem);
  506     }
  507 
  508     if (rc != EUCA_OK) {
  509         LOGERROR("[%s] '%s %s %s %s %s' failed. rc=%d\n", SP(instanceId), euca_rootwrap, command, "-a", NP(host), NP(credentials), rc);
  510         return (EUCA_SYSTEM_ERROR);
  511     } else {
  512         LOGDEBUG("[%s] migration key authorization succeeded\n", SP(instanceId));
  513     }
  514     return (EUCA_OK);
  515 }
  516 
  517 //!
  518 //! Configure libvirtd to not use polkitd by default.
  519 //!
  520 //! Only needs to be run during init() as a one time operation. In most cases
  521 //! this will check the config and not restart libvirt if everything is ok.
  522 //!
  523 //! @param[in] use_polkit set 1, will enable polkit, 0 will disable (default)
  524 //! @return EUCA_OK, EUCA_INVALID_ERROR, or EUCA_SYSTEM_ERROR
  525 //!
  526 int config_polkit(int use_polkit)
  527 {
  528     int rc = 0;
  529     char euca_rootwrap[EUCA_MAX_PATH] = "";
  530     char command[EUCA_MAX_PATH] = "";
  531     char *euca_base = getenv(EUCALYPTUS_ENV_VAR_NAME);
  532 
  533     snprintf(command, EUCA_MAX_PATH, EUCALYPTUS_CONFIG_NO_POLKIT, NP(euca_base));
  534     snprintf(euca_rootwrap, EUCA_MAX_PATH, EUCALYPTUS_ROOTWRAP, NP(euca_base));
  535     LOGDEBUG("config-no-polkit command: '%s %s'\n", euca_rootwrap, command);
  536 
  537     if (use_polkit)
  538         rc = euca_execlp(NULL, euca_rootwrap, command, "-e", NULL); // enable
  539     else
  540         rc = euca_execlp(NULL, euca_rootwrap, command, NULL);       // disable - default
  541 
  542     if (rc != EUCA_OK) {
  543         LOGERROR("%s %s' failed. rc=%d\n",euca_rootwrap, command, rc);
  544         return (EUCA_SYSTEM_ERROR);
  545     } else {
  546         LOGDEBUG("Libvirtd polkit configuration succeeded\n");
  547     }
  548     return (EUCA_OK);
  549 }
  550 
  551 //!
  552 //! Copies the url string of the ENABLED service of the requested type into dest_buffer.
  553 //! dest_buffer MUST be the same size as the services uri array length, 512.
  554 //!
  555 //! @param[in] service_type
  556 //! @param[in] nc
  557 //! @param[in] dest_buffer
  558 //! @return EUCA_OK on success, EUCA_ERROR on failure.
  559 //! @pre
  560 //!
  561 //! @post
  562 //!
  563 int get_service_url(const char *service_type, struct nc_state_t *nc, char *dest_buffer)
  564 {
  565     int i = 0;
  566     boolean found = FALSE;
  567 
  568     if (service_type == NULL || nc == NULL || dest_buffer == NULL) {
  569         LOGERROR("Invalid input parameters. At least one is NULL.\n");
  570         return (EUCA_ERROR);
  571     }
  572 
  573     sem_p(service_state_sem);
  574 
  575     for (i = 0; i < 16; i++) {
  576         if (!strcmp(service_type, nc->services[i].type)) {
  577             //Winner!
  578             if (nc->services[i].urisLen > 0) {
  579                 euca_strncpy(dest_buffer, nc->services[i].uris[0], 512);
  580                 found = TRUE;
  581             }
  582         }
  583     }
  584     sem_v(service_state_sem);
  585 
  586     if (found) {
  587         LOGTRACE("Found enabled service URI for service type %s as %s\n", service_type, dest_buffer);
  588         return (EUCA_OK);
  589     }
  590 
  591     dest_buffer[0] = '\0';             //Ensure 0 length string
  592     LOGTRACE("No enabled service found for service type %s\n", service_type);
  593     return (EUCA_ERROR);
  594 }
  595 
  596 //!
  597 //!
  598 //!
  599 //! @pre
  600 //!
  601 //! @post
  602 //!
  603 static void printNCServiceStateInfo(void)
  604 {
  605     int i = 0;
  606     //Don't bother if not at trace logging
  607     if (log_level_get() <= EUCA_LOG_TRACE) {
  608         sem_p(service_state_sem);
  609         LOGTRACE("Printing %d services\n", nc_state.servicesLen);
  610         LOGTRACE("Epoch %d\n", nc_state.ncStatus.localEpoch);
  611         for (i = 0; i < nc_state.servicesLen; i++) {
  612             LOGTRACE("Service - %s %s %s %s\n", nc_state.services[i].name, nc_state.services[i].partition, nc_state.services[i].type, nc_state.services[i].uris[0]);
  613         }
  614         for (i = 0; i < nc_state.disabledServicesLen; i++) {
  615             LOGTRACE("Disabled Service - %s %s %s %s\n", nc_state.disabledServices[i].name, nc_state.disabledServices[i].partition, nc_state.disabledServices[i].type,
  616                      nc_state.disabledServices[i].uris[0]);
  617         }
  618         for (i = 0; i < nc_state.servicesLen; i++) {
  619             LOGTRACE("Notready Service - %s %s %s %s\n", nc_state.notreadyServices[i].name, nc_state.notreadyServices[i].partition, nc_state.notreadyServices[i].type,
  620                      nc_state.notreadyServices[i].uris[0]);
  621         }
  622         sem_v(service_state_sem);
  623     }
  624 }
  625 
  626 //!
  627 //!
  628 //!
  629 //! @param[in] pMeta
  630 //!
  631 //! @pre
  632 //!
  633 //! @post
  634 //!
  635 static void printMsgServiceStateInfo(ncMetadata * pMeta)
  636 {
  637     int i = 0;
  638     //Don't bother if not at trace logging
  639     if (log_level_get() <= EUCA_LOG_TRACE) {
  640         LOGTRACE("Printing %d services\n", pMeta->servicesLen);
  641         LOGTRACE("Msg-Meta epoch %d\n", pMeta->epoch);
  642 
  643         for (i = 0; i < pMeta->servicesLen; i++) {
  644             LOGTRACE("Msg-Meta: Service - %s %s %s %s\n", pMeta->services[i].name, pMeta->services[i].partition, pMeta->services[i].type, pMeta->services[i].uris[0]);
  645         }
  646 
  647         for (i = 0; i < pMeta->disabledServicesLen; i++) {
  648             LOGTRACE("Msg-Meta: Disabled Service - %s %s %s %s\n", pMeta->disabledServices[i].name, pMeta->disabledServices[i].partition, pMeta->disabledServices[i].type,
  649                      pMeta->disabledServices[i].uris[0]);
  650         }
  651 
  652         for (i = 0; i < pMeta->servicesLen; i++) {
  653             LOGTRACE("Msg-Meta: Notready Service - %s %s %s %s\n", pMeta->notreadyServices[i].name, pMeta->notreadyServices[i].partition, pMeta->notreadyServices[i].type,
  654                      pMeta->notreadyServices[i].uris[0]);
  655         }
  656     }
  657 }
  658 
  659 //!
  660 //! Update the state of the services and topology as received from the CC
  661 //!
  662 //! @param[in] pMeta a pointer to the node controller (NC) metadata structure
  663 //! @param[in] authoritative indicates whether this request is allowed to reset epoch
  664 //! @pre
  665 //!
  666 //! @note
  667 //!
  668 static void updateServiceStateInfo(ncMetadata * pMeta, boolean authoritative)
  669 {
  670     int i = 0;
  671     char scURL[512];
  672     if ((pMeta != NULL) && (pMeta->servicesLen > 0)) {
  673         LOGTRACE("Updating NC's topology/service state info: pMeta: userId=%s\n", pMeta->userId);
  674 
  675         // store information from CLC that needs to be kept up-to-date in the NC
  676         sem_p(service_state_sem);
  677 
  678         if (pMeta->epoch >= nc_state.ncStatus.localEpoch || // we have updates ('=' is there in case CC does not bump epoch numbers)
  679             authoritative              // trust the authoritative requests and always take their services info, even if epoch goes backward
  680             ) {
  681             //Update the epoch first
  682             nc_state.ncStatus.localEpoch = pMeta->epoch;
  683 
  684             //Copy new services info wholesale
  685             memcpy(nc_state.services, pMeta->services, sizeof(serviceInfoType) * 16);
  686             memcpy(nc_state.disabledServices, pMeta->disabledServices, sizeof(serviceInfoType) * 16);
  687             memcpy(nc_state.notreadyServices, pMeta->notreadyServices, sizeof(serviceInfoType) * 16);
  688             nc_state.servicesLen = pMeta->servicesLen;
  689             nc_state.disabledServicesLen = pMeta->disabledServicesLen;
  690             nc_state.notreadyServicesLen = pMeta->notreadyServicesLen;
  691 
  692             //Make a copy of the SC url to use outside of the semaphore
  693             for (i = 0; i < nc_state.servicesLen; i++) {
  694                 if (!strcmp(nc_state.services[i].type, "storage")) {
  695                     if (nc_state.services[i].urisLen > 0) {
  696                         memcpy(scURL, nc_state.services[i].uris[0], 512);
  697                         break;
  698                     }
  699                 }
  700             }
  701         }
  702         sem_v(service_state_sem);
  703 
  704         LOGTRACE("Updating VBR localhost config sc url to: %s\n", scURL);
  705         //Push the change to the vbr code
  706         vbr_update_hostconfig_scurl(scURL);
  707 
  708     } else {
  709         LOGTRACE("Cannot update service infos, null found\n");
  710         return;
  711     }
  712 
  713     //Log the results...
  714     printNCServiceStateInfo();
  715     printMsgServiceStateInfo(pMeta);
  716 }
  717 
  718 //!
  719 //! Utilitarian functions used in the lower level handlers. This scans the string buffer
  720 //! 's' for a matching parameter 'name' to fill in the 'valp' value.
  721 //!
  722 //! @param[in]  s a non NULL string buffer
  723 //! @param[in]  name the name of the parameter we're looking for
  724 //! @param[out] valp a pointer to the integer returned if we found the parameter in 's'
  725 //!
  726 //! @return EUCA_OK on success; EUCA_ERROR if any parameters are invalid; or EUCA_NO_FOUND_ERROR
  727 //!         if the 'name' parameter is not found in 's'. In any error case, 'valp' will remain
  728 //!         invalid and could be modified.
  729 //!
  730 int get_value(char *s, const char *name, long long *valp)
  731 {
  732     char buf[CHAR_BUFFER_SIZE] = "";
  733 
  734     if ((s == NULL) || (name == NULL) || (valp == NULL))
  735         return (EUCA_ERROR);
  736     snprintf(buf, CHAR_BUFFER_SIZE, "%s=%%lld", name);
  737     return ((euca_lscanf(s, buf, valp) == 1) ? EUCA_OK : EUCA_NOT_FOUND_ERROR);
  738 }
  739 
  740 //!
  741 //! Handles the logging of libvirt errors
  742 //!
  743 //! @param[in] userData (UNUSED)
  744 //! @param[in] error a pointer to the libvirt error information
  745 //!
  746 void libvirt_err_handler(void *userData, virErrorPtr error)
  747 {
  748     boolean ignore_error = FALSE;
  749 
  750     if (error == NULL) {
  751         LOGERROR("libvirt error handler was given a NULL pointer\n");
  752         return;
  753     }
  754 
  755     if (error->code == VIR_ERR_NO_DOMAIN) {
  756         char *instanceId = euca_strestr(error->message, "'", "'");  // try to find instance ID in the message
  757         if (instanceId) {
  758             // NOTE: sem_p/v(inst_sem) cannot be used as this err_handler can be called in refresh_instance_info's context
  759             ncInstance *instance = find_instance(&global_instances, instanceId);
  760             if (instance && (instance->terminationRequestedTime // termination of this instance was requested
  761                              || (instance->state == BOOTING)    // it is booting or rebooting
  762                              || (instance->state == BUNDLING_SHUTDOWN || instance->state == BUNDLING_SHUTOFF)
  763                              || (instance->state == CREATEIMAGE_SHUTDOWN || instance->state == CREATEIMAGE_SHUTOFF))) {
  764                 ignore_error = TRUE;
  765             }
  766             free(instanceId);
  767         }
  768     }
  769 
  770     if (!ignore_error) {
  771         EUCALOG(EUCA_LOG_ERROR, "libvirt: %s (code=%d)\n", error->message, error->code);
  772     }
  773 }
  774 
  775 //!
  776 //! converts 'dev' into canonical form (e.g., "sda" of "/dev/sda") unless
  777 //! it is already in canonical form
  778 //!
  779 //! @param[in]  dev the device name string (e.g. /dev/sda or sda)
  780 //! @param[out] cdev the device name in canonical form (without /dev/)
  781 //! @param[in]  cdev_len length of the cdev buffer in bytes
  782 //!
  783 //! @return EUCA_OK on success or EUCA_ERROR on failure
  784 //!
  785 int canonicalize_dev(const char *dev, char *cdev, int cdev_len)
  786 {
  787     char cdev_local[128];
  788     euca_strncpy(cdev_local, dev, sizeof(cdev_local));
  789 
  790     const char *s = cdev_local;
  791     if (strstr(dev, "/dev/") == dev) {
  792         s = s + strlen("/dev/");
  793     }
  794     if (strchr(s, '/')) {
  795         LOGERROR("device name string of unexpected format (must be /dev/XXX)\n");
  796         return EUCA_ERROR;
  797     }
  798     if (strlen(s) > (cdev_len - 1)) {
  799         LOGERROR("buffer size (%d) exceeded for device name string\n", cdev_len);
  800         return EUCA_ERROR;
  801     }
  802     euca_strncpy(cdev, s, cdev_len);
  803 
  804     return EUCA_OK;
  805 }
  806 
  807 //!
  808 //! This updates the 'aliases' of sensor 'dimensions' that store sensor data for specific
  809 //! block devices. Dimensions are strings like 'root', 'ephemeral0', 'vol-XYZ', etc. The
  810 //! purpose of aliases is to map block device statistics returned by getstats.pl script,
  811 //! which use guest block device names, such as 'sda' or 'vdb', into dimensions. To deduce
  812 //! the mapping, we use .xml files that are passed to libvirt. This is somewhat awkward, but
  813 //! it gets us the guest device actually used by the hypervisor. (The device we request may
  814 //! be modified by XSL transforms and NC hooks.)
  815 //!
  816 //! @param[in] instance a pointer to the instance
  817 //!
  818 //! @return Always return EUCA_OK
  819 //!
  820 int update_disk_aliases(ncInstance * instance)
  821 {
  822     int i = 0;
  823     int j = 0;
  824     char *volumeId = NULL;
  825     char **devs = NULL;
  826     char lpath[EUCA_MAX_PATH] = "";
  827     boolean saw_ephemeral0 = FALSE;
  828     boolean saw_root = FALSE;
  829     ncVolume *volume = NULL;
  830 
  831     // update block devices from instance XML file
  832     if ((devs = get_xpath_content(instance->libvirtFilePath, "/domain/devices/disk/target[@dev]/@dev")) != NULL) {
  833         for (i = 0; devs[i]; i++) {
  834             volumeId = NULL;
  835             if (strstr(devs[i], "da1")) {   // regexp: [hsvx]v?da1?
  836                 volumeId = "root";
  837                 saw_root = TRUE;
  838             } else if (strstr(devs[i], "da2")) {
  839                 if (saw_ephemeral0) {
  840                     LOGERROR("[%s] unexpected disk layout in instance", instance->instanceId);
  841                 } else {
  842                     volumeId = "ephemeral0";
  843                     saw_ephemeral0 = TRUE;
  844                 }
  845             } else if (strstr(devs[i], "da")) {
  846                 volumeId = "root";
  847                 saw_root = TRUE;
  848             } else if (strstr(devs[i], "db")) {
  849                 if (saw_ephemeral0) {
  850                     LOGERROR("[%s] unexpected disk layout in instance", instance->instanceId);
  851                 } else {
  852                     volumeId = "ephemeral0";
  853                     saw_ephemeral0 = TRUE;
  854                 }
  855             } else if (strstr(devs[i], "dc")) {
  856                 volumeId = "ephemeral1";
  857             } else if (strstr(devs[i], "dd")) {
  858                 volumeId = "ephemeral2";
  859             } else if (strstr(devs[i], "de")) {
  860                 volumeId = "ephemeral3";
  861             }
  862 
  863             if (volumeId) {
  864                 ebs_volume_data *vol_data = NULL;
  865 
  866                 if (strcmp("root", volumeId) == 0) {
  867                     if (instance->params.root->locationType == NC_LOCATION_SC) {
  868                         if (deserialize_volume(instance->params.root->resourceLocation, &vol_data) == 0) {
  869                             volumeId = vol_data->volumeId;
  870                         }
  871                     }
  872                 }
  873                 sensor_set_volume(instance->instanceId, volumeId, devs[i]);
  874 
  875                 EUCA_FREE(vol_data);
  876             }
  877             EUCA_FREE(devs[i]);
  878         }
  879         EUCA_FREE(devs);
  880     }
  881 
  882     if (!saw_root) {
  883         LOGWARN("[%s] failed to find 'dev' entry for root\n", instance->instanceId);
  884     }
  885     // now update attached or detached volumes, if any
  886     for (i = 0; i < EUCA_MAX_VOLUMES; ++i) {
  887         volume = &instance->volumes[i];
  888         if (strlen(volume->volumeId) == 0)
  889             continue;
  890 
  891         snprintf(lpath, sizeof(lpath), EUCALYPTUS_VOLUME_LIBVIRT_XML_PATH_FORMAT, instance->instancePath, volume->volumeId);    // vol-XXX-libvirt.xml
  892         if ((devs = get_xpath_content(lpath, "/disk/target[@dev]/@dev")) != NULL) {
  893             if (devs[0] && devs[1] == NULL) {
  894                 sensor_set_volume(instance->instanceId, volume->volumeId, devs[0]);
  895             } else {
  896                 LOGWARN("[%s] failed to find 'dev' entry in %s\n", lpath, instance->instanceId);
  897             }
  898 
  899             for (j = 0; devs[j]; j++) {
  900                 EUCA_FREE(devs[j]);
  901             }
  902             EUCA_FREE(devs);
  903         } else {
  904             sensor_set_volume(instance->instanceId, volume->volumeId, NULL);
  905         }
  906     }
  907 
  908     return EUCA_OK;
  909 }
  910 
  911 //!
  912 //! Logs the currently running domains
  913 //!
  914 void print_running_domains(void)
  915 {
  916     ncInstance *instance = NULL;
  917     bunchOfInstances *head = NULL;
  918     char buf[CHAR_BUFFER_SIZE] = "";
  919 
  920     sem_p(inst_sem);
  921     {
  922         for (head = global_instances; head; head = head->next) {
  923             instance = head->instance;
  924             if (instance->state == STAGING || instance->state == BOOTING || instance->state == RUNNING || instance->state == BLOCKED || instance->state == PAUSED) {
  925                 strcat(buf, " ");
  926                 strcat(buf, instance->instanceId);
  927             }
  928         }
  929     }
  930     sem_v(inst_sem);
  931     LOGINFO("currently running/booting: %s\n", buf);
  932 }
  933 
  934 //!
  935 //!
  936 //!
  937 //! @param[in] ptr
  938 //!
  939 static void *libvirt_thread(void *ptr)
  940 {
  941     int rc = 0;
  942     sigset_t mask = { {0} };
  943 
  944     // allow SIGUSR1 signal to be delivered to this thread and its children
  945     sigemptyset(&mask);
  946     sigaddset(&mask, SIGUSR1);
  947     sigprocmask(SIG_UNBLOCK, &mask, NULL);
  948 
  949     if (nc_state.conn) {
  950         if ((rc = virConnectClose(nc_state.conn)) != 0) {
  951             LOGDEBUG("refcount on close was non-zero: %d\n", rc);
  952         }
  953     }
  954     nc_state.conn = virConnectOpen(nc_state.uri);
  955     return (NULL);
  956 }
  957 
  958 //!
  959 //! Checks and reset the hypervisor connection.
  960 //!
  961 //! @return a pointer to the hypervisor connection structure or NULL if we failed.
  962 //!
  963 virConnectPtr lock_hypervisor_conn()
  964 {
  965     int rc = 0;
  966     int status = 0;
  967     pid_t cpid = 0;
  968     pthread_t thread = { 0 };
  969     long long thread_par = 0L;
  970     boolean bail = FALSE;
  971     //boolean try_again = FALSE;
  972     struct timespec ts = { 0 };
  973     virConnectPtr tmp_conn = NULL;
  974 
  975     // Acquire our hypervisor semaphore
  976     sem_p(hyp_sem);
  977 
  978     if (call_hooks(NC_EVENT_PRE_HYP_CHECK, nc_state.home)) {
  979         LOGFATAL("hooks prevented check on the hypervisor\n");
  980         sem_v(hyp_sem);
  981         return NULL;
  982     }
  983     // Fork off a process just to open and immediately close a libvirt connection.
  984     // The purpose is to try to identify periods when open or close calls block indefinitely.
  985     // Success in the child process does not guarantee success in the parent process, but
  986     // hopefully it will flag certain bad conditions and will allow the parent to avoid them.
  987 
  988     if ((cpid = fork()) < 0) {         // fork error
  989         LOGERROR("failed to fork to check hypervisor connection\n");
  990         bail = TRUE;                   // we are in big trouble if we cannot fork
  991     } else if (cpid == 0) {            // child process - checks on the connection
  992         if ((tmp_conn = virConnectOpen(nc_state.uri)) == NULL)
  993             exit(1);
  994         virConnectClose(tmp_conn);
  995         exit(0);
  996     } else {                           // parent process - waits for the child, kills it if necessary
  997         if ((rc = timewait(cpid, &status, LIBVIRT_TIMEOUT_SEC)) < 0) {
  998             LOGERROR("failed to wait for forked process: %s\n", strerror(errno));
  999             bail = TRUE;
 1000         } else if (rc == 0) {
 1001             LOGERROR("timed out waiting for hypervisor checker pid=%d\n", cpid);
 1002             bail = TRUE;
 1003         } else if (WEXITSTATUS(status) != 0) {
 1004             LOGERROR("child process failed to connect to hypervisor\n");
 1005             bail = TRUE;
 1006         }
 1007         // terminate the child, if any
 1008         killwait(cpid);
 1009     }
 1010 
 1011     if (bail) {
 1012         sem_v(hyp_sem);
 1013         return NULL;                   // better fail the operation than block the whole NC
 1014     }
 1015 
 1016     LOGTRACE("process check for libvirt succeeded\n");
 1017 
 1018     // At this point, the check for libvirt done in a separate process was
 1019     // successful, so we proceed to close and reopen the connection in a
 1020     // separate thread, which we will try to wake up with SIGUSR1 if it
 1021     // blocks for too long (as a last-resource effort). The reason we reset
 1022     // the connection so often is because libvirt operations have a
 1023     // tendency to block indefinitely if we do not do this.
 1024 
 1025     if (pthread_create(&thread, NULL, libvirt_thread, (void *)&thread_par) != 0) {
 1026         LOGERROR("failed to create the libvirt refreshing thread\n");
 1027         bail = TRUE;
 1028     } else {
 1029         for (;;) {
 1030             if (clock_gettime(CLOCK_REALTIME, &ts) == -1) {
 1031                 LOGERROR("failed to obtain time\n");
 1032                 bail = TRUE;
 1033                 break;
 1034             }
 1035 
 1036             ts.tv_sec += LIBVIRT_TIMEOUT_SEC;
 1037             if ((rc = pthread_timedjoin_np(thread, NULL, &ts)) == 0)
 1038                 break;                 // all is well
 1039 
 1040             if (rc != ETIMEDOUT) {     // error other than timeout
 1041                 LOGERROR("failed to wait for libvirt refreshing thread (rc=%d)\n", rc);
 1042                 bail = TRUE;
 1043                 break;
 1044             }
 1045 
 1046             LOGERROR("timed out on libvirt refreshing thread\n");
 1047             pthread_kill(thread, SIGUSR1);
 1048             sleep(1);
 1049         }
 1050     }
 1051 
 1052     if (bail) {
 1053         sem_v(hyp_sem);
 1054         return NULL;
 1055     }
 1056     LOGTRACE("thread check for libvirt succeeded\n");
 1057 
 1058     if (nc_state.conn == NULL) {
 1059         LOGERROR("failed to connect to %s\n", nc_state.uri);
 1060         sem_v(hyp_sem);
 1061         return NULL;
 1062     }
 1063     return nc_state.conn;
 1064 }
 1065 
 1066 //!
 1067 //! Closes the connection with the hypervisor
 1068 //!
 1069 void unlock_hypervisor_conn()
 1070 {
 1071     sem_v(hyp_sem);
 1072 }
 1073 
 1074 //!
 1075 //! Instance state state machine.
 1076 //!
 1077 //! @param[in] instance a pointer to the instance to modify
 1078 //! @param[in] state the new instance state
 1079 //!
 1080 void change_state(ncInstance * instance, instance_states state)
 1081 {
 1082     int old_state = instance->state;
 1083 
 1084     instance->state = ((int)state);
 1085     switch (state) {                   /* mapping from NC's internal states into external ones */
 1086     case STAGING:
 1087     case CANCELED:
 1088         // Mark primary and secondary network interfaces as attached
 1089         euca_strncpy(instance->ncnet.stateName, VOL_STATE_ATTACHED, sizeof(instance->ncnet.stateName)); // primary nic
 1090         for (int i = 0; i < EUCA_MAX_NICS; i++) { // secondary nics in VPC mode only
 1091             if (strlen(instance->secNetCfgs[i].interfaceId) == 0)
 1092                continue; // empty slot, move on
 1093             else
 1094                euca_strncpy(instance->secNetCfgs[i].stateName, VOL_STATE_ATTACHED, sizeof(instance->secNetCfgs[i].stateName));
 1095         }
 1096         instance->stateCode = PENDING;
 1097         break;
 1098     case BOOTING:
 1099     case RUNNING:
 1100     case BLOCKED:
 1101     case PAUSED:
 1102         instance->stateCode = EXTANT;
 1103         instance->retries = LIBVIRT_QUERY_RETRIES;
 1104         break;
 1105     case CRASHED:
 1106     case BUNDLING_SHUTDOWN:
 1107     case BUNDLING_SHUTOFF:
 1108     case CREATEIMAGE_SHUTDOWN:
 1109     case CREATEIMAGE_SHUTOFF:
 1110     case SHUTDOWN:
 1111     case SHUTOFF:
 1112         if (instance->stateCode != EXTANT) {
 1113             instance->stateCode = PENDING;
 1114         }
 1115         instance->retries = LIBVIRT_QUERY_RETRIES;
 1116         break;
 1117     case TEARDOWN:
 1118         // Mark primary and secondary network interfaces as detached
 1119         euca_strncpy(instance->ncnet.stateName, VOL_STATE_DETACHED, sizeof(instance->ncnet.stateName)); // primary nic
 1120         for (int i = 0; i < EUCA_MAX_NICS; i++) { // secondary nics in VPC mode only
 1121             if (strlen(instance->secNetCfgs[i].interfaceId) == 0)
 1122                continue; // empty slot, move on
 1123             else
 1124                euca_strncpy(instance->secNetCfgs[i].stateName, VOL_STATE_DETACHED, sizeof(instance->secNetCfgs[i].stateName));
 1125         }
 1126         instance->stateCode = TEARDOWN;
 1127         break;
 1128     default:
 1129         LOGERROR("[%s] unexpected state (%d)\n", instance->instanceId, instance->state);
 1130         return;
 1131     }
 1132 
 1133     euca_strncpy(instance->stateName, instance_state_names[instance->stateCode], CHAR_BUFFER_SIZE);
 1134     if (old_state != state) {
 1135         LOGDEBUG("[%s] state change for instance: %s -> %s (%s)\n",
 1136                  instance->instanceId, instance_state_names[old_state], instance_state_names[instance->state], instance_state_names[instance->stateCode]);
 1137     }
 1138 }
 1139 
 1140 //!
 1141 //! waits indefinitely until a state transition takes place  (timeouts are implemented in the
 1142 //! monitoring thread) and returns 0 if from_state->to_state transition takes place and 1 otherwise
 1143 //!
 1144 //! @param[in] instance a pointer to the instance we're monitoring
 1145 //! @param[in] from_state the starting state of the transition
 1146 //! @param[in] to_state the ending state of the transition
 1147 //!
 1148 //! @return EUCA_OK on success or EUCA_ERROR on failure.
 1149 //!
 1150 int wait_state_transition(ncInstance * instance, instance_states from_state, instance_states to_state)
 1151 {
 1152     instance_states current_state = NO_STATE;
 1153 
 1154     while (1) {
 1155         current_state = instance->state;
 1156         if (current_state == to_state)
 1157             return (EUCA_OK);
 1158 
 1159         if (current_state != from_state)
 1160             return (EUCA_ERROR);
 1161 
 1162         // no point in checking more frequently
 1163         sleep(MONITORING_PERIOD);
 1164     }
 1165     return (EUCA_ERROR);
 1166 }
 1167 
 1168 //!
 1169 //! Refresh instance information.
 1170 //!
 1171 //! (This is called while holding inst_sem.)
 1172 //!
 1173 //! @param[in] nc a pointer to the global NC state structure.
 1174 //! @param[in] instance a pointer to the instance being refreshed
 1175 //!
 1176 static void refresh_instance_info(struct nc_state_t *nc, ncInstance * instance)
 1177 {
 1178     int error = 0;
 1179     int rc = 0;
 1180     char *ip = NULL;
 1181     virDomainInfo info = { 0 };
 1182     instance_states new_state = NO_STATE;
 1183     instance_states old_state = instance->state;
 1184 
 1185     // no need to bug for domains without state on Hypervisor
 1186     if (old_state == TEARDOWN || old_state == STAGING || old_state == BUNDLING_SHUTOFF || old_state == CREATEIMAGE_SHUTOFF)
 1187         return;
 1188 
 1189     {                                  // all this is done while holding the hypervisor lock, with a valid connection
 1190         virConnectPtr conn = lock_hypervisor_conn();
 1191         if (conn == NULL) {
 1192             hypervisor_conn_errors++;
 1193             // This is last resort. restarting libvirtd
 1194             if (hypervisor_conn_errors >= MAX_CONNECTION_ERRORS) {
 1195                 LOGWARN("Got %d connection errors to libvirt. Restarting libvirtd service...\n", hypervisor_conn_errors);
 1196                 euca_execlp(NULL, nc_state.rootwrap_cmd_path, "/sbin/service", "libvirtd", "restart", NULL);
 1197                 sleep(LIBVIRT_TIMEOUT_SEC);
 1198             }
 1199             return;
 1200         } else {
 1201             hypervisor_conn_errors = 0;
 1202         }
 1203 
 1204         virDomainPtr dom = virDomainLookupByName(conn, instance->instanceId);
 1205 
 1206         if (dom == NULL) {             // hypervisor doesn't know about it
 1207             if (old_state == BUNDLING_SHUTDOWN) {
 1208                 LOGINFO("[%s] detected disappearance of bundled domain\n", instance->instanceId);
 1209                 change_state(instance, BUNDLING_SHUTOFF);
 1210             } else if (old_state == CREATEIMAGE_SHUTDOWN) {
 1211                 LOGINFO("[%s] detected disappearance of createImage domain\n", instance->instanceId);
 1212                 change_state(instance, CREATEIMAGE_SHUTOFF);
 1213             } else if (old_state == RUNNING || old_state == BLOCKED || old_state == PAUSED || old_state == SHUTDOWN) {
 1214                 // If we just finished migration, then this is normal.
 1215                 //
 1216                 // Could this be a bad assumption if the
 1217                 // virDomainLookupByName() call above returns NULL for
 1218                 // some transient reason rather than because hypervisor
 1219                 // doesn't know of the domain any more?
 1220                 if (is_migration_src(instance)) {
 1221                     if (instance->migration_state == MIGRATION_IN_PROGRESS) {
 1222                         // This usually occurs when there has been some
 1223                         // glitch in the migration: an i/o error or
 1224                         // reset connction.  When that happens, we do
 1225                         // *not* want to shut off the instance!
 1226                         //
 1227                         // It can also happen absent an anomaly, such as
 1228                         // when refresh_instance_info() is called right
 1229                         // as the migration is completing (there's a race).
 1230                         LOGDEBUG("[%s] possible migration anomaly, not yet assuming completion\n", instance->instanceId);
 1231                         unlock_hypervisor_conn();
 1232                         return;
 1233                     }
 1234                     LOGINFO("[%s] migration completed (state='%s'), cleaning up\n", instance->instanceId, migration_state_names[instance->migration_state]);
 1235                     change_state(instance, SHUTOFF);
 1236                     unlock_hypervisor_conn();
 1237                     return;
 1238                 }
 1239                 // most likely the user has shut it down from the inside
 1240                 if (instance->stop_requested) {
 1241                     LOGDEBUG("[%s] ignoring domain in stopped state\n", instance->instanceId);
 1242                 } else if (instance->terminationRequestedTime) {
 1243                     LOGDEBUG("[%s] hypervisor not finding the terminating domain\n", instance->instanceId);
 1244                 } else if (instance->retries) {
 1245                     LOGWARN("[%s] hypervisor failed to find domain, will retry %d more time(s)\n", instance->instanceId, instance->retries);
 1246                     instance->retries--;
 1247                 } else {
 1248                     LOGWARN("[%s] hypervisor failed to find domain, assuming it was shut off\n", instance->instanceId);
 1249                     change_state(instance, SHUTOFF);
 1250                 }
 1251             }
 1252             // else 'old_state' stays in SHUTFOFF, BOOTING, CANCELED, or CRASHED
 1253 
 1254             // set guest power state
 1255             strncpy(instance->guestStateName, GUEST_STATE_POWERED_OFF, CHAR_BUFFER_SIZE);
 1256 
 1257             // persist state updates to disk
 1258             save_instance_struct(instance);
 1259 
 1260             unlock_hypervisor_conn();
 1261             return;
 1262         }
 1263 
 1264         error = virDomainGetInfo(dom, &info);
 1265         if ((error < 0) || (info.state == VIR_DOMAIN_NOSTATE)) {
 1266             LOGWARN("[%s] failed to get information for domain\n", instance->instanceId);
 1267             // what to do? hopefully we'll find out more later
 1268             virDomainFree(dom);
 1269             unlock_hypervisor_conn();
 1270             return;
 1271         }
 1272 
 1273         new_state = info.state;
 1274         switch (old_state) {
 1275         case BOOTING:
 1276         case RUNNING:
 1277         case BLOCKED:
 1278         case PAUSED:
 1279             // migration-related logic
 1280             if (is_migration_dst(instance)) {
 1281                 if (old_state == BOOTING && new_state == PAUSED) {
 1282                     incoming_migrations_in_progress++;
 1283                     LOGINFO("[%s] incoming (%s < %s) migration in progress (1 of %d)\n", instance->instanceId, instance->migration_dst, instance->migration_src,
 1284                             incoming_migrations_in_progress);
 1285                     instance->migration_state = MIGRATION_IN_PROGRESS;
 1286                     LOGDEBUG("[%s] incoming (%s < %s) migration_state set to '%s'\n", instance->instanceId,
 1287                              instance->migration_dst, instance->migration_src, migration_state_names[instance->migration_state]);
 1288 
 1289                     if (!strcmp(nc->pEucaNet->sMode, NETMODE_VPCMIDO)) {
 1290                         bridge_instance_interfaces_remove(nc, instance);
 1291                     }
 1292                     if (!strcmp(nc->pEucaNet->sMode, NETMODE_EDGE)) {
 1293                         char iface[16];
 1294                         snprintf(iface, 16, "vn_%s", instance->instanceId);
 1295                         bridge_interface_set_hairpin(nc, instance, iface);
 1296                     } 
 1297                 } else if ((old_state == BOOTING || old_state == PAUSED)
 1298                            && (new_state == RUNNING || new_state == BLOCKED)) {
 1299                     LOGINFO("[%s] completing incoming (%s < %s) migration...\n", instance->instanceId, instance->migration_dst, instance->migration_src);
 1300                     instance->migration_state = NOT_MIGRATING;  // done!
 1301                     bzero(instance->migration_src, HOSTNAME_SIZE);
 1302                     bzero(instance->migration_dst, HOSTNAME_SIZE);
 1303                     bzero(instance->migration_credentials, CREDENTIAL_SIZE);
 1304                     instance->migrationTime = 0;
 1305                     save_instance_struct(instance);
 1306                     // copy_intances is called upon return in monitoring_thread().
 1307                     incoming_migrations_in_progress--;
 1308                     LOGINFO("[%s] incoming migration complete (%d other incoming migration[s] actively in progress)\n", instance->instanceId, incoming_migrations_in_progress);
 1309                     // If no remaining incoming or pending migrations, deauthorize all clients.
 1310                     // TO-DO: Consolidate with similar sequence in handlers_kvm.c into a utility function?
 1311                     if (!incoming_migrations_in_progress) {
 1312                         int incoming_migrations_pending = 0;
 1313                         int incoming_migrations_counted = 0;
 1314                         LOGINFO("no remaining active incoming migrations -- checking to see if there are any pending migrations\n");
 1315                         bunchOfInstances *head = NULL;
 1316                         for (head = global_instances; head; head = head->next) {
 1317                             if ((head->instance->migration_state == MIGRATION_PREPARING) || (head->instance->migration_state == MIGRATION_READY)) {
 1318                                 LOGINFO("[%s] is pending migration, migration_state='%s', deferring deauthorization of migration keys\n", head->instance->instanceId,
 1319                                         migration_state_names[head->instance->migration_state]);
 1320                                 incoming_migrations_pending++;
 1321                             }
 1322                             // Belt and suspenders...
 1323                             if ((head->instance->migration_state == MIGRATION_IN_PROGRESS) && !strcmp(nc_state.ip, head->instance->migration_dst)) {
 1324                                 LOGWARN("[%s] Possible internal bug detected: instance migration_state='%s', but incoming_migrations_in_progress=%d\n", head->instance->instanceId,
 1325                                         migration_state_names[head->instance->migration_state], incoming_migrations_in_progress);
 1326                                 incoming_migrations_counted++;
 1327                             }
 1328                         }
 1329                         if (incoming_migrations_counted != incoming_migrations_in_progress) {
 1330                             LOGWARN("Possible internal bug detected: incoming_migrations_in_progress=%d, but %d incoming migrations counted\n", incoming_migrations_in_progress,
 1331                                     incoming_migrations_counted);
 1332                         }
 1333                         if (!incoming_migrations_pending) {
 1334                             LOGINFO("no remaining incoming or pending migrations -- deauthorizing all migration client keys\n");
 1335                             deauthorize_migration_keys(FALSE);
 1336                         }
 1337                     } else {
 1338                         // Verify that our count of incoming_migrations_in_progress matches our version of reality.
 1339                         bunchOfInstances *head = NULL;
 1340                         int incoming_migrations_counted = 0;
 1341                         for (head = global_instances; head; head = head->next) {
 1342                             if ((head->instance->migration_state == MIGRATION_IN_PROGRESS) && !strcmp(nc_state.ip, head->instance->migration_dst)) {
 1343                                 incoming_migrations_counted++;
 1344                             }
 1345                         }
 1346                         if (incoming_migrations_counted != incoming_migrations_in_progress) {
 1347                             LOGWARN("Possible internal bug detected: incoming_migrations_in_progress=%d, but %d incoming migrations counted\n", incoming_migrations_in_progress,
 1348                                     incoming_migrations_counted);
 1349                         }
 1350                     }
 1351                 } else if (new_state == SHUTOFF || new_state == SHUTDOWN) {
 1352                     // this is normal at the beginning of incoming migration, before a domain is created in PAUSED state
 1353                     break;
 1354                 }
 1355             }
 1356 
 1357             if (new_state == SHUTOFF || new_state == SHUTDOWN || new_state == CRASHED) {
 1358                 LOGWARN("[%s] hypervisor reported previously running domain as %s\n", instance->instanceId, instance_state_names[new_state]);
 1359             }
 1360             // change to state, whatever it happens to be
 1361             change_state(instance, new_state);
 1362             break;
 1363         case SHUTDOWN:
 1364         case SHUTOFF:
 1365         case CRASHED:
 1366             if (new_state == RUNNING || new_state == BLOCKED || new_state == PAUSED) {
 1367                 // cannot go back!
 1368                 LOGWARN("[%s] detected prodigal domain, terminating it\n", instance->instanceId);
 1369                 virDomainDestroy(dom);
 1370             } else {
 1371                 change_state(instance, new_state);
 1372             }
 1373             break;
 1374         case BUNDLING_SHUTDOWN:
 1375         case CREATEIMAGE_SHUTDOWN:
 1376             LOGDEBUG("[%s] hypervisor state for bundle/createImage domain is %s\n", instance->instanceId, instance_state_names[new_state]);
 1377             break;
 1378         default:
 1379             LOGERROR("[%s] unexpected state (%d) in refresh\n", instance->instanceId, old_state);
 1380         }
 1381 
 1382         virDomainFree(dom);
 1383         unlock_hypervisor_conn();
 1384     }
 1385 
 1386     // if instance is running, try to find out its IP address
 1387     if (instance->state == RUNNING || instance->state == BLOCKED || instance->state == PAUSED) {
 1388         ip = NULL;
 1389 
 1390         if (!strncmp(instance->ncnet.privateIp, "0.0.0.0", INET_ADDR_LEN)) {
 1391             rc = MAC2IP(instance->ncnet.privateMac, &ip);
 1392             if (!rc && ip) {
 1393                 LOGINFO("[%s] discovered private IP %s for instance\n", instance->instanceId, ip);
 1394                 euca_strncpy(instance->ncnet.privateIp, ip, INET_ADDR_LEN);
 1395                 EUCA_FREE(ip);
 1396             }
 1397         }
 1398         // set guest power state
 1399         strncpy(instance->guestStateName, GUEST_STATE_POWERED_ON, CHAR_BUFFER_SIZE);
 1400     } else {
 1401         strncpy(instance->guestStateName, GUEST_STATE_POWERED_OFF, CHAR_BUFFER_SIZE);
 1402     }
 1403 
 1404     // persist state updates to disk
 1405     save_instance_struct(instance);
 1406 }
 1407 
 1408 //!
 1409 //! copying the linked list for use by Describe* requests
 1410 //!
 1411 void copy_instances(void)
 1412 {
 1413     ncInstance *instance = NULL;
 1414     ncInstance *src_instance = NULL;
 1415     ncInstance *dst_instance = NULL;
 1416     bunchOfInstances *head = NULL;
 1417     bunchOfInstances *container = NULL;
 1418 
 1419     sem_p(inst_copy_sem);
 1420     {
 1421         // free the old linked list copy
 1422         for (head = global_instances_copy; head;) {
 1423             container = head;
 1424             instance = head->instance;
 1425             head = head->next;
 1426             EUCA_FREE(instance);
 1427             EUCA_FREE(container);
 1428         }
 1429 
 1430         global_instances_copy = NULL;
 1431 
 1432         // make a fresh copy
 1433         for (head = global_instances; head; head = head->next) {
 1434             src_instance = head->instance;
 1435             dst_instance = (ncInstance *) EUCA_ALLOC(1, sizeof(ncInstance));
 1436             memcpy(dst_instance, src_instance, sizeof(ncInstance));
 1437             add_instance(&global_instances_copy, dst_instance);
 1438         }
 1439     }
 1440     sem_v(inst_copy_sem);
 1441 }
 1442 
 1443 //!
 1444 //! helper that is used during initialization and by monitornig thread
 1445 //!
 1446 static void update_log_params(void)
 1447 {
 1448     int log_level = 0;
 1449     int log_roll_number = 0;
 1450     long log_max_size_bytes = 0;
 1451     char *log_prefix = NULL;
 1452     char *log_facility = NULL;
 1453 
 1454     // read log params from config file and update in-memory configuration
 1455     configReadLogParams(&log_level, &log_roll_number, &log_max_size_bytes, &log_prefix);
 1456 
 1457     // reconfigure the logging subsystem to use the new values, if any
 1458     log_params_set(log_level, log_roll_number, log_max_size_bytes);
 1459     log_prefix_set(log_prefix);
 1460     EUCA_FREE(log_prefix);
 1461 
 1462     if ((log_facility = configFileValue("LOGFACILITY")) != NULL) {
 1463         if (strlen(log_facility) > 0) {
 1464             log_facility_set(log_facility, "nc");
 1465         }
 1466         EUCA_FREE(log_facility);
 1467     }
 1468 }
 1469 
 1470 //!
 1471 //! helper that is used during initialization and by monitornig thread
 1472 //!
 1473 static void update_ebs_params(void)
 1474 {
 1475     char *ceph_user = getConfString(nc_state.configFiles, 2, CONFIG_NC_CEPH_USER);
 1476     char *ceph_keys = getConfString(nc_state.configFiles, 2, CONFIG_NC_CEPH_KEYS);
 1477     char *ceph_conf = getConfString(nc_state.configFiles, 2, CONFIG_NC_CEPH_CONF);
 1478     init_iscsi(nc_state.home,
 1479                (ceph_user == NULL) ? (DEFAULT_CEPH_USER) : (ceph_user),
 1480                (ceph_keys == NULL) ? (DEFAULT_CEPH_KEYRING) : (ceph_keys), (ceph_conf == NULL) ? (DEFAULT_CEPH_CONF) : (ceph_conf));
 1481     EUCA_FREE(ceph_user);
 1482     EUCA_FREE(ceph_keys);
 1483     EUCA_FREE(ceph_conf);
 1484 }
 1485 
 1486 //!
 1487 //! This defines the NC monitoring thread
 1488 //!
 1489 //! @param[in] arg a transparent pointer to the global NC state structure
 1490 //!
 1491 //! @return Always return NULL
 1492 //!
 1493 void *monitoring_thread(void *arg)
 1494 {
 1495 #define EUCANETD_PID_FILE         "%s/var/run/eucalyptus/eucanetd.pid"
 1496 #define EUCANETD_SERVICE_NAME     "eucanetd"
 1497 
 1498     int i = 0;
 1499     int tmpint = 0;
 1500     int left = 0;
 1501     int cleaned_up = 0;
 1502     int destroy_files = 0;
 1503     char *psPid = NULL;
 1504     char sPidFile[EUCA_MAX_PATH] = "";
 1505     char nfile[EUCA_MAX_PATH] = "";
 1506     char nfilefinal[EUCA_MAX_PATH] = "";
 1507     char URL[EUCA_MAX_PATH] = "";
 1508     char ccHost[EUCA_MAX_PATH] = "";
 1509     char clcHost[EUCA_MAX_PATH] = "";
 1510     char tmpbuf[EUCA_MAX_PATH] = "";
 1511     long long iteration = 0;
 1512     long long work_fs_size_mb = 0;
 1513     long long work_fs_avail_mb = 0;
 1514     long long cache_fs_size_mb = 0;
 1515     long long cache_fs_avail_mb = 0;
 1516     FILE *FP = NULL;
 1517     time_t now = 0;
 1518     struct nc_state_t *nc = NULL;
 1519     bunchOfInstances *head = NULL;
 1520     bunchOfInstances *vnhead = NULL;
 1521     ncInstance *instance = NULL;
 1522     ncInstance *vninstance = NULL;
 1523 
 1524     LOGINFO("spawning monitoring thread\n");
 1525     if (arg == NULL) {
 1526         LOGFATAL("internal error (NULL parameter to monitoring_thread)\n");
 1527         return NULL;
 1528     }
 1529 
 1530     nc = ((struct nc_state_t *)arg);
 1531 
 1532     for (iteration = 0; TRUE; iteration++) {
 1533         now = time(NULL);
 1534 
 1535         // EUCA-10056 we need to check if EUCANETD is running when in EDGE of VPC mode
 1536         if (!strcmp(nc_state.pEucaNet->sMode, NETMODE_EDGE)) {
 1537             snprintf(sPidFile, EUCA_MAX_PATH, EUCANETD_PID_FILE, nc_state.home);
 1538             if ((psPid = file2str(sPidFile)) != NULL) {
 1539                 // Is the
 1540                 if (euca_is_running(atoi(psPid), EUCANETD_SERVICE_NAME)) {
 1541                     if (nc_state.isEucanetdEnabled == FALSE)
 1542                         LOGDEBUG("Service %s detected and running.\n", EUCANETD_SERVICE_NAME);
 1543                     nc_state.isEucanetdEnabled = TRUE;
 1544                 } else if (nc_state.isEucanetdEnabled) {
 1545                     // EUCANETD isn't running... Throw a fault for the user to correct
 1546                     LOGERROR("Service %s not running (even if PID file is detected).\n", EUCANETD_SERVICE_NAME);
 1547                     nc_state.isEucanetdEnabled = FALSE;
 1548                     log_eucafault("1008", "daemon", EUCANETD_SERVICE_NAME, NULL);
 1549                 }
 1550                 EUCA_FREE(psPid);
 1551             } else if (nc_state.isEucanetdEnabled) {
 1552                 // EUCANETD isn't running... Throw a fault for the user to correct
 1553                 LOGERROR("Service %s not running.\n", EUCANETD_SERVICE_NAME);
 1554                 nc_state.isEucanetdEnabled = FALSE;
 1555                 log_eucafault("1008", "daemon", EUCANETD_SERVICE_NAME, NULL);
 1556             }
 1557         }
 1558 
 1559         sem_p(inst_sem);
 1560 
 1561         snprintf(nfile, EUCA_MAX_PATH, EUCALYPTUS_LOG_DIR "/local-net.stage", nc_state.home);
 1562         snprintf(nfilefinal, EUCA_MAX_PATH, EUCALYPTUS_LOG_DIR "/local-net", nc_state.home);
 1563         if ((FP = fopen(nfile, "w")) == NULL) {
 1564             LOGWARN("could not open file %s for writing\n", nfile);
 1565         } else {
 1566             // print out latest CC and CLC IP addr to the local-net file
 1567             URL[0] = ccHost[0] = clcHost[0] = '\0';
 1568 
 1569             for (i = 0; i < nc_state.servicesLen; i++) {
 1570                 if (!strcmp(nc_state.services[i].type, "cluster")) {
 1571                     if (nc_state.services[i].urisLen > 0) {
 1572                         memcpy(URL, nc_state.services[i].uris[0], 512);
 1573                         if (strlen(URL)) {
 1574                             if (tokenize_uri(URL, tmpbuf, ccHost, &tmpint, tmpbuf)) {
 1575                                 snprintf(ccHost, EUCA_MAX_PATH, "0.0.0.0");
 1576                             }
 1577                         }
 1578                     }
 1579                 } else if (!strcmp(nc_state.services[i].type, "eucalyptus")) {
 1580                     if (nc_state.services[i].urisLen > 0) {
 1581                         memcpy(URL, nc_state.services[i].uris[0], 512);
 1582                         if (strlen(URL)) {
 1583                             if (tokenize_uri(URL, tmpbuf, clcHost, &tmpint, tmpbuf)) {
 1584                                 snprintf(clcHost, EUCA_MAX_PATH, "0.0.0.0");
 1585                             }
 1586                         }
 1587                     }
 1588                 }
 1589             }
 1590 
 1591             if (strlen(ccHost)) {
 1592                 fprintf(FP, "CCIP=%s\n", ccHost);
 1593             }
 1594 
 1595             if (strlen(clcHost)) {
 1596                 fprintf(FP, "CLCIP=%s\n", clcHost);
 1597             }
 1598             fflush(FP);
 1599         }
 1600 
 1601         cleaned_up = 0;
 1602         for (head = global_instances; head; head = head->next) {
 1603             instance = head->instance;
 1604 
 1605             // query for current state, if any
 1606             refresh_instance_info(nc, instance);
 1607 
 1608             // time out logic for migration-ready instances
 1609             if (!strcmp(instance->stateName, "Extant") && ((instance->migration_state == MIGRATION_READY) || (instance->migration_state == MIGRATION_PREPARING))
 1610                 && ((now - instance->migrationTime) > nc_state.migration_ready_threshold)) {
 1611                 if (instance->migrationTime) {
 1612                     if (outgoing_migrations_in_progress) {
 1613                         LOGINFO("[%s] has been in migration state '%s' on source for %d seconds (threshold is %d), but not rolling back due to %d ongoing outgoing migration[s]\n",
 1614                                 instance->instanceId, migration_state_names[instance->migration_state], (int)(now - instance->migrationTime), nc_state.migration_ready_threshold,
 1615                                 outgoing_migrations_in_progress);
 1616                         continue;
 1617                     }
 1618 
 1619                     LOGWARN("[%s] has been in migration state '%s' on source for %d seconds (threshold is %d), rolling back [%d].\n",
 1620                             instance->instanceId, migration_state_names[instance->migration_state], (int)(now - instance->migrationTime), nc_state.migration_ready_threshold,
 1621                             instance->migrationTime);
 1622                     migration_rollback(instance);
 1623                     continue;
 1624                 } else {
 1625                     if (instance->state == BOOTING) {
 1626                         // Assume destination node. (Is this a safe assumption?)
 1627                         LOGDEBUG("[%s] destination node ready: instance in booting state with no migrationTime.\n", instance->instanceId);
 1628                     } else {
 1629                         LOGWARN("[%s] in instance state '%s' is ready to migrate but has a zero instance migrationTime.\n",
 1630                                 instance->instanceId, instance_state_names[instance->state]);
 1631                         migration_rollback(instance);
 1632                     }
 1633                 }
 1634             }
 1635             // don't touch running or canceled threads
 1636             if (instance->state != STAGING && instance->state != BOOTING &&
 1637                 instance->state != SHUTOFF &&
 1638                 instance->state != SHUTDOWN &&
 1639                 instance->state != BUNDLING_SHUTDOWN &&
 1640                 instance->state != BUNDLING_SHUTOFF && instance->state != CREATEIMAGE_SHUTDOWN && instance->state != CREATEIMAGE_SHUTOFF && instance->state != TEARDOWN) {
 1641 
 1642                 if (FP && !strcmp(instance->stateName, "Extant")) {
 1643                     //! @TODO is this still being used?
 1644                     //! @TODO yes! for EDGE networking
 1645                     // have a running instance, write its information to local state file
 1646                     fprintf(FP, "%s %s %s %d %s %s %s\n",
 1647                             SP(instance->instanceId), SP(nc_state.pEucaNet->sPublicDevice), "NA", instance->ncnet.vlan, SP(instance->ncnet.privateMac),
 1648                             SP(instance->ncnet.publicIp), SP(instance->ncnet.privateIp));
 1649                     fflush(FP);
 1650                 }
 1651                 continue;
 1652             }
 1653 
 1654             if (instance->state == TEARDOWN) {
 1655                 // it's been long enough, we can forget the instance
 1656                 if ((now - instance->terminationTime) > nc_state.teardown_state_duration) {
 1657                     remove_instance(&global_instances, instance);
 1658                     LOGINFO("[%s] forgetting about instance\n", instance->instanceId);
 1659                     free_instance(&instance);
 1660                     break;             // need to get out since the list changed
 1661                 }
 1662                 continue;
 1663             }
 1664             // time out logic for STAGING or BOOTING or BUNDLING instances
 1665             if (instance->state == STAGING && (now - instance->launchTime) < nc_state.staging_cleanup_threshold)
 1666                 continue;              // hasn't been long enough, spare it
 1667 
 1668             if (instance->state == BOOTING && (now - instance->bootTime) < nc_state.booting_cleanup_threshold)
 1669                 continue;
 1670 
 1671             if ((instance->state == BUNDLING_SHUTDOWN || instance->state == BUNDLING_SHUTOFF)
 1672                 && (now - instance->bundlingTime) < nc_state.bundling_cleanup_threshold)
 1673                 continue;
 1674 
 1675             if ((instance->state == CREATEIMAGE_SHUTDOWN || instance->state == CREATEIMAGE_SHUTOFF)
 1676                 && (now - instance->createImageTime) < nc_state.createImage_cleanup_threshold)
 1677                 continue;
 1678 
 1679             // terminate a booting instance as a special case, though not if it's an incoming migration
 1680             if (instance->state == BOOTING) {
 1681                 if ((instance->migration_state == MIGRATION_PREPARING) || (instance->migration_state == MIGRATION_READY)) {
 1682                     LOGDEBUG("[%s] instance has exceeded BOOTING cleanup threshold of %d seconds, but has migration_state=%s, so not terminating\n", instance->instanceId,
 1683                              nc_state.booting_cleanup_threshold, migration_state_names[instance->migration_state]);
 1684                     continue;
 1685                 } else {
 1686                     LOGDEBUG("[%s] finding and terminating BOOTING instance, which has exceeded cleanup threshold of %d seconds\n", instance->instanceId,
 1687                              nc_state.booting_cleanup_threshold);
 1688 
 1689                     // do the shutdown in a thread
 1690                     pthread_attr_t tattr;
 1691                     pthread_t tid;
 1692                     pthread_attr_init(&tattr);
 1693                     pthread_attr_setdetachstate(&tattr, PTHREAD_CREATE_DETACHED);
 1694                     void *param = (void *)strdup(instance->instanceId);
 1695                     if (pthread_create(&tid, &tattr, terminating_thread, (void *)param) != 0) {
 1696                         LOGERROR("[%s] failed to start VM termination thread\n", instance->instanceId);
 1697                     }
 1698                 }
 1699             }
 1700 
 1701             if (cleaned_up < nc_state.concurrent_cleanup_ops) {
 1702                 // ok, it's been condemned => destroy the files
 1703                 cleaned_up++;
 1704                 destroy_files = !nc_state.save_instance_files;
 1705                 if (call_hooks(NC_EVENT_PRE_CLEAN, instance->instancePath)) {
 1706                     if (destroy_files) {
 1707                         LOGERROR("[%s] cancelled instance cleanup via hooks\n", instance->instanceId);
 1708                         destroy_files = 0;
 1709                     }
 1710                 }
 1711 
 1712                 LOGINFO("[%s] cleaning up state for instance%s\n", instance->instanceId, (destroy_files) ? ("") : (" (but keeping the files)"));
 1713                 if (destroy_instance_backing(instance, destroy_files)) {
 1714                     LOGWARN("[%s] failed to cleanup instance state\n", instance->instanceId);
 1715                 }
 1716                 // check to see if this is the last instance running on vlan, handle local networking information drop
 1717                 left = 0;
 1718                 for (vnhead = global_instances; vnhead; vnhead = vnhead->next) {
 1719                     vninstance = vnhead->instance;
 1720                     if (vninstance->ncnet.vlan == (instance->ncnet).vlan && strcmp(instance->instanceId, vninstance->instanceId)) {
 1721                         left++;
 1722                     }
 1723                 }
 1724 
 1725                 change_state(instance, TEARDOWN);   // TEARDOWN = no more resources
 1726                 instance->terminationTime = time(NULL);
 1727             }
 1728         }
 1729 
 1730         if (FP) {
 1731             fclose(FP);
 1732             rename(nfile, nfilefinal);
 1733         }
 1734 
 1735         copy_instances();              // copy global_instances to global_instances_copy
 1736         sem_v(inst_sem);
 1737 
 1738         if (head) {
 1739             // we got out because of modified list, no need to sleep now
 1740             continue;
 1741         }
 1742 
 1743         sleep(MONITORING_PERIOD);
 1744 
 1745         // do this on every iteration (every MONITORING_PERIOD seconds)
 1746         if ((iteration % 1) == 0) {
 1747             // see if config file has changed and react to those changes
 1748             if (isConfigModified(nc_state.configFiles, 2) > 0) {    // config modification time has changed
 1749                 if (readConfigFile(nc_state.configFiles, 2)) {
 1750                     // something has changed that can be read in
 1751                     LOGINFO("configuration file has been modified, ingressing new options\n");
 1752 
 1753                     // log-related options
 1754                     update_log_params();
 1755 
 1756                     // EBS-related options
 1757                     update_ebs_params();
 1758 
 1759                     //! @todo pick up other NC options dynamically?
 1760                 }
 1761             }
 1762         }
 1763         // do this every 10th iteration (every 10*MONITORING_PERIOD seconds)
 1764         if ((iteration % 10) == 0) {
 1765             //! @todo 3.2 change 1 to 10
 1766 
 1767             // check file system state and blobstore state
 1768             blobstore_meta work_meta, cache_meta;
 1769             if (stat_backing_store(NULL, &work_meta, &cache_meta) == EUCA_OK) {
 1770                 work_fs_size_mb = (long long)(work_meta.fs_bytes_size / MEGABYTE);
 1771                 work_fs_avail_mb = (long long)(work_meta.fs_bytes_available / MEGABYTE);
 1772                 cache_fs_size_mb = (long long)(cache_meta.fs_bytes_size / MEGABYTE);
 1773                 cache_fs_avail_mb = (long long)(cache_meta.fs_bytes_available / MEGABYTE);
 1774 
 1775                 if (work_fs_avail_mb < ((work_fs_size_mb * DISK_TOO_LOW_PERCENT) / 100)) {
 1776                     log_eucafault("1003", "component", euca_this_component_name, "file", work_meta.path, NULL);
 1777                 }
 1778 
 1779                 if (cache_fs_size_mb > 0 && cache_fs_avail_mb < ((cache_fs_size_mb * DISK_TOO_LOW_PERCENT) / 100)) {
 1780                     log_eucafault("1003", "component", euca_this_component_name, "file", cache_meta.path, NULL);
 1781                 }
 1782                 //! @todo add more faults (cache or work reserved exceeds available space on file system)
 1783             }
 1784         }
 1785     }
 1786 
 1787     return NULL;
 1788 
 1789 #undef EUCANETD_PID_FILE
 1790 #undef EUCANETD_SERVICE_NAME
 1791 }
 1792 
 1793 //!
 1794 //! Fills in some of the fields of instance struct
 1795 //!
 1796 //! @param[in] instance struct to fill in
 1797 //!
 1798 void set_instance_params(ncInstance * instance)
 1799 {
 1800     char *s = NULL;
 1801 
 1802     if (nc_state.config_use_virtio_net) {
 1803         instance->params.nicType = NIC_TYPE_VIRTIO;
 1804     } else {
 1805         if (strstr(instance->platform, "windows")) {
 1806             instance->params.nicType = NIC_TYPE_WINDOWS;
 1807         } else {
 1808             instance->params.nicType = NIC_TYPE_LINUX;
 1809         }
 1810     }
 1811 
 1812     euca_strncpy(instance->hypervisorType, nc_state.H->name, sizeof(instance->hypervisorType)); // set the hypervisor type
 1813 
 1814     instance->hypervisorCapability = nc_state.capability;   // set the cap (xen/hw/hw+xen)
 1815     if ((s = system_output("getconf LONG_BIT")) != NULL) {
 1816         int bitness = atoi(s);
 1817         if (bitness == 32 || bitness == 64) {
 1818             instance->hypervisorBitness = bitness;
 1819         } else {
 1820             LOGWARN("[%s] can't determine the host's bitness (%s, assuming 64)\n", instance->instanceId, s);
 1821             instance->hypervisorBitness = 64;
 1822         }
 1823         EUCA_FREE(s);
 1824     } else {
 1825         LOGWARN("[%s] can't determine the host's bitness (assuming 64)\n", instance->instanceId);
 1826         instance->hypervisorBitness = 64;
 1827     }
 1828     instance->combinePartitions = nc_state.convert_to_disk;
 1829     instance->do_inject_key = nc_state.do_inject_key;
 1830 }
 1831 
 1832 //!
 1833 //! Defines the instance startup thread
 1834 //!
 1835 //! @param[in] arg a transparent pointer to the instance structure to start
 1836 //!
 1837 //! @return Always return NULL
 1838 //!
 1839 void *startup_thread(void *arg)
 1840 {
 1841     int i = 0;
 1842     int error = EUCA_OK;
 1843     int status = 0;
 1844     int rc = 0;
 1845     int create_timedout = 0;
 1846     char *xml = NULL;
 1847     char brname[IF_NAME_LEN] = "";
 1848     pid_t cpid = 0;
 1849     boolean try_killing = FALSE;
 1850     boolean created = FALSE;
 1851     ncInstance *instance = ((ncInstance *) arg);
 1852     virDomainPtr dom = NULL;
 1853 
 1854     LOGDEBUG("[%s] spawning startup thread\n", instance->instanceId);
 1855     virConnectPtr conn = lock_hypervisor_conn();
 1856     if (conn == NULL) {
 1857         LOGERROR("[%s] could not contact the hypervisor, abandoning the instance\n", instance->instanceId);
 1858         hypervisor_conn_errors++;
 1859         goto shutoff;
 1860     }
 1861     unlock_hypervisor_conn();          // unlock right away, since we are just checking on it
 1862 
 1863     // set up networking
 1864     snprintf(brname, IF_NAME_LEN, "%s", nc_state.pEucaNet->sBridgeDevice);
 1865 
 1866     euca_strncpy(instance->params.guestNicDeviceName, brname, sizeof(instance->params.guestNicDeviceName));
 1867 
 1868     // set parameters like hypervisor type, bitness, NIC type, key injection, etc.
 1869     set_instance_params(instance);
 1870 
 1871     if ((error = create_instance_backing(instance, FALSE))  // do the heavy lifting on the disk
 1872         || (error = gen_instance_xml(instance)) // create euca-specific instance XML file
 1873         || (error = gen_libvirt_instance_xml(instance))) {  // transform euca-specific XML into libvirt XML
 1874         LOGERROR("[%s] failed to prepare images for instance (error=%d)\n", instance->instanceId, error);
 1875         goto shutoff;
 1876     }
 1877 
 1878     if (instance->state == TEARDOWN) { // timed out in STAGING
 1879         goto free;
 1880     }
 1881 
 1882     if (instance->state == CANCELED) {
 1883         LOGERROR("[%s] cancelled instance startup\n", instance->instanceId);
 1884         goto shutoff;
 1885     }
 1886 
 1887     if (call_hooks(NC_EVENT_PRE_BOOT, instance->instancePath)) {
 1888         LOGERROR("[%s] cancelled instance startup via hooks\n", instance->instanceId);
 1889         goto shutoff;
 1890     }
 1891 
 1892     if (instance_network_gate(instance, nc_state.booting_envwait_threshold)) {
 1893         LOGERROR("[%s] cancelled instance startup via network_gate\n", instance->instanceId);
 1894         goto shutoff;
 1895     }
 1896 
 1897     xml = file2str(instance->libvirtFilePath);
 1898 
 1899     save_instance_struct(instance);    // to enable NC recovery
 1900     sensor_add_resource(instance->instanceId, "instance", instance->uuid);
 1901     sensor_set_resource_alias(instance->instanceId, instance->ncnet.privateIp);
 1902     update_disk_aliases(instance);
 1903 
 1904     // serialize domain creation as hypervisors can get confused with
 1905     // too many simultaneous create requests
 1906     LOGTRACE("[%s] instance about to boot\n", instance->instanceId);
 1907 
 1908     for (i = 0; i < MAX_CREATE_TRYS; i++) { // retry loop
 1909         // TODO: CHUCK -----> Find better
 1910         if (i == 0) {
 1911             sleep(10);
 1912         }
 1913 
 1914         if (i > 0) {
 1915             LOGINFO("[%s] attempt %d of %d to create the instance\n", instance->instanceId, i + 1, MAX_CREATE_TRYS);
 1916         }
 1917 
 1918         {                              // all this is done while holding the hypervisor lock, with a valid connection
 1919             virConnectPtr conn = lock_hypervisor_conn();
 1920             if (conn == NULL) {        // get a new connection for each loop iteration
 1921                 LOGERROR("[%s] could not contact the hypervisor, abandoning the instance\n", instance->instanceId);
 1922                 hypervisor_conn_errors++;
 1923                 goto shutoff;
 1924             }
 1925 
 1926             sem_p(loop_sem);
 1927 
 1928             if (i > 0 && create_timedout == 1) {
 1929                 dom = virDomainLookupByName(conn, instance->instanceId);
 1930                 if (dom) {
 1931 
 1932                     // a forked process failed to return in a timely manner, yet the instance
 1933                     // launched. Since we can't verify the validity of the instance, terminate and
 1934                     // let the NC clean up.
 1935                     LOGERROR("[%s] failed to launch cleanly after %d seconds, destroying instance\n", instance->instanceId, CREATE_TIMEOUT_SEC);
 1936                     error = virDomainDestroy(dom);
 1937                     LOGINFO("[%s] instance destroyed - return: %d\n", instance->instanceId, error);
 1938 
 1939                     virDomainFree(dom);
 1940                     sem_v(loop_sem);
 1941                     unlock_hypervisor_conn();
 1942 
 1943                     goto shutoff;
 1944                 }
 1945             }
 1946 
 1947             // We have seen virDomainCreateLinux() on occasion block indefinitely,
 1948             // which freezes all activity on the NC since hyp_sem and loop_sem are
 1949             // being held by the thread. (This is on Lucid with AppArmor enabled.)
 1950             // To protect against that, we invoke the function in a process and
 1951             // terminate it after CREATE_TIMEOUT_SEC seconds.
 1952             //
 1953             // #0  0x00007f359f0b1f93 in poll () from /lib/libc.so.6
 1954             // #1  0x00007f359a9a44e2 in ?? () from /usr/lib/libvirt.so.0
 1955             // #2  0x00007f359a9a5060 in ?? () from /usr/lib/libvirt.so.0
 1956             // #3  0x00007f359a9ac159 in ?? () from /usr/lib/libvirt.so.0
 1957             // #4  0x00007f359a98d65b in virDomainCreateXML () from /usr/lib/libvirt.so.0
 1958             // #5  0x00007f359b053c8e in startup_thread (arg=0x7f358813bf40) at handlers.c:644
 1959             // #6  0x00007f359f3619ca in start_thread () from /lib/libpthread.so.0
 1960             // #7  0x00007f359f0be70d in clone () from /lib/libc.so.6
 1961             // #8  0x0000000000000000 in ?? ()
 1962 
 1963             if ((cpid = fork()) < 0) { // fork error
 1964                 LOGERROR("[%s] failed to fork to start instance\n", instance->instanceId);
 1965             } else if (cpid == 0) {    // child process - creates the domain
 1966                 if ((dom = virDomainCreateLinux(conn, xml, 0)) != NULL) {
 1967                     virDomainFree(dom); // To be safe. Docs are not clear on whether the handle exists outside the process.
 1968 
 1969                     if (!strcmp(nc_state.pEucaNet->sMode, NETMODE_VPCMIDO)) {
 1970                         bridge_instance_interfaces_remove(&nc_state, instance);
 1971                     }
 1972                     // Fix for EUCA-12608
 1973                     if (!strcmp(nc_state.pEucaNet->sMode, NETMODE_EDGE)) {
 1974                         char iface[16];
 1975                         snprintf(iface, 16, "vn_%s", instance->instanceId);
 1976                         bridge_interface_set_hairpin(&nc_state, instance, iface);
 1977                     } 
 1978 
 1979                     exit(0);
 1980                 } else {
 1981                     exit(1);
 1982                 }
 1983             } else {
 1984                 // parent process - waits for the child, kills it if necessary
 1985                 try_killing = FALSE;
 1986                 if ((rc = timewait(cpid, &status, CREATE_TIMEOUT_SEC)) < 0) {
 1987                     LOGERROR("[%s] failed to wait for forked process: %s\n", instance->instanceId, strerror(errno));
 1988                     try_killing = TRUE;
 1989                 } else if (rc == 0) {
 1990                     LOGERROR("[%s] timed out waiting for forked process pid=%d\n", instance->instanceId, cpid);
 1991                     create_timedout = 1; // Sometimes a timeout can occur but the instance is running...
 1992                     try_killing = TRUE;
 1993                 } else if (WEXITSTATUS(status) != 0) {
 1994                     LOGERROR("[%s] hypervisor failed to create the instance\n", instance->instanceId);
 1995                 } else {
 1996                     created = TRUE;
 1997                 }
 1998 
 1999                 if (try_killing) {
 2000                     killwait(cpid);
 2001                 }
 2002             }
 2003 
 2004             sem_v(loop_sem);
 2005             unlock_hypervisor_conn();  // guard against libvirtd connection badness
 2006         }
 2007 
 2008         if (created)
 2009             break;
 2010 
 2011         sleep(1);
 2012     }
 2013 
 2014     if (!created) {
 2015         goto shutoff;
 2016     }
 2017     //! @TODO bring back correlationId
 2018     eventlog("NC", instance->userId, "", "instanceBoot", "begin");
 2019 
 2020     {                                  // make instance state changes while under lock
 2021         sem_p(inst_sem);
 2022         // check one more time for cancellation
 2023         if (instance->state == TEARDOWN) {
 2024             // timed out in BOOTING
 2025         } else if (instance->state == CANCELED || instance->state == SHUTOFF) {
 2026             LOGERROR("[%s] startup of instance was cancelled\n", instance->instanceId);
 2027             change_state(instance, SHUTOFF);
 2028         } else {
 2029             LOGINFO("[%s] booting\n", instance->instanceId);
 2030             instance->bootTime = time(NULL);
 2031             change_state(instance, BOOTING);
 2032         }
 2033         copy_instances();
 2034         sem_v(inst_sem);
 2035     }
 2036     goto free;
 2037 
 2038 shutoff:                              // escape point for error conditions
 2039     change_state(instance, SHUTOFF);
 2040 
 2041 free:
 2042     EUCA_FREE(xml);
 2043     unset_corrid(get_corrid());
 2044     return NULL;
 2045 }
 2046 
 2047 //!
 2048 //! Defines the termination thread.
 2049 //!
 2050 //! @param[in] arg a transparent pointer to the argument passed to this thread handler
 2051 //!
 2052 //! @return Always return NULL
 2053 //!
 2054 void *terminating_thread(void *arg)
 2055 {
 2056     char *instanceId = (char *)arg;
 2057 
 2058     LOGDEBUG("[%s] spawning terminating thread\n", instanceId);
 2059 
 2060     int err = find_and_terminate_instance(instanceId);
 2061     if (err != EUCA_OK) {
 2062         goto free;
 2063     }
 2064 
 2065     {
 2066         sem_p(inst_sem);
 2067         ncInstance *instance = find_instance(&global_instances, instanceId);
 2068         if (instance == NULL) {
 2069             sem_v(inst_sem);
 2070             goto free;
 2071         }
 2072         // change the state and let the monitoring_thread clean up state
 2073         if (instance->state != TEARDOWN && instance->state != CANCELED) {
 2074             // do not leave TEARDOWN (cleaned up) or CANCELED (already trying to terminate)
 2075             if (instance->state == STAGING) {
 2076                 change_state(instance, CANCELED);
 2077             } else {
 2078                 change_state(instance, SHUTOFF);
 2079             }
 2080         }
 2081         copy_instances();
 2082         sem_v(inst_sem);
 2083     }
 2084 free:
 2085     EUCA_FREE(arg);
 2086     unset_corrid(get_corrid());
 2087     return NULL;
 2088 }
 2089 
 2090 //!
 2091 //! On startup, adopt instance found running on the hypervisor.
 2092 //!
 2093 void adopt_instances()
 2094 {
 2095     int dom_ids[MAXDOMS] = { 0 };
 2096     int num_doms = 0;
 2097     int i = 0;
 2098     int error = 0;
 2099     int err = 0;
 2100     virDomainInfo info = { 0 };
 2101     const char *dom_name = NULL;
 2102     ncInstance *instance = NULL;
 2103     virDomainPtr dom = NULL;
 2104     virConnectPtr conn = NULL;
 2105 
 2106     conn = lock_hypervisor_conn();
 2107     while (conn == NULL) {
 2108        LOGERROR("Can't get connection to libvirt. Restarting libvirtd service...\n");
 2109        euca_execlp(NULL, nc_state.rootwrap_cmd_path, "/sbin/service", "libvirtd", "restart", NULL);
 2110        sleep(LIBVIRT_TIMEOUT_SEC);
 2111        LOGINFO("Trying to re-connect");
 2112        conn = lock_hypervisor_conn();
 2113     }
 2114 
 2115     LOGINFO("looking for existing domains\n");
 2116     virSetErrorFunc(NULL, libvirt_err_handler);
 2117 
 2118     num_doms = virConnectListDomains(conn, dom_ids, MAXDOMS);
 2119     if (num_doms == 0) {
 2120         LOGINFO("no currently running domains to adopt\n");
 2121         unlock_hypervisor_conn();
 2122         return;
 2123     }
 2124     if (num_doms < 0) {
 2125         LOGWARN("failed to find out about running domains\n");
 2126         unlock_hypervisor_conn();
 2127         return;
 2128     }
 2129     // WARNING: be sure to call virDomainFree when necessary so as to avoid leaking the virDomainPtr
 2130     for (i = 0; i < num_doms; i++) {
 2131         dom = virDomainLookupByID(conn, dom_ids[i]);
 2132         if (!dom) {
 2133             LOGWARN("failed to lookup running domain #%d, ignoring it\n", dom_ids[i]);
 2134             continue;
 2135         }
 2136         error = virDomainGetInfo(dom, &info);
 2137         if ((error < 0) || (info.state == VIR_DOMAIN_NOSTATE)) {
 2138             LOGWARN("failed to get info on running domain #%d, ignoring it\n", dom_ids[i]);
 2139             virDomainFree(dom);
 2140             continue;
 2141         }
 2142 
 2143         if (info.state == VIR_DOMAIN_SHUTDOWN || info.state == VIR_DOMAIN_SHUTOFF || info.state == VIR_DOMAIN_CRASHED) {
 2144             LOGDEBUG("ignoring non-running domain #%d\n", dom_ids[i]);
 2145             virDomainFree(dom);
 2146             continue;
 2147         }
 2148 
 2149         if ((dom_name = virDomainGetName(dom)) == NULL) {
 2150             LOGWARN("failed to get name of running domain #%d, ignoring it\n", dom_ids[i]);
 2151             virDomainFree(dom);
 2152             continue;
 2153         }
 2154         if (!strcmp(dom_name, "Domain-0")) {
 2155             virDomainFree(dom);
 2156             continue;
 2157         }
 2158 
 2159         if ((instance = load_instance_struct(dom_name)) == NULL) {
 2160             LOGWARN("failed to recover Eucalyptus metadata of running domain %s, ignoring it\n", dom_name);
 2161             virDomainFree(dom);
 2162             continue;
 2163         }
 2164 
 2165         virDomainFree(dom);
 2166 
 2167         if (call_hooks(NC_EVENT_ADOPTING, instance->instancePath)) {
 2168             LOGINFO("[%s] ignoring running domain due to hooks\n", instance->instanceId);
 2169             free_instance(&instance);
 2170             continue;
 2171         }
 2172 
 2173         change_state(instance, info.state);
 2174         sem_p(inst_sem);
 2175         {
 2176             err = add_instance(&global_instances, instance);
 2177         }
 2178         sem_v(inst_sem);
 2179 
 2180         if (err) {
 2181             free_instance(&instance);
 2182             continue;
 2183         }
 2184 
 2185         sensor_add_resource(instance->instanceId, "instance", instance->uuid);  // ensure the sensor system monitors this instance
 2186         sensor_set_resource_alias(instance->instanceId, instance->ncnet.privateIp);
 2187         update_disk_aliases(instance);
 2188 
 2189         //! @TODO try to re-check IPs?
 2190         LOGINFO("[%s] - adopted running domain from user %s\n", instance->instanceId, instance->userId);
 2191     }
 2192     unlock_hypervisor_conn();
 2193 
 2194     sem_p(inst_sem);
 2195     {
 2196         copy_instances();              // copy global_instances to global_instances_copy
 2197     }
 2198     sem_v(inst_sem);
 2199 }
 2200 
 2201 //!
 2202 //!
 2203 //!
 2204 //! @param[in] sig
 2205 //!
 2206 static void nc_signal_handler(int sig)
 2207 {
 2208     LOGDEBUG("signal handler caught %d\n", sig);
 2209 }
 2210 
 2211 //!
 2212 //! Initialize the NC handlers
 2213 //!
 2214 //! @return EUCA_OK on success or proper error code. Known error code returned include EUCA_ERROR,
 2215 //!         EUCA_FATAL_ERROR
 2216 //!
 2217 static int init(void)
 2218 {
 2219 #define GET_VAR_INT(_var, _name, _def)                   \
 2220 {                                                        \
 2221     s = getConfString(nc_state.configFiles, 2, (_name)); \
 2222     if (s) {                                             \
 2223         (_var) = atoi(s);                                \
 2224         EUCA_FREE(s);                                    \
 2225     } else {                                             \
 2226         (_var) = (_def);                                 \
 2227     }                                                    \
 2228 }
 2229 
 2230     static int initialized = 0;
 2231     int do_warn = 0, i;
 2232     char logFile[EUCA_MAX_PATH] = "";
 2233     char logFileReqTrack[EUCA_MAX_PATH] = "";
 2234     char *bridge = NULL;
 2235     char *s = NULL;
 2236     char *tmp = NULL;
 2237     char *pubinterface = NULL;
 2238     struct stat mystat = { 0 };
 2239     struct handlers **h = NULL;
 2240     sigset_t mask = { {0} };
 2241     struct sigaction act = { {0} };
 2242 
 2243     // 0 => hasn't run, -1 => failed, 1 => ok
 2244     if (initialized > 0)
 2245         return EUCA_OK;
 2246     else if (initialized < 0)
 2247         return EUCA_ERROR;
 2248 
 2249     // ensure that MAXes are zeroed out
 2250     bzero(&nc_state, sizeof(struct nc_state_t));
 2251     strncpy(nc_state.version, EUCA_VERSION, sizeof(nc_state.version));  // set the version
 2252     nc_state.is_enabled = TRUE;        // NC is enabled unless disk state will say otherwise
 2253 
 2254     // configure signal handling for this thread and its children:
 2255     // - ignore SIGALRM, which may be used in libraries we depend on
 2256     // - deliver SIGUSR1 to a no-op signal handler, as a way to unblock 'stuck' system calls in libraries we depend on
 2257     {
 2258         // add SIGUSR1 & SIGALRM to the list of signals blocked by this thread and all of its children threads
 2259         sigemptyset(&mask);
 2260         sigaddset(&mask, SIGUSR1);
 2261         sigaddset(&mask, SIGALRM);
 2262         sigprocmask(SIG_BLOCK, &mask, NULL);
 2263 
 2264         // establish function nc_signal_handler() as the handler for delivery of SIGUSR1, in whatever thread
 2265         bzero(&act, sizeof(struct sigaction));
 2266         act.sa_handler = nc_signal_handler;
 2267         act.sa_flags = 0;
 2268         sigemptyset(&act.sa_mask);
 2269         sigaction(SIGUSR1, &act, NULL);
 2270     }
 2271 
 2272     // read in configuration - this should be first!
 2273 
 2274     // determine home ($EUCALYPTUS)
 2275     if ((tmp = getenv(EUCALYPTUS_ENV_VAR_NAME)) == NULL) {
 2276         nc_state.home[0] = '\0';       // empty string means '/'
 2277         do_warn = 1;
 2278     } else {
 2279         strncpy(nc_state.home, tmp, EUCA_MAX_PATH - 1);
 2280     }
 2281 
 2282     //Set the SC client policy file path
 2283     char policyFile[EUCA_MAX_PATH];
 2284     bzero(policyFile, EUCA_MAX_PATH);
 2285     snprintf(policyFile, EUCA_MAX_PATH, EUCALYPTUS_POLICIES_DIR "/sc-client-policy.xml", nc_state.home);
 2286     euca_strncpy(nc_state.config_sc_policy_file, policyFile, EUCA_MAX_PATH);
 2287 
 2288     // set the minimum log for now
 2289     snprintf(logFile, EUCA_MAX_PATH, EUCALYPTUS_LOG_DIR "/nc.log", nc_state.home);
 2290     snprintf(logFileReqTrack, EUCA_MAX_PATH, EUCALYPTUS_LOG_DIR "/nc-tracking.log", nc_state.home);
 2291     log_file_set(logFile, logFileReqTrack);
 2292     LOGINFO("spawning Eucalyptus node controller v%s %s\n", nc_state.version, compile_timestamp_str);
 2293     if (do_warn)
 2294         LOGWARN("env variable %s not set, using /\n", EUCALYPTUS_ENV_VAR_NAME);
 2295 
 2296     // search for the config file
 2297     snprintf(nc_state.configFiles[1], EUCA_MAX_PATH, EUCALYPTUS_CONF_LOCATION, nc_state.home);
 2298     if (stat(nc_state.configFiles[1], &mystat)) {
 2299         LOGFATAL("could not open configuration file %s\n", nc_state.configFiles[1]);
 2300         return (EUCA_ERROR);
 2301     }
 2302     snprintf(nc_state.configFiles[0], EUCA_MAX_PATH, EUCALYPTUS_CONF_OVERRIDE_LOCATION, nc_state.home);
 2303     LOGINFO("NC is looking for configuration in %s,%s\n", nc_state.configFiles[1], nc_state.configFiles[0]);
 2304 
 2305     configInitValues(configKeysRestartNC, configKeysNoRestartNC);   // initialize config subsystem
 2306     readConfigFile(nc_state.configFiles, 2);
 2307     update_log_params();
 2308     LOGINFO("running as user '%s'\n", get_username());
 2309 
 2310     // set default in the paths. the driver will override
 2311     nc_state.config_network_path[0] = '\0';
 2312     nc_state.xm_cmd_path[0] = '\0';
 2313     nc_state.virsh_cmd_path[0] = '\0';
 2314     nc_state.get_info_cmd_path[0] = '\0';
 2315     snprintf(nc_state.libvirt_xslt_path, EUCA_MAX_PATH, EUCALYPTUS_LIBVIRT_XSLT, nc_state.home);    // for now, this must be set before anything in xml.c is invoked
 2316     snprintf(nc_state.rootwrap_cmd_path, EUCA_MAX_PATH, EUCALYPTUS_ROOTWRAP, nc_state.home);
 2317 
 2318     {                                  // determine the hypervisor to use
 2319         char *hypervisor = getConfString(nc_state.configFiles, 2, CONFIG_HYPERVISOR);
 2320         if (!hypervisor) {
 2321             LOGFATAL("value %s is not set in the config file\n", CONFIG_HYPERVISOR);
 2322             return (EUCA_FATAL_ERROR);
 2323         }
 2324         // let's look for the right hypervisor driver
 2325         for (h = available_handlers; *h; h++) {
 2326             if (!strncmp((*h)->name, "default", CHAR_BUFFER_SIZE))
 2327                 nc_state.D = *h;
 2328 
 2329             if (!strncmp((*h)->name, hypervisor, CHAR_BUFFER_SIZE))
 2330                 nc_state.H = *h;
 2331 
 2332             if (!strncmp((*h)->name, "kvm", CHAR_BUFFER_SIZE) && !strcmp(hypervisor, "qemu")) {
 2333                 nc_state.H = *h;
 2334                 strcpy(nc_state.H->name, "qemu");   // TODO: kind of a hack, to make instance->hypervisorType right
 2335             }
 2336         }
 2337 
 2338         if (nc_state.H == NULL) {
 2339             LOGFATAL("requested hypervisor type (%s) is not available\n", hypervisor);
 2340             EUCA_FREE(hypervisor);
 2341             return (EUCA_FATAL_ERROR);
 2342         }
 2343         // only load virtio config for kvm
 2344         if (!strncmp("kvm", hypervisor, CHAR_BUFFER_SIZE) || !strncmp("qemu", hypervisor, CHAR_BUFFER_SIZE) || !strncmp("KVM", hypervisor, CHAR_BUFFER_SIZE)) {
 2345             GET_VAR_INT(nc_state.config_use_virtio_net, CONFIG_USE_VIRTIO_NET, 0);  // for now, these three Virtio settings must be set before anything in xml.c is invoked
 2346             GET_VAR_INT(nc_state.config_use_virtio_disk, CONFIG_USE_VIRTIO_DISK, 0);
 2347             GET_VAR_INT(nc_state.config_use_virtio_root, CONFIG_USE_VIRTIO_ROOT, 0);
 2348         }
 2349         EUCA_FREE(hypervisor);
 2350     }
 2351 
 2352     GET_VAR_INT(nc_state.config_cpu_passthrough, CONFIG_CPU_PASSTHROUGH, 0);
 2353     LOGINFO("CPU passthrough to instance: %s\n", (nc_state.config_cpu_passthrough) ? ("enabled") : ("disabled"));
 2354 
 2355     {
 2356         // load NC's state from disk, if any
 2357         struct nc_state_t nc_state_disk = { 0 };
 2358 
 2359         // allocate temporary network struct (we cannot put vnetConfig on the stack, it is large: 102MB)
 2360         if ((nc_state_disk.pEucaNet = EUCA_ZALLOC(1, sizeof(euca_network))) == NULL) {
 2361             LOGFATAL("Cannot allocate network configuration structure!\n");
 2362             return (EUCA_FATAL_ERROR);
 2363         }
 2364         // Allocate our network structure
 2365         if ((nc_state.pEucaNet = EUCA_ZALLOC(1, sizeof(euca_network))) == NULL) {
 2366             LOGFATAL("Cannot allocate network configuration structure!\n");
 2367             EUCA_FREE(nc_state_disk.pEucaNet);
 2368             return (EUCA_FATAL_ERROR);
 2369         }
 2370 
 2371         if (read_nc_xml(&nc_state_disk) == EUCA_OK) {
 2372             //! @TODO currently read_nc_xml() relies on nc_state.libvirt_xslt_path and virtio flags being set, which is brittle - fix init() in xml.c
 2373             LOGINFO("loaded NC state from previous invocation\n");
 2374 
 2375             // check on the version, in case it has changed
 2376             if (strcmp(nc_state_disk.version, nc_state.version) != 0 && nc_state_disk.version[0] != '\0') {
 2377                 LOGINFO("found state from NC v%s while starting NC v%s\n", nc_state_disk.version, nc_state.version);
 2378                 // any NC upgrade/downgrade-related code can go here
 2379             }
 2380             // check on the state
 2381             if (nc_state_disk.is_enabled == FALSE) {
 2382                 LOGINFO("NC will start up as DISABLED based on disk state\n");
 2383                 nc_state.is_enabled = FALSE;
 2384             }
 2385         } else {
 2386             // there is no disk state, so create it
 2387             if (gen_nc_xml(&nc_state) != EUCA_OK) {
 2388                 LOGERROR("failed to update NC state on disk\n");
 2389             } else {
 2390                 LOGINFO("wrote NC state to disk\n");
 2391             }
 2392         }
 2393     }
 2394 
 2395     {
 2396         /* Initialize libvirtd.conf, since some buggy versions of libvirt
 2397          * require it.  At least two versions of libvirt have had this issue,
 2398          * most recently the version in RHEL 6.1.  Note that this happens
 2399          * at each startup of the NC mainly because the location of the
 2400          * required file depends on the process owner's home directory, which
 2401          * may change after the initial installation.
 2402          */
 2403         int use_polkit = 0;
 2404         char libVirtConf[EUCA_MAX_PATH];
 2405         uid_t uid = geteuid();
 2406         struct passwd *pw;
 2407         FILE *fd;
 2408         struct stat lvcstat;
 2409         pw = getpwuid(uid);
 2410         errno = 0;
 2411         if (pw != NULL) {
 2412             snprintf(libVirtConf, EUCA_MAX_PATH, "%s/.libvirt/libvirtd.conf", pw->pw_dir);
 2413             if (access(libVirtConf, R_OK) == -1 && errno == ENOENT) {
 2414                 libVirtConf[strlen(libVirtConf) - strlen("/libvirtd.conf")] = '\0';
 2415                 errno = 0;
 2416                 if (stat(libVirtConf, &lvcstat) == -1 && errno == ENOENT) {
 2417                     mkdir(libVirtConf, 0755);
 2418                 } else if (errno) {
 2419                     LOGINFO("Failed to stat %s/.libvirt\n", pw->pw_dir);
 2420                 }
 2421                 libVirtConf[strlen(libVirtConf)] = '/';
 2422                 errno = 0;
 2423                 fd = fopen(libVirtConf, "a");
 2424                 if (fd == NULL) {
 2425                     LOGINFO("Failed to open %s, error code %d\n", libVirtConf, errno);
 2426                 } else {
 2427                     fclose(fd);
 2428                 }
 2429             } else if (errno) {
 2430                 LOGINFO("Failed to access libvirtd.conf, error code %d\n", errno);
 2431             }
 2432         } else {
 2433             LOGINFO("Cannot get EUID, not creating libvirtd.conf\n");
 2434         }
 2435 
 2436         //
 2437         // Configure libvirtd polkit authentication on the libvirt sockets
 2438         // by default we *disable* polkit authentication due to stability issues.
 2439         // If the configuration parameter is set to -1 we won't touch the configuration
 2440         //
 2441         GET_VAR_INT(use_polkit, CONFIG_LIBVIRT_USE_POLICY_KIT, 0);
 2442         if (use_polkit >= 0) {
 2443             if (config_polkit(use_polkit) != EUCA_OK) {
 2444                 LOGERROR("Unable to %s polkitd for libvirtd.\n", use_polkit ? "enable" : "disable");
 2445             } else {
 2446                 LOGINFO("libvirtd configured to %s polkitd.\n", use_polkit ? "use" : "not use");
 2447             }
 2448         } else {
 2449             LOGDEBUG("Skipping libvirt policy kit configuration\n");
 2450         }
 2451     }
 2452     {                                  // initialize hooks if their directory looks ok
 2453         char dir[EUCA_MAX_PATH];
 2454         snprintf(dir, sizeof(dir), EUCALYPTUS_NC_HOOKS_DIR, nc_state.home);
 2455         // if 'dir' does not exist, init_hooks() will silently fail,
 2456         // and all future call_hooks() will silently succeed
 2457         init_hooks(nc_state.home, dir);
 2458 
 2459         if (call_hooks(NC_EVENT_PRE_INIT, nc_state.home)) {
 2460             LOGFATAL("hooks prevented initialization\n");
 2461             return (EUCA_FATAL_ERROR);
 2462         }
 2463     }
 2464 
 2465     GET_VAR_INT(nc_state.config_max_mem, CONFIG_MAX_MEM, 0);
 2466     GET_VAR_INT(nc_state.config_max_cores, CONFIG_MAX_CORES, 0);
 2467     GET_VAR_INT(nc_state.save_instance_files, CONFIG_SAVE_INSTANCES, 0);
 2468     GET_VAR_INT(nc_state.concurrent_disk_ops, CONFIG_CONCURRENT_DISK_OPS, 4);
 2469     GET_VAR_INT(nc_state.sc_request_timeout_sec, CONFIG_SC_REQUEST_TIMEOUT, 45);
 2470     GET_VAR_INT(nc_state.concurrent_cleanup_ops, CONFIG_CONCURRENT_CLEANUP_OPS, 30);
 2471     GET_VAR_INT(nc_state.disable_snapshots, CONFIG_DISABLE_SNAPSHOTS, 0);
 2472     GET_VAR_INT(nc_state.shutdown_grace_period_sec, CONFIG_SHUTDOWN_GRACE_PERIOD_SEC, 60);
 2473 
 2474     strcpy(nc_state.admin_user_id, EUCALYPTUS_ADMIN);
 2475     GET_VAR_INT(nc_state.staging_cleanup_threshold, CONFIG_NC_STAGING_CLEANUP_THRESHOLD, default_staging_cleanup_threshold);
 2476     GET_VAR_INT(nc_state.booting_cleanup_threshold, CONFIG_NC_BOOTING_CLEANUP_THRESHOLD, default_booting_cleanup_threshold);
 2477     GET_VAR_INT(nc_state.booting_envwait_threshold, CONFIG_NC_BOOTING_ENVWAIT_THRESHOLD, default_booting_envwait_threshold);
 2478     GET_VAR_INT(nc_state.bundling_cleanup_threshold, CONFIG_NC_BUNDLING_CLEANUP_THRESHOLD, default_bundling_cleanup_threshold);
 2479     GET_VAR_INT(nc_state.createImage_cleanup_threshold, CONFIG_NC_CREATEIMAGE_CLEANUP_THRESHOLD, default_createImage_cleanup_threshold);
 2480     GET_VAR_INT(nc_state.teardown_state_duration, CONFIG_NC_TEARDOWN_STATE_DURATION, default_teardown_state_duration);
 2481     GET_VAR_INT(nc_state.migration_ready_threshold, CONFIG_NC_MIGRATION_READY_THRESHOLD, default_migration_ready_threshold);
 2482     // largest ephemeral volume that NC will cache; larger volumes will be created under 'work' blobstore
 2483     GET_VAR_INT(nc_state.ephemeral_cache_highwater_gb, CONFIG_NC_EPHEMERAL_CACHE_HIGHWATER_GB, 0);
 2484     int max_attempts;
 2485     GET_VAR_INT(max_attempts, CONFIG_WALRUS_DOWNLOAD_MAX_ATTEMPTS, -1);
 2486     if (max_attempts > 0 && max_attempts < 99)
 2487         objectstorage_set_max_download_attempts(max_attempts);
 2488 
 2489     // add three eucalyptus directories with executables to PATH of this process
 2490     add_euca_to_path(nc_state.home);
 2491 
 2492     // read in .pem files
 2493     if (euca_init_cert()) {
 2494         LOGWARN("no cryptographic certificates found: waiting for node to be registered...\n");
 2495         //        return (EUCA_FATAL_ERROR);
 2496     }
 2497     // check on dependencies (3rd-party programs that NC invokes)
 2498     if (diskutil_init(0)) {
 2499         LOGFATAL("failed to find required dependencies for disk operations\n");
 2500         return (EUCA_FATAL_ERROR);
 2501     }
 2502     // check on the Imaging Toolkit readyness
 2503     char node_pk_path[EUCA_MAX_PATH];
 2504     snprintf(node_pk_path, sizeof(node_pk_path), EUCALYPTUS_KEYS_DIR "/node-pk.pem", nc_state.home);
 2505     char cloud_cert_path[EUCA_MAX_PATH];
 2506     snprintf(cloud_cert_path, sizeof(cloud_cert_path), EUCALYPTUS_KEYS_DIR "/cloud-cert.pem", nc_state.home);
 2507     if (imaging_init(nc_state.home, cloud_cert_path, node_pk_path)) {
 2508         LOGFATAL("failed to find required dependencies for image work\n");
 2509         return (EUCA_FATAL_ERROR);
 2510     }
 2511 
 2512     //// from now on we have unrecoverable failure, so no point in retrying to re-init ////
 2513     initialized = -1;
 2514 
 2515     hyp_sem = sem_alloc(1, IPC_MUTEX_SEMAPHORE);
 2516     inst_sem = sem_alloc(1, IPC_MUTEX_SEMAPHORE);
 2517     inst_copy_sem = sem_alloc(1, IPC_MUTEX_SEMAPHORE);
 2518     addkey_sem = sem_alloc(1, IPC_MUTEX_SEMAPHORE);
 2519     log_sem = sem_alloc(1, IPC_MUTEX_SEMAPHORE);
 2520     service_state_sem = sem_alloc(1, IPC_MUTEX_SEMAPHORE);
 2521     stats_sem = sem_alloc(1, IPC_MUTEX_SEMAPHORE);
 2522 
 2523     if (!hyp_sem || !inst_sem || !inst_copy_sem || !addkey_sem || !log_sem || !service_state_sem) {
 2524         LOGFATAL("failed to create and initialize semaphores\n");
 2525         return (EUCA_FATAL_ERROR);
 2526     }
 2527     if (log_sem_set(log_sem) != 0) {
 2528         LOGFATAL("failed to set logging semaphore\n");
 2529         return (EUCA_FATAL_ERROR);
 2530     }
 2531 
 2532     if ((loop_sem = diskutil_get_loop_sem()) == NULL) { // NC does not need GRUB for now
 2533         LOGFATAL("failed to find all dependencies\n");
 2534         return (EUCA_FATAL_ERROR);
 2535     }
 2536 
 2537     if (init_eucafaults(euca_this_component_name) == 0) {
 2538         LOGFATAL("failed to initialize fault-logging subsystem\n");
 2539         return (EUCA_FATAL_ERROR);
 2540     }
 2541 
 2542     if (init_ebs_utils(nc_state.sc_request_timeout_sec) != 0) {
 2543         LOGFATAL("Failed to initialize ebs utils\n");
 2544         return (EUCA_FATAL_ERROR);
 2545     }
 2546     // initialize the EBS subsystem
 2547     update_ebs_params();
 2548 
 2549     deauthorize_migration_keys(TRUE);
 2550 
 2551     // NOTE: this is the only call which needs to be called on both
 2552     // the default and the specific handler! All the others will be
 2553     // either or
 2554     i = nc_state.D->doInitialize(&nc_state);
 2555     if (nc_state.H->doInitialize)
 2556         i += nc_state.H->doInitialize(&nc_state);
 2557 
 2558     if (i) {
 2559         LOGFATAL("failed to initialized hypervisor driver!\n");
 2560         return (EUCA_FATAL_ERROR);
 2561     }
 2562 
 2563     {
 2564         // check on hypervisor and pull out capabilities
 2565         virConnectPtr conn = lock_hypervisor_conn();
 2566         if (conn == NULL) {
 2567             // libvirt could be unresponsive for some time if there are log of instances after previous restart via deauthorize_migration_keys call
 2568             // let's wait a bit and ask for a connection again
 2569             sleep(LIBVIRT_TIMEOUT_SEC);
 2570             conn = lock_hypervisor_conn();
 2571             if (conn == NULL) {
 2572                LOGFATAL("unable to contact hypervisor\n");
 2573                return (EUCA_FATAL_ERROR);
 2574             }
 2575         }
 2576         char *caps_xml = virConnectGetCapabilities(conn);
 2577         if (caps_xml == NULL) {
 2578             LOGFATAL("unable to obtain hypervisor capabilities\n");
 2579             unlock_hypervisor_conn();
 2580             return (EUCA_FATAL_ERROR);
 2581         }
 2582         unlock_hypervisor_conn();
 2583         if (strstr(caps_xml, "<live/>") != NULL) {
 2584             nc_state.migration_capable = 1;
 2585         }
 2586         EUCA_FREE(caps_xml);
 2587     }
 2588     LOGINFO("hypervisor %scapable of live migration\n", nc_state.migration_capable ? "" : "not ");
 2589 
 2590     // now that hypervisor-specific initializers have discovered mem_max and cores_max,
 2591     // adjust the values based on configuration parameters, if any
 2592     if (nc_state.config_max_mem) {
 2593         if (nc_state.config_max_mem > nc_state.phy_max_mem)
 2594             LOGWARN("MAX_MEM value is set to %lldMB that is greater than the amount of physical memory: %lldMB\n", nc_state.config_max_mem, nc_state.phy_max_mem);
 2595         nc_state.mem_max = nc_state.config_max_mem;
 2596     } else {
 2597         nc_state.mem_max = nc_state.phy_max_mem;
 2598     }
 2599 
 2600     if (nc_state.config_max_cores) {
 2601         nc_state.cores_max = nc_state.config_max_cores;
 2602         if (nc_state.cores_max > nc_state.phy_max_cores)
 2603             LOGINFO("MAX_CORES value is set to %lld that is greater than the amount of physical cores: %lld\n", nc_state.cores_max, nc_state.phy_max_cores);
 2604     } else {
 2605         nc_state.cores_max = nc_state.phy_max_cores;
 2606     }
 2607 
 2608     LOGINFO("physical memory available for instances: %lldMB\n", nc_state.mem_max);
 2609     LOGINFO("virtual cpu cores available for instances: %lld\n", nc_state.cores_max);
 2610 
 2611     // sensor subsystem
 2612     if (sensor_init(NULL, NULL, nc_state.cores_max, FALSE, NULL) != EUCA_OK) {
 2613         LOGERROR("failed to initialize sensor subsystem in this process\n");
 2614         return (EUCA_FATAL_ERROR);
 2615     }
 2616 
 2617     if (sensor_set_hyp_sem(hyp_sem) != 0) {
 2618         LOGFATAL("failed to set hypervisor semaphore for the sensor subsystem\n");
 2619         return (EUCA_FATAL_ERROR);
 2620     }
 2621 
 2622     {
 2623         // backing store configuration
 2624         init_backing_errors(); // configure backingstore/blobstore errors to log using the backing::bs_errors() function
 2625 
 2626         char *instances_path = getConfString(nc_state.configFiles, 2, INSTANCE_PATH);
 2627 
 2628         if (instances_path == NULL) {
 2629             LOGERROR("%s is not set\n", INSTANCE_PATH);
 2630             return (EUCA_FATAL_ERROR);
 2631         }
 2632         // create work and cache sub-directories so that stat_backing_store() below succeeds
 2633         char cache_path[EUCA_MAX_PATH];
 2634         snprintf(cache_path, sizeof(cache_path), "%s/cache", instances_path);
 2635         if (ensure_directories_exist(cache_path, 0, NULL, NULL, BACKING_DIRECTORY_PERM) == -1) {
 2636             EUCA_FREE(instances_path);
 2637             return (EUCA_ERROR);
 2638         }
 2639 
 2640         char work_path[EUCA_MAX_PATH];
 2641         snprintf(work_path, sizeof(work_path), "%s/work", instances_path);
 2642         if (ensure_directories_exist(work_path, 0, NULL, NULL, BACKING_DIRECTORY_PERM) == -1) {
 2643             EUCA_FREE(instances_path);
 2644             return (EUCA_ERROR);
 2645         }
 2646         // determine how much is used/available in work and cache areas on the backing store
 2647         blobstore_meta work_meta, cache_meta;
 2648         stat_backing_store(instances_path, &work_meta, &cache_meta);    // will zero-out work_ and cache_meta
 2649         long long work_fs_size_mb = (long long)(work_meta.fs_bytes_size / MEGABYTE);
 2650         long long work_fs_avail_mb = (long long)(work_meta.fs_bytes_available / MEGABYTE);
 2651         long long cache_fs_size_mb = (long long)(cache_meta.fs_bytes_size / MEGABYTE);
 2652         long long cache_fs_avail_mb = (long long)(cache_meta.fs_bytes_available / MEGABYTE);
 2653         long long work_bs_size_mb = work_meta.blocks_limit ? (work_meta.blocks_limit / SEC_PER_MB) : (-1L); // convert sectors->MB
 2654         long long work_bs_allocated_mb = work_meta.blocks_limit ? (work_meta.blocks_allocated / SEC_PER_MB) : 0;
 2655         long long work_bs_reserved_mb = work_meta.blocks_limit ? ((work_meta.blocks_locked + work_meta.blocks_unlocked) / SEC_PER_MB) : 0;
 2656         long long cache_bs_size_mb = cache_meta.blocks_limit ? (cache_meta.blocks_limit / SEC_PER_MB) : (-1L);
 2657         long long cache_bs_allocated_mb = cache_meta.blocks_limit ? (cache_meta.blocks_allocated / SEC_PER_MB) : 0;
 2658         long long cache_bs_reserved_mb = cache_meta.blocks_limit ? ((cache_meta.blocks_locked + cache_meta.blocks_unlocked) / SEC_PER_MB) : 0;
 2659 
 2660         // sanity check
 2661         if (work_fs_avail_mb < MIN_BLOBSTORE_SIZE_MB) {
 2662             LOGERROR("insufficient available work space (%lld MB) under %s/work\n", work_fs_avail_mb, instances_path);
 2663             EUCA_FREE(instances_path);
 2664             return (EUCA_FATAL_ERROR);
 2665         }
 2666         // look up configuration file settings for work and cache size
 2667         long long conf_work_size_mb;
 2668         GET_VAR_INT(conf_work_size_mb, CONFIG_NC_WORK_SIZE, -1);
 2669 
 2670         long long conf_cache_size_mb;
 2671         GET_VAR_INT(conf_cache_size_mb, CONFIG_NC_CACHE_SIZE, -1);
 2672 
 2673         long long conf_work_overhead_mb;
 2674         GET_VAR_INT(conf_work_overhead_mb, CONFIG_NC_OVERHEAD_SIZE, PER_INSTANCE_BUFFER_MB);
 2675 
 2676         {                              // accommodate legacy MAX_DISK setting by converting it
 2677             int max_disk_gb;
 2678             GET_VAR_INT(max_disk_gb, CONFIG_MAX_DISK, -1);
 2679             if (max_disk_gb != -1) {
 2680                 if (conf_work_size_mb == -1) {
 2681                     LOGWARN("using deprecated setting %s for the new setting %s\n", CONFIG_MAX_DISK, CONFIG_NC_WORK_SIZE);
 2682                     if (max_disk_gb == 0) {
 2683                         conf_work_size_mb = -1; // change in semantics: 0 used to mean 'unlimited', now 'unset' or -1 means that
 2684                     } else {
 2685                         conf_work_size_mb = max_disk_gb * 1024;
 2686                     }
 2687                 } else {
 2688                     LOGWARN("ignoring deprecated setting %s in favor of the new setting %s\n", CONFIG_MAX_DISK, CONFIG_NC_WORK_SIZE);
 2689                 }
 2690             }
 2691         }
 2692 
 2693         // decide what work and cache sizes should be, based on all the inputs
 2694         long long work_size_mb = -1;
 2695         long long cache_size_mb = -1;
 2696 
 2697         // above all, try to respect user-specified limits for work and cache
 2698         if (conf_work_size_mb != -1) {
 2699             if (conf_work_size_mb < MIN_BLOBSTORE_SIZE_MB) {
 2700                 LOGWARN("ignoring specified work size (%s=%lld) that is below acceptable minimum (%d)\n", CONFIG_NC_WORK_SIZE, conf_work_size_mb, MIN_BLOBSTORE_SIZE_MB);
 2701             } else {
 2702                 if (work_bs_size_mb != -1 && work_bs_size_mb != conf_work_size_mb) {
 2703                     LOGWARN("specified work size (%s=%lld) differs from existing work size (%lld), will try resizing\n", CONFIG_NC_WORK_SIZE, conf_work_size_mb, work_bs_size_mb);
 2704                 }
 2705                 work_size_mb = conf_work_size_mb;
 2706             }
 2707         }
 2708 
 2709         if (conf_cache_size_mb != -1) { // respect user-specified limit
 2710             if (conf_cache_size_mb < MIN_BLOBSTORE_SIZE_MB) {
 2711                 cache_size_mb = 0;     // so it won't be used
 2712             } else {
 2713                 if (cache_bs_size_mb != -1 && cache_bs_size_mb != conf_cache_size_mb) {
 2714                     LOGWARN("specified cache size (%s=%lld) differs from existing cache size (%lld), will try resizing\n",
 2715                             CONFIG_NC_CACHE_SIZE, conf_cache_size_mb, cache_bs_size_mb);
 2716                 }
 2717                 cache_size_mb = conf_cache_size_mb;
 2718             }
 2719         }
 2720         // if the user did not specify sizes, try existing blobstores,
 2721         // if any, whose limits would have been chosen earlier
 2722         if (work_size_mb == -1 && work_bs_size_mb != -1)
 2723             work_size_mb = work_bs_size_mb;
 2724 
 2725         if (cache_size_mb == -1 && cache_bs_size_mb != -1)
 2726             cache_size_mb = cache_bs_size_mb;
 2727 
 2728         // if the user did not specify either or both of the sizes,
 2729         // and blobstores do not exist yet, make reasonable choices
 2730         if (memcmp(&work_meta.fs_id, &cache_meta.fs_id, sizeof(fsid_t)) == 0) { // cache and work are on the same file system
 2731             long long fs_usable_mb = (long long)((double)work_fs_avail_mb - (double)(work_fs_avail_mb) * FS_BUFFER_PERCENT);
 2732             if (work_size_mb == -1 && cache_size_mb == -1) {
 2733                 work_size_mb = (long long)((double)fs_usable_mb * WORK_BS_PERCENT);
 2734                 cache_size_mb = fs_usable_mb - work_size_mb;
 2735             } else if (work_size_mb == -1) {
 2736                 work_size_mb = fs_usable_mb - cache_size_mb + cache_bs_allocated_mb;
 2737             } else if (cache_size_mb == -1) {
 2738                 cache_size_mb = fs_usable_mb - work_size_mb + work_bs_allocated_mb;
 2739             }
 2740             // sanity check
 2741             if ((cache_size_mb + work_size_mb - cache_bs_allocated_mb - work_bs_allocated_mb) > work_fs_avail_mb) {
 2742                 LOGWARN("sum of work and cache sizes exceeds available disk space\n");
 2743             }
 2744         } else {                       // cache and work are on different file systems
 2745             if (work_size_mb == -1) {
 2746                 work_size_mb = (long long)((double)work_fs_avail_mb - (double)(work_fs_avail_mb) * FS_BUFFER_PERCENT);
 2747             }
 2748 
 2749             if (cache_size_mb == -1) {
 2750                 cache_size_mb = (long long)((double)cache_fs_avail_mb - (double)(cache_fs_avail_mb) * FS_BUFFER_PERCENT);
 2751             }
 2752         }
 2753 
 2754         // sanity-check final results
 2755         if (cache_size_mb < MIN_BLOBSTORE_SIZE_MB)
 2756             cache_size_mb = 0;
 2757 
 2758         if (work_size_mb < MIN_BLOBSTORE_SIZE_MB) {
 2759             LOGERROR("insufficient disk space for virtual machines\n");
 2760             EUCA_FREE(instances_path);
 2761             return (EUCA_FATAL_ERROR);
 2762         }
 2763 
 2764         if (init_backing_store(instances_path, work_size_mb, cache_size_mb)) {
 2765             LOGFATAL("failed to initialize backing store\n");
 2766             EUCA_FREE(instances_path);
 2767             return (EUCA_FATAL_ERROR);
 2768         }
 2769         // record the work-space limit for max_disk
 2770         long long work_size_gb = (long long)(work_size_mb / MB_PER_DISK_UNIT);
 2771         if (conf_work_overhead_mb < 0 || conf_work_overhead_mb > work_size_mb) {    // sanity check work overhead
 2772             conf_work_overhead_mb = PER_INSTANCE_BUFFER_MB;
 2773         }
 2774 
 2775         long long overhead_mb = work_size_gb * conf_work_overhead_mb;   // work_size_gb is the theoretical max number of instances
 2776         long long disk_max_mb = work_size_mb - overhead_mb;
 2777         nc_state.disk_max = disk_max_mb / MB_PER_DISK_UNIT;
 2778 
 2779         LOGINFO("disk space for instances: %s/work\n", instances_path);
 2780         LOGINFO("                          %06lldMB limit (%.1f%% of the file system) - %lldMB overhead = %lldMB = %lldGB\n",
 2781                 work_size_mb, ((double)work_size_mb / (double)work_fs_size_mb) * 100.0, overhead_mb, disk_max_mb, nc_state.disk_max);
 2782         LOGINFO("                          %06lldMB reserved for use (%.1f%% of limit)\n", work_bs_reserved_mb, ((double)work_bs_reserved_mb / (double)work_size_mb) * 100.0);
 2783         LOGINFO("                          %06lldMB allocated for use (%.1f%% of limit, %.1f%% of the file system)\n", work_bs_allocated_mb,
 2784                 ((double)work_bs_allocated_mb / (double)work_size_mb) * 100.0, ((double)work_bs_allocated_mb / (double)work_fs_size_mb) * 100.0);
 2785 
 2786         if (cache_size_mb) {
 2787             LOGINFO("    disk space for cache: %s/cache\n", instances_path);
 2788             LOGINFO("                          %06lldMB limit (%.1f%% of the file system)\n", cache_size_mb, ((double)cache_size_mb / (double)cache_fs_size_mb) * 100.0);
 2789             LOGINFO("                          %06lldMB reserved for use (%.1f%% of limit)\n", cache_bs_reserved_mb,
 2790                     ((double)cache_bs_reserved_mb / (double)cache_size_mb) * 100.0);
 2791             LOGINFO("                          %06lldMB allocated for use (%.1f%% of limit, %.1f%% of the file system)\n", cache_bs_allocated_mb,
 2792                     ((double)cache_bs_allocated_mb / (double)cache_size_mb) * 100.0, ((double)cache_bs_allocated_mb / (double)cache_fs_size_mb) * 100.0);
 2793         } else {
 2794             LOGWARN("disk cache will not be used\n");
 2795         }
 2796 
 2797         EUCA_FREE(instances_path);
 2798     }
 2799 
 2800     // adopt running instances -- do this before disk integrity check so we know what can be purged
 2801     adopt_instances();
 2802 
 2803     if (check_backing_store(&global_instances) != EUCA_OK) {    // integrity check, cleanup of unused instances and shrinking of cache
 2804         LOGFATAL("integrity check of the backing store failed");
 2805         return (EUCA_FATAL_ERROR);
 2806     }
 2807     // setup the network
 2808     snprintf(nc_state.config_network_path, EUCA_MAX_PATH, NC_NET_PATH_DEFAULT, nc_state.home);
 2809 
 2810     tmp = getConfString(nc_state.configFiles, 2, "VNET_MODE");
 2811     if (!tmp) {
 2812         LOGWARN("VNET_MODE is not defined, defaulting to '%s'\n", NETMODE_INVALID);
 2813         tmp = strdup(NETMODE_INVALID);
 2814         if (!tmp) {
 2815             LOGFATAL("Out of memory\n");
 2816             return (EUCA_FATAL_ERROR);
 2817         }
 2818     }
 2819 
 2820     int initFail = 0;
 2821 
 2822     if (tmp && !(!strcmp(tmp, NETMODE_EDGE) || !strcmp(tmp, NETMODE_VPCMIDO))) {
 2823         char errorm[256];
 2824         memset(errorm, 0, 256);
 2825         sprintf(errorm, "Invalid VNET_MODE setting: %s", tmp);
 2826         LOGFATAL("%s\n", errorm);
 2827         initFail = 1;
 2828     }
 2829 
 2830     if (tmp && (!strcmp(tmp, NETMODE_EDGE) || !strcmp(tmp, NETMODE_VPCMIDO))) {
 2831         bridge = getConfString(nc_state.configFiles, 2, "VNET_BRIDGE");
 2832         if (!bridge) {
 2833             LOGFATAL("in 'EDGE' or 'VPCMIDO' network mode, you must specify a value for VNET_BRIDGE\n");
 2834             initFail = 1;
 2835         }
 2836     }
 2837 
 2838     if (tmp && !strcmp(tmp, NETMODE_EDGE)) {
 2839         pubinterface = getConfString(nc_state.configFiles, 2, "VNET_PUBINTERFACE");
 2840         if (!pubinterface)
 2841             pubinterface = getConfString(nc_state.configFiles, 2, "VNET_INTERFACE");
 2842 
 2843         if (!pubinterface) {
 2844             LOGWARN("VNET_PUBINTERFACE is not defined, defaulting to 'eth0'\n");
 2845             pubinterface = strdup("eth0");
 2846             if (!pubinterface) {
 2847                 LOGFATAL("out of memory!\n");
 2848                 initFail = 1;
 2849             }
 2850         }
 2851     }
 2852 
 2853     snprintf(nc_state.pEucaNet->sMode, NETMODE_LEN, "%s", tmp);
 2854     if (pubinterface)
 2855         snprintf(nc_state.pEucaNet->sPublicDevice, IF_NAME_LEN, "%s", pubinterface);
 2856 
 2857     if (bridge)
 2858         snprintf(nc_state.pEucaNet->sBridgeDevice, IF_NAME_LEN, "%s", bridge);
 2859 
 2860     EUCA_FREE(pubinterface);
 2861     EUCA_FREE(bridge);
 2862     EUCA_FREE(tmp);
 2863 
 2864     if (initFail)
 2865         return (EUCA_FATAL_ERROR);
 2866 
 2867     // set NC helper path
 2868     tmp = getConfString(nc_state.configFiles, 2, CONFIG_NC_BUNDLE_UPLOAD);
 2869     if (tmp) {
 2870         snprintf(nc_state.ncBundleUploadCmd, EUCA_MAX_PATH, "%s", tmp);
 2871         EUCA_FREE(tmp);
 2872     } else {
 2873         snprintf(nc_state.ncBundleUploadCmd, EUCA_MAX_PATH, "%s", EUCALYPTUS_NC_BUNDLE_UPLOAD); // default value
 2874     }
 2875 
 2876     // set NC helper path
 2877     tmp = getConfString(nc_state.configFiles, 2, CONFIG_NC_CHECK_BUCKET);
 2878     if (tmp) {
 2879         snprintf(nc_state.ncCheckBucketCmd, EUCA_MAX_PATH, "%s", tmp);
 2880         EUCA_FREE(tmp);
 2881     } else {
 2882         snprintf(nc_state.ncCheckBucketCmd, EUCA_MAX_PATH, "%s", EUCALYPTUS_NC_CHECK_BUCKET);   // default value
 2883     }
 2884 
 2885     // set NC helper path
 2886     tmp = getConfString(nc_state.configFiles, 2, CONFIG_NC_DELETE_BUNDLE);
 2887     if (tmp) {
 2888         snprintf(nc_state.ncDeleteBundleCmd, EUCA_MAX_PATH, "%s", tmp);
 2889         EUCA_FREE(tmp);
 2890     } else {
 2891         snprintf(nc_state.ncDeleteBundleCmd, EUCA_MAX_PATH, "%s", EUCALYPTUS_NC_DELETE_BUNDLE); // default value
 2892     }
 2893 
 2894     {
 2895         // set enable ws-security
 2896         tmp = getConfString(nc_state.configFiles, 2, CONFIG_ENABLE_WS_SECURITY);
 2897         if (tmp && !strcmp(tmp, "N")) {
 2898             LOGDEBUG("Configuring no use of WS-SEC as specified in config file by explicit 'no' value\n");
 2899             nc_state.config_use_ws_sec = 0;
 2900             EUCA_FREE(tmp);
 2901         } else {
 2902             LOGDEBUG("Configured to use WS-SEC by default\n");
 2903             if (tmp)
 2904                 EUCA_FREE(tmp);
 2905             nc_state.config_use_ws_sec = 1;
 2906         }
 2907     }
 2908 
 2909     {                                  // find and set iqn
 2910         snprintf(nc_state.iqn, CHAR_BUFFER_SIZE, "UNSET");
 2911         char *ptr = NULL, *iqn = NULL, *tmp = NULL, cmd[EUCA_MAX_PATH];
 2912         snprintf(cmd, EUCA_MAX_PATH, "%s cat /etc/iscsi/initiatorname.iscsi", nc_state.rootwrap_cmd_path);
 2913         ptr = system_output(cmd);
 2914         if (ptr) {
 2915             iqn = strstr(ptr, "InitiatorName=");
 2916             if (iqn) {
 2917                 iqn += strlen("InitiatorName=");
 2918                 tmp = strstr(iqn, "\n");
 2919                 if (tmp)
 2920                     *tmp = '\0';
 2921                 snprintf(nc_state.iqn, CHAR_BUFFER_SIZE, "%s", iqn);
 2922             }
 2923             EUCA_FREE(ptr);
 2924         }
 2925     }
 2926 
 2927     {                                  // find and set IP
 2928         char hostname[HOSTNAME_SIZE];
 2929         if (gethostname(hostname, sizeof(hostname)) != 0) {
 2930             LOGFATAL("failed to find hostname\n");
 2931             return (EUCA_FATAL_ERROR);
 2932         }
 2933         LOGDEBUG("Searching for IP by hostname %s\n", hostname);
 2934 
 2935         struct addrinfo hints, *servinfo, *p;
 2936         struct sockaddr_in *h;
 2937         memset(&hints, 0, sizeof hints);
 2938         hints.ai_family = AF_INET;
 2939         hints.ai_socktype = SOCK_STREAM;
 2940         int rv;
 2941         if ((rv = getaddrinfo(hostname, "http", &hints, &servinfo)) != 0) {
 2942             LOGFATAL("getaddrinfo: %s\n", gai_strerror(rv));
 2943             return (EUCA_FATAL_ERROR);
 2944         }
 2945         int found = 0;
 2946         for(p = servinfo; !found && p != NULL; p = p->ai_next) {
 2947             if (!found) {
 2948                 h = (struct sockaddr_in *) p->ai_addr;
 2949                 euca_strncpy(nc_state.ip, inet_ntoa(h->sin_addr), sizeof(nc_state.ip));
 2950                 found = 1;
 2951             }
 2952         }
 2953         freeaddrinfo(servinfo);
 2954         if (!found) {
 2955             LOGFATAL("failed to obtain IP for %s\n", hostname);
 2956             return (EUCA_FATAL_ERROR);
 2957         }
 2958         LOGINFO("using IP %s\n", nc_state.ip);
 2959         LOGINFO("Initializing localhost info for vbr processing\n");
 2960         if (vbr_init_hostconfig
 2961             (nc_state.iqn, nc_state.ip, nc_state.config_sc_policy_file, nc_state.config_use_ws_sec, nc_state.config_use_virtio_root, nc_state.config_use_virtio_disk) != 0) {
 2962             LOGFATAL("Error initializing vbr localhost configuration\n");
 2963             return (EUCA_FATAL_ERROR);
 2964         }
 2965     }
 2966 
 2967     {
 2968         LOGINFO("Initializing service state and epoch\n");
 2969         //Initialize the service state info.
 2970         nc_state.ncStatus.localEpoch = 0;
 2971         snprintf(nc_state.ncStatus.details, 1024, "ERRORS=0");
 2972         snprintf(nc_state.ncStatus.serviceId.type, 32, "node");
 2973         snprintf(nc_state.ncStatus.serviceId.name, 32, "self");
 2974         snprintf(nc_state.ncStatus.serviceId.partition, 32, "unset");
 2975         nc_state.ncStatus.serviceId.urisLen = 0;
 2976         nc_state.servicesLen = 0;
 2977         nc_state.disabledServicesLen = 0;
 2978         nc_state.notreadyServicesLen = 0;
 2979 
 2980         for (i = 0; i < 32 && nc_state.ncStatus.serviceId.urisLen < 8; i++) {
 2981             if (nc_state.pEucaNet->aLocalIps[i]) {
 2982                 char *host;
 2983                 host = hex2dot(nc_state.pEucaNet->aLocalIps[i]);
 2984                 if (host) {
 2985                     snprintf(nc_state.ncStatus.serviceId.uris[nc_state.ncStatus.serviceId.urisLen], 512, "http://%s:8775/axis2/services/EucalyptusNC", host);
 2986                     nc_state.ncStatus.serviceId.urisLen++;
 2987                     EUCA_FREE(host);
 2988                 }
 2989             }
 2990         }
 2991 
 2992         LOGINFO("Done initializing services state\n");
 2993     }
 2994 
 2995     {                                  // start the monitoring thread
 2996         pthread_t tcb;
 2997         if (pthread_create(&tcb, NULL, monitoring_thread, &nc_state)) {
 2998             LOGFATAL("failed to spawn a monitoring thread\n");
 2999             return (EUCA_FATAL_ERROR);
 3000         }
 3001         if (pthread_detach(tcb)) {
 3002             LOGFATAL("failed to detach the monitoring thread\n");
 3003             return (EUCA_FATAL_ERROR);
 3004         }
 3005     }
 3006 
 3007     {
 3008 
 3009         if (initialize_stats_system(DEFAULT_SENSOR_INTERVAL_SEC) != EUCA_OK) {
 3010             //        if (init_stats(nc_state.home, euca_this_component_name, nc_stats_lock, nc_stats_unlock) != EUCA_OK) {
 3011             LOGERROR("Could not initialize NC stats system\n");
 3012             return EUCA_ERROR;
 3013         }
 3014         LOGDEBUG("Stats system initialized for NC\n");
 3015 
 3016         //Stats thread. Independent of the monitoring thread because the monitoring thread fires irregularly
 3017         pthread_t stats_thread;
 3018         if (pthread_create(&stats_thread, NULL, nc_run_stats, &nc_state)) {
 3019             LOGFATAL("Failed to spawn the internal stats thread\n");
 3020             return (EUCA_FATAL_ERROR);
 3021         }
 3022         if (pthread_detach(stats_thread)) {
 3023             LOGFATAL("Failed to detach the internal stats thread\n");
 3024             return (EUCA_FATAL_ERROR);
 3025         }
 3026 
 3027     }
 3028 
 3029     // post-init hook
 3030     if (call_hooks(NC_EVENT_POST_INIT, nc_state.home)) {
 3031         LOGFATAL("hooks prevented initialization\n");
 3032         return (EUCA_FATAL_ERROR);
 3033     }
 3034 
 3035     initialized = 1;
 3036     return (EUCA_OK);
 3037 
 3038 #undef GET_VAR_INT
 3039 }
 3040 
 3041 //!
 3042 //!
 3043 //!
 3044 //! @note this routine runs immediately when the process is started
 3045 //!
 3046 void doInitNC(void)
 3047 {
 3048     if (init()) {
 3049         LOGWARN("could not initialize\n");
 3050     }
 3051     LOGINFO("component started\n");
 3052 }
 3053 
 3054 //!
 3055 //! Handles the describe instance request
 3056 //!
 3057 //! @param[in]  pMeta a pointer to the node controller (NC) metadata structure
 3058 //! @param[in]  instIds a pointer the list of instance identifiers to retrieve data for
 3059 //! @param[in]  instIdsLen the number of instance identifiers in the instIds list
 3060 //! @param[out] outInsts a pointer the list of instances for which we have data
 3061 //! @param[out] outInstsLen the number of instances in the outInsts list.
 3062 //!
 3063 //! @return EUCA_OK on success or proper error code. Known error code returned include: EUCA_ERROR,
 3064 //!         EUCA_MEMORY_ERROR, EUCA_MEMORY_ERROR
 3065 //!
 3066 int doDescribeInstances(ncMetadata * pMeta, char **instIds, int instIdsLen, ncInstance *** outInsts, int *outInstsLen)
 3067 {
 3068 #define NC_MONIT_FILENAME                        EUCALYPTUS_RUN_DIR  "/nc-stats"
 3069 
 3070     int i = 0;
 3071     int j = 0;
 3072     int ret = EUCA_OK;
 3073     int len = 0;
 3074     char *s = "";
 3075     char *file_name = NULL;
 3076     char myName[CHAR_BUFFER_SIZE] = "";
 3077     FILE *f = NULL;
 3078     long long used_mem = 0;
 3079     long long used_disk = 0;
 3080     long long used_cores = 0;
 3081     u_int vols_count = 0;
 3082     u_int nics_count = 0;
 3083 
 3084     if (init())
 3085         return (EUCA_ERROR);
 3086 
 3087     LOGTRACE("invoked\n");             // response will be at INFO, so this is TRACE
 3088 
 3089     updateServiceStateInfo(pMeta, FALSE);
 3090     if (nc_state.H->doDescribeInstances)
 3091         ret = nc_state.H->doDescribeInstances(&nc_state, pMeta, instIds, instIdsLen, outInsts, outInstsLen);
 3092     else
 3093         ret = nc_state.D->doDescribeInstances(&nc_state, pMeta, instIds, instIdsLen, outInsts, outInstsLen);
 3094 
 3095     if (ret)
 3096         return ret;
 3097 
 3098     for (i = 0; i < (*outInstsLen); i++) {
 3099         char vols_str[128] = "";
 3100         char vol_str[16] = "";
 3101         char nics_str[128] = "";
 3102         char nic_str[16] = "";
 3103         char status_str[128] = "running";
 3104         ncInstance *instance = (*outInsts)[i];
 3105 
 3106         // construct a string summarizing the volumes attached to the instance
 3107         vols_count = 0;
 3108         for (j = 0; j < EUCA_MAX_VOLUMES; ++j) {
 3109             ncVolume *volume = &instance->volumes[j];
 3110             if (strlen(volume->volumeId) == 0)
 3111                 continue;
 3112             vols_count++;
 3113 
 3114             s = "";
 3115             if (!strcmp(volume->stateName, VOL_STATE_ATTACHING))
 3116                 s = "a";
 3117             else if (!strcmp(volume->stateName, VOL_STATE_ATTACHED))
 3118                 s = "A";
 3119             else if (!strcmp(volume->stateName, VOL_STATE_ATTACHING_FAILED))
 3120                 s = "af";
 3121             else if (!strcmp(volume->stateName, VOL_STATE_DETACHING))
 3122                 s = "d";
 3123             else if (!strcmp(volume->stateName, VOL_STATE_DETACHED))
 3124                 s = "D";
 3125             else if (!strcmp(volume->stateName, VOL_STATE_DETACHING_FAILED))
 3126                 s = "df";
 3127 
 3128             snprintf(vol_str, sizeof(vol_str), "%s%s:%s", (vols_count > 1) ? (",") : (""), volume->volumeId, s);
 3129             if ((strlen(vols_str) + strlen(vol_str)) < sizeof(vols_str)) {
 3130                 strcat(vols_str, vol_str);
 3131             }
 3132         }
 3133 
 3134         nics_count = 0;
 3135         for (j = 0; j < EUCA_MAX_NICS; ++j) {
 3136             netConfig *net = &instance->secNetCfgs[j];
 3137             if (strlen(net->interfaceId) == 0)
 3138                 continue;
 3139             nics_count++;
 3140 
 3141             s = "";
 3142             if (!strcmp(net->stateName, VOL_STATE_ATTACHING))
 3143                 s = "a";
 3144             else if (!strcmp(net->stateName, VOL_STATE_ATTACHED))
 3145                 s = "A";
 3146             else if (!strcmp(net->stateName, VOL_STATE_ATTACHING_FAILED))
 3147                 s = "af";
 3148             else if (!strcmp(net->stateName, VOL_STATE_DETACHING))
 3149                 s = "d";
 3150             else if (!strcmp(net->stateName, VOL_STATE_DETACHED))
 3151                 s = "D";
 3152             else if (!strcmp(net->stateName, VOL_STATE_DETACHING_FAILED))
 3153                 s = "df";
 3154             else
 3155                 s = "U"; //unknown state
 3156 
 3157             snprintf(nic_str, sizeof(nic_str), "%s%s:%s", (nics_count > 1) ? (",") : (""), net->interfaceId, s);
 3158             if ((strlen(nics_str) + strlen(nic_str)) < sizeof(nics_str)) {
 3159                 strcat(nics_str, nic_str);
 3160             }
 3161         }
 3162 
 3163         if (instance->migration_state != NOT_MIGRATING) {   // construct migration status string
 3164             char *peer = "?";
 3165             char dir = '?';
 3166             if (!strcmp(nc_state.ip, instance->migration_src)) {
 3167                 peer = instance->migration_dst;
 3168                 dir = '>';
 3169             } else {
 3170                 peer = instance->migration_src;
 3171                 dir = '<';
 3172             }
 3173             snprintf(status_str, sizeof(status_str), "%s %c%s", migration_state_names[instance->migration_state], dir, peer);
 3174         } else if (instance->terminationTime) {
 3175             strncpy(status_str, "terminated", sizeof(status_str));
 3176         } else if (instance->terminationRequestedTime) {
 3177             strncpy(status_str, "terminating", sizeof(status_str));
 3178         } else if (instance->state == BUNDLING_SHUTDOWN || instance->state == BUNDLING_SHUTOFF) {
 3179             strncpy(status_str, "bundling", sizeof(status_str));
 3180         } else if (instance->state == CREATEIMAGE_SHUTDOWN || instance->state == CREATEIMAGE_SHUTOFF) {
 3181             strncpy(status_str, "creating image", sizeof(status_str));
 3182         } else if (instance->bootTime == 0) {
 3183             strncpy(status_str, "staging", sizeof(status_str));
 3184         }                              // else it is "running"
 3185 
 3186         if (nics_count > 0) {
 3187             LOGDEBUG("[%s] %s (%s) pub=%s vols=%s nics=%s\n", instance->instanceId, instance->stateName, status_str, instance->ncnet.publicIp, vols_str, nics_str);
 3188         } else {
 3189             LOGDEBUG("[%s] %s (%s) pub=%s vols=%s\n", instance->instanceId, instance->stateName, status_str, instance->ncnet.publicIp, vols_str);
 3190         }
 3191     }
 3192 
 3193     // allocate enough memory
 3194     len = (strlen(EUCALYPTUS_CONF_LOCATION) > strlen(NC_MONIT_FILENAME)) ? strlen(EUCALYPTUS_CONF_LOCATION) : strlen(NC_MONIT_FILENAME);
 3195     len += 2 + strlen(nc_state.home);
 3196     if ((file_name = EUCA_ALLOC(1, sizeof(char) * len)) == NULL) {
 3197         LOGERROR("Out of memory!\n");
 3198         return (EUCA_MEMORY_ERROR);
 3199     }
 3200 
 3201     sprintf(file_name, NC_MONIT_FILENAME, nc_state.home);
 3202     if (!strcmp(pMeta->userId, EUCALYPTUS_ADMIN)) {
 3203         if ((f = fopen(file_name, "w")) == NULL) {
 3204             if ((f = fopen(file_name, "w+")) == NULL) {
 3205                 LOGWARN("Cannot create %s!\n", file_name);
 3206             } else {
 3207                 if ((len = fileno(f)) > 0)
 3208                     fchmod(len, S_IRUSR | S_IWUSR);
 3209             }
 3210         }
 3211 
 3212         if (f) {
 3213             fprintf(f, "version: %s\n", EUCA_VERSION);
 3214             fprintf(f, "timestamp: %ld\n", time(NULL));
 3215             if (gethostname(myName, CHAR_BUFFER_SIZE) == 0)
 3216                 fprintf(f, "node: %s\n", myName);
 3217             fprintf(f, "hypervisor: %s\n", nc_state.H->name);
 3218             fprintf(f, "network: %s\n", nc_state.pEucaNet->sMode);
 3219 
 3220             used_disk = used_mem = used_cores = 0;
 3221             for (i = 0; i < (*outInstsLen); i++) {
 3222                 ncInstance *instance = (*outInsts)[i];
 3223                 used_disk += instance->params.disk;
 3224                 used_mem += instance->params.mem;
 3225                 used_cores += instance->params.cores;
 3226             }
 3227 
 3228             fprintf(f, "memory (max/avail/used) MB: %lld/%lld/%lld\n", nc_state.mem_max, nc_state.mem_max - used_mem, used_mem);
 3229             fprintf(f, "disk (max/avail/used) GB: %lld/%lld/%lld\n", nc_state.disk_max, nc_state.disk_max - used_disk, used_disk);
 3230             fprintf(f, "cores (max/avail/used): %lld/%lld/%lld\n", nc_state.cores_max, nc_state.cores_max - used_cores, used_cores);
 3231 
 3232             for (i = 0; i < (*outInstsLen); i++) {
 3233                 ncInstance *instance = (*outInsts)[i];
 3234                 fprintf(f, "id: %s", instance->instanceId);
 3235                 fprintf(f, " userId: %s", instance->userId);
 3236                 fprintf(f, " state: %s", instance->stateName);
 3237                 fprintf(f, " mem: %d", instance->params.mem);
 3238                 fprintf(f, " disk: %d", instance->params.disk);
 3239                 fprintf(f, " cores: %d", instance->params.cores);
 3240                 fprintf(f, " private: %s", instance->ncnet.privateIp);
 3241                 fprintf(f, " public: %s\n", instance->ncnet.publicIp);
 3242             }
 3243             fclose(f);
 3244         }
 3245     }
 3246     EUCA_FREE(file_name);
 3247 
 3248     LOGTRACE("done\n");
 3249     return (EUCA_OK);
 3250 }
 3251 
 3252 //!
 3253 //! Handles the broadcast network info request
 3254 //!
 3255 //! @param[in] pMeta a pointer to the node controller (NC) metadata structure
 3256 //! @param[in] networkInfo is a string
 3257 //!
 3258 //! @return EUCA_ERROR on failure or the result of the proper doBroadcastNetworkInfo() handler call.
 3259 //!
 3260 int doBroadcastNetworkInfo(ncMetadata * pMeta, char *networkInfo)
 3261 {
 3262     int ret = EUCA_OK;
 3263 
 3264     if (init())
 3265         return (EUCA_ERROR);
 3266 
 3267     LOGDEBUG("invoked\n");
 3268     LOGTRACE("invoked with networkInfo='%s'\n", SP(networkInfo));
 3269 
 3270     if (nc_state.H->doBroadcastNetworkInfo)
 3271         ret = nc_state.H->doBroadcastNetworkInfo(&nc_state, pMeta, networkInfo);
 3272     else
 3273         ret = nc_state.D->doBroadcastNetworkInfo(&nc_state, pMeta, networkInfo);
 3274 
 3275     return ret;
 3276 }
 3277 
 3278 //!
 3279 //! Handles the assign address request
 3280 //!
 3281 //! @param[in] pMeta a pointer to the node controller (NC) metadata structure
 3282 //! @param[in] instanceId the instance identifier string (i-XXXXXXXX)
 3283 //! @param[in] publicIp a string representation of the public IP to assign to the instance
 3284 //!
 3285 //! @return EUCA_ERROR on failure or the result of the proper doAssignAddress() handler call.
 3286 //!
 3287 int doAssignAddress(ncMetadata * pMeta, char *instanceId, char *publicIp)
 3288 {
 3289     int ret = EUCA_OK;
 3290 
 3291     if (init())
 3292         return (EUCA_ERROR);
 3293 
 3294     LOGINFO("[%s] assigning address: [%s]\n", SP(instanceId), SP(publicIp));
 3295     LOGDEBUG("[%s] invoked (publicIp=%s)\n", instanceId, publicIp);
 3296 
 3297     if (nc_state.H->doAssignAddress)
 3298         ret = nc_state.H->doAssignAddress(&nc_state, pMeta, instanceId, publicIp);
 3299     else
 3300         ret = nc_state.D->doAssignAddress(&nc_state, pMeta, instanceId, publicIp);
 3301 
 3302     return ret;
 3303 }
 3304 
 3305 //!
 3306 //! Handles the power down request.
 3307 //!
 3308 //! @param[in] pMeta a pointer to the node controller (NC) metadata structure
 3309 //!
 3310 //! @return EUCA_ERROR on failure or the result of the proper doPowerDown() handler call.
 3311 //!
 3312 int doPowerDown(ncMetadata * pMeta)
 3313 {
 3314     int ret = EUCA_OK;
 3315 
 3316     if (init())
 3317         return (EUCA_ERROR);
 3318 
 3319     LOGINFO("powering down\n");
 3320     LOGDEBUG("invoked\n");
 3321 
 3322     if (nc_state.H->doPowerDown)
 3323         ret = nc_state.H->doPowerDown(&nc_state, pMeta);
 3324     else
 3325         ret = nc_state.D->doPowerDown(&nc_state, pMeta);
 3326 
 3327     return ret;
 3328 }
 3329 
 3330 //!
 3331 //! Handles the run instance request.
 3332 //!
 3333 //! @param[in]  pMeta a pointer to the node controller (NC) metadata structure
 3334 //! @param[in]  uuid unique user identifier string
 3335 //! @param[in]  instanceId the instance identifier string (i-XXXXXXXX)
 3336 //! @param[in]  reservationId the reservation identifier string
 3337 //! @param[in]  params a pointer to the virtual machine parameters to use
 3338 //! @param[in]  imageId UNUSED
 3339 //! @param[in]  imageURL UNUSED
 3340 //! @param[in]  kernelId the kernel image identifier (eki-XXXXXXXX)
 3341 //! @param[in]  kernelURL the kernel image URL address
 3342 //! @param[in]  ramdiskId the ramdisk image identifier (eri-XXXXXXXX)
 3343 //! @param[in]  ramdiskURL the ramdisk image URL address
 3344 //! @param[in]  ownerId the owner identifier string
 3345 //! @param[in]  accountId the account identifier string
 3346 //! @param[in]  keyName the key name string
 3347 //! @param[in]  netparams a pointer to the network parameters string
 3348 //! @param[in]  userData the user data string
 3349 //! @param[in]  launchIndex the launch index string
 3350 //! @param[in]  platform the platform name string
 3351 //! @param[in]  expiryTime the reservation expiration time
 3352 //! @param[in]  groupNames a list of group name string
 3353 //! @param[in]  groupNamesSize the number of group name in the groupNames list
 3354 //! @param[out] outInst the list of instances created by this request
 3355 //!
 3356 //! @return EUCA_ERROR on failure or the result of the proper doRunInstance() handler call.
 3357 //!
 3358 int doRunInstance(ncMetadata * pMeta, char *uuid, char *instanceId, char *reservationId, virtualMachine * params, char *imageId, char *imageURL,
 3359                   char *kernelId, char *kernelURL, char *ramdiskId, char *ramdiskURL, char *ownerId, char *accountId, char *keyName,
 3360                   netConfig * netparams, char *userData, char *credential, char *launchIndex, char *platform, int expiryTime, char **groupNames, int groupNamesSize,
 3361                   char *rootDirective, char **groupIds, int groupIdsSize, netConfig * secNetCfgs, int secNetCfgsLen, ncInstance ** outInst)
 3362 {
 3363     int ret = EUCA_OK;
 3364 
 3365     if (init())
 3366         return (EUCA_ERROR);
 3367     DISABLED_CHECK;
 3368 
 3369     LOGINFO("[%s] running instance groupId=%s cores=%d disk=%d memory=%d vlan=%d net=%d priMAC=%s privIp=%s plat=%s kernel=%s ramdisk=%s\n",
 3370             instanceId, SP(groupIds[0]), params->cores, params->disk, params->mem, netparams->vlan, netparams->networkIndex, netparams->privateMac, netparams->privateIp, platform,
 3371             kernelId, ramdiskId);
 3372     if (vbr_legacy(instanceId, params, imageId, imageURL, kernelId, kernelURL, ramdiskId, ramdiskURL) != EUCA_OK)
 3373         return (EUCA_ERROR);
 3374     // spark: kernel and ramdisk id are required for linux bundle-instance, but are not in the runInstance request;
 3375     if (!kernelId || !ramdiskId) {
 3376         for (int i = 0; i < EUCA_MAX_VBRS && i < params->virtualBootRecordLen; i++) {
 3377             virtualBootRecord *vbr = &(params->virtualBootRecord[i]);
 3378             if (strlen(vbr->resourceLocation) > 0) {
 3379                 if (!strcmp(vbr->typeName, "kernel")) {
 3380                     // free our string if it was previously set
 3381                     EUCA_FREE(kernelId);
 3382                     kernelId = strdup(vbr->id);
 3383                 }
 3384 
 3385                 if (!strcmp(vbr->typeName, "ramdisk")) {
 3386                     // free our string if it was previously set
 3387                     EUCA_FREE(ramdiskId);
 3388                     ramdiskId = strdup(vbr->id);
 3389                 }
 3390             } else {
 3391                 break;
 3392             }
 3393         }
 3394     }
 3395     if (nc_state.H->doRunInstance) {
 3396         ret = nc_state.H->doRunInstance(&nc_state, pMeta, uuid, instanceId, reservationId, params, imageId, imageURL, kernelId, kernelURL, ramdiskId,
 3397                                         ramdiskURL, ownerId, accountId, keyName, netparams, userData, credential, launchIndex, platform, expiryTime, groupNames, groupNamesSize,
 3398                                         rootDirective, groupIds, groupIdsSize, secNetCfgs, secNetCfgsLen, outInst);
 3399     } else {
 3400         ret = nc_state.D->doRunInstance(&nc_state, pMeta, uuid, instanceId, reservationId, params, imageId, imageURL, kernelId, kernelURL, ramdiskId,
 3401                                         ramdiskURL, ownerId, accountId, keyName, netparams, userData, credential, launchIndex, platform, expiryTime, groupNames, groupNamesSize,
 3402                                         rootDirective, groupIds, groupIdsSize, secNetCfgs, secNetCfgsLen, outInst);
 3403     }
 3404     return ret;
 3405 }
 3406 
 3407 //!
 3408 //! Finds and terminate an instance.
 3409 //!
 3410 //! @param[in]  pMeta a pointer to the node controller (NC) metadata structure
 3411 //! @param[in]  instanceId the instance identifier string (i-XXXXXXXX)
 3412 //! @param[in]  force if set to 1 will force the termination of the instance
 3413 //! @param[out] shutdownState the instance state code after the call to find_and_terminate_instance() if successful
 3414 //! @param[out] previousState the instance state code after the call to find_and_terminate_instance() if successful
 3415 //!
 3416 //! @return EUCA_ERROR on failure or the result of the proper doTerminateInstance() handler call.
 3417 //!
 3418 int doTerminateInstance(ncMetadata * pMeta, char *instanceId, int force, int *shutdownState, int *previousState)
 3419 {
 3420     int ret = EUCA_OK;
 3421 
 3422     if (init())
 3423         return (EUCA_ERROR);
 3424     DISABLED_CHECK;
 3425 
 3426     LOGINFO("[%s] termination requested\n", instanceId);
 3427 
 3428     if (nc_state.H->doTerminateInstance)
 3429         ret = nc_state.H->doTerminateInstance(&nc_state, pMeta, instanceId, force, shutdownState, previousState);
 3430     else
 3431         ret = nc_state.D->doTerminateInstance(&nc_state, pMeta, instanceId, force, shutdownState, previousState);
 3432 
 3433     return ret;
 3434 }
 3435 
 3436 //!
 3437 //! Handles the reboot instance request
 3438 //!
 3439 //! @param[in] pMeta a pointer to the node controller (NC) metadata structure
 3440 //! @param[in] instanceId the instance identifier string (i-XXXXXXXX)
 3441 //!
 3442 //! @return EUCA_ERROR on failure or the result of the proper doRebootInstance() handler call.
 3443 //!
 3444 int doRebootInstance(ncMetadata * pMeta, char *instanceId)
 3445 {
 3446     int ret = EUCA_OK;
 3447 
 3448     if (init())
 3449         return (EUCA_ERROR);
 3450     DISABLED_CHECK;
 3451 
 3452     LOGINFO("[%s] rebooting requested\n", SP(instanceId));
 3453     LOGDEBUG("[%s] invoked\n", instanceId);
 3454 
 3455     if (nc_state.H->doRebootInstance)
 3456         ret = nc_state.H->doRebootInstance(&nc_state, pMeta, instanceId);
 3457     else
 3458         ret = nc_state.D->doRebootInstance(&nc_state, pMeta, instanceId);
 3459 
 3460     return ret;
 3461 }
 3462 
 3463 //!
 3464 //! Handles the get console output request
 3465 //!
 3466 //! @param[in]  pMeta a pointer to the node controller (NC) metadata structure
 3467 //! @param[in]  instanceId the instance identifier string (i-XXXXXXXX)
 3468 //! @param[out] consoleOutput a pointer to the unallocated string that will contain the output
 3469 //!
 3470 //! @return EUCA_ERROR on failure or the result of the proper doGetConsoleOutput() handler call.
 3471 //!
 3472 int doGetConsoleOutput(ncMetadata * pMeta, char *instanceId, char **consoleOutput)
 3473 {
 3474     int ret = EUCA_OK;
 3475 
 3476     if (init())
 3477         return 1;
 3478 
 3479     LOGINFO("[%s] console output requested\n", instanceId);
 3480 
 3481     if (nc_state.H->doGetConsoleOutput)
 3482         ret = nc_state.H->doGetConsoleOutput(&nc_state, pMeta, instanceId, consoleOutput);
 3483     else
 3484         ret = nc_state.D->doGetConsoleOutput(&nc_state, pMeta, instanceId, consoleOutput);
 3485 
 3486     return ret;
 3487 }
 3488 
 3489 //!
 3490 //! Handles the describe resource request.
 3491 //!
 3492 //! @param[in]  pMeta a pointer to the node controller (NC) metadata structure
 3493 //! @param[in]  resourceType UNUSED
 3494 //! @param[out] outRes a list of resources we retrieved data for
 3495 //!
 3496 //! @return EUCA_ERROR on failure or the result of the proper doDescribeResource() handler call.
 3497 //!
 3498 int doDescribeResource(ncMetadata * pMeta, char *resourceType, ncResource ** outRes)
 3499 {
 3500     int ret = EUCA_OK;
 3501 
 3502     if (init())
 3503         return (EUCA_ERROR);
 3504 
 3505     updateServiceStateInfo(pMeta, TRUE);
 3506 
 3507     if (nc_state.H->doDescribeResource)
 3508         ret = nc_state.H->doDescribeResource(&nc_state, pMeta, resourceType, outRes);
 3509     else
 3510         ret = nc_state.D->doDescribeResource(&nc_state, pMeta, resourceType, outRes);
 3511 
 3512     return ret;
 3513 }
 3514 
 3515 //!
 3516 //! Starts the network process.
 3517 //!
 3518 //! @param[in] pMeta a pointer to the node controller (NC) metadata structure
 3519 //! @param[in] uuid a string containing the user unique identifier (UNUSED)
 3520 //! @param[in] remoteHosts the list of remote hosts (UNUSED)
 3521 //! @param[in] remoteHostsLen the number of hosts in the remoteHosts list (UNUSED)
 3522 //! @param[in] port the port number to use for the network (UNUSED)
 3523 //! @param[in] vlan the network vlan to use.
 3524 //!
 3525 //! @return EUCA_ERROR on failure or the result of the proper doStartNetwork() handler call.
 3526 //!
 3527 int doStartNetwork(ncMetadata * pMeta, char *uuid, char **remoteHosts, int remoteHostsLen, int port, int vlan)
 3528 {
 3529     int ret = EUCA_OK;
 3530 
 3531     if (init())
 3532         return (EUCA_ERROR);
 3533 
 3534     LOGINFO("starting network (remoteHostsLen=%d port=%d vlan=%d)\n", remoteHostsLen, port, vlan);
 3535     LOGDEBUG("invoked (remoteHostsLen=%d port=%d vlan=%d)\n", remoteHostsLen, port, vlan);
 3536 
 3537     if (nc_state.H->doStartNetwork)
 3538         ret = nc_state.H->doStartNetwork(&nc_state, pMeta, uuid, remoteHosts, remoteHostsLen, port, vlan);
 3539     else
 3540         ret = nc_state.D->doStartNetwork(&nc_state, pMeta, uuid, remoteHosts, remoteHostsLen, port, vlan);
 3541 
 3542     return ret;
 3543 }
 3544 
 3545 //!
 3546 //! Attach a given volume to an instance.
 3547 //!
 3548 //! @param[in] pMeta a pointer to the node controller (NC) metadata structure
 3549 //! @param[in] instanceId the instance identifier string (i-XXXXXXXX)
 3550 //! @param[in] volumeId the volume identifier string (vol-XXXXXXXX)
 3551 //! @param[in] remoteDev the target device name
 3552 //! @param[in] localDev the local device name
 3553 //!
 3554 //! @return EUCA_ERROR on failure or the result of the proper doAttachVolume() handler call.
 3555 //!
 3556 int doAttachVolume(ncMetadata * pMeta, char *instanceId, char *volumeId, char *remoteDev, char *localDev)
 3557 {
 3558     int ret = EUCA_OK;
 3559 
 3560     if (init())
 3561         return (EUCA_ERROR);
 3562     DISABLED_CHECK;
 3563 
 3564     LOGINFO("[%s][%s] attaching volume\n", instanceId, volumeId);
 3565     LOGDEBUG("[%s][%s] volume attaching (remoteDev=%s localDev=%s)\n", instanceId, volumeId, remoteDev, localDev);
 3566 
 3567     if (nc_state.H->doAttachVolume)
 3568         ret = nc_state.H->doAttachVolume(&nc_state, pMeta, instanceId, volumeId, remoteDev, localDev);
 3569     else
 3570         ret = nc_state.D->doAttachVolume(&nc_state, pMeta, instanceId, volumeId, remoteDev, localDev);
 3571 
 3572     return ret;
 3573 }
 3574 
 3575 //!
 3576 //! Detach a given volume from an instance.
 3577 //!
 3578 //! @param[in] pMeta a pointer to the node controller (NC) metadata structure
 3579 //! @param[in] instanceId the instance identifier string (i-XXXXXXXX)
 3580 //! @param[in] volumeId the volume identifier string (vol-XXXXXXXX)
 3581 //! @param[in] attachmentToken the target device name
 3582 //! @param[in] localDev the local device name
 3583 //! @param[in] force if set to 1, this will force the volume to detach
 3584 //! @param[in] grab_inst_sem if set to 1, will require the usage of the instance semaphore
 3585 //!
 3586 //! @return EUCA_ERROR on failure or the result of the proper doDetachVolume() handler call.
 3587 //!
 3588 int doDetachVolume(ncMetadata * pMeta, char *instanceId, char *volumeId, char *attachmentToken, char *localDev, int force)
 3589 {
 3590     int ret = EUCA_OK;
 3591 
 3592     if (init())
 3593         return (EUCA_ERROR);
 3594     DISABLED_CHECK;
 3595 
 3596     LOGINFO("[%s][%s] detaching volume\n", instanceId, volumeId);
 3597     LOGDEBUG("[%s][%s] volume detaching (localDev=%s force=%d)\n", instanceId, volumeId, localDev, force);
 3598 
 3599     if (nc_state.H->doDetachVolume)
 3600         ret = nc_state.H->doDetachVolume(&nc_state, pMeta, instanceId, volumeId, attachmentToken, localDev, force);
 3601     else
 3602         ret = nc_state.D->doDetachVolume(&nc_state, pMeta, instanceId, volumeId, attachmentToken, localDev, force);
 3603 
 3604     return ret;
 3605 }
 3606 
 3607 //!
 3608 //! Attach a given network interface to an instance (VPC mode only)
 3609 //!
 3610 //! @param[in] pMeta a pointer to the node controller (NC) metadata structure
 3611 //! @param[in] instanceId the instance identifier string (i-XXXXXXXX)
 3612 //! @param[in] netConfig the pointer to netConfig structure
 3613 //!
 3614 //! @return EUCA_ERROR on failure or the result of the proper doAttachNetworkInterface() handler call.
 3615 //!
 3616 int doAttachNetworkInterface(ncMetadata * pMeta, char *instanceId, netConfig *netCfg)
 3617 {
 3618     int ret = EUCA_OK;
 3619 
 3620     if (init())
 3621         return (EUCA_ERROR);
 3622     DISABLED_CHECK;
 3623 
 3624     LOGINFO("[%s][%s] attaching network interface\n", instanceId, netCfg->interfaceId);
 3625     LOGDEBUG("[%s][%s] network interface attaching (vlan=%d networkIndex=%d privateMac=%s publicIp=%s privateIp=%s device=%d attachmentId=%s)\n",
 3626             instanceId, netCfg->interfaceId, netCfg->vlan, netCfg->networkIndex, netCfg->privateMac, netCfg->publicIp,
 3627             netCfg->privateIp, netCfg->device, netCfg->attachmentId);
 3628 
 3629     if (nc_state.H->doAttachNetworkInterface)
 3630         ret = nc_state.H->doAttachNetworkInterface(&nc_state, pMeta, instanceId, netCfg);
 3631     else
 3632         ret = nc_state.D->doAttachNetworkInterface(&nc_state, pMeta, instanceId, netCfg);
 3633 
 3634     return ret;
 3635 }
 3636 
 3637 //!
 3638 //! Detach a given network interface from an instance (VPC mode only)
 3639 //!
 3640 //! @param[in] pMeta a pointer to the node controller (NC) metadata structure
 3641 //! @param[in] instanceId the instance identifier string (i-XXXXXXXX)
 3642 //! @param[in] attachmentId the attachment ID string (eni-attach-XXXXXXXX)
 3643 //! @param[in] force if set to 1, this will force the network interface to detach
 3644 //!
 3645 //! @return EUCA_ERROR on failure or the result of the proper doDetachNetworkInterface() handler call.
 3646 //!
 3647 int doDetachNetworkInterface(ncMetadata * pMeta, char *instanceId, char *attachmentId, int force)
 3648 {
 3649     int ret = EUCA_OK;
 3650 
 3651     if (init())
 3652         return (EUCA_ERROR);
 3653     DISABLED_CHECK;
 3654 
 3655     LOGINFO("[%s][%s] detaching network interface\n", instanceId, attachmentId);
 3656 
 3657     if (nc_state.H->doDetachNetworkInterface)
 3658         ret = nc_state.H->doDetachNetworkInterface(&nc_state, pMeta, instanceId, attachmentId, force);
 3659     else
 3660         ret = nc_state.D->doDetachNetworkInterface(&nc_state, pMeta, instanceId, attachmentId, force);
 3661 
 3662     return ret;
 3663 }
 3664 
 3665 //!
 3666 //! Handles the bundling instance request.
 3667 //!
 3668 //! @param[in] pMeta a pointer to the node controller (NC) metadata structure
 3669 //! @param[in] instanceId the instance identifier string (i-XXXXXXXX)
 3670 //! @param[in] bucketName the bucket name string to which the bundle will be saved
 3671 //! @param[in] filePrefix the prefix name string of the bundle
 3672 //! @param[in] objectStorageURL the objectstorage URL address string
 3673 //! @param[in] userPublicKey the public key string
 3674 //! @param[in] S3Policy the S3 engine policy
 3675 //! @param[in] S3PolicySig the S3 engine policy signature
 3676 //! @param[in] architecture image/instance architecture
 3677 //!
 3678 //! @return EUCA_ERROR on failure or the result of the proper doBundleInstance() handler call.
 3679 //!
 3680 int doBundleInstance(ncMetadata * pMeta, char *instanceId, char *bucketName, char *filePrefix, char *objectStorageURL, char *userPublicKey, char *S3Policy, char *S3PolicySig,
 3681                      char *architecture)
 3682 {
 3683     int ret = EUCA_OK;
 3684 
 3685     if (init())
 3686         return (EUCA_ERROR);
 3687     DISABLED_CHECK;
 3688 
 3689     LOGINFO("[%s] starting instance bundling into bucket %s\n", instanceId, bucketName);
 3690     LOGDEBUG("[%s] bundling parameters: bucketName=%s filePrefix=%s objectStorageURL=%s userPublicKey=%s S3Policy=%s, S3PolicySig=%s, architecture=%s\n",
 3691              instanceId, bucketName, filePrefix, objectStorageURL, userPublicKey, S3Policy, S3PolicySig, architecture);
 3692 
 3693     if (nc_state.H->doBundleInstance)
 3694         ret = nc_state.H->doBundleInstance(&nc_state, pMeta, instanceId, bucketName, filePrefix, objectStorageURL, userPublicKey, S3Policy, S3PolicySig, architecture);
 3695     else
 3696         ret = nc_state.D->doBundleInstance(&nc_state, pMeta, instanceId, bucketName, filePrefix, objectStorageURL, userPublicKey, S3Policy, S3PolicySig, architecture);
 3697 
 3698     return ret;
 3699 }
 3700 
 3701 //!
 3702 //! Handles the bundle restart request.
 3703 //!
 3704 //! @param[in] pMeta a pointer to the node controller (NC) metadata structure
 3705 //! @param[in] instanceId the instance identifier string (i-XXXXXXXX)
 3706 //!
 3707 //! @return EUCA_ERROR on failure or the result of the proper doBundleRestartInstance() handler call.
 3708 //!
 3709 int doBundleRestartInstance(ncMetadata * pMeta, char *instanceId)
 3710 {
 3711     if (init())
 3712         return (EUCA_ERROR);
 3713     DISABLED_CHECK;
 3714 
 3715     LOGINFO("[%s] restarting bundling instance\n", instanceId);
 3716     if (nc_state.H->doBundleRestartInstance)
 3717         return (nc_state.H->doBundleRestartInstance(&nc_state, pMeta, instanceId));
 3718     return (nc_state.D->doBundleRestartInstance(&nc_state, pMeta, instanceId));
 3719 }
 3720 
 3721 //!
 3722 //! Handles the cancel bundle task request.
 3723 //!
 3724 //! @param[in] pMeta a pointer to the node controller (NC) metadata structure
 3725 //! @param[in] instanceId the instance identifier string (i-XXXXXXXX)
 3726 //!
 3727 //! @return EUCA_ERROR on failure or the result of the proper doCancelBundleTask() handler call.
 3728 //!
 3729 int doCancelBundleTask(ncMetadata * pMeta, char *instanceId)
 3730 {
 3731     int ret = EUCA_OK;
 3732 
 3733     if (init())
 3734         return (EUCA_ERROR);
 3735     DISABLED_CHECK;
 3736 
 3737     LOGINFO("[%s] canceling bundling instance\n", instanceId);
 3738 
 3739     if (nc_state.H->doCancelBundleTask)
 3740         ret = nc_state.H->doCancelBundleTask(&nc_state, pMeta, instanceId);
 3741     else
 3742         ret = nc_state.D->doCancelBundleTask(&nc_state, pMeta, instanceId);
 3743 
 3744     return ret;
 3745 }
 3746 
 3747 //!
 3748 //! Handles the describe bundle tasks request.
 3749 //!
 3750 //! @param[in]  pMeta a pointer to the node controller (NC) metadata structure
 3751 //! @param[in]  instIds a list of instance identifier string
 3752 //! @param[in]  instIdsLen the number of instance identifiers in the instIds list
 3753 //! @param[out] outBundleTasks a pointer to the created bundle tasks list
 3754 //! @param[out] outBundleTasksLen the number of bundle tasks in the outBundleTasks list
 3755 //!
 3756 //! @return EUCA_ERROR on failure or the result of the proper doDescribeBundleTasks() handler call.
 3757 //!
 3758 int doDescribeBundleTasks(ncMetadata * pMeta, char **instIds, int instIdsLen, bundleTask *** outBundleTasks, int *outBundleTasksLen)
 3759 {
 3760     int ret = EUCA_OK;
 3761 
 3762     if (init())
 3763         return (EUCA_ERROR);
 3764     DISABLED_CHECK;
 3765 
 3766     LOGINFO("describing bundle tasks (for %d instances)\n", instIdsLen);
 3767 
 3768     if (nc_state.H->doDescribeBundleTasks)
 3769         ret = nc_state.H->doDescribeBundleTasks(&nc_state, pMeta, instIds, instIdsLen, outBundleTasks, outBundleTasksLen);
 3770     else
 3771         ret = nc_state.D->doDescribeBundleTasks(&nc_state, pMeta, instIds, instIdsLen, outBundleTasks, outBundleTasksLen);
 3772 
 3773     return ret;
 3774 }
 3775 
 3776 //!
 3777 //! Handles the image creation request.
 3778 //!
 3779 //! @param[in] pMeta a pointer to the node controller (NC) metadata structure
 3780 //! @param[in] instanceId the instance identifier string (i-XXXXXXXX)
 3781 //! @param[in] volumeId the volume identifier string (vol-XXXXXXXX)
 3782 //! @param[in] remoteDev the remote device name
 3783 //!
 3784 //! @return EUCA_ERROR on failure or the result of the proper doCreateImage() handler call.
 3785 //!
 3786 int doCreateImage(ncMetadata * pMeta, char *instanceId, char *volumeId, char *remoteDev)
 3787 {
 3788     int ret = EUCA_OK;
 3789 
 3790     if (init())
 3791         return (EUCA_ERROR);
 3792     DISABLED_CHECK;
 3793 
 3794     LOGINFO("[%s][%s] creating image\n", instanceId, volumeId);
 3795 
 3796     if (nc_state.H->doCreateImage)
 3797         ret = nc_state.H->doCreateImage(&nc_state, pMeta, instanceId, volumeId, remoteDev);
 3798     else
 3799         ret = nc_state.D->doCreateImage(&nc_state, pMeta, instanceId, volumeId, remoteDev);
 3800 
 3801     return ret;
 3802 }
 3803 
 3804 //!
 3805 //! Handles the describe sensors request.
 3806 //!
 3807 //! @param[in]  pMeta a pointer to the node controller (NC) metadata structure
 3808 //! @param[in]  historySize the size of the data history to retrieve
 3809 //! @param[in]  collectionIntervalTimeMs the data collection interval in milliseconds
 3810 //! @param[in]  instIds the list of instance identifiers string
 3811 //! @param[in]  instIdsLen the number of instance identifiers in the instIds list
 3812 //! @param[in]  sensorIds a list of sensor identifiers string
 3813 //! @param[in]  sensorIdsLen the number of sensor identifiers string in the sensorIds list
 3814 //! @param[out] outResources a list of sensor resources created by this request
 3815 //! @param[out] outResourcesLen the number of sensor resources contained in the outResources list
 3816 //!
 3817 //! @return EUCA_ERROR on failure or the result of the proper doDescribeSensors() handler call.
 3818 //!
 3819 int doDescribeSensors(ncMetadata * pMeta, int historySize, long long collectionIntervalTimeMs, char **instIds, int instIdsLen, char **sensorIds,
 3820                       int sensorIdsLen, sensorResource *** outResources, int *outResourcesLen)
 3821 {
 3822     int ret = EUCA_OK;
 3823 
 3824     if (init())
 3825         return (EUCA_ERROR);
 3826 
 3827     LOGDEBUG("invoked (instIdsLen=%d sensorIdsLen=%d)\n", instIdsLen, sensorIdsLen);
 3828 
 3829     if (nc_state.H->doDescribeSensors) {
 3830         ret = nc_state.H->doDescribeSensors(&nc_state, pMeta, historySize, collectionIntervalTimeMs, instIds, instIdsLen, sensorIds, sensorIdsLen, outResources, outResourcesLen);
 3831     } else {
 3832         ret = nc_state.D->doDescribeSensors(&nc_state, pMeta, historySize, collectionIntervalTimeMs, instIds, instIdsLen, sensorIds, sensorIdsLen, outResources, outResourcesLen);
 3833     }
 3834 
 3835     return ret;
 3836 }
 3837 
 3838 //!
 3839 //! Handles the modify node request.
 3840 //!
 3841 //! @param[in] pMeta a pointer to the node controller (NC) metadata structure
 3842 //! @param[in] stateName
 3843 //!
 3844 //! @return EUCA_OK on success or EUCA_ERROR on failure
 3845 //!
 3846 //! TODO: doxygen
 3847 int doModifyNode(ncMetadata * pMeta, char *stateName)
 3848 {
 3849     int ret = EUCA_OK;
 3850 
 3851     if (init())
 3852         return (EUCA_ERROR);
 3853 
 3854     LOGINFO("modifying node\n");
 3855     LOGDEBUG("invoked (stateName=%s)\n", stateName);
 3856 
 3857     if (nc_state.H->doModifyNode) {
 3858         ret = nc_state.H->doModifyNode(&nc_state, pMeta, stateName);
 3859     } else {
 3860         ret = nc_state.D->doModifyNode(&nc_state, pMeta, stateName);
 3861     }
 3862 
 3863     return ret;
 3864 }
 3865 
 3866 //!
 3867 //! Handles the instance migration request.
 3868 //!
 3869 //! @param[in]  pMeta a pointer to the node controller (NC) metadata structure
 3870 //! @param[in]  instances metadata for the instance to migrate to destination
 3871 //! @param[in]  instancesLen number of instances in the instance list
 3872 //! @param[in]  action IP of the destination Node Controller
 3873 //! @param[in]  credentials credentials that enable the migration
 3874 //! @param[in]  resourceLocations ID=URL list of self-signed URLs (only relevant for 'prepare' on source node)
 3875 //! @param[in]  resourceLocationsLen number of URLs in the list (only relevant for 'prepare' on source node)
 3876 //!
 3877 //! @return EUCA_OK on sucess or EUCA_ERROR on failure
 3878 //!
 3879 //! TODO: doxygen
 3880 //!
 3881 int doMigrateInstances(ncMetadata * pMeta, ncInstance ** instances, int instancesLen, char *action, char *credentials, char ** resourceLocations, int resourceLocationsLen)
 3882 {
 3883     int ret = EUCA_OK;
 3884 
 3885     if (init())
 3886         return (EUCA_ERROR);
 3887 
 3888     LOGINFO("migrating %d instances\n", instancesLen);
 3889     LOGTRACE("invoked\n");
 3890 
 3891     LOGDEBUG("verifying %d instance[s] for migration...\n", instancesLen);
 3892     for (int i = 0; i < instancesLen; i++) {
 3893         LOGDEBUG("verifying instance # %d...\n", i);
 3894         if (instances[i]) {
 3895             LOGDEBUG("invoked (action=%s instance[%d].{id=%s src=%s dst=%s) creds=%s\n",
 3896                      action, i, instances[i]->instanceId, instances[i]->migration_src, instances[i]->migration_dst, (credentials == NULL) ? "UNSET" : "present");
 3897             if (!strcmp(instances[i]->migration_src, instances[i]->migration_dst)) {
 3898                 if (strcmp(action, "rollback")) {
 3899                     // Anything but rollback.
 3900                     LOGERROR("[%s] rejecting proposed SAME-NODE migration from %s to %s\n", instances[i]->instanceId, instances[i]->migration_src, instances[i]->migration_dst);
 3901                     return (EUCA_UNSUPPORTED_ERROR);
 3902                 } else {
 3903                     // Ignore the fact src & dst are the same if a rollback--it doesn't matter.
 3904                     LOGDEBUG("[%s] ignoring apparent same-node migration hosts (%s > %s) for action '%s'\n", instances[i]->instanceId, instances[i]->migration_src,
 3905                              instances[i]->migration_dst, action);
 3906                 }
 3907             }
 3908         }
 3909     }
 3910 
 3911     if (nc_state.H->doMigrateInstances) {
 3912         ret = nc_state.H->doMigrateInstances(&nc_state, pMeta, instances, instancesLen, action, credentials, resourceLocations, resourceLocationsLen);
 3913     } else {
 3914         ret = nc_state.D->doMigrateInstances(&nc_state, pMeta, instances, instancesLen, action, credentials, resourceLocations, resourceLocationsLen);
 3915     }
 3916 
 3917     LOGTRACE("done\n");
 3918 
 3919     return ret;
 3920 }
 3921 
 3922 //!
 3923 //! Handles the instance start request
 3924 //!
 3925 //! @param[in] pMeta a pointer to the node controller (NC) metadata structure
 3926 //! @param[in] instanceId the instance identifier string (i-XXXXXXXX)
 3927 //!
 3928 //! @return EUCA_ERROR on failure or the result of the actual doStartInstance() call
 3929 //!
 3930 int doStartInstance(ncMetadata * pMeta, char *instanceId)
 3931 {
 3932     int ret = EUCA_OK;
 3933 
 3934     if (init())
 3935         return (EUCA_ERROR);
 3936     DISABLED_CHECK;
 3937 
 3938     LOGINFO("[%s] instance start requested\n", instanceId);
 3939     if (nc_state.H->doStartInstance)
 3940         ret = nc_state.H->doStartInstance(&nc_state, pMeta, instanceId);
 3941     else
 3942         ret = nc_state.D->doStartInstance(&nc_state, pMeta, instanceId);
 3943 
 3944     return ret;
 3945 }
 3946 
 3947 //!
 3948 //! Handles the instance stop request
 3949 //!
 3950 //! @param[in] pMeta a pointer to the node controller (NC) metadata structure
 3951 //! @param[in] instanceId the instance identifier string (i-XXXXXXXX)
 3952 //!
 3953 //! @return EUCA_ERROR on failure or the result of the actual doStopInstance() call
 3954 //!
 3955 int doStopInstance(ncMetadata * pMeta, char *instanceId)
 3956 {
 3957     int ret = EUCA_OK;
 3958 
 3959     if (init())
 3960         return (EUCA_ERROR);
 3961     DISABLED_CHECK;
 3962 
 3963     LOGINFO("[%s] instance shutdown requested\n", instanceId);
 3964     if (nc_state.H->doStopInstance)
 3965         ret = nc_state.H->doStopInstance(&nc_state, pMeta, instanceId);
 3966     else
 3967         ret = nc_state.D->doStopInstance(&nc_state, pMeta, instanceId);
 3968 
 3969     return ret;
 3970 }
 3971 
 3972 //!
 3973 //! Finds an instance in the global instance list
 3974 //!
 3975 //! @param[in] instanceId the instance identifier string (i-XXXXXXXX)
 3976 //!
 3977 //! @return a pointer to the instance structure if found. Otherwise NULL is returned.
 3978 //!
 3979 ncInstance *find_global_instance(const char *instanceId)
 3980 {
 3981     return NULL;
 3982 }
 3983 
 3984 //!
 3985 //! Predicate determining whether the instance is a migration destination
 3986 //!
 3987 //! @param[in] instance pointer to the instance struct
 3988 //!
 3989 //! @return true or false
 3990 //!
 3991 int is_migration_dst(const ncInstance * instance)
 3992 {
 3993     if (instance->migration_state != NOT_MIGRATING && !strcmp(instance->migration_dst, nc_state.ip))
 3994         return TRUE;
 3995     return FALSE;
 3996 }
 3997 
 3998 //!
 3999 //! Predicate determining whether the instance is a migration source
 4000 //!
 4001 //! @param[in] instance pointer to the instance struct
 4002 //!
 4003 //! @return true or false
 4004 //!
 4005 int is_migration_src(const ncInstance * instance)
 4006 {
 4007     if (instance->migration_state != NOT_MIGRATING && !strcmp(instance->migration_src, nc_state.ip))
 4008         return TRUE;
 4009     return FALSE;
 4010 }
 4011 
 4012 //!
 4013 //! Rollback a pending migration request on a source NC
 4014 //!
 4015 //! Currently only safe to call under the protection of inst_sem, such as from the migrating_thread().
 4016 //!
 4017 //! @param[in] instance pointer to the instance struct
 4018 //!
 4019 //! @return true or false
 4020 //!
 4021 int migration_rollback(ncInstance * instance)
 4022 {
 4023     // TO-DO: duplicated code in two parts of conditional. Refactor.
 4024     if (is_migration_src(instance)) {
 4025         LOGINFO("[%s] starting migration rollback of instance on source %s\n", instance->instanceId, instance->migration_src);
 4026         instance->migration_state = NOT_MIGRATING;
 4027         // Not zeroing out the src & dst for debugging purposes:
 4028         // There's a problem with refresh_instances_info() not finding domains
 4029         // and eventually shutting them down.
 4030         //bzero(instance->migration_src, HOSTNAME_SIZE);
 4031         //bzero(instance->migration_dst, HOSTNAME_SIZE);
 4032         bzero(instance->migration_credentials, CREDENTIAL_SIZE);
 4033         instance->migrationTime = 0;
 4034         save_instance_struct(instance);
 4035         copy_instances();
 4036         LOGINFO("[%s] migration source rolled back\n", instance->instanceId);
 4037         return TRUE;
 4038     } else if (is_migration_dst(instance)) {
 4039         // TO-DO: Do I want to protect this functionality by requiring something like a 'force' option be passed to this function?
 4040         LOGWARN("[%s] resetting migration state '%s' to 'none' for an already-migrated (%s < %s) instance. Something went wrong somewhere...\n",
 4041                 instance->instanceId, migration_state_names[instance->migration_state], instance->migration_dst, instance->migration_src);
 4042         instance->migration_state = NOT_MIGRATING;
 4043         bzero(instance->migration_src, HOSTNAME_SIZE);
 4044         bzero(instance->migration_dst, HOSTNAME_SIZE);
 4045         bzero(instance->migration_credentials, CREDENTIAL_SIZE);
 4046         instance->migrationTime = 0;
 4047         save_instance_struct(instance);
 4048         copy_instances();
 4049         LOGINFO("[%s] migration state reset.\n", instance->instanceId);
 4050         return TRUE;
 4051     }
 4052     // Neither source nor destination node?
 4053     LOGERROR("[%s] request to roll back migration of instance on non-source/destination node %s\n", instance->instanceId, nc_state.ip);
 4054     // We've seen this case caused by a bug in the migration code--one that left the migration_dst blank in the instance struct.
 4055     // So if this happens, we'll assume the rollback request was valid, and we'll reset its state and time so that it will get cleaned up--rather than stuck!
 4056     instance->migration_state = NOT_MIGRATING;
 4057     instance->migrationTime = 0;
 4058     save_instance_struct(instance);
 4059     copy_instances();
 4060     return FALSE;
 4061 }
 4062 
 4063 
 4064 // function that performs any local checks to determine that networking is in place enough to boot instance
 4065 int instance_network_gate(ncInstance *instance, time_t timeout_seconds) {
 4066     char *filebuf=NULL, path[EUCA_MAX_PATH], needle[EUCA_MAX_PATH];
 4067     time_t max_time=0;
 4068     int count = 1;
 4069     
 4070     if (timeout_seconds == 0) {
 4071         LOGDEBUG("skipping network gate (NC_BOOTING_ENVWAIT_THRESHOLD has been manually set to 0 seconds in eucalyptus.conf)\n");
 4072         return(0);
 4073     }
 4074 
 4075     if (!instance || timeout_seconds < 0 || timeout_seconds > 3600) {
 4076         LOGERROR("invalid input params\n");
 4077         return(0);
 4078     }
 4079 
 4080     max_time = time(NULL) + timeout_seconds;
 4081     
 4082     LOGDEBUG("[%s] waiting at most %d seconds for required instance networking to exist before booting instance\n", SP(instance->instanceId), (int)timeout_seconds);
 4083     while(time(NULL) < max_time) {
 4084         
 4085         LOGTRACE("[%s] instance state code %d\n", SP(instance->instanceId), instance->state);
 4086         
 4087         if (instance == NULL) {
 4088             LOGWARN("[%s] instance no longer valid - aborting instance gate\n", SP(instance->instanceId));
 4089             return(0);
 4090         }
 4091         
 4092         LOGTRACE("[%s] instance state code new=%d orig=%d\n", SP(instance->instanceId), instance->state, instance->state);
 4093 
 4094         if (instance->state != STAGING) {
 4095             LOGINFO("[%s] returning from gate since instance is no longer STAGING\n", SP(instance->instanceId));
 4096             return(0);
 4097         }
 4098         
 4099         if (!strcmp(nc_state.pEucaNet->sMode, NETMODE_EDGE)) {
 4100             // check to ensure that dhcpd config contains the mac for the instance
 4101             snprintf(path, EUCA_MAX_PATH, "%s/var/run/eucalyptus/net/euca-dhcp.conf", nc_state.home);
 4102             snprintf(needle, EUCA_MAX_PATH, "node-%s ", instance->ncnet.privateIp);
 4103             filebuf = file2str(path);
 4104             if (filebuf && strstr(filebuf, needle)) {
 4105                 LOGDEBUG("[%s] local dhcpd config contains required instance record, continuing\n", SP(instance->instanceId));
 4106                 EUCA_FREE(filebuf);
 4107                 return(0);
 4108             } else {
 4109                 LOGTRACE("[%s] local dhcpd config does not (yet) contain required instance record, waiting...(%d seconds remaining)\n", SP(instance->instanceId), (int)(max_time - time(NULL)));
 4110             }
 4111             EUCA_FREE(filebuf);
 4112         } else if (!strcmp(nc_state.pEucaNet->sMode, NETMODE_VPCMIDO)) {
 4113             char *fileBuf = NULL, *vers=NULL, *appvers=NULL, *startBuf=NULL;
 4114             char xmlfile[EUCA_MAX_PATH] = "";
 4115 
 4116             snprintf(xmlfile, EUCA_MAX_PATH, "%s/var/run/eucalyptus/global_network_info.xml", nc_state.home);
 4117 
 4118             fileBuf = file2str(xmlfile);
 4119             if (fileBuf) startBuf = strstr(fileBuf, "network-data");
 4120             
 4121             if (startBuf) {
 4122                 vers = euca_gettok(startBuf, "version=\"");
 4123                 appvers = euca_gettok(startBuf, "applied-version=\"");
 4124                 
 4125                 if (vers && appvers && !strcmp(vers, appvers)) {
 4126                     LOGDEBUG("[%s] version (%s) and applied version (%s) match\n", instance->instanceId, vers, appvers);
 4127                     
 4128                     if (strstr(fileBuf, instance->instanceId)) {
 4129                         LOGDEBUG("[%s] global network config contains required instance record\n", SP(instance->instanceId));
 4130                         EUCA_FREE(vers);
 4131                         EUCA_FREE(appvers);
 4132                         EUCA_FREE(fileBuf);
 4133                         return(0);
 4134                     } else {
 4135                         LOGTRACE("[%s] global network config does not (yet) contain required instance record, waiting...(%d seconds remaining)\n", SP(instance->instanceId), (int)(max_time - time(NULL)));
 4136                     }
 4137                 } else {
 4138                     LOGDEBUG("[%s] version (%s) and applied version (%s) do not match (yet), waiting\n", instance->instanceId, vers, appvers);
 4139                 }
 4140                 
 4141                 EUCA_FREE(vers);
 4142                 EUCA_FREE(appvers);
 4143             } else {
 4144                 LOGDEBUG("[%s] cannot read valid global network view file '%s' (yet), waiting\n", instance->instanceId, xmlfile);
 4145             }
 4146             EUCA_FREE(fileBuf);
 4147         } else {
 4148             return(0);
 4149         }
 4150         
 4151         count++;
 4152         sleep(1);
 4153     }
 4154     
 4155     LOGERROR("[%s] timed out waiting for instance network information to appear before booting instance\n", SP(instance->instanceId));
 4156     return(1);
 4157 }
 4158 
 4159 /**
 4160  * Removes instance NIC specified in the argument from bridge.
 4161  * @param nc [in] pointer to nc_state data structure.
 4162  * @param instance [in] pointer to ncInstance data structure of the instance of interest.
 4163  * @param iface [in] pointer to string with the interface name of interest.
 4164  * @return 0 on success. 1 otherwise.
 4165  */
 4166 int bridge_interface_remove(struct nc_state_t *nc, ncInstance *instance, char *iface) {
 4167     char cmd[EUCA_MAX_PATH], obuf[256], ebuf[256], sPath[EUCA_MAX_PATH];
 4168     int rc = 0;
 4169 
 4170     if (!nc || !instance || !iface) {
 4171         LOGWARN("Invalid argument: cannot remove NULL bridge interface.\n");
 4172         return (1);
 4173     }
 4174     LOGTRACE("checking if VM interface is attached to a bridge (%s/%s)\n", iface, instance->params.guestNicDeviceName);
 4175 
 4176     // If this device does not have a 'brport' path, this isn't a bridge device
 4177     snprintf(sPath, EUCA_MAX_PATH, "/sys/class/net/%s/brport/", iface);
 4178     if (!check_directory(sPath)) {
 4179         LOGTRACE("VM interface is attached to a bridge (%s/%s)\n", iface, instance->params.guestNicDeviceName);
 4180         snprintf(cmd, EUCA_MAX_PATH, "%s brctl delif %s %s", nc->rootwrap_cmd_path, instance->params.guestNicDeviceName, iface);
 4181         rc = timeshell(cmd, obuf, ebuf, 256, 10);
 4182         if (rc) {
 4183             LOGERROR("unable to remove instance interface from bridge: instance will not be able to connect to midonet (will not connect to network): check bridge/libvirt/kvm health\n");
 4184             LOGINFO("Failed to remove %s from %s\n", iface, instance->params.guestNicDeviceName);
 4185         } else {
 4186             LOGTRACE("VM interface removed from bridge (%s/%s)\n", iface, instance->params.guestNicDeviceName);
 4187         }
 4188     }
 4189     return (rc);
 4190 }
 4191 
 4192 /**
 4193  * Removes instance NIC(s) from bridge.
 4194  * @param nc [in] pointer to nc_state data structure.
 4195  * @param instance [in] pointer to ncInstance data structure of the instance of interest.
 4196  * @return 0 on success. Positive integer otherwise.
 4197  */
 4198 int bridge_instance_interfaces_remove(struct nc_state_t *nc, ncInstance *instance) {
 4199     char iface[16];
 4200     int rc = 0;
 4201     
 4202     if (!nc || !instance) {
 4203         LOGWARN("Invalid argument: cannot remove NULL bridge interface.\n");
 4204         return (1);
 4205     }
 4206     snprintf(iface, 16, "vn_%s", instance->instanceId);
 4207     rc += bridge_interface_remove(nc, instance, iface);
 4208 
 4209     // Repeat process for secondary interfaces as well
 4210     for (int i = 0; i < EUCA_MAX_NICS; i++) {
 4211         if (strlen(instance->secNetCfgs[i].interfaceId) == 0)
 4212             continue;
 4213 
 4214         snprintf(iface, 16, "vn_%s", instance->secNetCfgs[i].interfaceId);
 4215         rc += bridge_interface_remove(nc, instance, iface);
 4216     }
 4217     
 4218     return (rc);
 4219 }
 4220 
 4221 /**
 4222  * Enables hairpin mode of a linux bridge port (instance interface) - address EUCA-12608
 4223  * @param nc [in] pointer to nc_state data structure.
 4224  * @param instance [in] pointer to ncInstance data structure of the instance of interest.
 4225  * @param iface [in] pointer to string with the interface name of interest.
 4226  * @return 0 on success. 1 otherwise.
 4227  */
 4228 int bridge_interface_set_hairpin(struct nc_state_t *nc, ncInstance *instance, char *iface) {
 4229     char cmd[EUCA_MAX_PATH], obuf[256], ebuf[256], sPath[EUCA_MAX_PATH];
 4230     int rc = 0;
 4231 
 4232     if (!nc || !instance || !iface) {
 4233         LOGWARN("Invalid argument: cannot set hairpin on NULL bridge interface.\n");
 4234         return (1);
 4235     }
 4236 
 4237     // Make sure that this is a bridge port and that hairpin mode is supported
 4238     // RHEL7 bridge port has bpdu_guard parameter (RHEL6 does not)
 4239     snprintf(sPath, EUCA_MAX_PATH, "/sys/class/net/%s/brport/bpdu_guard", iface);
 4240     if (!check_file(sPath)) {
 4241         snprintf(cmd, EUCA_MAX_PATH, "%s brctl hairpin %s %s on", nc->rootwrap_cmd_path, instance->params.guestNicDeviceName, iface);
 4242         rc = timeshell(cmd, obuf, ebuf, 256, 10);
 4243         if (rc) {
 4244             LOGERROR("Unable to set hairpin mode for %s port on %s\n", iface, instance->params.guestNicDeviceName);
 4245             LOGINFO("%s may suffer limited connectivity (EUCA-12608)\n", instance->instanceId);
 4246         } else {
 4247             LOGTRACE("%s/%s hairpin mode is on\n", iface, instance->params.guestNicDeviceName);
 4248         }
 4249     }
 4250     return (rc);
 4251 }