corosync  2.3.4
wd.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2010-2012 Red Hat, Inc.
3  *
4  * All rights reserved.
5  *
6  * Author: Angus Salkeld <asalkeld@redhat.com>
7  *
8  * This software licensed under BSD license, the text of which follows:
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions are met:
12  *
13  * - Redistributions of source code must retain the above copyright notice,
14  * this list of conditions and the following disclaimer.
15  * - Redistributions in binary form must reproduce the above copyright notice,
16  * this list of conditions and the following disclaimer in the documentation
17  * and/or other materials provided with the distribution.
18  * - Neither the name of the MontaVista Software, Inc. nor the names of its
19  * contributors may be used to endorse or promote products derived from this
20  * software without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
23  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
26  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
32  * THE POSSIBILITY OF SUCH DAMAGE.
33  */
34 
35 #include <config.h>
36 
37 #include <unistd.h>
38 #include <fcntl.h>
39 #include <sys/ioctl.h>
40 #include <linux/types.h>
41 #include <linux/watchdog.h>
42 #include <sys/reboot.h>
43 
44 #include <corosync/corotypes.h>
45 #include <corosync/corodefs.h>
46 #include <corosync/coroapi.h>
47 #include <corosync/list.h>
48 #include <corosync/logsys.h>
49 #include <corosync/icmap.h>
50 #include "fsm.h"
51 
52 #include "service.h"
53 
54 typedef enum {
60 
61 struct resource {
63  char *recovery;
65  time_t last_updated;
66  struct cs_fsm fsm;
67 
69  uint64_t check_timeout;
71 };
72 
74 
75 /*
76  * Service Interfaces required by service_message_handler struct
77  */
78 static char *wd_exec_init_fn (struct corosync_api_v1 *corosync_api);
79 static int wd_exec_exit_fn (void);
80 static void wd_resource_check_fn (void* resource_ref);
81 
82 static struct corosync_api_v1 *api;
83 #define WD_DEFAULT_TIMEOUT_SEC 6
84 #define WD_DEFAULT_TIMEOUT_MS (WD_DEFAULT_TIMEOUT_SEC * CS_TIME_MS_IN_SEC)
85 #define WD_MIN_TIMEOUT_MS 500
86 #define WD_MAX_TIMEOUT_MS (120 * CS_TIME_MS_IN_SEC)
87 static uint32_t watchdog_timeout = WD_DEFAULT_TIMEOUT_SEC;
88 static uint64_t tickle_timeout = (WD_DEFAULT_TIMEOUT_MS / 2);
89 static int dog = -1;
90 static corosync_timer_handle_t wd_timer;
91 static int watchdog_ok = 1;
92 
94  .name = "corosync watchdog service",
95  .id = WD_SERVICE,
96  .priority = 1,
97  .private_data_size = 0,
98  .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED,
99  .lib_init_fn = NULL,
100  .lib_exit_fn = NULL,
101  .lib_engine = NULL,
102  .lib_engine_count = 0,
103  .exec_engine = NULL,
104  .exec_engine_count = 0,
105  .confchg_fn = NULL,
106  .exec_init_fn = wd_exec_init_fn,
107  .exec_exit_fn = wd_exec_exit_fn,
108  .exec_dump_fn = NULL
109 };
110 
111 static DECLARE_LIST_INIT (confchg_notify);
112 
113 /*
114  * F S M
115  */
116 static void wd_config_changed (struct cs_fsm* fsm, int32_t event, void * data);
117 static void wd_resource_failed (struct cs_fsm* fsm, int32_t event, void * data);
118 
123 };
124 
128 };
129 
130 const char * wd_running_str = "running";
131 const char * wd_failed_str = "failed";
132 const char * wd_failure_str = "failure";
133 const char * wd_stopped_str = "stopped";
134 const char * wd_config_changed_str = "config_changed";
135 
137  { WD_S_STOPPED, WD_E_CONFIG_CHANGED, wd_config_changed, {WD_S_STOPPED, WD_S_RUNNING, -1} },
138  { WD_S_STOPPED, WD_E_FAILURE, NULL, {-1} },
139  { WD_S_RUNNING, WD_E_CONFIG_CHANGED, wd_config_changed, {WD_S_RUNNING, WD_S_STOPPED, -1} },
140  { WD_S_RUNNING, WD_E_FAILURE, wd_resource_failed, {WD_S_FAILED, -1} },
141  { WD_S_FAILED, WD_E_CONFIG_CHANGED, wd_config_changed, {WD_S_RUNNING, WD_S_STOPPED, -1} },
142  { WD_S_FAILED, WD_E_FAILURE, NULL, {-1} },
143 };
144 
146 {
147  return (&wd_service_engine);
148 }
149 
150 static const char * wd_res_state_to_str(struct cs_fsm* fsm,
151  int32_t state)
152 {
153  switch (state) {
154  case WD_S_STOPPED:
155  return wd_stopped_str;
156  break;
157  case WD_S_RUNNING:
158  return wd_running_str;
159  break;
160  case WD_S_FAILED:
161  return wd_failed_str;
162  break;
163  }
164  return NULL;
165 }
166 
167 static const char * wd_res_event_to_str(struct cs_fsm* fsm,
168  int32_t event)
169 {
170  switch (event) {
171  case WD_E_CONFIG_CHANGED:
172  return wd_config_changed_str;
173  break;
174  case WD_E_FAILURE:
175  return wd_failure_str;
176  break;
177  }
178  return NULL;
179 }
180 
181 static void wd_fsm_cb (struct cs_fsm *fsm, int cb_event, int32_t curr_state,
182  int32_t next_state, int32_t fsm_event, void *data)
183 {
184  switch (cb_event) {
186  log_printf (LOGSYS_LEVEL_ERROR, "Fsm:%s could not find event \"%s\" in state \"%s\"",
187  fsm->name, fsm->event_to_str(fsm, fsm_event), fsm->state_to_str(fsm, curr_state));
189  break;
191  log_printf (LOGSYS_LEVEL_INFO, "Fsm:%s event \"%s\", state \"%s\" --> \"%s\"",
192  fsm->name,
193  fsm->event_to_str(fsm, fsm_event),
194  fsm->state_to_str(fsm, fsm->table[fsm->curr_entry].curr_state),
195  fsm->state_to_str(fsm, next_state));
196  break;
198  log_printf (LOGSYS_LEVEL_CRIT, "Fsm:%s Can't change state from \"%s\" to \"%s\" (event was \"%s\")",
199  fsm->name,
200  fsm->state_to_str(fsm, fsm->table[fsm->curr_entry].curr_state),
201  fsm->state_to_str(fsm, next_state),
202  fsm->event_to_str(fsm, fsm_event));
204  break;
205  default:
206  log_printf (LOGSYS_LEVEL_CRIT, "Fsm: Unknown callback event!");
208  break;
209  }
210 }
211 
212 /*
213  * returns (CS_TRUE == OK, CS_FALSE == failed)
214  */
215 static int32_t wd_resource_state_is_ok (struct resource *ref)
216 {
217  char* state = NULL;
218  uint64_t last_updated;
219  uint64_t my_time;
220  uint64_t allowed_period;
221  char key_name[ICMAP_KEYNAME_MAXLEN];
222 
223  snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", ref->res_path, "last_updated");
224  if (icmap_get_uint64(key_name, &last_updated) != CS_OK) {
225  /* key does not exist.
226  */
227  return CS_FALSE;
228  }
229 
230  snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", ref->res_path, "state");
231  if (icmap_get_string(key_name, &state) != CS_OK || strcmp(state, "disabled") == 0) {
232  /* key does not exist.
233  */
234  if (state != NULL)
235  free(state);
236 
237  return CS_FALSE;
238  }
239 
240  if (last_updated == 0) {
241  /* initial value */
242  free(state);
243  return CS_TRUE;
244  }
245 
246  my_time = cs_timestamp_get();
247 
248  /*
249  * Here we check that the monitor has written a timestamp within the poll_period
250  * plus a grace factor of (0.5 * poll_period).
251  */
252  allowed_period = (ref->check_timeout * MILLI_2_NANO_SECONDS * 3) / 2;
253  if ((last_updated + allowed_period) < my_time) {
255  "last_updated %"PRIu64" ms too late, period:%"PRIu64".",
256  (uint64_t)(my_time/MILLI_2_NANO_SECONDS - ((last_updated + allowed_period) / MILLI_2_NANO_SECONDS)),
257  ref->check_timeout);
258  free(state);
259  return CS_FALSE;
260  }
261 
262  if (strcmp (state, wd_failed_str) == 0) {
263  free(state);
264  return CS_FALSE;
265  }
266 
267  free(state);
268  return CS_TRUE;
269 }
270 
271 static void wd_config_changed (struct cs_fsm* fsm, int32_t event, void * data)
272 {
273  char *state;
274  uint64_t tmp_value;
275  uint64_t next_timeout;
276  struct resource *ref = (struct resource*)data;
277  char key_name[ICMAP_KEYNAME_MAXLEN];
278 
279  next_timeout = ref->check_timeout;
280 
281  snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", ref->res_path, "poll_period");
282  if (icmap_get_uint64(ref->res_path, &tmp_value) == CS_OK) {
283  if (tmp_value >= WD_MIN_TIMEOUT_MS && tmp_value <= WD_MAX_TIMEOUT_MS) {
285  "poll_period changing from:%"PRIu64" to %"PRIu64".",
286  ref->check_timeout, tmp_value);
287  /*
288  * To easy in the transition between poll_period's we are going
289  * to make the first timeout the bigger of the new and old value.
290  * This is to give the monitoring system time to adjust.
291  */
292  next_timeout = CS_MAX(tmp_value, ref->check_timeout);
293  ref->check_timeout = tmp_value;
294  } else {
296  "Could NOT use poll_period:%"PRIu64" ms for resource %s",
297  tmp_value, ref->name);
298  }
299  }
300 
301  snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", ref->res_path, "recovery");
302  if (icmap_get_string(key_name, &ref->recovery) != CS_OK) {
303  /* key does not exist.
304  */
306  "resource %s missing a recovery key.", ref->name);
307  cs_fsm_state_set(&ref->fsm, WD_S_STOPPED, ref, wd_fsm_cb);
308  return;
309  }
310  snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", ref->res_path, "state");
311  if (icmap_get_string(key_name, &state) != CS_OK) {
312  /* key does not exist.
313  */
315  "resource %s missing a state key.", ref->name);
316  cs_fsm_state_set(&ref->fsm, WD_S_STOPPED, ref, wd_fsm_cb);
317  return;
318  }
319  if (ref->check_timer) {
320  api->timer_delete(ref->check_timer);
321  ref->check_timer = 0;
322  }
323 
324  if (strcmp(wd_stopped_str, state) == 0) {
325  cs_fsm_state_set(&ref->fsm, WD_S_STOPPED, ref, wd_fsm_cb);
326  } else {
327  api->timer_add_duration(next_timeout * MILLI_2_NANO_SECONDS,
328  ref, wd_resource_check_fn, &ref->check_timer);
329  cs_fsm_state_set(&ref->fsm, WD_S_RUNNING, ref, wd_fsm_cb);
330  }
331  free(state);
332 }
333 
334 static void wd_resource_failed (struct cs_fsm* fsm, int32_t event, void * data)
335 {
336  struct resource* ref = (struct resource*)data;
337 
338  if (ref->check_timer) {
339  api->timer_delete(ref->check_timer);
340  ref->check_timer = 0;
341  }
342 
343  log_printf (LOGSYS_LEVEL_CRIT, "%s resource \"%s\" failed!",
344  ref->recovery, (char*)ref->name);
345  if (strcmp (ref->recovery, "watchdog") == 0 ||
346  strcmp (ref->recovery, "quit") == 0) {
347  watchdog_ok = 0;
348  }
349  else if (strcmp (ref->recovery, "reboot") == 0) {
350  reboot(RB_AUTOBOOT);
351  }
352  else if (strcmp (ref->recovery, "shutdown") == 0) {
353  reboot(RB_POWER_OFF);
354  }
355  cs_fsm_state_set(fsm, WD_S_FAILED, data, wd_fsm_cb);
356 }
357 
358 static void wd_key_changed(
359  int32_t event,
360  const char *key_name,
361  struct icmap_notify_value new_val,
362  struct icmap_notify_value old_val,
363  void *user_data)
364 {
365  struct resource* ref = (struct resource*)user_data;
366  char *last_key_part;
367 
368  if (ref == NULL) {
369  return ;
370  }
371 
372  last_key_part = strrchr(key_name, '.');
373  if (last_key_part == NULL) {
374  return ;
375  }
376  last_key_part++;
377 
378  if (event == ICMAP_TRACK_ADD || event == ICMAP_TRACK_MODIFY) {
379  if (strcmp(last_key_part, "last_updated") == 0 ||
380  strcmp(last_key_part, "current") == 0) {
381  return;
382  }
383 
384  cs_fsm_process(&ref->fsm, WD_E_CONFIG_CHANGED, ref, wd_fsm_cb);
385  }
386 
387  if (event == ICMAP_TRACK_DELETE && ref != NULL) {
388  if (strcmp(last_key_part, "state") != 0) {
389  return ;
390  }
391 
393  "resource \"%s\" deleted from cmap!",
394  ref->name);
395 
396  api->timer_delete(ref->check_timer);
397  ref->check_timer = 0;
399 
400  free(ref);
401  }
402 }
403 
404 static void wd_resource_check_fn (void* resource_ref)
405 {
406  struct resource* ref = (struct resource*)resource_ref;
407 
408  if (wd_resource_state_is_ok (ref) == CS_FALSE) {
409  cs_fsm_process(&ref->fsm, WD_E_FAILURE, ref, wd_fsm_cb);
410  return;
411  }
413  ref, wd_resource_check_fn, &ref->check_timer);
414 }
415 
416 /*
417  * return 0 - fully configured
418  * return -1 - partially configured
419  */
420 static int32_t wd_resource_create (char *res_path, char *res_name)
421 {
422  char *state;
423  uint64_t tmp_value;
424  struct resource *ref = calloc (1, sizeof (struct resource));
425  char key_name[ICMAP_KEYNAME_MAXLEN];
426 
427  strcpy(ref->res_path, res_path);
429  ref->check_timer = 0;
430 
431  strcpy(ref->name, res_name);
432  ref->fsm.name = ref->name;
433  ref->fsm.table = wd_fsm_table;
434  ref->fsm.entries = sizeof(wd_fsm_table) / sizeof(struct cs_fsm_entry);
435  ref->fsm.curr_entry = 0;
436  ref->fsm.curr_state = WD_S_STOPPED;
437  ref->fsm.state_to_str = wd_res_state_to_str;
438  ref->fsm.event_to_str = wd_res_event_to_str;
439 
440  snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", res_path, "poll_period");
441  if (icmap_get_uint64(key_name, &tmp_value) != CS_OK) {
442  icmap_set_uint64(key_name, ref->check_timeout);
443  } else {
444  if (tmp_value >= WD_MIN_TIMEOUT_MS && tmp_value <= WD_MAX_TIMEOUT_MS) {
445  ref->check_timeout = tmp_value;
446  } else {
448  "Could NOT use poll_period:%"PRIu64" ms for resource %s",
449  tmp_value, ref->name);
450  }
451  }
452 
453  icmap_track_add(res_path,
455  wd_key_changed,
456  ref, &ref->icmap_track);
457 
458  snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", res_path, "recovery");
459  if (icmap_get_string(key_name, &ref->recovery) != CS_OK) {
460  /* key does not exist.
461  */
463  "resource %s missing a recovery key.", ref->name);
464  return -1;
465  }
466  snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", res_path, "state");
467  if (icmap_get_string(key_name, &state) != CS_OK) {
468  /* key does not exist.
469  */
471  "resource %s missing a state key.", ref->name);
472  return -1;
473  }
474 
475  snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", res_path, "last_updated");
476  if (icmap_get_uint64(key_name, &tmp_value) != CS_OK) {
477  /* key does not exist.
478  */
479  ref->last_updated = 0;
480  } else {
481  ref->last_updated = tmp_value;
482  }
483 
484  /*
485  * delay the first check to give the monitor time to start working.
486  */
487  tmp_value = CS_MAX(ref->check_timeout * 2, WD_DEFAULT_TIMEOUT_MS);
488  api->timer_add_duration(tmp_value * MILLI_2_NANO_SECONDS,
489  ref,
490  wd_resource_check_fn, &ref->check_timer);
491 
492  cs_fsm_state_set(&ref->fsm, WD_S_RUNNING, ref, wd_fsm_cb);
493  return 0;
494 }
495 
496 
497 static void wd_tickle_fn (void* arg)
498 {
499  ENTER();
500 
501  if (watchdog_ok) {
502  if (dog > 0) {
503  ioctl(dog, WDIOC_KEEPALIVE, &watchdog_ok);
504  }
505  api->timer_add_duration(tickle_timeout*MILLI_2_NANO_SECONDS, NULL,
506  wd_tickle_fn, &wd_timer);
507  }
508  else {
509  log_printf (LOGSYS_LEVEL_ALERT, "NOT tickling the watchdog!");
510  }
511 
512 }
513 
514 static void wd_resource_created_cb(
515  int32_t event,
516  const char *key_name,
517  struct icmap_notify_value new_val,
518  struct icmap_notify_value old_val,
519  void *user_data)
520 {
521  char res_name[ICMAP_KEYNAME_MAXLEN];
522  char res_type[ICMAP_KEYNAME_MAXLEN];
523  char tmp_key[ICMAP_KEYNAME_MAXLEN];
524  int res;
525 
526  if (event != ICMAP_TRACK_ADD) {
527  return ;
528  }
529 
530  res = sscanf(key_name, "resources.%[^.].%[^.].%[^.]", res_type, res_name, tmp_key);
531  if (res != 3) {
532  return ;
533  }
534 
535  if (strcmp(tmp_key, "state") != 0) {
536  return ;
537  }
538 
539  snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "resources.%s.%s.", res_type, res_name);
540  wd_resource_create (tmp_key, res_name);
541 }
542 
543 static void wd_scan_resources (void)
544 {
545  int res_count = 0;
546  icmap_track_t icmap_track = NULL;
547  icmap_iter_t iter;
548  const char *key_name;
549  int res;
550  char res_name[ICMAP_KEYNAME_MAXLEN];
551  char res_type[ICMAP_KEYNAME_MAXLEN];
552  char tmp_key[ICMAP_KEYNAME_MAXLEN];
553 
554  ENTER();
555 
556  iter = icmap_iter_init("resources.");
557  while ((key_name = icmap_iter_next(iter, NULL, NULL)) != NULL) {
558  res = sscanf(key_name, "resources.%[^.].%[^.].%[^.]", res_type, res_name, tmp_key);
559  if (res != 3) {
560  continue ;
561  }
562 
563  if (strcmp(tmp_key, "state") != 0) {
564  continue ;
565  }
566 
567  snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "resources.%s.%s.", res_type, res_name);
568  if (wd_resource_create (tmp_key, res_name) == 0) {
569  res_count++;
570  }
571  }
572  icmap_iter_finalize(iter);
573 
574  icmap_track_add("resources.process.", ICMAP_TRACK_ADD | ICMAP_TRACK_PREFIX,
575  wd_resource_created_cb, NULL, &icmap_track);
576  icmap_track_add("resources.system.", ICMAP_TRACK_ADD | ICMAP_TRACK_PREFIX,
577  wd_resource_created_cb, NULL, &icmap_track);
578 
579  if (res_count == 0) {
580  log_printf (LOGSYS_LEVEL_INFO, "no resources configured.");
581  }
582 }
583 
584 
585 static void watchdog_timeout_apply (uint32_t new)
586 {
587  struct watchdog_info ident;
588  uint32_t original_timeout = watchdog_timeout;
589 
590  if (new == original_timeout) {
591  return;
592  }
593 
594  watchdog_timeout = new;
595 
596  if (dog > 0) {
597  ioctl(dog, WDIOC_GETSUPPORT, &ident);
598  if (ident.options & WDIOF_SETTIMEOUT) {
599  /* yay! the dog is trained.
600  */
601  ioctl(dog, WDIOC_SETTIMEOUT, &watchdog_timeout);
602  }
603  ioctl(dog, WDIOC_GETTIMEOUT, &watchdog_timeout);
604  }
605 
606  if (watchdog_timeout == new) {
607  tickle_timeout = (watchdog_timeout * CS_TIME_MS_IN_SEC)/ 2;
608 
609  /* reset the tickle timer in case it was reduced.
610  */
611  api->timer_delete (wd_timer);
612  api->timer_add_duration(tickle_timeout*MILLI_2_NANO_SECONDS, NULL,
613  wd_tickle_fn, &wd_timer);
614 
615  log_printf (LOGSYS_LEVEL_DEBUG, "The Watchdog timeout is %d seconds", watchdog_timeout);
616  log_printf (LOGSYS_LEVEL_DEBUG, "The tickle timeout is %"PRIu64" ms", tickle_timeout);
617  } else {
619  "Could not change the Watchdog timeout from %d to %d seconds",
620  original_timeout, new);
621  }
622 
623 }
624 
625 static int setup_watchdog(void)
626 {
627  struct watchdog_info ident;
628 
629  ENTER();
630  if (access ("/dev/watchdog", W_OK) != 0) {
631  log_printf (LOGSYS_LEVEL_WARNING, "No Watchdog, try modprobe <a watchdog>");
632  dog = -1;
633  return -1;
634  }
635 
636  /* here goes, lets hope they have "Magic Close"
637  */
638  dog = open("/dev/watchdog", O_WRONLY);
639 
640  if (dog == -1) {
641  log_printf (LOGSYS_LEVEL_WARNING, "Watchdog exists but couldn't be opened.");
642  dog = -1;
643  return -1;
644  }
645 
646  /* Right we have the dog.
647  * Lets see what breed it is.
648  */
649 
650  ioctl(dog, WDIOC_GETSUPPORT, &ident);
651  log_printf (LOGSYS_LEVEL_INFO, "Watchdog is now been tickled by corosync.");
652  log_printf (LOGSYS_LEVEL_DEBUG, "%s", ident.identity);
653 
654  watchdog_timeout_apply (watchdog_timeout);
655 
656  ioctl(dog, WDIOC_SETOPTIONS, WDIOS_ENABLECARD);
657 
658  return 0;
659 }
660 
661 static void wd_top_level_key_changed(
662  int32_t event,
663  const char *key_name,
664  struct icmap_notify_value new_val,
665  struct icmap_notify_value old_val,
666  void *user_data)
667 {
668  uint32_t tmp_value_32;
669 
670  ENTER();
671 
672  if (icmap_get_uint32("resources.watchdog_timeout", &tmp_value_32) != CS_OK) {
673  if (tmp_value_32 >= 2 && tmp_value_32 <= 120) {
674  watchdog_timeout_apply (tmp_value_32);
675  }
676  }
677  else {
678  watchdog_timeout_apply (WD_DEFAULT_TIMEOUT_SEC);
679  }
680 }
681 
682 static void watchdog_timeout_get_initial (void)
683 {
684  uint32_t tmp_value_32;
685  icmap_track_t icmap_track = NULL;
686 
687  ENTER();
688 
689  if (icmap_get_uint32("resources.watchdog_timeout", &tmp_value_32) != CS_OK) {
690  watchdog_timeout_apply (WD_DEFAULT_TIMEOUT_SEC);
691 
692  icmap_set_uint32("resources.watchdog_timeout", watchdog_timeout);
693  }
694  else {
695  if (tmp_value_32 >= 2 && tmp_value_32 <= 120) {
696  watchdog_timeout_apply (tmp_value_32);
697  } else {
698  watchdog_timeout_apply (WD_DEFAULT_TIMEOUT_SEC);
699  }
700  }
701 
702  icmap_track_add("resources.watchdog_timeout", ICMAP_TRACK_MODIFY,
703  wd_top_level_key_changed, NULL, &icmap_track);
704 
705 }
706 
707 static char *wd_exec_init_fn (struct corosync_api_v1 *corosync_api)
708 {
709 
710  ENTER();
711 
712  api = corosync_api;
713 
714  watchdog_timeout_get_initial();
715 
716  setup_watchdog();
717 
718  wd_scan_resources();
719 
720  api->timer_add_duration(tickle_timeout*MILLI_2_NANO_SECONDS, NULL,
721  wd_tickle_fn, &wd_timer);
722 
723  return NULL;
724 }
725 
726 static int wd_exec_exit_fn (void)
727 {
728  char magic = 'V';
729  ENTER();
730 
731  if (dog > 0) {
732  log_printf (LOGSYS_LEVEL_INFO, "magically closing the watchdog.");
733  write (dog, &magic, 1);
734  }
735  return 0;
736 }
737 
738 
#define CS_TRUE
Definition: corotypes.h:51
const char * name
Definition: coroapi.h:432
const char * wd_running_str
Definition: wd.c:130
const char * icmap_iter_next(icmap_iter_t iter, size_t *value_len, icmap_value_types_t *type)
Definition: icmap.c:1103
struct cs_fsm fsm
Definition: wd.c:66
#define LOGSYS_LEVEL_INFO
Definition: logsys.h:73
const char * wd_config_changed_str
Definition: wd.c:134
int32_t curr_state
Definition: fsm.h:67
#define CS_FALSE
Definition: corotypes.h:50
void(* timer_delete)(corosync_timer_handle_t timer_handle)
Definition: coroapi.h:193
const char * wd_failed_str
Definition: wd.c:131
void icmap_iter_finalize(icmap_iter_t iter)
Definition: icmap.c:1124
int(* timer_add_duration)(unsigned long long nanoseconds_in_future, void *data, void(*timer_nf)(void *data), corosync_timer_handle_t *handle)
Definition: coroapi.h:181
#define corosync_exit_error(err)
Definition: exec/util.h:70
Definition: fsm.h:65
#define WD_DEFAULT_TIMEOUT_SEC
Definition: wd.c:83
cs_fsm_event_to_str_fn event_to_str
Definition: fsm.h:72
time_t last_updated
Definition: wd.c:65
#define log_printf(level, format, args...)
Definition: logsys.h:217
corosync_timer_handle_t check_timer
Definition: wd.c:68
Definition: wd.c:61
#define ICMAP_TRACK_DELETE
Definition: icmap.h:77
#define ICMAP_KEYNAME_MAXLEN
Definition: icmap.h:48
const char * wd_failure_str
Definition: wd.c:132
char res_path[ICMAP_KEYNAME_MAXLEN]
Definition: wd.c:62
size_t entries
Definition: fsm.h:69
#define LOGSYS_LEVEL_WARNING
Definition: logsys.h:71
#define ICMAP_TRACK_MODIFY
Definition: icmap.h:78
struct corosync_service_engine * wd_get_service_engine_ver0(void)
Definition: wd.c:145
cs_error_t icmap_set_uint32(const char *key_name, uint32_t value)
Definition: icmap.c:611
void * user_data
Definition: sam.c:126
icmap_track_t icmap_track
Definition: wd.c:70
#define ICMAP_TRACK_ADD
Definition: icmap.h:76
struct corosync_service_engine wd_service_engine
Definition: wd.c:93
const char * name
Definition: fsm.h:66
#define LOGSYS_LEVEL_ERROR
Definition: logsys.h:70
Linked list API.
#define DECLARE_LIST_INIT(name)
Definition: list.h:51
wd_resource_event
Definition: wd.c:125
cs_error_t icmap_track_delete(icmap_track_t icmap_track)
Definition: icmap.c:1212
#define LOGSYS_LEVEL_DEBUG
Definition: logsys.h:74
char name[CS_MAX_NAME_LENGTH]
Definition: wd.c:64
char * recovery
Definition: wd.c:63
#define CS_FSM_CB_EVENT_STATE_SET
Definition: fsm.h:55
cs_error_t icmap_get_uint32(const char *key_name, uint32_t *u32)
Definition: icmap.c:866
struct cs_fsm_entry wd_fsm_table[]
Definition: wd.c:136
#define WD_MAX_TIMEOUT_MS
Definition: wd.c:86
cs_error_t icmap_set_uint64(const char *key_name, uint64_t value)
Definition: icmap.c:623
#define ENTER
Definition: logsys.h:218
cs_fsm_state_to_str_fn state_to_str
Definition: fsm.h:71
#define MILLI_2_NANO_SECONDS
Definition: coroapi.h:92
Definition: fsm.h:58
#define CS_FSM_CB_EVENT_PROCESS_NF
Definition: fsm.h:54
cs_error_t icmap_get_uint64(const char *key_name, uint64_t *u64)
Definition: icmap.c:878
#define WD_DEFAULT_TIMEOUT_MS
Definition: wd.c:84
#define CS_FSM_CB_EVENT_STATE_SET_NF
Definition: fsm.h:56
#define CS_MAX(x, y)
Definition: corotypes.h:54
uint64_t check_timeout
Definition: wd.c:69
#define CS_MAX_NAME_LENGTH
Definition: corotypes.h:52
#define WD_MIN_TIMEOUT_MS
Definition: wd.c:85
qb_loop_timer_handle corosync_timer_handle_t
Definition: coroapi.h:64
wd_resource_state_t
Definition: wd.c:54
cs_error_t icmap_get_string(const char *key_name, char **str)
Definition: icmap.c:896
#define LOGSYS_LEVEL_CRIT
Definition: logsys.h:69
int32_t curr_entry
Definition: fsm.h:68
#define LOGSYS_LEVEL_ALERT
Definition: logsys.h:68
#define CS_TIME_MS_IN_SEC
Definition: corotypes.h:113
int32_t curr_state
Definition: fsm.h:59
struct cs_fsm_entry * table
Definition: fsm.h:70
icmap_iter_t icmap_iter_init(const char *prefix)
Definition: icmap.c:1097
wd_resource_state
Definition: wd.c:119
qb_map_iter_t * icmap_iter_t
Definition: icmap.h:121
const char * wd_stopped_str
Definition: wd.c:133
cs_error_t icmap_track_add(const char *key_name, int32_t track_type, icmap_notify_fn_t notify_fn, void *user_data, icmap_track_t *icmap_track)
Definition: icmap.c:1167
LOGSYS_DECLARE_SUBSYS("WD")
#define ICMAP_TRACK_PREFIX
Definition: icmap.h:84