Add comprehensive thread exit monitoring and signal handling for debugging thread crashes
This commit is contained in:
BIN
event_miner
BIN
event_miner
Binary file not shown.
180
event_miner.c
180
event_miner.c
@@ -16,6 +16,8 @@
|
||||
#include <unistd.h>
|
||||
#include <getopt.h>
|
||||
#include <errno.h>
|
||||
#include <signal.h>
|
||||
#include <sys/types.h>
|
||||
#include "nostr_core_lib/nostr_core/nostr_common.h" // Common definitions and init/cleanup
|
||||
#include "nostr_core_lib/nostr_core/nip001.h" // Basic protocol functions
|
||||
#include "nostr_core_lib/nostr_core/nip013.h" // Proof-of-work functions
|
||||
@@ -27,6 +29,18 @@
|
||||
#define DEFAULT_THREADS 4
|
||||
#define DEFAULT_POW 2
|
||||
|
||||
// Thread exit codes
|
||||
#define THREAD_EXIT_SUCCESS 0 // Found solution or normal completion
|
||||
#define THREAD_EXIT_STOPPED 1 // Stopped by main thread
|
||||
#define THREAD_EXIT_ERROR 2 // Error occurred
|
||||
|
||||
// Global variables for debugging
|
||||
static volatile sig_atomic_t g_signal_received = 0;
|
||||
static volatile int g_shutdown_requested = 0;
|
||||
static pthread_t* g_thread_handles = NULL;
|
||||
static int g_thread_count = 0;
|
||||
static void* g_worker_contexts = NULL; // Will be cast to mining_context_t*
|
||||
|
||||
// Forward declarations for callbacks
|
||||
typedef struct mining_context mining_context_t;
|
||||
|
||||
@@ -89,6 +103,13 @@ static void* miner_thread(void* arg);
|
||||
static int mine_event(mining_context_t* ctx);
|
||||
static void cleanup_context(mining_context_t* ctx);
|
||||
|
||||
// Signal handling and debugging functions
|
||||
static void signal_handler(int sig);
|
||||
static void install_signal_handlers(void);
|
||||
static void log_thread_exit(int thread_id, void* exit_status, const char* reason);
|
||||
static const char* get_signal_name(int sig);
|
||||
static void emergency_shutdown(void);
|
||||
|
||||
// Callback implementations
|
||||
static void solution_found_callback(cJSON* solution, void* user_data);
|
||||
static void verbose_pow_callback(int current_difficulty, uint64_t nonce, void* user_data);
|
||||
@@ -244,6 +265,90 @@ static char* read_stdin_json(void) {
|
||||
return buffer;
|
||||
}
|
||||
|
||||
// Signal handling and debugging functions
|
||||
static const char* get_signal_name(int sig) {
|
||||
switch (sig) {
|
||||
case SIGSEGV: return "SIGSEGV (Segmentation fault)";
|
||||
case SIGABRT: return "SIGABRT (Abort)";
|
||||
case SIGFPE: return "SIGFPE (Floating point exception)";
|
||||
case SIGBUS: return "SIGBUS (Bus error)";
|
||||
case SIGINT: return "SIGINT (Interrupt)";
|
||||
case SIGTERM: return "SIGTERM (Terminate)";
|
||||
default: return "Unknown signal";
|
||||
}
|
||||
}
|
||||
|
||||
static void log_thread_exit(int thread_id, void* exit_status, const char* reason) {
|
||||
time_t current_time = time(NULL);
|
||||
struct tm* local_time = localtime(¤t_time);
|
||||
|
||||
printf("[%02d:%02d:%02d] Thread %d exited: %s (status: %ld)\n",
|
||||
local_time->tm_hour, local_time->tm_min, local_time->tm_sec,
|
||||
thread_id, reason, (long)exit_status);
|
||||
fflush(stdout);
|
||||
}
|
||||
|
||||
static void emergency_shutdown(void) {
|
||||
// Set global shutdown flag
|
||||
g_shutdown_requested = 1;
|
||||
|
||||
// Signal all worker threads to stop if contexts are available
|
||||
if (g_worker_contexts) {
|
||||
mining_context_t* contexts = (mining_context_t*)g_worker_contexts;
|
||||
for (int i = 0; i < g_thread_count; i++) {
|
||||
contexts[i].should_stop = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void signal_handler(int sig) {
|
||||
static volatile sig_atomic_t in_signal_handler = 0;
|
||||
|
||||
// Prevent recursive signal handling
|
||||
if (in_signal_handler) {
|
||||
return;
|
||||
}
|
||||
in_signal_handler = 1;
|
||||
|
||||
g_signal_received = sig;
|
||||
|
||||
// Log the signal (async-signal-safe functions only)
|
||||
const char* sig_name = get_signal_name(sig);
|
||||
|
||||
// Write to stderr using async-signal-safe functions
|
||||
write(STDERR_FILENO, "\n[SIGNAL] Received ", 19);
|
||||
write(STDERR_FILENO, sig_name, strlen(sig_name));
|
||||
write(STDERR_FILENO, "\n", 1);
|
||||
|
||||
// For fatal signals, try emergency shutdown
|
||||
if (sig == SIGSEGV || sig == SIGABRT || sig == SIGFPE || sig == SIGBUS) {
|
||||
write(STDERR_FILENO, "[SIGNAL] Attempting emergency shutdown...\n", 42);
|
||||
emergency_shutdown();
|
||||
|
||||
// Reset signal handler to default and re-raise
|
||||
signal(sig, SIG_DFL);
|
||||
raise(sig);
|
||||
} else if (sig == SIGINT || sig == SIGTERM) {
|
||||
// Graceful shutdown for interrupt signals
|
||||
write(STDERR_FILENO, "[SIGNAL] Initiating graceful shutdown...\n", 41);
|
||||
emergency_shutdown();
|
||||
}
|
||||
|
||||
in_signal_handler = 0;
|
||||
}
|
||||
|
||||
static void install_signal_handlers(void) {
|
||||
// Install handlers for crash signals
|
||||
signal(SIGSEGV, signal_handler);
|
||||
signal(SIGABRT, signal_handler);
|
||||
signal(SIGFPE, signal_handler);
|
||||
signal(SIGBUS, signal_handler);
|
||||
|
||||
// Install handlers for graceful shutdown
|
||||
signal(SIGINT, signal_handler);
|
||||
signal(SIGTERM, signal_handler);
|
||||
}
|
||||
|
||||
// Callback implementations
|
||||
static void solution_found_callback(cJSON* solution, void* user_data) {
|
||||
main_context_t* main_ctx = (main_context_t*)user_data;
|
||||
@@ -293,9 +398,10 @@ static void verbose_pow_callback(int current_difficulty, uint64_t nonce, void* u
|
||||
fflush(stdout);
|
||||
}
|
||||
|
||||
// Mining thread function - New callback-based approach
|
||||
// Mining thread function - Enhanced with exit status monitoring
|
||||
static void* miner_thread(void* arg) {
|
||||
mining_context_t* ctx = (mining_context_t*)arg;
|
||||
void* exit_status = (void*)(intptr_t)THREAD_EXIT_ERROR; // Default to error
|
||||
|
||||
// Initialize thread-specific timing for verbose mode
|
||||
ctx->thread_start_time = time(NULL);
|
||||
@@ -304,20 +410,22 @@ static void* miner_thread(void* arg) {
|
||||
// Create a copy of the event for this thread
|
||||
char* event_str = cJSON_Print(ctx->event);
|
||||
if (!event_str) {
|
||||
return NULL;
|
||||
log_thread_exit(ctx->thread_id, exit_status, "JSON serialization failed");
|
||||
return exit_status;
|
||||
}
|
||||
|
||||
cJSON* local_event = cJSON_Parse(event_str);
|
||||
free(event_str);
|
||||
|
||||
if (!local_event) {
|
||||
return NULL;
|
||||
log_thread_exit(ctx->thread_id, exit_status, "Event parsing failed");
|
||||
return exit_status;
|
||||
}
|
||||
|
||||
uint64_t attempts = 0;
|
||||
|
||||
// Mine until solution found or signaled to stop by main thread
|
||||
while (!ctx->should_stop) {
|
||||
while (!ctx->should_stop && !g_shutdown_requested) {
|
||||
// Attempt mining with verbose callback if enabled
|
||||
void (*progress_cb)(int, uint64_t, void*) = ctx->verbose_enabled ? verbose_pow_callback : NULL;
|
||||
|
||||
@@ -334,20 +442,37 @@ static void* miner_thread(void* arg) {
|
||||
if (ctx->solution_callback) {
|
||||
ctx->solution_callback(local_event, ctx->user_data);
|
||||
}
|
||||
exit_status = (void*)(intptr_t)THREAD_EXIT_SUCCESS;
|
||||
log_thread_exit(ctx->thread_id, exit_status, "Solution found");
|
||||
break; // Exit after reporting solution
|
||||
}
|
||||
|
||||
// Check for emergency shutdown
|
||||
if (g_shutdown_requested) {
|
||||
exit_status = (void*)(intptr_t)THREAD_EXIT_STOPPED;
|
||||
log_thread_exit(ctx->thread_id, exit_status, "Emergency shutdown");
|
||||
break;
|
||||
}
|
||||
|
||||
// Small delay to prevent CPU overuse and allow responsive stopping
|
||||
usleep(100); // 0.1ms - more responsive to should_stop signal
|
||||
}
|
||||
|
||||
// Normal stop by main thread
|
||||
if (ctx->should_stop && !g_shutdown_requested) {
|
||||
exit_status = (void*)(intptr_t)THREAD_EXIT_STOPPED;
|
||||
log_thread_exit(ctx->thread_id, exit_status, "Stopped by main thread");
|
||||
}
|
||||
|
||||
cJSON_Delete(local_event);
|
||||
return NULL;
|
||||
return exit_status;
|
||||
}
|
||||
|
||||
// Main mining function - New hub-and-spoke model
|
||||
// Main mining function - Enhanced with debugging and monitoring
|
||||
static int mine_event(mining_context_t* ctx) {
|
||||
// Install signal handlers for crash detection
|
||||
install_signal_handlers();
|
||||
|
||||
// Set up main context for centralized control
|
||||
main_context_t main_ctx;
|
||||
memset(&main_ctx, 0, sizeof(main_context_t));
|
||||
@@ -386,7 +511,13 @@ static int mine_event(mining_context_t* ctx) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Set up global debugging variables
|
||||
g_thread_handles = threads;
|
||||
g_thread_count = ctx->thread_count;
|
||||
g_worker_contexts = worker_contexts;
|
||||
|
||||
time_t start_time = time(NULL);
|
||||
printf("[DEBUG] Starting %d mining threads...\n", ctx->thread_count);
|
||||
|
||||
// Start threads
|
||||
for (int i = 0; i < ctx->thread_count; i++) {
|
||||
@@ -398,18 +529,21 @@ static int mine_event(mining_context_t* ctx) {
|
||||
}
|
||||
// Wait for threads that were created
|
||||
for (int j = 0; j < i; j++) {
|
||||
pthread_join(threads[j], NULL);
|
||||
void* exit_status;
|
||||
pthread_join(threads[j], &exit_status);
|
||||
log_thread_exit(j, exit_status, "Cleanup after creation failure");
|
||||
}
|
||||
free(threads);
|
||||
free(worker_contexts);
|
||||
pthread_mutex_destroy(&main_ctx.result_mutex);
|
||||
return -1;
|
||||
}
|
||||
printf("[DEBUG] Thread %d started successfully\n", i);
|
||||
}
|
||||
|
||||
// Main thread control loop - centralized monitoring
|
||||
int result = 0;
|
||||
while (!main_ctx.solution_found && !main_ctx.timeout_reached) {
|
||||
while (!main_ctx.solution_found && !main_ctx.timeout_reached && !g_shutdown_requested) {
|
||||
// Check for timeout
|
||||
if (ctx->timeout_seconds > 0) {
|
||||
time_t current_time = time(NULL);
|
||||
@@ -420,30 +554,56 @@ static int mine_event(mining_context_t* ctx) {
|
||||
}
|
||||
}
|
||||
|
||||
// Check for signals
|
||||
if (g_signal_received) {
|
||||
printf("[DEBUG] Signal received, shutting down...\n");
|
||||
break;
|
||||
}
|
||||
|
||||
// Small sleep to avoid busy waiting
|
||||
usleep(10000); // 10ms
|
||||
}
|
||||
|
||||
// Signal all workers to stop
|
||||
printf("[DEBUG] Signaling threads to stop...\n");
|
||||
for (int i = 0; i < ctx->thread_count; i++) {
|
||||
worker_contexts[i].should_stop = 1;
|
||||
}
|
||||
|
||||
// Wait for all threads to finish
|
||||
// Wait for all threads to finish and capture exit statuses
|
||||
printf("[DEBUG] Waiting for threads to finish...\n");
|
||||
for (int i = 0; i < ctx->thread_count; i++) {
|
||||
pthread_join(threads[i], NULL);
|
||||
void* exit_status;
|
||||
pthread_join(threads[i], &exit_status);
|
||||
|
||||
// Log exit status if it wasn't already logged by the thread
|
||||
long status_code = (long)exit_status;
|
||||
if (status_code == THREAD_EXIT_ERROR) {
|
||||
log_thread_exit(i, exit_status, "Thread error (not previously logged)");
|
||||
}
|
||||
}
|
||||
|
||||
// Handle results
|
||||
if (main_ctx.solution_found && main_ctx.result_event) {
|
||||
ctx->result_event = main_ctx.result_event; // Transfer ownership
|
||||
result = 1; // Success
|
||||
printf("[DEBUG] Solution found successfully\n");
|
||||
} else if (main_ctx.timeout_reached) {
|
||||
result = -1; // Timeout
|
||||
printf("[DEBUG] Mining timed out\n");
|
||||
} else if (g_shutdown_requested || g_signal_received) {
|
||||
result = -3; // Signal/emergency shutdown
|
||||
printf("[DEBUG] Emergency shutdown completed\n");
|
||||
} else {
|
||||
result = -2; // Error
|
||||
printf("[DEBUG] Mining failed with error\n");
|
||||
}
|
||||
|
||||
// Clear global debugging variables
|
||||
g_thread_handles = NULL;
|
||||
g_thread_count = 0;
|
||||
g_worker_contexts = NULL;
|
||||
|
||||
// Cleanup
|
||||
free(threads);
|
||||
free(worker_contexts);
|
||||
|
||||
Reference in New Issue
Block a user