Add comprehensive thread exit monitoring and signal handling for debugging thread crashes
This commit is contained in:
BIN
event_miner
BIN
event_miner
Binary file not shown.
180
event_miner.c
180
event_miner.c
@@ -16,6 +16,8 @@
|
|||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#include <getopt.h>
|
#include <getopt.h>
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
|
#include <signal.h>
|
||||||
|
#include <sys/types.h>
|
||||||
#include "nostr_core_lib/nostr_core/nostr_common.h" // Common definitions and init/cleanup
|
#include "nostr_core_lib/nostr_core/nostr_common.h" // Common definitions and init/cleanup
|
||||||
#include "nostr_core_lib/nostr_core/nip001.h" // Basic protocol functions
|
#include "nostr_core_lib/nostr_core/nip001.h" // Basic protocol functions
|
||||||
#include "nostr_core_lib/nostr_core/nip013.h" // Proof-of-work functions
|
#include "nostr_core_lib/nostr_core/nip013.h" // Proof-of-work functions
|
||||||
@@ -27,6 +29,18 @@
|
|||||||
#define DEFAULT_THREADS 4
|
#define DEFAULT_THREADS 4
|
||||||
#define DEFAULT_POW 2
|
#define DEFAULT_POW 2
|
||||||
|
|
||||||
|
// Thread exit codes
|
||||||
|
#define THREAD_EXIT_SUCCESS 0 // Found solution or normal completion
|
||||||
|
#define THREAD_EXIT_STOPPED 1 // Stopped by main thread
|
||||||
|
#define THREAD_EXIT_ERROR 2 // Error occurred
|
||||||
|
|
||||||
|
// Global variables for debugging
|
||||||
|
static volatile sig_atomic_t g_signal_received = 0;
|
||||||
|
static volatile int g_shutdown_requested = 0;
|
||||||
|
static pthread_t* g_thread_handles = NULL;
|
||||||
|
static int g_thread_count = 0;
|
||||||
|
static void* g_worker_contexts = NULL; // Will be cast to mining_context_t*
|
||||||
|
|
||||||
// Forward declarations for callbacks
|
// Forward declarations for callbacks
|
||||||
typedef struct mining_context mining_context_t;
|
typedef struct mining_context mining_context_t;
|
||||||
|
|
||||||
@@ -89,6 +103,13 @@ static void* miner_thread(void* arg);
|
|||||||
static int mine_event(mining_context_t* ctx);
|
static int mine_event(mining_context_t* ctx);
|
||||||
static void cleanup_context(mining_context_t* ctx);
|
static void cleanup_context(mining_context_t* ctx);
|
||||||
|
|
||||||
|
// Signal handling and debugging functions
|
||||||
|
static void signal_handler(int sig);
|
||||||
|
static void install_signal_handlers(void);
|
||||||
|
static void log_thread_exit(int thread_id, void* exit_status, const char* reason);
|
||||||
|
static const char* get_signal_name(int sig);
|
||||||
|
static void emergency_shutdown(void);
|
||||||
|
|
||||||
// Callback implementations
|
// Callback implementations
|
||||||
static void solution_found_callback(cJSON* solution, void* user_data);
|
static void solution_found_callback(cJSON* solution, void* user_data);
|
||||||
static void verbose_pow_callback(int current_difficulty, uint64_t nonce, void* user_data);
|
static void verbose_pow_callback(int current_difficulty, uint64_t nonce, void* user_data);
|
||||||
@@ -244,6 +265,90 @@ static char* read_stdin_json(void) {
|
|||||||
return buffer;
|
return buffer;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Signal handling and debugging functions
|
||||||
|
static const char* get_signal_name(int sig) {
|
||||||
|
switch (sig) {
|
||||||
|
case SIGSEGV: return "SIGSEGV (Segmentation fault)";
|
||||||
|
case SIGABRT: return "SIGABRT (Abort)";
|
||||||
|
case SIGFPE: return "SIGFPE (Floating point exception)";
|
||||||
|
case SIGBUS: return "SIGBUS (Bus error)";
|
||||||
|
case SIGINT: return "SIGINT (Interrupt)";
|
||||||
|
case SIGTERM: return "SIGTERM (Terminate)";
|
||||||
|
default: return "Unknown signal";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void log_thread_exit(int thread_id, void* exit_status, const char* reason) {
|
||||||
|
time_t current_time = time(NULL);
|
||||||
|
struct tm* local_time = localtime(¤t_time);
|
||||||
|
|
||||||
|
printf("[%02d:%02d:%02d] Thread %d exited: %s (status: %ld)\n",
|
||||||
|
local_time->tm_hour, local_time->tm_min, local_time->tm_sec,
|
||||||
|
thread_id, reason, (long)exit_status);
|
||||||
|
fflush(stdout);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void emergency_shutdown(void) {
|
||||||
|
// Set global shutdown flag
|
||||||
|
g_shutdown_requested = 1;
|
||||||
|
|
||||||
|
// Signal all worker threads to stop if contexts are available
|
||||||
|
if (g_worker_contexts) {
|
||||||
|
mining_context_t* contexts = (mining_context_t*)g_worker_contexts;
|
||||||
|
for (int i = 0; i < g_thread_count; i++) {
|
||||||
|
contexts[i].should_stop = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void signal_handler(int sig) {
|
||||||
|
static volatile sig_atomic_t in_signal_handler = 0;
|
||||||
|
|
||||||
|
// Prevent recursive signal handling
|
||||||
|
if (in_signal_handler) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
in_signal_handler = 1;
|
||||||
|
|
||||||
|
g_signal_received = sig;
|
||||||
|
|
||||||
|
// Log the signal (async-signal-safe functions only)
|
||||||
|
const char* sig_name = get_signal_name(sig);
|
||||||
|
|
||||||
|
// Write to stderr using async-signal-safe functions
|
||||||
|
write(STDERR_FILENO, "\n[SIGNAL] Received ", 19);
|
||||||
|
write(STDERR_FILENO, sig_name, strlen(sig_name));
|
||||||
|
write(STDERR_FILENO, "\n", 1);
|
||||||
|
|
||||||
|
// For fatal signals, try emergency shutdown
|
||||||
|
if (sig == SIGSEGV || sig == SIGABRT || sig == SIGFPE || sig == SIGBUS) {
|
||||||
|
write(STDERR_FILENO, "[SIGNAL] Attempting emergency shutdown...\n", 42);
|
||||||
|
emergency_shutdown();
|
||||||
|
|
||||||
|
// Reset signal handler to default and re-raise
|
||||||
|
signal(sig, SIG_DFL);
|
||||||
|
raise(sig);
|
||||||
|
} else if (sig == SIGINT || sig == SIGTERM) {
|
||||||
|
// Graceful shutdown for interrupt signals
|
||||||
|
write(STDERR_FILENO, "[SIGNAL] Initiating graceful shutdown...\n", 41);
|
||||||
|
emergency_shutdown();
|
||||||
|
}
|
||||||
|
|
||||||
|
in_signal_handler = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void install_signal_handlers(void) {
|
||||||
|
// Install handlers for crash signals
|
||||||
|
signal(SIGSEGV, signal_handler);
|
||||||
|
signal(SIGABRT, signal_handler);
|
||||||
|
signal(SIGFPE, signal_handler);
|
||||||
|
signal(SIGBUS, signal_handler);
|
||||||
|
|
||||||
|
// Install handlers for graceful shutdown
|
||||||
|
signal(SIGINT, signal_handler);
|
||||||
|
signal(SIGTERM, signal_handler);
|
||||||
|
}
|
||||||
|
|
||||||
// Callback implementations
|
// Callback implementations
|
||||||
static void solution_found_callback(cJSON* solution, void* user_data) {
|
static void solution_found_callback(cJSON* solution, void* user_data) {
|
||||||
main_context_t* main_ctx = (main_context_t*)user_data;
|
main_context_t* main_ctx = (main_context_t*)user_data;
|
||||||
@@ -293,9 +398,10 @@ static void verbose_pow_callback(int current_difficulty, uint64_t nonce, void* u
|
|||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Mining thread function - New callback-based approach
|
// Mining thread function - Enhanced with exit status monitoring
|
||||||
static void* miner_thread(void* arg) {
|
static void* miner_thread(void* arg) {
|
||||||
mining_context_t* ctx = (mining_context_t*)arg;
|
mining_context_t* ctx = (mining_context_t*)arg;
|
||||||
|
void* exit_status = (void*)(intptr_t)THREAD_EXIT_ERROR; // Default to error
|
||||||
|
|
||||||
// Initialize thread-specific timing for verbose mode
|
// Initialize thread-specific timing for verbose mode
|
||||||
ctx->thread_start_time = time(NULL);
|
ctx->thread_start_time = time(NULL);
|
||||||
@@ -304,20 +410,22 @@ static void* miner_thread(void* arg) {
|
|||||||
// Create a copy of the event for this thread
|
// Create a copy of the event for this thread
|
||||||
char* event_str = cJSON_Print(ctx->event);
|
char* event_str = cJSON_Print(ctx->event);
|
||||||
if (!event_str) {
|
if (!event_str) {
|
||||||
return NULL;
|
log_thread_exit(ctx->thread_id, exit_status, "JSON serialization failed");
|
||||||
|
return exit_status;
|
||||||
}
|
}
|
||||||
|
|
||||||
cJSON* local_event = cJSON_Parse(event_str);
|
cJSON* local_event = cJSON_Parse(event_str);
|
||||||
free(event_str);
|
free(event_str);
|
||||||
|
|
||||||
if (!local_event) {
|
if (!local_event) {
|
||||||
return NULL;
|
log_thread_exit(ctx->thread_id, exit_status, "Event parsing failed");
|
||||||
|
return exit_status;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint64_t attempts = 0;
|
uint64_t attempts = 0;
|
||||||
|
|
||||||
// Mine until solution found or signaled to stop by main thread
|
// Mine until solution found or signaled to stop by main thread
|
||||||
while (!ctx->should_stop) {
|
while (!ctx->should_stop && !g_shutdown_requested) {
|
||||||
// Attempt mining with verbose callback if enabled
|
// Attempt mining with verbose callback if enabled
|
||||||
void (*progress_cb)(int, uint64_t, void*) = ctx->verbose_enabled ? verbose_pow_callback : NULL;
|
void (*progress_cb)(int, uint64_t, void*) = ctx->verbose_enabled ? verbose_pow_callback : NULL;
|
||||||
|
|
||||||
@@ -334,20 +442,37 @@ static void* miner_thread(void* arg) {
|
|||||||
if (ctx->solution_callback) {
|
if (ctx->solution_callback) {
|
||||||
ctx->solution_callback(local_event, ctx->user_data);
|
ctx->solution_callback(local_event, ctx->user_data);
|
||||||
}
|
}
|
||||||
|
exit_status = (void*)(intptr_t)THREAD_EXIT_SUCCESS;
|
||||||
|
log_thread_exit(ctx->thread_id, exit_status, "Solution found");
|
||||||
break; // Exit after reporting solution
|
break; // Exit after reporting solution
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check for emergency shutdown
|
||||||
|
if (g_shutdown_requested) {
|
||||||
|
exit_status = (void*)(intptr_t)THREAD_EXIT_STOPPED;
|
||||||
|
log_thread_exit(ctx->thread_id, exit_status, "Emergency shutdown");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
// Small delay to prevent CPU overuse and allow responsive stopping
|
// Small delay to prevent CPU overuse and allow responsive stopping
|
||||||
usleep(100); // 0.1ms - more responsive to should_stop signal
|
usleep(100); // 0.1ms - more responsive to should_stop signal
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Normal stop by main thread
|
||||||
|
if (ctx->should_stop && !g_shutdown_requested) {
|
||||||
|
exit_status = (void*)(intptr_t)THREAD_EXIT_STOPPED;
|
||||||
|
log_thread_exit(ctx->thread_id, exit_status, "Stopped by main thread");
|
||||||
|
}
|
||||||
|
|
||||||
cJSON_Delete(local_event);
|
cJSON_Delete(local_event);
|
||||||
return NULL;
|
return exit_status;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Main mining function - New hub-and-spoke model
|
// Main mining function - Enhanced with debugging and monitoring
|
||||||
static int mine_event(mining_context_t* ctx) {
|
static int mine_event(mining_context_t* ctx) {
|
||||||
|
// Install signal handlers for crash detection
|
||||||
|
install_signal_handlers();
|
||||||
|
|
||||||
// Set up main context for centralized control
|
// Set up main context for centralized control
|
||||||
main_context_t main_ctx;
|
main_context_t main_ctx;
|
||||||
memset(&main_ctx, 0, sizeof(main_context_t));
|
memset(&main_ctx, 0, sizeof(main_context_t));
|
||||||
@@ -386,7 +511,13 @@ static int mine_event(mining_context_t* ctx) {
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Set up global debugging variables
|
||||||
|
g_thread_handles = threads;
|
||||||
|
g_thread_count = ctx->thread_count;
|
||||||
|
g_worker_contexts = worker_contexts;
|
||||||
|
|
||||||
time_t start_time = time(NULL);
|
time_t start_time = time(NULL);
|
||||||
|
printf("[DEBUG] Starting %d mining threads...\n", ctx->thread_count);
|
||||||
|
|
||||||
// Start threads
|
// Start threads
|
||||||
for (int i = 0; i < ctx->thread_count; i++) {
|
for (int i = 0; i < ctx->thread_count; i++) {
|
||||||
@@ -398,18 +529,21 @@ static int mine_event(mining_context_t* ctx) {
|
|||||||
}
|
}
|
||||||
// Wait for threads that were created
|
// Wait for threads that were created
|
||||||
for (int j = 0; j < i; j++) {
|
for (int j = 0; j < i; j++) {
|
||||||
pthread_join(threads[j], NULL);
|
void* exit_status;
|
||||||
|
pthread_join(threads[j], &exit_status);
|
||||||
|
log_thread_exit(j, exit_status, "Cleanup after creation failure");
|
||||||
}
|
}
|
||||||
free(threads);
|
free(threads);
|
||||||
free(worker_contexts);
|
free(worker_contexts);
|
||||||
pthread_mutex_destroy(&main_ctx.result_mutex);
|
pthread_mutex_destroy(&main_ctx.result_mutex);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
printf("[DEBUG] Thread %d started successfully\n", i);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Main thread control loop - centralized monitoring
|
// Main thread control loop - centralized monitoring
|
||||||
int result = 0;
|
int result = 0;
|
||||||
while (!main_ctx.solution_found && !main_ctx.timeout_reached) {
|
while (!main_ctx.solution_found && !main_ctx.timeout_reached && !g_shutdown_requested) {
|
||||||
// Check for timeout
|
// Check for timeout
|
||||||
if (ctx->timeout_seconds > 0) {
|
if (ctx->timeout_seconds > 0) {
|
||||||
time_t current_time = time(NULL);
|
time_t current_time = time(NULL);
|
||||||
@@ -420,30 +554,56 @@ static int mine_event(mining_context_t* ctx) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check for signals
|
||||||
|
if (g_signal_received) {
|
||||||
|
printf("[DEBUG] Signal received, shutting down...\n");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
// Small sleep to avoid busy waiting
|
// Small sleep to avoid busy waiting
|
||||||
usleep(10000); // 10ms
|
usleep(10000); // 10ms
|
||||||
}
|
}
|
||||||
|
|
||||||
// Signal all workers to stop
|
// Signal all workers to stop
|
||||||
|
printf("[DEBUG] Signaling threads to stop...\n");
|
||||||
for (int i = 0; i < ctx->thread_count; i++) {
|
for (int i = 0; i < ctx->thread_count; i++) {
|
||||||
worker_contexts[i].should_stop = 1;
|
worker_contexts[i].should_stop = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Wait for all threads to finish
|
// Wait for all threads to finish and capture exit statuses
|
||||||
|
printf("[DEBUG] Waiting for threads to finish...\n");
|
||||||
for (int i = 0; i < ctx->thread_count; i++) {
|
for (int i = 0; i < ctx->thread_count; i++) {
|
||||||
pthread_join(threads[i], NULL);
|
void* exit_status;
|
||||||
|
pthread_join(threads[i], &exit_status);
|
||||||
|
|
||||||
|
// Log exit status if it wasn't already logged by the thread
|
||||||
|
long status_code = (long)exit_status;
|
||||||
|
if (status_code == THREAD_EXIT_ERROR) {
|
||||||
|
log_thread_exit(i, exit_status, "Thread error (not previously logged)");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Handle results
|
// Handle results
|
||||||
if (main_ctx.solution_found && main_ctx.result_event) {
|
if (main_ctx.solution_found && main_ctx.result_event) {
|
||||||
ctx->result_event = main_ctx.result_event; // Transfer ownership
|
ctx->result_event = main_ctx.result_event; // Transfer ownership
|
||||||
result = 1; // Success
|
result = 1; // Success
|
||||||
|
printf("[DEBUG] Solution found successfully\n");
|
||||||
} else if (main_ctx.timeout_reached) {
|
} else if (main_ctx.timeout_reached) {
|
||||||
result = -1; // Timeout
|
result = -1; // Timeout
|
||||||
|
printf("[DEBUG] Mining timed out\n");
|
||||||
|
} else if (g_shutdown_requested || g_signal_received) {
|
||||||
|
result = -3; // Signal/emergency shutdown
|
||||||
|
printf("[DEBUG] Emergency shutdown completed\n");
|
||||||
} else {
|
} else {
|
||||||
result = -2; // Error
|
result = -2; // Error
|
||||||
|
printf("[DEBUG] Mining failed with error\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Clear global debugging variables
|
||||||
|
g_thread_handles = NULL;
|
||||||
|
g_thread_count = 0;
|
||||||
|
g_worker_contexts = NULL;
|
||||||
|
|
||||||
// Cleanup
|
// Cleanup
|
||||||
free(threads);
|
free(threads);
|
||||||
free(worker_contexts);
|
free(worker_contexts);
|
||||||
|
|||||||
Reference in New Issue
Block a user