v0.7.34 - We seemed to maybe finally fixed the monitoring error?

This commit is contained in:
Your Name
2025-10-22 10:19:43 -04:00
parent 9cb9b746d8
commit 9179d57cc9
21 changed files with 2877 additions and 503 deletions

View File

@@ -108,6 +108,136 @@ struct subscription_manager g_subscription_manager;
// Message queue functions for proper libwebsockets pattern
/**
* Queue a message for WebSocket writing following libwebsockets' proper pattern.
* This function adds messages to a per-session queue and requests writeable callback.
*
* @param wsi WebSocket instance
* @param pss Per-session data containing message queue
* @param message Message string to write
* @param length Length of message string
* @param type LWS_WRITE_* type (LWS_WRITE_TEXT, etc.)
* @return 0 on success, -1 on error
*/
int queue_message(struct lws* wsi, struct per_session_data* pss, const char* message, size_t length, enum lws_write_protocol type) {
if (!wsi || !pss || !message || length == 0) {
DEBUG_ERROR("queue_message: invalid parameters");
return -1;
}
// Allocate message queue node
struct message_queue_node* node = malloc(sizeof(struct message_queue_node));
if (!node) {
DEBUG_ERROR("queue_message: failed to allocate queue node");
return -1;
}
// Allocate buffer with LWS_PRE space
size_t buffer_size = LWS_PRE + length;
unsigned char* buffer = malloc(buffer_size);
if (!buffer) {
DEBUG_ERROR("queue_message: failed to allocate message buffer");
free(node);
return -1;
}
// Copy message to buffer with LWS_PRE offset
memcpy(buffer + LWS_PRE, message, length);
// Initialize node
node->data = buffer;
node->length = length;
node->type = type;
node->next = NULL;
// Add to queue (thread-safe)
pthread_mutex_lock(&pss->session_lock);
if (!pss->message_queue_head) {
// Queue was empty
pss->message_queue_head = node;
pss->message_queue_tail = node;
} else {
// Add to end of queue
pss->message_queue_tail->next = node;
pss->message_queue_tail = node;
}
pss->message_queue_count++;
pthread_mutex_unlock(&pss->session_lock);
// Request writeable callback (only if not already requested)
if (!pss->writeable_requested) {
pss->writeable_requested = 1;
lws_callback_on_writable(wsi);
}
DEBUG_TRACE("Queued message: len=%zu, queue_count=%d", length, pss->message_queue_count);
return 0;
}
/**
* Process message queue when the socket becomes writeable.
* This function is called from LWS_CALLBACK_SERVER_WRITEABLE.
*
* @param wsi WebSocket instance
* @param pss Per-session data containing message queue
* @return 0 on success, -1 on error
*/
int process_message_queue(struct lws* wsi, struct per_session_data* pss) {
if (!wsi || !pss) {
DEBUG_ERROR("process_message_queue: invalid parameters");
return -1;
}
// Get next message from queue (thread-safe)
pthread_mutex_lock(&pss->session_lock);
struct message_queue_node* node = pss->message_queue_head;
if (!node) {
// Queue is empty
pss->writeable_requested = 0;
pthread_mutex_unlock(&pss->session_lock);
return 0;
}
// Remove from queue
pss->message_queue_head = node->next;
if (!pss->message_queue_head) {
pss->message_queue_tail = NULL;
}
pss->message_queue_count--;
pthread_mutex_unlock(&pss->session_lock);
// Write message (libwebsockets handles partial writes internally)
int write_result = lws_write(wsi, node->data + LWS_PRE, node->length, node->type);
// Free node resources
free(node->data);
free(node);
if (write_result < 0) {
DEBUG_ERROR("process_message_queue: write failed, result=%d", write_result);
return -1;
}
DEBUG_TRACE("Processed message: wrote %d bytes, remaining in queue: %d", write_result, pss->message_queue_count);
// If queue not empty, request another callback
pthread_mutex_lock(&pss->session_lock);
if (pss->message_queue_head) {
lws_callback_on_writable(wsi);
} else {
pss->writeable_requested = 0;
}
pthread_mutex_unlock(&pss->session_lock);
return 0;
}
/////////////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////////////////
// WEBSOCKET PROTOCOL
@@ -719,16 +849,22 @@ static int nostr_relay_callback(struct lws *wsi, enum lws_callback_reasons reaso
cJSON_AddItemToArray(response, cJSON_CreateString(cJSON_GetStringValue(event_id)));
cJSON_AddItemToArray(response, cJSON_CreateBool(result == 0));
cJSON_AddItemToArray(response, cJSON_CreateString(strlen(error_message) > 0 ? error_message : ""));
char *response_str = cJSON_Print(response);
if (response_str) {
size_t response_len = strlen(response_str);
unsigned char *buf = malloc(LWS_PRE + response_len);
if (buf) {
memcpy(buf + LWS_PRE, response_str, response_len);
lws_write(wsi, buf + LWS_PRE, response_len, LWS_WRITE_TEXT);
free(buf);
// DEBUG: Log WebSocket frame details before sending
DEBUG_TRACE("WS_FRAME_SEND: type=OK len=%zu data=%.100s%s",
response_len,
response_str,
response_len > 100 ? "..." : "");
// Queue message for proper libwebsockets pattern
if (queue_message(wsi, pss, response_str, response_len, LWS_WRITE_TEXT) != 0) {
DEBUG_ERROR("Failed to queue OK response message");
}
free(response_str);
}
cJSON_Delete(response);
@@ -823,12 +959,18 @@ static int nostr_relay_callback(struct lws *wsi, enum lws_callback_reasons reaso
char *eose_str = cJSON_Print(eose_response);
if (eose_str) {
size_t eose_len = strlen(eose_str);
unsigned char *buf = malloc(LWS_PRE + eose_len);
if (buf) {
memcpy(buf + LWS_PRE, eose_str, eose_len);
lws_write(wsi, buf + LWS_PRE, eose_len, LWS_WRITE_TEXT);
free(buf);
// DEBUG: Log WebSocket frame details before sending
DEBUG_TRACE("WS_FRAME_SEND: type=EOSE len=%zu data=%.100s%s",
eose_len,
eose_str,
eose_len > 100 ? "..." : "");
// Queue message for proper libwebsockets pattern
if (queue_message(wsi, pss, eose_str, eose_len, LWS_WRITE_TEXT) != 0) {
DEBUG_ERROR("Failed to queue EOSE message");
}
free(eose_str);
}
cJSON_Delete(eose_response);
@@ -908,9 +1050,22 @@ static int nostr_relay_callback(struct lws *wsi, enum lws_callback_reasons reaso
return 0;
}
// CRITICAL FIX: Remove from session list FIRST (while holding lock)
// to prevent race condition where global manager frees the subscription
// while we're still iterating through the session list
// CRITICAL FIX: Mark subscription as inactive in global manager FIRST
// This prevents other threads from accessing it during removal
pthread_mutex_lock(&g_subscription_manager.subscriptions_lock);
subscription_t* target_sub = g_subscription_manager.active_subscriptions;
while (target_sub) {
if (strcmp(target_sub->id, subscription_id) == 0 && target_sub->wsi == wsi) {
target_sub->active = 0; // Mark as inactive immediately
break;
}
target_sub = target_sub->next;
}
pthread_mutex_unlock(&g_subscription_manager.subscriptions_lock);
// Now safe to remove from session list
if (pss) {
pthread_mutex_lock(&pss->session_lock);
@@ -928,8 +1083,7 @@ static int nostr_relay_callback(struct lws *wsi, enum lws_callback_reasons reaso
pthread_mutex_unlock(&pss->session_lock);
}
// Remove from global manager AFTER removing from session list
// This prevents use-after-free when iterating session subscriptions
// Finally remove from global manager (which will free it)
remove_subscription_from_manager(subscription_id, wsi);
// Subscription closed
@@ -972,6 +1126,13 @@ static int nostr_relay_callback(struct lws *wsi, enum lws_callback_reasons reaso
}
break;
case LWS_CALLBACK_SERVER_WRITEABLE:
// Handle message queue when socket becomes writeable
if (pss) {
process_message_queue(wsi, pss);
}
break;
case LWS_CALLBACK_CLOSED:
DEBUG_TRACE("WebSocket connection closed");
@@ -1005,20 +1166,66 @@ static int nostr_relay_callback(struct lws *wsi, enum lws_callback_reasons reaso
auth_status,
reason);
// Clean up session subscriptions
// Clean up message queue to prevent memory leaks
while (pss->message_queue_head) {
struct message_queue_node* node = pss->message_queue_head;
pss->message_queue_head = node->next;
free(node->data);
free(node);
}
pss->message_queue_tail = NULL;
pss->message_queue_count = 0;
pss->writeable_requested = 0;
// Clean up session subscriptions - copy IDs first to avoid use-after-free
pthread_mutex_lock(&pss->session_lock);
// First pass: collect subscription IDs safely
typedef struct temp_sub_id {
char id[SUBSCRIPTION_ID_MAX_LENGTH];
struct temp_sub_id* next;
} temp_sub_id_t;
temp_sub_id_t* temp_ids = NULL;
temp_sub_id_t* temp_tail = NULL;
int temp_count = 0;
struct subscription* sub = pss->subscriptions;
while (sub) {
struct subscription* next = sub->session_next;
remove_subscription_from_manager(sub->id, wsi);
sub = next;
if (sub->active) { // Only process active subscriptions
temp_sub_id_t* temp = malloc(sizeof(temp_sub_id_t));
if (temp) {
memcpy(temp->id, sub->id, SUBSCRIPTION_ID_MAX_LENGTH);
temp->id[SUBSCRIPTION_ID_MAX_LENGTH - 1] = '\0';
temp->next = NULL;
if (!temp_ids) {
temp_ids = temp;
temp_tail = temp;
} else {
temp_tail->next = temp;
temp_tail = temp;
}
temp_count++;
}
}
sub = sub->session_next;
}
// Clear session list immediately
pss->subscriptions = NULL;
pss->subscription_count = 0;
pthread_mutex_unlock(&pss->session_lock);
// Second pass: remove from global manager using copied IDs
temp_sub_id_t* current_temp = temp_ids;
while (current_temp) {
temp_sub_id_t* next_temp = current_temp->next;
remove_subscription_from_manager(current_temp->id, wsi);
free(current_temp);
current_temp = next_temp;
}
pthread_mutex_destroy(&pss->session_lock);
} else {
DEBUG_LOG("WebSocket CLOSED: ip=unknown duration=0s subscriptions=0 authenticated=no reason=unknown");
@@ -1685,12 +1892,18 @@ int handle_count_message(const char* sub_id, cJSON* filters, struct lws *wsi, st
char *count_str = cJSON_Print(count_response);
if (count_str) {
size_t count_len = strlen(count_str);
unsigned char *buf = malloc(LWS_PRE + count_len);
if (buf) {
memcpy(buf + LWS_PRE, count_str, count_len);
lws_write(wsi, buf + LWS_PRE, count_len, LWS_WRITE_TEXT);
free(buf);
// DEBUG: Log WebSocket frame details before sending
DEBUG_TRACE("WS_FRAME_SEND: type=COUNT len=%zu data=%.100s%s",
count_len,
count_str,
count_len > 100 ? "..." : "");
// Queue message for proper libwebsockets pattern
if (queue_message(wsi, pss, count_str, count_len, LWS_WRITE_TEXT) != 0) {
DEBUG_ERROR("Failed to queue COUNT message");
}
free(count_str);
}
cJSON_Delete(count_response);