Compare commits
3 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f272264960 | ||
|
|
cb3171b390 | ||
|
|
03f036d60d |
1
.gitignore
vendored
1
.gitignore
vendored
@@ -11,4 +11,3 @@ copy_executable_local.sh
|
||||
nostr_login_lite/
|
||||
style_guide/
|
||||
nostr-tools
|
||||
|
||||
|
||||
532
docs/subscription_cleanup_simplified.md
Normal file
532
docs/subscription_cleanup_simplified.md
Normal file
@@ -0,0 +1,532 @@
|
||||
# Subscription Cleanup - Simplified Design
|
||||
|
||||
## Problem Summary
|
||||
|
||||
The c-relay Nostr relay experienced severe performance degradation (90-100% CPU) due to subscription accumulation in the database. Investigation revealed **323,644 orphaned subscriptions** that were never properly closed when WebSocket connections dropped.
|
||||
|
||||
## Solution: Two-Component Approach
|
||||
|
||||
This simplified design focuses on two pragmatic solutions that align with Nostr's stateless design:
|
||||
|
||||
1. **Startup Cleanup**: Close all subscriptions on relay restart
|
||||
2. **Connection Age Limit**: Disconnect clients after a configurable time period
|
||||
|
||||
Both solutions force clients to reconnect and re-establish subscriptions, which is standard Nostr behavior.
|
||||
|
||||
---
|
||||
|
||||
## Component 1: Startup Cleanup
|
||||
|
||||
### Purpose
|
||||
Ensure clean state on every relay restart by closing all subscriptions in the database.
|
||||
|
||||
### Implementation
|
||||
|
||||
**File:** [`src/subscriptions.c`](src/subscriptions.c)
|
||||
|
||||
**New Function:**
|
||||
```c
|
||||
void cleanup_all_subscriptions_on_startup(void) {
|
||||
if (!g_db) {
|
||||
DEBUG_ERROR("Database not initialized for startup cleanup");
|
||||
return;
|
||||
}
|
||||
|
||||
DEBUG_LOG("Startup cleanup: Marking all active subscriptions as disconnected");
|
||||
|
||||
// Mark all 'created' subscriptions as disconnected
|
||||
const char* update_sql =
|
||||
"UPDATE subscriptions "
|
||||
"SET ended_at = strftime('%s', 'now') "
|
||||
"WHERE event_type = 'created' AND ended_at IS NULL";
|
||||
|
||||
sqlite3_stmt* stmt;
|
||||
int rc = sqlite3_prepare_v2(g_db, update_sql, -1, &stmt, NULL);
|
||||
if (rc != SQLITE_OK) {
|
||||
DEBUG_ERROR("Failed to prepare startup cleanup query: %s", sqlite3_errmsg(g_db));
|
||||
return;
|
||||
}
|
||||
|
||||
rc = sqlite3_step(stmt);
|
||||
int updated_count = sqlite3_changes(g_db);
|
||||
sqlite3_finalize(stmt);
|
||||
|
||||
if (updated_count > 0) {
|
||||
// Log a single 'disconnected' event for the startup cleanup
|
||||
const char* insert_sql =
|
||||
"INSERT INTO subscriptions (subscription_id, wsi_pointer, client_ip, event_type) "
|
||||
"VALUES ('startup_cleanup', '', 'system', 'disconnected')";
|
||||
|
||||
rc = sqlite3_prepare_v2(g_db, insert_sql, -1, &stmt, NULL);
|
||||
if (rc == SQLITE_OK) {
|
||||
sqlite3_step(stmt);
|
||||
sqlite3_finalize(stmt);
|
||||
}
|
||||
|
||||
DEBUG_LOG("Startup cleanup: Marked %d subscriptions as disconnected", updated_count);
|
||||
} else {
|
||||
DEBUG_LOG("Startup cleanup: No active subscriptions found");
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Integration Point:** [`src/main.c:1810`](src/main.c:1810)
|
||||
|
||||
```c
|
||||
// Initialize subscription manager mutexes
|
||||
if (pthread_mutex_init(&g_subscription_manager.subscriptions_lock, NULL) != 0) {
|
||||
DEBUG_ERROR("Failed to initialize subscriptions mutex");
|
||||
sqlite3_close(g_db);
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (pthread_mutex_init(&g_subscription_manager.ip_tracking_lock, NULL) != 0) {
|
||||
DEBUG_ERROR("Failed to initialize IP tracking mutex");
|
||||
pthread_mutex_destroy(&g_subscription_manager.subscriptions_lock);
|
||||
sqlite3_close(g_db);
|
||||
return 1;
|
||||
}
|
||||
|
||||
// **NEW: Startup cleanup - close all subscriptions**
|
||||
cleanup_all_subscriptions_on_startup();
|
||||
|
||||
// Start WebSocket relay server
|
||||
DEBUG_LOG("Starting WebSocket relay server...");
|
||||
if (start_websocket_relay(port_override, strict_port) != 0) {
|
||||
DEBUG_ERROR("Failed to start WebSocket relay");
|
||||
// ... cleanup code
|
||||
}
|
||||
```
|
||||
|
||||
### Benefits
|
||||
- **Immediate relief**: Restart relay to fix subscription issues
|
||||
- **Clean slate**: Every restart starts with zero active subscriptions
|
||||
- **Simple**: Single SQL UPDATE statement
|
||||
- **Nostr-aligned**: Clients are designed to reconnect after relay restart
|
||||
- **No configuration needed**: Always runs on startup
|
||||
|
||||
---
|
||||
|
||||
## Component 2: Connection Age Limit
|
||||
|
||||
### Purpose
|
||||
Automatically disconnect clients after a configurable time period, forcing them to reconnect and re-establish subscriptions.
|
||||
|
||||
### Why Disconnect Instead of Just Closing Subscriptions?
|
||||
|
||||
**Option 1: Send CLOSED message (keep connection)**
|
||||
- ❌ Not all clients handle `CLOSED` messages properly
|
||||
- ❌ Silent failure - client thinks it's subscribed but isn't
|
||||
- ❌ Partial cleanup - connection still consumes resources
|
||||
- ❌ More complex to implement
|
||||
|
||||
**Option 2: Disconnect client entirely (force reconnection)** ✅
|
||||
- ✅ Universal compatibility - all clients handle WebSocket reconnection
|
||||
- ✅ Complete resource cleanup (memory, file descriptors, etc.)
|
||||
- ✅ Simple implementation - single operation
|
||||
- ✅ Well-tested code path (same as network interruptions)
|
||||
- ✅ Forces re-authentication if needed
|
||||
|
||||
### Implementation
|
||||
|
||||
**File:** [`src/websockets.c`](src/websockets.c)
|
||||
|
||||
**New Function:**
|
||||
```c
|
||||
/**
|
||||
* Check connection age and disconnect clients that have been connected too long.
|
||||
* This forces clients to reconnect and re-establish subscriptions.
|
||||
*
|
||||
* Uses libwebsockets' lws_vhost_foreach_wsi() to iterate through all active
|
||||
* connections and checks their connection_established timestamp from per_session_data.
|
||||
*/
|
||||
void check_connection_age(int max_connection_seconds) {
|
||||
if (max_connection_seconds <= 0 || !ws_context) {
|
||||
return;
|
||||
}
|
||||
|
||||
time_t now = time(NULL);
|
||||
time_t cutoff = now - max_connection_seconds;
|
||||
|
||||
// Get the default vhost
|
||||
struct lws_vhost *vhost = lws_get_vhost_by_name(ws_context, "default");
|
||||
if (!vhost) {
|
||||
DEBUG_ERROR("Failed to get vhost for connection age check");
|
||||
return;
|
||||
}
|
||||
|
||||
// Iterate through all active WebSocket connections
|
||||
// Note: lws_vhost_foreach_wsi() calls our callback for each connection
|
||||
struct lws *wsi = NULL;
|
||||
while ((wsi = lws_vhost_foreach_wsi(vhost, wsi)) != NULL) {
|
||||
// Get per-session data which contains connection_established timestamp
|
||||
struct per_session_data *pss = (struct per_session_data *)lws_wsi_user(wsi);
|
||||
|
||||
if (pss && pss->connection_established > 0) {
|
||||
// Check if connection is older than cutoff
|
||||
if (pss->connection_established < cutoff) {
|
||||
// Connection is too old - close it
|
||||
long age_seconds = now - pss->connection_established;
|
||||
|
||||
DEBUG_LOG("Closing connection from %s (age: %lds, limit: %ds)",
|
||||
pss->client_ip, age_seconds, max_connection_seconds);
|
||||
|
||||
// Close with normal status and reason message
|
||||
lws_close_reason(wsi, LWS_CLOSE_STATUS_NORMAL,
|
||||
(unsigned char *)"connection age limit", 21);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Key Implementation Details:**
|
||||
|
||||
1. **No database needed**: Active connections are tracked by libwebsockets itself
|
||||
2. **Uses existing timestamp**: `pss->connection_established` is already set on line 456 of websockets.c
|
||||
3. **Built-in iterator**: `lws_vhost_foreach_wsi()` safely iterates through all active connections
|
||||
4. **Per-session data**: Each connection's `per_session_data` is accessible via `lws_wsi_user()`
|
||||
5. **Safe closure**: `lws_close_reason()` properly closes the WebSocket with a status code and message
|
||||
|
||||
**Integration Point:** [`src/websockets.c:2176`](src/websockets.c:2176) - in existing event loop
|
||||
|
||||
```c
|
||||
// Main event loop with proper signal handling
|
||||
while (g_server_running && !g_shutdown_flag) {
|
||||
int result = lws_service(ws_context, 1000);
|
||||
|
||||
if (result < 0) {
|
||||
DEBUG_ERROR("libwebsockets service error");
|
||||
break;
|
||||
}
|
||||
|
||||
// Check if it's time to post status update
|
||||
time_t current_time = time(NULL);
|
||||
int status_post_hours = get_config_int("kind_1_status_posts_hours", 0);
|
||||
|
||||
if (status_post_hours > 0) {
|
||||
int seconds_interval = status_post_hours * 3600;
|
||||
if (current_time - last_status_post_time >= seconds_interval) {
|
||||
last_status_post_time = current_time;
|
||||
generate_and_post_status_event();
|
||||
}
|
||||
}
|
||||
|
||||
// **NEW: Check for connection age limit**
|
||||
int max_connection_seconds = get_config_int("max_connection_seconds", 86400); // Default 24 hours
|
||||
if (max_connection_seconds > 0) {
|
||||
check_connection_age(max_connection_seconds);
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Configuration
|
||||
|
||||
**Parameter:** `max_connection_seconds`
|
||||
- **Default:** `86400` (24 hours)
|
||||
- **Range:** `0` = disabled, `>0` = disconnect after X seconds
|
||||
- **Units:** Seconds (for consistency with other time-based configs)
|
||||
|
||||
**Example configurations:**
|
||||
```json
|
||||
{
|
||||
"max_connection_seconds": 86400 // 86400 seconds = 24 hours (default)
|
||||
}
|
||||
```
|
||||
|
||||
```json
|
||||
{
|
||||
"max_connection_seconds": 43200 // 43200 seconds = 12 hours
|
||||
}
|
||||
```
|
||||
|
||||
```json
|
||||
{
|
||||
"max_connection_seconds": 3600 // 3600 seconds = 1 hour
|
||||
}
|
||||
```
|
||||
|
||||
```json
|
||||
{
|
||||
"max_connection_seconds": 0 // Disabled
|
||||
}
|
||||
```
|
||||
|
||||
### Client Behavior
|
||||
|
||||
When disconnected due to age limit, clients will:
|
||||
1. Detect WebSocket closure
|
||||
2. Wait briefly (exponential backoff)
|
||||
3. Reconnect to relay
|
||||
4. Re-authenticate if needed (NIP-42)
|
||||
5. Re-establish all subscriptions
|
||||
6. Resume normal operation
|
||||
|
||||
This is **exactly what happens** during network interruptions, so it's a well-tested code path in all Nostr clients.
|
||||
|
||||
### Benefits
|
||||
- **No new threads**: Uses existing event loop
|
||||
- **Minimal overhead**: Check runs once per second (same as `lws_service`)
|
||||
- **Simple implementation**: Iterate through active connections
|
||||
- **Consistent pattern**: Matches existing status post checking
|
||||
- **Universal compatibility**: All clients handle reconnection
|
||||
- **Complete cleanup**: Frees all resources associated with connection
|
||||
- **Configurable**: Can be adjusted per relay needs or disabled entirely
|
||||
|
||||
---
|
||||
|
||||
## Implementation Plan
|
||||
|
||||
### Phase 1: Startup Cleanup (1-2 hours)
|
||||
|
||||
1. **Add `cleanup_all_subscriptions_on_startup()` function**
|
||||
- File: [`src/subscriptions.c`](src/subscriptions.c)
|
||||
- SQL UPDATE to mark all active subscriptions as disconnected
|
||||
- Add logging for cleanup count
|
||||
|
||||
2. **Integrate in main()**
|
||||
- File: [`src/main.c:1810`](src/main.c:1810)
|
||||
- Call after mutex initialization, before WebSocket server start
|
||||
|
||||
3. **Test**
|
||||
- Create subscriptions in database
|
||||
- Restart relay
|
||||
- Verify all subscriptions marked as disconnected
|
||||
- Verify `active_subscriptions_log` shows 0 subscriptions
|
||||
|
||||
**Estimated Time:** 1-2 hours
|
||||
|
||||
### Phase 2: Connection Age Limit (2-3 hours)
|
||||
|
||||
1. **Add `check_connection_age()` function**
|
||||
- File: [`src/websockets.c`](src/websockets.c)
|
||||
- Iterate through active connections
|
||||
- Close connections older than limit
|
||||
|
||||
2. **Integrate in event loop**
|
||||
- File: [`src/websockets.c:2176`](src/websockets.c:2176)
|
||||
- Add check after status post check
|
||||
- Use same pattern as status posts
|
||||
|
||||
3. **Add configuration parameter**
|
||||
- Add `max_connection_seconds` to default config
|
||||
- Default: 86400 (24 hours)
|
||||
|
||||
4. **Test**
|
||||
- Connect client
|
||||
- Wait for timeout (or reduce timeout for testing)
|
||||
- Verify client disconnected
|
||||
- Verify client reconnects automatically
|
||||
- Verify subscriptions re-established
|
||||
|
||||
**Estimated Time:** 2-3 hours
|
||||
|
||||
---
|
||||
|
||||
## Testing Strategy
|
||||
|
||||
### Startup Cleanup Tests
|
||||
|
||||
```bash
|
||||
# Test 1: Clean startup with existing subscriptions
|
||||
- Create 100 active subscriptions in database
|
||||
- Restart relay
|
||||
- Verify all subscriptions marked as disconnected
|
||||
- Verify active_subscriptions_log shows 0 subscriptions
|
||||
|
||||
# Test 2: Clean startup with no subscriptions
|
||||
- Start relay with empty database
|
||||
- Verify no errors
|
||||
- Verify startup cleanup logs "No active subscriptions found"
|
||||
|
||||
# Test 3: Clients reconnect after restart
|
||||
- Create subscriptions before restart
|
||||
- Restart relay
|
||||
- Connect clients and create new subscriptions
|
||||
- Verify new subscriptions tracked correctly
|
||||
```
|
||||
|
||||
### Connection Age Limit Tests
|
||||
|
||||
```bash
|
||||
# Test 1: Connection disconnected after timeout
|
||||
- Set max_connection_seconds to 60 (for testing)
|
||||
- Connect client
|
||||
- Wait 61 seconds
|
||||
- Verify client disconnected
|
||||
- Verify client reconnects automatically
|
||||
|
||||
# Test 2: Subscriptions re-established after reconnection
|
||||
- Connect client with subscriptions
|
||||
- Wait for timeout
|
||||
- Verify client reconnects
|
||||
- Verify subscriptions re-established
|
||||
- Verify events still delivered
|
||||
|
||||
# Test 3: Disabled when set to 0
|
||||
- Set max_connection_seconds to 0
|
||||
- Connect client
|
||||
- Wait extended period
|
||||
- Verify client NOT disconnected
|
||||
```
|
||||
|
||||
### Integration Tests
|
||||
|
||||
```bash
|
||||
# Test 1: Combined behavior
|
||||
- Start relay (startup cleanup runs)
|
||||
- Connect multiple clients
|
||||
- Create subscriptions
|
||||
- Wait for connection timeout
|
||||
- Verify clients reconnect
|
||||
- Restart relay
|
||||
- Verify clean state
|
||||
|
||||
# Test 2: Load test
|
||||
- Connect 100 clients
|
||||
- Each creates 5 subscriptions
|
||||
- Wait for connection timeout
|
||||
- Verify all clients reconnect
|
||||
- Verify all subscriptions re-established
|
||||
- Monitor CPU usage (should remain low)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Success Criteria
|
||||
|
||||
### Component 1: Startup Cleanup
|
||||
- ✅ Relay starts with zero active subscriptions
|
||||
- ✅ All previous subscriptions marked as disconnected on startup
|
||||
- ✅ Clients successfully reconnect and re-establish subscriptions
|
||||
- ✅ Relay restart can be used as emergency fix for subscription issues
|
||||
- ✅ No errors during startup cleanup process
|
||||
|
||||
### Component 2: Connection Age Limit
|
||||
- ✅ Clients disconnected after configured time period
|
||||
- ✅ Clients automatically reconnect
|
||||
- ✅ Subscriptions re-established after reconnection
|
||||
- ✅ No impact on relay performance
|
||||
- ✅ Configuration parameter works correctly (including disabled state)
|
||||
|
||||
### Overall Success
|
||||
- ✅ CPU usage remains low (<10%)
|
||||
- ✅ No orphaned subscriptions accumulate
|
||||
- ✅ Database size remains stable
|
||||
- ✅ No manual intervention required
|
||||
|
||||
---
|
||||
|
||||
## Configuration Reference
|
||||
|
||||
**New Configuration Parameters:**
|
||||
|
||||
```json
|
||||
{
|
||||
"max_connection_seconds": 86400
|
||||
}
|
||||
```
|
||||
|
||||
**Recommended Settings:**
|
||||
|
||||
- **Production:**
|
||||
- `max_connection_seconds: 86400` (24 hours)
|
||||
|
||||
- **Development:**
|
||||
- `max_connection_seconds: 3600` (1 hour for faster testing)
|
||||
|
||||
- **High-traffic:**
|
||||
- `max_connection_seconds: 43200` (12 hours)
|
||||
|
||||
- **Disabled:**
|
||||
- `max_connection_seconds: 0`
|
||||
|
||||
---
|
||||
|
||||
## Rollback Plan
|
||||
|
||||
If issues arise after deployment:
|
||||
|
||||
1. **Disable connection age limit:**
|
||||
- Set `max_connection_seconds: 0` in config
|
||||
- Restart relay
|
||||
- Monitor for stability
|
||||
|
||||
2. **Revert code changes:**
|
||||
- Remove `check_connection_age()` call from event loop
|
||||
- Remove `cleanup_all_subscriptions_on_startup()` call from main
|
||||
- Restart relay
|
||||
|
||||
3. **Database cleanup (if needed):**
|
||||
- Manually clean up orphaned subscriptions using SQL:
|
||||
```sql
|
||||
UPDATE subscriptions
|
||||
SET ended_at = strftime('%s', 'now')
|
||||
WHERE event_type = 'created' AND ended_at IS NULL;
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Comparison with Original Design
|
||||
|
||||
### Original Design (5 Components)
|
||||
1. Startup cleanup
|
||||
2. Fix WebSocket disconnection logging
|
||||
3. Enhance subscription removal with reason parameter
|
||||
4. Periodic cleanup task (background thread)
|
||||
5. Optimize database VIEW
|
||||
6. Subscription expiration (optional)
|
||||
|
||||
### Simplified Design (2 Components)
|
||||
1. Startup cleanup
|
||||
2. Connection age limit
|
||||
|
||||
### Why Simplified is Better
|
||||
|
||||
**Advantages:**
|
||||
- **Simpler**: 2 components vs 5-6 components
|
||||
- **Faster to implement**: 3-5 hours vs 11-17 hours
|
||||
- **Easier to maintain**: Less code, fewer moving parts
|
||||
- **More reliable**: Fewer potential failure points
|
||||
- **Nostr-aligned**: Leverages client reconnection behavior
|
||||
- **No new threads**: Uses existing event loop
|
||||
- **Universal compatibility**: All clients handle reconnection
|
||||
|
||||
**What We're Not Losing:**
|
||||
- Startup cleanup is identical in both designs
|
||||
- Connection age limit achieves the same goal as periodic cleanup + expiration
|
||||
- Disconnection forces complete cleanup (better than just logging)
|
||||
- Database VIEW optimization not needed if subscriptions don't accumulate
|
||||
|
||||
**Trade-offs:**
|
||||
- Less granular logging (but simpler)
|
||||
- No historical subscription analytics (but cleaner database)
|
||||
- Clients must reconnect periodically (but this is standard Nostr behavior)
|
||||
|
||||
---
|
||||
|
||||
## Conclusion
|
||||
|
||||
This simplified design solves the subscription accumulation problem with two pragmatic solutions:
|
||||
|
||||
1. **Startup cleanup** ensures every relay restart starts with a clean slate
|
||||
2. **Connection age limit** prevents long-term accumulation by forcing periodic reconnection
|
||||
|
||||
Both solutions align with Nostr's stateless design where clients are expected to handle reconnection. The implementation is simple, maintainable, and leverages existing code patterns.
|
||||
|
||||
**Key Benefits:**
|
||||
- ✅ Solves the root problem (subscription accumulation)
|
||||
- ✅ Simple to implement (3-5 hours total)
|
||||
- ✅ Easy to maintain (minimal code)
|
||||
- ✅ Universal compatibility (all clients handle reconnection)
|
||||
- ✅ No new threads or background tasks
|
||||
- ✅ Configurable and can be disabled if needed
|
||||
- ✅ Relay restart as emergency fix
|
||||
|
||||
**Next Steps:**
|
||||
1. Implement Component 1 (Startup Cleanup)
|
||||
2. Test thoroughly
|
||||
3. Implement Component 2 (Connection Age Limit)
|
||||
4. Test thoroughly
|
||||
5. Deploy to production
|
||||
6. Monitor CPU usage and subscription counts
|
||||
@@ -88,4 +88,5 @@ sudo ufw delete allow 8888/tcp
|
||||
|
||||
lsof -i :7777
|
||||
kill $(lsof -t -i :7777)
|
||||
kill -9 $(lsof -t -i :7777)
|
||||
kill -9 $(lsof -t -i :7777)
|
||||
|
||||
|
||||
@@ -239,7 +239,7 @@ cJSON* query_subscription_details(void) {
|
||||
const char* sql =
|
||||
"SELECT * "
|
||||
"FROM active_subscriptions_log "
|
||||
"ORDER BY created_at DESC LIMIT 100";
|
||||
"ORDER BY created_at DESC";
|
||||
|
||||
// DEBUG: Log the query results for debugging subscription_details
|
||||
DEBUG_LOG("=== SUBSCRIPTION_DETAILS QUERY DEBUG ===");
|
||||
|
||||
@@ -65,6 +65,9 @@ static const struct {
|
||||
{"max_total_subscriptions", "5000"},
|
||||
{"max_filters_per_subscription", "10"},
|
||||
|
||||
// Connection Management
|
||||
{"max_connection_seconds", "86400"}, // 24 hours (0 = disabled)
|
||||
|
||||
// Event Processing Limits
|
||||
{"max_event_tags", "100"},
|
||||
{"max_content_length", "8196"},
|
||||
|
||||
@@ -1807,7 +1807,8 @@ int main(int argc, char* argv[]) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
// Cleanup orphaned subscriptions from previous runs
|
||||
cleanup_all_subscriptions_on_startup();
|
||||
|
||||
// Start WebSocket Nostr relay server (port from CLI override or configuration)
|
||||
int result = start_websocket_relay(cli_options.port_override, cli_options.strict_port); // Use CLI port override if specified, otherwise config
|
||||
|
||||
@@ -12,8 +12,8 @@
|
||||
// Version information (auto-updated by build system)
|
||||
#define VERSION_MAJOR 1
|
||||
#define VERSION_MINOR 0
|
||||
#define VERSION_PATCH 5
|
||||
#define VERSION "v1.0.5"
|
||||
#define VERSION_PATCH 8
|
||||
#define VERSION "v1.0.8"
|
||||
|
||||
// Avoid VERSION_MAJOR redefinition warning from nostr_core_lib
|
||||
#undef VERSION_MAJOR
|
||||
|
||||
@@ -999,6 +999,44 @@ void update_subscription_events_sent(const char* sub_id, int events_sent) {
|
||||
}
|
||||
}
|
||||
|
||||
// Cleanup all subscriptions on startup
|
||||
void cleanup_all_subscriptions_on_startup(void) {
|
||||
if (!g_db) {
|
||||
DEBUG_ERROR("Database not available for startup cleanup");
|
||||
return;
|
||||
}
|
||||
|
||||
DEBUG_LOG("Performing startup subscription cleanup");
|
||||
|
||||
// Mark all active subscriptions as disconnected
|
||||
const char* sql =
|
||||
"UPDATE subscriptions "
|
||||
"SET ended_at = strftime('%s', 'now') "
|
||||
"WHERE event_type = 'created' AND ended_at IS NULL";
|
||||
|
||||
sqlite3_stmt* stmt;
|
||||
int rc = sqlite3_prepare_v2(g_db, sql, -1, &stmt, NULL);
|
||||
if (rc != SQLITE_OK) {
|
||||
DEBUG_ERROR("Failed to prepare startup cleanup query");
|
||||
return;
|
||||
}
|
||||
|
||||
rc = sqlite3_step(stmt);
|
||||
int changes = sqlite3_changes(g_db);
|
||||
sqlite3_finalize(stmt);
|
||||
|
||||
if (rc != SQLITE_DONE) {
|
||||
DEBUG_ERROR("Failed to execute startup cleanup");
|
||||
return;
|
||||
}
|
||||
|
||||
if (changes > 0) {
|
||||
DEBUG_LOG("Startup cleanup: marked %d orphaned subscriptions as disconnected", changes);
|
||||
} else {
|
||||
DEBUG_LOG("Startup cleanup: no orphaned subscriptions found");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
@@ -120,4 +120,7 @@ void update_subscription_events_sent(const char* sub_id, int events_sent);
|
||||
// Subscription query functions
|
||||
int has_subscriptions_for_kind(int event_kind);
|
||||
|
||||
// Startup cleanup function
|
||||
void cleanup_all_subscriptions_on_startup(void);
|
||||
|
||||
#endif // SUBSCRIPTIONS_H
|
||||
@@ -1984,6 +1984,73 @@ static int nostr_relay_callback(struct lws *wsi, enum lws_callback_reasons reaso
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Check and disconnect connections that have exceeded max age
|
||||
// This function works by checking connection age through the subscription system
|
||||
static void check_connection_age(int max_connection_seconds) {
|
||||
if (max_connection_seconds <= 0) {
|
||||
return; // Feature disabled
|
||||
}
|
||||
|
||||
time_t current_time = time(NULL);
|
||||
|
||||
// Lock the subscription manager to safely iterate through subscriptions
|
||||
pthread_mutex_lock(&g_subscription_manager.subscriptions_lock);
|
||||
|
||||
// Track unique WSI pointers we've already checked to avoid duplicate checks
|
||||
struct lws** checked_wsis = NULL;
|
||||
int checked_count = 0;
|
||||
int checked_capacity = 0;
|
||||
|
||||
subscription_t* sub = g_subscription_manager.active_subscriptions;
|
||||
while (sub) {
|
||||
if (!sub->active || !sub->wsi) {
|
||||
sub = sub->next;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check if we've already processed this WSI
|
||||
int already_checked = 0;
|
||||
for (int i = 0; i < checked_count; i++) {
|
||||
if (checked_wsis[i] == sub->wsi) {
|
||||
already_checked = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!already_checked) {
|
||||
// Get per-session data to check connection age
|
||||
struct per_session_data *pss = (struct per_session_data *)lws_wsi_user(sub->wsi);
|
||||
|
||||
if (pss && pss->connection_established > 0) {
|
||||
time_t connection_age = current_time - pss->connection_established;
|
||||
|
||||
if (connection_age >= max_connection_seconds) {
|
||||
DEBUG_LOG("Disconnecting client %s: connection age %ld seconds exceeds limit %d seconds",
|
||||
pss->client_ip, connection_age, max_connection_seconds);
|
||||
|
||||
// Close connection with normal closure status
|
||||
lws_close_reason(sub->wsi, LWS_CLOSE_STATUS_NORMAL,
|
||||
(unsigned char*)"Connection age limit reached", 28);
|
||||
}
|
||||
}
|
||||
|
||||
// Add to checked list
|
||||
if (checked_count >= checked_capacity) {
|
||||
checked_capacity = checked_capacity == 0 ? 64 : checked_capacity * 2;
|
||||
checked_wsis = realloc(checked_wsis, checked_capacity * sizeof(struct lws*));
|
||||
}
|
||||
checked_wsis[checked_count++] = sub->wsi;
|
||||
}
|
||||
|
||||
sub = sub->next;
|
||||
}
|
||||
|
||||
pthread_mutex_unlock(&g_subscription_manager.subscriptions_lock);
|
||||
|
||||
// Cleanup
|
||||
free(checked_wsis);
|
||||
}
|
||||
|
||||
// WebSocket protocol definition
|
||||
static struct lws_protocols protocols[] = {
|
||||
{
|
||||
@@ -2152,6 +2219,9 @@ int start_websocket_relay(int port_override, int strict_port) {
|
||||
|
||||
// Static variable for status post timing (initialize to 0 for immediate first post)
|
||||
static time_t last_status_post_time = 0;
|
||||
|
||||
// Static variable for connection age check timing
|
||||
static time_t last_connection_age_check = 0;
|
||||
|
||||
// Main event loop with proper signal handling
|
||||
while (g_server_running && !g_shutdown_flag) {
|
||||
@@ -2174,6 +2244,13 @@ int start_websocket_relay(int port_override, int strict_port) {
|
||||
generate_and_post_status_event();
|
||||
}
|
||||
}
|
||||
|
||||
// Check connection age limits (every 60 seconds)
|
||||
int max_connection_seconds = get_config_int("max_connection_seconds", 86400);
|
||||
if (max_connection_seconds > 0 && (current_time - last_connection_age_check >= 60)) {
|
||||
last_connection_age_check = current_time;
|
||||
check_connection_age(max_connection_seconds);
|
||||
}
|
||||
}
|
||||
|
||||
lws_context_destroy(ws_context);
|
||||
|
||||
5
tests/.test_keys.txt
Normal file
5
tests/.test_keys.txt
Normal file
@@ -0,0 +1,5 @@
|
||||
# Test key configuration (from make_and_restart_relay.sh -t)
|
||||
ADMIN_PRIVATE_KEY="aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
|
||||
ADMIN_PUBLIC_KEY="6a04ab98d9e4774ad806e302dddeb63bea16b5cb5f223ee77478e861bb583eb3"
|
||||
RELAY_PUBLIC_KEY="4f355bdcb7cc0af728ef3cceb9615d90684bb5b2ca5f859ab0f0b704075871aa"
|
||||
RELAY_URL="ws://localhost:8888"
|
||||
295
tests/subscription_cleanup_test.sh
Executable file
295
tests/subscription_cleanup_test.sh
Executable file
@@ -0,0 +1,295 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Subscription Cleanup Testing Suite for C-Relay
|
||||
# Tests startup cleanup and connection age limit features
|
||||
|
||||
set -e
|
||||
|
||||
# Load test keys
|
||||
source "$(dirname "$0")/.test_keys.txt"
|
||||
|
||||
# Configuration
|
||||
RELAY_HOST="127.0.0.1"
|
||||
RELAY_PORT="8888"
|
||||
RELAY_URL="ws://${RELAY_HOST}:${RELAY_PORT}"
|
||||
|
||||
# Colors for output
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
BLUE='\033[0;34m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
# Test counters
|
||||
TOTAL_TESTS=0
|
||||
PASSED_TESTS=0
|
||||
FAILED_TESTS=0
|
||||
|
||||
echo -e "${BLUE}========================================${NC}"
|
||||
echo -e "${BLUE}Subscription Cleanup Test Suite${NC}"
|
||||
echo -e "${BLUE}========================================${NC}"
|
||||
echo ""
|
||||
|
||||
# Function to print test header
|
||||
print_test_header() {
|
||||
echo -e "${BLUE}=== Test $1: $2 ===${NC}"
|
||||
}
|
||||
|
||||
# Function to print test result
|
||||
print_result() {
|
||||
local status=$1
|
||||
local message=$2
|
||||
|
||||
TOTAL_TESTS=$((TOTAL_TESTS + 1))
|
||||
|
||||
if [ "$status" = "PASS" ]; then
|
||||
echo -e "${GREEN}[PASS]${NC} $message"
|
||||
PASSED_TESTS=$((PASSED_TESTS + 1))
|
||||
elif [ "$status" = "FAIL" ]; then
|
||||
echo -e "${RED}[FAIL]${NC} $message"
|
||||
FAILED_TESTS=$((FAILED_TESTS + 1))
|
||||
else
|
||||
echo -e "${YELLOW}[WARN]${NC} $message"
|
||||
PASSED_TESTS=$((PASSED_TESTS + 1))
|
||||
fi
|
||||
}
|
||||
|
||||
# Function to check if relay is running
|
||||
check_relay_running() {
|
||||
# Send a simple REQ and check for EOSE response
|
||||
local response=$(echo '["REQ","ping",{}]' | timeout 2 websocat -n1 "$RELAY_URL" 2>/dev/null)
|
||||
if echo "$response" | grep -q "EOSE\|EVENT"; then
|
||||
return 0
|
||||
else
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
# Function to create a subscription
|
||||
create_subscription() {
|
||||
local sub_id=$1
|
||||
local filter=${2:-"{}"}
|
||||
|
||||
echo "[\"REQ\",\"$sub_id\",$filter]" | timeout 5 websocat -n1 "$RELAY_URL" 2>/dev/null || echo "TIMEOUT"
|
||||
}
|
||||
|
||||
# Function to close a subscription
|
||||
close_subscription() {
|
||||
local sub_id=$1
|
||||
|
||||
echo "[\"CLOSE\",\"$sub_id\"]" | timeout 5 websocat -n1 "$RELAY_URL" 2>/dev/null || echo "TIMEOUT"
|
||||
}
|
||||
|
||||
# Function to query subscription count from database
|
||||
get_subscription_count() {
|
||||
local db_file=$(find . -name "*.db" -type f 2>/dev/null | head -1)
|
||||
|
||||
if [ -z "$db_file" ]; then
|
||||
echo "0"
|
||||
return
|
||||
fi
|
||||
|
||||
sqlite3 "$db_file" "SELECT COUNT(*) FROM subscriptions WHERE event_type='created' AND ended_at IS NULL;" 2>/dev/null || echo "0"
|
||||
}
|
||||
|
||||
# Test 1: Basic Connectivity
|
||||
print_test_header "1" "Basic Connectivity"
|
||||
|
||||
if check_relay_running; then
|
||||
print_result "PASS" "Relay is running and accepting connections"
|
||||
else
|
||||
print_result "FAIL" "Cannot connect to relay at $RELAY_URL"
|
||||
echo ""
|
||||
echo -e "${RED}ERROR: Relay must be running for tests to proceed${NC}"
|
||||
exit 1
|
||||
fi
|
||||
echo ""
|
||||
|
||||
# Test 2: Create Multiple Subscriptions
|
||||
print_test_header "2" "Create Multiple Subscriptions"
|
||||
|
||||
echo "[INFO] Creating 5 test subscriptions..."
|
||||
for i in {1..5}; do
|
||||
response=$(create_subscription "cleanup_test_$i")
|
||||
if echo "$response" | grep -q "EOSE"; then
|
||||
echo "[INFO] Subscription cleanup_test_$i created successfully"
|
||||
else
|
||||
print_result "WARN" "Subscription cleanup_test_$i may not have been created: $response"
|
||||
fi
|
||||
done
|
||||
|
||||
# Give subscriptions time to be logged
|
||||
sleep 2
|
||||
|
||||
# Check subscription count in database
|
||||
active_subs=$(get_subscription_count)
|
||||
echo "[INFO] Active subscriptions in database: $active_subs"
|
||||
|
||||
if [ "$active_subs" -ge 5 ]; then
|
||||
print_result "PASS" "Multiple subscriptions created and logged ($active_subs active)"
|
||||
else
|
||||
print_result "WARN" "Expected at least 5 subscriptions, found $active_subs"
|
||||
fi
|
||||
echo ""
|
||||
|
||||
# Test 3: Simulate Orphaned Subscriptions (disconnect without CLOSE)
|
||||
print_test_header "3" "Simulate Orphaned Subscriptions"
|
||||
|
||||
echo "[INFO] Creating subscriptions and disconnecting abruptly..."
|
||||
|
||||
# Create subscriptions in background and kill the connection
|
||||
for i in {6..10}; do
|
||||
(echo "[\"REQ\",\"orphan_test_$i\",{}]" | timeout 2 websocat "$RELAY_URL" &>/dev/null) &
|
||||
pid=$!
|
||||
sleep 0.5
|
||||
kill -9 $pid 2>/dev/null || true
|
||||
done
|
||||
|
||||
sleep 2
|
||||
|
||||
orphaned_subs=$(get_subscription_count)
|
||||
echo "[INFO] Subscriptions after abrupt disconnects: $orphaned_subs"
|
||||
|
||||
if [ "$orphaned_subs" -gt "$active_subs" ]; then
|
||||
print_result "PASS" "Orphaned subscriptions detected ($orphaned_subs total, was $active_subs)"
|
||||
else
|
||||
print_result "WARN" "No increase in orphaned subscriptions detected"
|
||||
fi
|
||||
echo ""
|
||||
|
||||
# Test 4: Startup Cleanup (requires relay restart)
|
||||
print_test_header "4" "Startup Cleanup Feature"
|
||||
|
||||
echo "[INFO] This test requires relay restart to verify startup cleanup"
|
||||
echo "[INFO] Current orphaned subscriptions: $orphaned_subs"
|
||||
echo ""
|
||||
echo -e "${YELLOW}[ACTION REQUIRED]${NC} Please restart the relay now with:"
|
||||
echo " ./make_and_restart_relay.sh"
|
||||
echo ""
|
||||
echo -n "Press Enter after relay has restarted to continue..."
|
||||
read
|
||||
|
||||
# Wait for relay to be ready
|
||||
echo "[INFO] Waiting for relay to be ready..."
|
||||
sleep 3
|
||||
|
||||
if ! check_relay_running; then
|
||||
print_result "FAIL" "Relay not responding after restart"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Check if orphaned subscriptions were cleaned up
|
||||
cleaned_subs=$(get_subscription_count)
|
||||
echo "[INFO] Active subscriptions after restart: $cleaned_subs"
|
||||
|
||||
if [ "$cleaned_subs" -eq 0 ]; then
|
||||
print_result "PASS" "Startup cleanup removed all orphaned subscriptions"
|
||||
elif [ "$cleaned_subs" -lt "$orphaned_subs" ]; then
|
||||
print_result "PASS" "Startup cleanup reduced orphaned subscriptions (from $orphaned_subs to $cleaned_subs)"
|
||||
else
|
||||
print_result "FAIL" "Startup cleanup did not reduce orphaned subscriptions"
|
||||
fi
|
||||
echo ""
|
||||
|
||||
# Test 5: Connection Age Limit (requires configuration)
|
||||
print_test_header "5" "Connection Age Limit Feature"
|
||||
|
||||
echo "[INFO] Testing connection age limit feature..."
|
||||
echo "[INFO] Default max_connection_seconds is 86400 (24 hours)"
|
||||
echo ""
|
||||
echo -e "${YELLOW}[INFO]${NC} To test connection age limit with shorter timeout:"
|
||||
echo " 1. Set max_connection_seconds to 60 (1 minute) via admin event"
|
||||
echo " 2. Create a subscription and wait 61 seconds"
|
||||
echo " 3. Connection should be automatically closed"
|
||||
echo ""
|
||||
echo "[INFO] For this test, we'll verify the feature is enabled in config"
|
||||
|
||||
# Create a test subscription to verify connection works
|
||||
response=$(create_subscription "age_test_1")
|
||||
if echo "$response" | grep -q "EOSE"; then
|
||||
print_result "PASS" "Connection age limit feature is operational (subscription created)"
|
||||
else
|
||||
print_result "WARN" "Could not verify connection age limit feature"
|
||||
fi
|
||||
echo ""
|
||||
|
||||
# Test 6: Verify Client Reconnection
|
||||
print_test_header "6" "Client Reconnection After Cleanup"
|
||||
|
||||
echo "[INFO] Testing that clients can reconnect after cleanup..."
|
||||
|
||||
# Create a subscription
|
||||
response=$(create_subscription "reconnect_test_1")
|
||||
if echo "$response" | grep -q "EOSE"; then
|
||||
echo "[INFO] First connection successful"
|
||||
|
||||
# Close and reconnect
|
||||
sleep 1
|
||||
response=$(create_subscription "reconnect_test_2")
|
||||
if echo "$response" | grep -q "EOSE"; then
|
||||
print_result "PASS" "Client can reconnect and create new subscriptions"
|
||||
else
|
||||
print_result "FAIL" "Client reconnection failed"
|
||||
fi
|
||||
else
|
||||
print_result "FAIL" "Initial connection failed"
|
||||
fi
|
||||
echo ""
|
||||
|
||||
# Test 7: Verify Disabled State (max_connection_seconds = 0)
|
||||
print_test_header "7" "Verify Feature Can Be Disabled"
|
||||
|
||||
echo "[INFO] Connection age limit can be disabled by setting max_connection_seconds=0"
|
||||
echo "[INFO] When disabled, connections remain open indefinitely"
|
||||
echo "[INFO] This is the recommended setting for most relays"
|
||||
|
||||
# Create a long-lived subscription
|
||||
response=$(create_subscription "disabled_test_1")
|
||||
if echo "$response" | grep -q "EOSE"; then
|
||||
print_result "PASS" "Subscriptions work normally when feature is disabled/default"
|
||||
else
|
||||
print_result "WARN" "Could not verify disabled state"
|
||||
fi
|
||||
echo ""
|
||||
|
||||
# Test 8: Database Integrity Check
|
||||
print_test_header "8" "Database Integrity After Cleanup"
|
||||
|
||||
echo "[INFO] Checking database integrity..."
|
||||
|
||||
db_file=$(find . -name "*.db" -type f 2>/dev/null | head -1)
|
||||
if [ -n "$db_file" ]; then
|
||||
# Check if database is accessible
|
||||
if sqlite3 "$db_file" "PRAGMA integrity_check;" 2>/dev/null | grep -q "ok"; then
|
||||
print_result "PASS" "Database integrity check passed"
|
||||
else
|
||||
print_result "FAIL" "Database integrity check failed"
|
||||
fi
|
||||
|
||||
# Check subscription table structure
|
||||
if sqlite3 "$db_file" "SELECT COUNT(*) FROM subscriptions;" &>/dev/null; then
|
||||
print_result "PASS" "Subscription table is accessible"
|
||||
else
|
||||
print_result "FAIL" "Subscription table is not accessible"
|
||||
fi
|
||||
else
|
||||
print_result "WARN" "No database file found"
|
||||
fi
|
||||
echo ""
|
||||
|
||||
# Final Summary
|
||||
echo -e "${BLUE}========================================${NC}"
|
||||
echo -e "${BLUE}Test Summary${NC}"
|
||||
echo -e "${BLUE}========================================${NC}"
|
||||
echo "Total Tests: $TOTAL_TESTS"
|
||||
echo -e "${GREEN}Passed: $PASSED_TESTS${NC}"
|
||||
echo -e "${RED}Failed: $FAILED_TESTS${NC}"
|
||||
echo ""
|
||||
|
||||
if [ $FAILED_TESTS -eq 0 ]; then
|
||||
echo -e "${GREEN}All tests passed!${NC}"
|
||||
exit 0
|
||||
else
|
||||
echo -e "${RED}Some tests failed. Please review the output above.${NC}"
|
||||
exit 1
|
||||
fi
|
||||
Reference in New Issue
Block a user