Compare commits

...

3 Commits

13 changed files with 961 additions and 7 deletions

1
.gitignore vendored
View File

@@ -11,4 +11,3 @@ copy_executable_local.sh
nostr_login_lite/
style_guide/
nostr-tools

View File

@@ -0,0 +1,532 @@
# Subscription Cleanup - Simplified Design
## Problem Summary
The c-relay Nostr relay experienced severe performance degradation (90-100% CPU) due to subscription accumulation in the database. Investigation revealed **323,644 orphaned subscriptions** that were never properly closed when WebSocket connections dropped.
## Solution: Two-Component Approach
This simplified design focuses on two pragmatic solutions that align with Nostr's stateless design:
1. **Startup Cleanup**: Close all subscriptions on relay restart
2. **Connection Age Limit**: Disconnect clients after a configurable time period
Both solutions force clients to reconnect and re-establish subscriptions, which is standard Nostr behavior.
---
## Component 1: Startup Cleanup
### Purpose
Ensure clean state on every relay restart by closing all subscriptions in the database.
### Implementation
**File:** [`src/subscriptions.c`](src/subscriptions.c)
**New Function:**
```c
void cleanup_all_subscriptions_on_startup(void) {
if (!g_db) {
DEBUG_ERROR("Database not initialized for startup cleanup");
return;
}
DEBUG_LOG("Startup cleanup: Marking all active subscriptions as disconnected");
// Mark all 'created' subscriptions as disconnected
const char* update_sql =
"UPDATE subscriptions "
"SET ended_at = strftime('%s', 'now') "
"WHERE event_type = 'created' AND ended_at IS NULL";
sqlite3_stmt* stmt;
int rc = sqlite3_prepare_v2(g_db, update_sql, -1, &stmt, NULL);
if (rc != SQLITE_OK) {
DEBUG_ERROR("Failed to prepare startup cleanup query: %s", sqlite3_errmsg(g_db));
return;
}
rc = sqlite3_step(stmt);
int updated_count = sqlite3_changes(g_db);
sqlite3_finalize(stmt);
if (updated_count > 0) {
// Log a single 'disconnected' event for the startup cleanup
const char* insert_sql =
"INSERT INTO subscriptions (subscription_id, wsi_pointer, client_ip, event_type) "
"VALUES ('startup_cleanup', '', 'system', 'disconnected')";
rc = sqlite3_prepare_v2(g_db, insert_sql, -1, &stmt, NULL);
if (rc == SQLITE_OK) {
sqlite3_step(stmt);
sqlite3_finalize(stmt);
}
DEBUG_LOG("Startup cleanup: Marked %d subscriptions as disconnected", updated_count);
} else {
DEBUG_LOG("Startup cleanup: No active subscriptions found");
}
}
```
**Integration Point:** [`src/main.c:1810`](src/main.c:1810)
```c
// Initialize subscription manager mutexes
if (pthread_mutex_init(&g_subscription_manager.subscriptions_lock, NULL) != 0) {
DEBUG_ERROR("Failed to initialize subscriptions mutex");
sqlite3_close(g_db);
return 1;
}
if (pthread_mutex_init(&g_subscription_manager.ip_tracking_lock, NULL) != 0) {
DEBUG_ERROR("Failed to initialize IP tracking mutex");
pthread_mutex_destroy(&g_subscription_manager.subscriptions_lock);
sqlite3_close(g_db);
return 1;
}
// **NEW: Startup cleanup - close all subscriptions**
cleanup_all_subscriptions_on_startup();
// Start WebSocket relay server
DEBUG_LOG("Starting WebSocket relay server...");
if (start_websocket_relay(port_override, strict_port) != 0) {
DEBUG_ERROR("Failed to start WebSocket relay");
// ... cleanup code
}
```
### Benefits
- **Immediate relief**: Restart relay to fix subscription issues
- **Clean slate**: Every restart starts with zero active subscriptions
- **Simple**: Single SQL UPDATE statement
- **Nostr-aligned**: Clients are designed to reconnect after relay restart
- **No configuration needed**: Always runs on startup
---
## Component 2: Connection Age Limit
### Purpose
Automatically disconnect clients after a configurable time period, forcing them to reconnect and re-establish subscriptions.
### Why Disconnect Instead of Just Closing Subscriptions?
**Option 1: Send CLOSED message (keep connection)**
- ❌ Not all clients handle `CLOSED` messages properly
- ❌ Silent failure - client thinks it's subscribed but isn't
- ❌ Partial cleanup - connection still consumes resources
- ❌ More complex to implement
**Option 2: Disconnect client entirely (force reconnection)**
- ✅ Universal compatibility - all clients handle WebSocket reconnection
- ✅ Complete resource cleanup (memory, file descriptors, etc.)
- ✅ Simple implementation - single operation
- ✅ Well-tested code path (same as network interruptions)
- ✅ Forces re-authentication if needed
### Implementation
**File:** [`src/websockets.c`](src/websockets.c)
**New Function:**
```c
/**
* Check connection age and disconnect clients that have been connected too long.
* This forces clients to reconnect and re-establish subscriptions.
*
* Uses libwebsockets' lws_vhost_foreach_wsi() to iterate through all active
* connections and checks their connection_established timestamp from per_session_data.
*/
void check_connection_age(int max_connection_seconds) {
if (max_connection_seconds <= 0 || !ws_context) {
return;
}
time_t now = time(NULL);
time_t cutoff = now - max_connection_seconds;
// Get the default vhost
struct lws_vhost *vhost = lws_get_vhost_by_name(ws_context, "default");
if (!vhost) {
DEBUG_ERROR("Failed to get vhost for connection age check");
return;
}
// Iterate through all active WebSocket connections
// Note: lws_vhost_foreach_wsi() calls our callback for each connection
struct lws *wsi = NULL;
while ((wsi = lws_vhost_foreach_wsi(vhost, wsi)) != NULL) {
// Get per-session data which contains connection_established timestamp
struct per_session_data *pss = (struct per_session_data *)lws_wsi_user(wsi);
if (pss && pss->connection_established > 0) {
// Check if connection is older than cutoff
if (pss->connection_established < cutoff) {
// Connection is too old - close it
long age_seconds = now - pss->connection_established;
DEBUG_LOG("Closing connection from %s (age: %lds, limit: %ds)",
pss->client_ip, age_seconds, max_connection_seconds);
// Close with normal status and reason message
lws_close_reason(wsi, LWS_CLOSE_STATUS_NORMAL,
(unsigned char *)"connection age limit", 21);
}
}
}
}
```
**Key Implementation Details:**
1. **No database needed**: Active connections are tracked by libwebsockets itself
2. **Uses existing timestamp**: `pss->connection_established` is already set on line 456 of websockets.c
3. **Built-in iterator**: `lws_vhost_foreach_wsi()` safely iterates through all active connections
4. **Per-session data**: Each connection's `per_session_data` is accessible via `lws_wsi_user()`
5. **Safe closure**: `lws_close_reason()` properly closes the WebSocket with a status code and message
**Integration Point:** [`src/websockets.c:2176`](src/websockets.c:2176) - in existing event loop
```c
// Main event loop with proper signal handling
while (g_server_running && !g_shutdown_flag) {
int result = lws_service(ws_context, 1000);
if (result < 0) {
DEBUG_ERROR("libwebsockets service error");
break;
}
// Check if it's time to post status update
time_t current_time = time(NULL);
int status_post_hours = get_config_int("kind_1_status_posts_hours", 0);
if (status_post_hours > 0) {
int seconds_interval = status_post_hours * 3600;
if (current_time - last_status_post_time >= seconds_interval) {
last_status_post_time = current_time;
generate_and_post_status_event();
}
}
// **NEW: Check for connection age limit**
int max_connection_seconds = get_config_int("max_connection_seconds", 86400); // Default 24 hours
if (max_connection_seconds > 0) {
check_connection_age(max_connection_seconds);
}
}
```
### Configuration
**Parameter:** `max_connection_seconds`
- **Default:** `86400` (24 hours)
- **Range:** `0` = disabled, `>0` = disconnect after X seconds
- **Units:** Seconds (for consistency with other time-based configs)
**Example configurations:**
```json
{
"max_connection_seconds": 86400 // 86400 seconds = 24 hours (default)
}
```
```json
{
"max_connection_seconds": 43200 // 43200 seconds = 12 hours
}
```
```json
{
"max_connection_seconds": 3600 // 3600 seconds = 1 hour
}
```
```json
{
"max_connection_seconds": 0 // Disabled
}
```
### Client Behavior
When disconnected due to age limit, clients will:
1. Detect WebSocket closure
2. Wait briefly (exponential backoff)
3. Reconnect to relay
4. Re-authenticate if needed (NIP-42)
5. Re-establish all subscriptions
6. Resume normal operation
This is **exactly what happens** during network interruptions, so it's a well-tested code path in all Nostr clients.
### Benefits
- **No new threads**: Uses existing event loop
- **Minimal overhead**: Check runs once per second (same as `lws_service`)
- **Simple implementation**: Iterate through active connections
- **Consistent pattern**: Matches existing status post checking
- **Universal compatibility**: All clients handle reconnection
- **Complete cleanup**: Frees all resources associated with connection
- **Configurable**: Can be adjusted per relay needs or disabled entirely
---
## Implementation Plan
### Phase 1: Startup Cleanup (1-2 hours)
1. **Add `cleanup_all_subscriptions_on_startup()` function**
- File: [`src/subscriptions.c`](src/subscriptions.c)
- SQL UPDATE to mark all active subscriptions as disconnected
- Add logging for cleanup count
2. **Integrate in main()**
- File: [`src/main.c:1810`](src/main.c:1810)
- Call after mutex initialization, before WebSocket server start
3. **Test**
- Create subscriptions in database
- Restart relay
- Verify all subscriptions marked as disconnected
- Verify `active_subscriptions_log` shows 0 subscriptions
**Estimated Time:** 1-2 hours
### Phase 2: Connection Age Limit (2-3 hours)
1. **Add `check_connection_age()` function**
- File: [`src/websockets.c`](src/websockets.c)
- Iterate through active connections
- Close connections older than limit
2. **Integrate in event loop**
- File: [`src/websockets.c:2176`](src/websockets.c:2176)
- Add check after status post check
- Use same pattern as status posts
3. **Add configuration parameter**
- Add `max_connection_seconds` to default config
- Default: 86400 (24 hours)
4. **Test**
- Connect client
- Wait for timeout (or reduce timeout for testing)
- Verify client disconnected
- Verify client reconnects automatically
- Verify subscriptions re-established
**Estimated Time:** 2-3 hours
---
## Testing Strategy
### Startup Cleanup Tests
```bash
# Test 1: Clean startup with existing subscriptions
- Create 100 active subscriptions in database
- Restart relay
- Verify all subscriptions marked as disconnected
- Verify active_subscriptions_log shows 0 subscriptions
# Test 2: Clean startup with no subscriptions
- Start relay with empty database
- Verify no errors
- Verify startup cleanup logs "No active subscriptions found"
# Test 3: Clients reconnect after restart
- Create subscriptions before restart
- Restart relay
- Connect clients and create new subscriptions
- Verify new subscriptions tracked correctly
```
### Connection Age Limit Tests
```bash
# Test 1: Connection disconnected after timeout
- Set max_connection_seconds to 60 (for testing)
- Connect client
- Wait 61 seconds
- Verify client disconnected
- Verify client reconnects automatically
# Test 2: Subscriptions re-established after reconnection
- Connect client with subscriptions
- Wait for timeout
- Verify client reconnects
- Verify subscriptions re-established
- Verify events still delivered
# Test 3: Disabled when set to 0
- Set max_connection_seconds to 0
- Connect client
- Wait extended period
- Verify client NOT disconnected
```
### Integration Tests
```bash
# Test 1: Combined behavior
- Start relay (startup cleanup runs)
- Connect multiple clients
- Create subscriptions
- Wait for connection timeout
- Verify clients reconnect
- Restart relay
- Verify clean state
# Test 2: Load test
- Connect 100 clients
- Each creates 5 subscriptions
- Wait for connection timeout
- Verify all clients reconnect
- Verify all subscriptions re-established
- Monitor CPU usage (should remain low)
```
---
## Success Criteria
### Component 1: Startup Cleanup
- ✅ Relay starts with zero active subscriptions
- ✅ All previous subscriptions marked as disconnected on startup
- ✅ Clients successfully reconnect and re-establish subscriptions
- ✅ Relay restart can be used as emergency fix for subscription issues
- ✅ No errors during startup cleanup process
### Component 2: Connection Age Limit
- ✅ Clients disconnected after configured time period
- ✅ Clients automatically reconnect
- ✅ Subscriptions re-established after reconnection
- ✅ No impact on relay performance
- ✅ Configuration parameter works correctly (including disabled state)
### Overall Success
- ✅ CPU usage remains low (<10%)
- No orphaned subscriptions accumulate
- Database size remains stable
- No manual intervention required
---
## Configuration Reference
**New Configuration Parameters:**
```json
{
"max_connection_seconds": 86400
}
```
**Recommended Settings:**
- **Production:**
- `max_connection_seconds: 86400` (24 hours)
- **Development:**
- `max_connection_seconds: 3600` (1 hour for faster testing)
- **High-traffic:**
- `max_connection_seconds: 43200` (12 hours)
- **Disabled:**
- `max_connection_seconds: 0`
---
## Rollback Plan
If issues arise after deployment:
1. **Disable connection age limit:**
- Set `max_connection_seconds: 0` in config
- Restart relay
- Monitor for stability
2. **Revert code changes:**
- Remove `check_connection_age()` call from event loop
- Remove `cleanup_all_subscriptions_on_startup()` call from main
- Restart relay
3. **Database cleanup (if needed):**
- Manually clean up orphaned subscriptions using SQL:
```sql
UPDATE subscriptions
SET ended_at = strftime('%s', 'now')
WHERE event_type = 'created' AND ended_at IS NULL;
```
---
## Comparison with Original Design
### Original Design (5 Components)
1. Startup cleanup
2. Fix WebSocket disconnection logging
3. Enhance subscription removal with reason parameter
4. Periodic cleanup task (background thread)
5. Optimize database VIEW
6. Subscription expiration (optional)
### Simplified Design (2 Components)
1. Startup cleanup
2. Connection age limit
### Why Simplified is Better
**Advantages:**
- **Simpler**: 2 components vs 5-6 components
- **Faster to implement**: 3-5 hours vs 11-17 hours
- **Easier to maintain**: Less code, fewer moving parts
- **More reliable**: Fewer potential failure points
- **Nostr-aligned**: Leverages client reconnection behavior
- **No new threads**: Uses existing event loop
- **Universal compatibility**: All clients handle reconnection
**What We're Not Losing:**
- Startup cleanup is identical in both designs
- Connection age limit achieves the same goal as periodic cleanup + expiration
- Disconnection forces complete cleanup (better than just logging)
- Database VIEW optimization not needed if subscriptions don't accumulate
**Trade-offs:**
- Less granular logging (but simpler)
- No historical subscription analytics (but cleaner database)
- Clients must reconnect periodically (but this is standard Nostr behavior)
---
## Conclusion
This simplified design solves the subscription accumulation problem with two pragmatic solutions:
1. **Startup cleanup** ensures every relay restart starts with a clean slate
2. **Connection age limit** prevents long-term accumulation by forcing periodic reconnection
Both solutions align with Nostr's stateless design where clients are expected to handle reconnection. The implementation is simple, maintainable, and leverages existing code patterns.
**Key Benefits:**
- Solves the root problem (subscription accumulation)
- Simple to implement (3-5 hours total)
- Easy to maintain (minimal code)
- Universal compatibility (all clients handle reconnection)
- No new threads or background tasks
- Configurable and can be disabled if needed
- Relay restart as emergency fix
**Next Steps:**
1. Implement Component 1 (Startup Cleanup)
2. Test thoroughly
3. Implement Component 2 (Connection Age Limit)
4. Test thoroughly
5. Deploy to production
6. Monitor CPU usage and subscription counts

View File

@@ -88,4 +88,5 @@ sudo ufw delete allow 8888/tcp
lsof -i :7777
kill $(lsof -t -i :7777)
kill -9 $(lsof -t -i :7777)
kill -9 $(lsof -t -i :7777)

View File

@@ -1 +1 @@
198023
1508392

View File

@@ -239,7 +239,7 @@ cJSON* query_subscription_details(void) {
const char* sql =
"SELECT * "
"FROM active_subscriptions_log "
"ORDER BY created_at DESC LIMIT 100";
"ORDER BY created_at DESC";
// DEBUG: Log the query results for debugging subscription_details
DEBUG_LOG("=== SUBSCRIPTION_DETAILS QUERY DEBUG ===");

View File

@@ -65,6 +65,9 @@ static const struct {
{"max_total_subscriptions", "5000"},
{"max_filters_per_subscription", "10"},
// Connection Management
{"max_connection_seconds", "86400"}, // 24 hours (0 = disabled)
// Event Processing Limits
{"max_event_tags", "100"},
{"max_content_length", "8196"},

View File

@@ -1807,7 +1807,8 @@ int main(int argc, char* argv[]) {
return 1;
}
// Cleanup orphaned subscriptions from previous runs
cleanup_all_subscriptions_on_startup();
// Start WebSocket Nostr relay server (port from CLI override or configuration)
int result = start_websocket_relay(cli_options.port_override, cli_options.strict_port); // Use CLI port override if specified, otherwise config

View File

@@ -12,8 +12,8 @@
// Version information (auto-updated by build system)
#define VERSION_MAJOR 1
#define VERSION_MINOR 0
#define VERSION_PATCH 5
#define VERSION "v1.0.5"
#define VERSION_PATCH 8
#define VERSION "v1.0.8"
// Avoid VERSION_MAJOR redefinition warning from nostr_core_lib
#undef VERSION_MAJOR

View File

@@ -999,6 +999,44 @@ void update_subscription_events_sent(const char* sub_id, int events_sent) {
}
}
// Cleanup all subscriptions on startup
void cleanup_all_subscriptions_on_startup(void) {
if (!g_db) {
DEBUG_ERROR("Database not available for startup cleanup");
return;
}
DEBUG_LOG("Performing startup subscription cleanup");
// Mark all active subscriptions as disconnected
const char* sql =
"UPDATE subscriptions "
"SET ended_at = strftime('%s', 'now') "
"WHERE event_type = 'created' AND ended_at IS NULL";
sqlite3_stmt* stmt;
int rc = sqlite3_prepare_v2(g_db, sql, -1, &stmt, NULL);
if (rc != SQLITE_OK) {
DEBUG_ERROR("Failed to prepare startup cleanup query");
return;
}
rc = sqlite3_step(stmt);
int changes = sqlite3_changes(g_db);
sqlite3_finalize(stmt);
if (rc != SQLITE_DONE) {
DEBUG_ERROR("Failed to execute startup cleanup");
return;
}
if (changes > 0) {
DEBUG_LOG("Startup cleanup: marked %d orphaned subscriptions as disconnected", changes);
} else {
DEBUG_LOG("Startup cleanup: no orphaned subscriptions found");
}
}
///////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////

View File

@@ -120,4 +120,7 @@ void update_subscription_events_sent(const char* sub_id, int events_sent);
// Subscription query functions
int has_subscriptions_for_kind(int event_kind);
// Startup cleanup function
void cleanup_all_subscriptions_on_startup(void);
#endif // SUBSCRIPTIONS_H

View File

@@ -1984,6 +1984,73 @@ static int nostr_relay_callback(struct lws *wsi, enum lws_callback_reasons reaso
return 0;
}
// Check and disconnect connections that have exceeded max age
// This function works by checking connection age through the subscription system
static void check_connection_age(int max_connection_seconds) {
if (max_connection_seconds <= 0) {
return; // Feature disabled
}
time_t current_time = time(NULL);
// Lock the subscription manager to safely iterate through subscriptions
pthread_mutex_lock(&g_subscription_manager.subscriptions_lock);
// Track unique WSI pointers we've already checked to avoid duplicate checks
struct lws** checked_wsis = NULL;
int checked_count = 0;
int checked_capacity = 0;
subscription_t* sub = g_subscription_manager.active_subscriptions;
while (sub) {
if (!sub->active || !sub->wsi) {
sub = sub->next;
continue;
}
// Check if we've already processed this WSI
int already_checked = 0;
for (int i = 0; i < checked_count; i++) {
if (checked_wsis[i] == sub->wsi) {
already_checked = 1;
break;
}
}
if (!already_checked) {
// Get per-session data to check connection age
struct per_session_data *pss = (struct per_session_data *)lws_wsi_user(sub->wsi);
if (pss && pss->connection_established > 0) {
time_t connection_age = current_time - pss->connection_established;
if (connection_age >= max_connection_seconds) {
DEBUG_LOG("Disconnecting client %s: connection age %ld seconds exceeds limit %d seconds",
pss->client_ip, connection_age, max_connection_seconds);
// Close connection with normal closure status
lws_close_reason(sub->wsi, LWS_CLOSE_STATUS_NORMAL,
(unsigned char*)"Connection age limit reached", 28);
}
}
// Add to checked list
if (checked_count >= checked_capacity) {
checked_capacity = checked_capacity == 0 ? 64 : checked_capacity * 2;
checked_wsis = realloc(checked_wsis, checked_capacity * sizeof(struct lws*));
}
checked_wsis[checked_count++] = sub->wsi;
}
sub = sub->next;
}
pthread_mutex_unlock(&g_subscription_manager.subscriptions_lock);
// Cleanup
free(checked_wsis);
}
// WebSocket protocol definition
static struct lws_protocols protocols[] = {
{
@@ -2152,6 +2219,9 @@ int start_websocket_relay(int port_override, int strict_port) {
// Static variable for status post timing (initialize to 0 for immediate first post)
static time_t last_status_post_time = 0;
// Static variable for connection age check timing
static time_t last_connection_age_check = 0;
// Main event loop with proper signal handling
while (g_server_running && !g_shutdown_flag) {
@@ -2174,6 +2244,13 @@ int start_websocket_relay(int port_override, int strict_port) {
generate_and_post_status_event();
}
}
// Check connection age limits (every 60 seconds)
int max_connection_seconds = get_config_int("max_connection_seconds", 86400);
if (max_connection_seconds > 0 && (current_time - last_connection_age_check >= 60)) {
last_connection_age_check = current_time;
check_connection_age(max_connection_seconds);
}
}
lws_context_destroy(ws_context);

5
tests/.test_keys.txt Normal file
View File

@@ -0,0 +1,5 @@
# Test key configuration (from make_and_restart_relay.sh -t)
ADMIN_PRIVATE_KEY="aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
ADMIN_PUBLIC_KEY="6a04ab98d9e4774ad806e302dddeb63bea16b5cb5f223ee77478e861bb583eb3"
RELAY_PUBLIC_KEY="4f355bdcb7cc0af728ef3cceb9615d90684bb5b2ca5f859ab0f0b704075871aa"
RELAY_URL="ws://localhost:8888"

View File

@@ -0,0 +1,295 @@
#!/bin/bash
# Subscription Cleanup Testing Suite for C-Relay
# Tests startup cleanup and connection age limit features
set -e
# Load test keys
source "$(dirname "$0")/.test_keys.txt"
# Configuration
RELAY_HOST="127.0.0.1"
RELAY_PORT="8888"
RELAY_URL="ws://${RELAY_HOST}:${RELAY_PORT}"
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
# Test counters
TOTAL_TESTS=0
PASSED_TESTS=0
FAILED_TESTS=0
echo -e "${BLUE}========================================${NC}"
echo -e "${BLUE}Subscription Cleanup Test Suite${NC}"
echo -e "${BLUE}========================================${NC}"
echo ""
# Function to print test header
print_test_header() {
echo -e "${BLUE}=== Test $1: $2 ===${NC}"
}
# Function to print test result
print_result() {
local status=$1
local message=$2
TOTAL_TESTS=$((TOTAL_TESTS + 1))
if [ "$status" = "PASS" ]; then
echo -e "${GREEN}[PASS]${NC} $message"
PASSED_TESTS=$((PASSED_TESTS + 1))
elif [ "$status" = "FAIL" ]; then
echo -e "${RED}[FAIL]${NC} $message"
FAILED_TESTS=$((FAILED_TESTS + 1))
else
echo -e "${YELLOW}[WARN]${NC} $message"
PASSED_TESTS=$((PASSED_TESTS + 1))
fi
}
# Function to check if relay is running
check_relay_running() {
# Send a simple REQ and check for EOSE response
local response=$(echo '["REQ","ping",{}]' | timeout 2 websocat -n1 "$RELAY_URL" 2>/dev/null)
if echo "$response" | grep -q "EOSE\|EVENT"; then
return 0
else
return 1
fi
}
# Function to create a subscription
create_subscription() {
local sub_id=$1
local filter=${2:-"{}"}
echo "[\"REQ\",\"$sub_id\",$filter]" | timeout 5 websocat -n1 "$RELAY_URL" 2>/dev/null || echo "TIMEOUT"
}
# Function to close a subscription
close_subscription() {
local sub_id=$1
echo "[\"CLOSE\",\"$sub_id\"]" | timeout 5 websocat -n1 "$RELAY_URL" 2>/dev/null || echo "TIMEOUT"
}
# Function to query subscription count from database
get_subscription_count() {
local db_file=$(find . -name "*.db" -type f 2>/dev/null | head -1)
if [ -z "$db_file" ]; then
echo "0"
return
fi
sqlite3 "$db_file" "SELECT COUNT(*) FROM subscriptions WHERE event_type='created' AND ended_at IS NULL;" 2>/dev/null || echo "0"
}
# Test 1: Basic Connectivity
print_test_header "1" "Basic Connectivity"
if check_relay_running; then
print_result "PASS" "Relay is running and accepting connections"
else
print_result "FAIL" "Cannot connect to relay at $RELAY_URL"
echo ""
echo -e "${RED}ERROR: Relay must be running for tests to proceed${NC}"
exit 1
fi
echo ""
# Test 2: Create Multiple Subscriptions
print_test_header "2" "Create Multiple Subscriptions"
echo "[INFO] Creating 5 test subscriptions..."
for i in {1..5}; do
response=$(create_subscription "cleanup_test_$i")
if echo "$response" | grep -q "EOSE"; then
echo "[INFO] Subscription cleanup_test_$i created successfully"
else
print_result "WARN" "Subscription cleanup_test_$i may not have been created: $response"
fi
done
# Give subscriptions time to be logged
sleep 2
# Check subscription count in database
active_subs=$(get_subscription_count)
echo "[INFO] Active subscriptions in database: $active_subs"
if [ "$active_subs" -ge 5 ]; then
print_result "PASS" "Multiple subscriptions created and logged ($active_subs active)"
else
print_result "WARN" "Expected at least 5 subscriptions, found $active_subs"
fi
echo ""
# Test 3: Simulate Orphaned Subscriptions (disconnect without CLOSE)
print_test_header "3" "Simulate Orphaned Subscriptions"
echo "[INFO] Creating subscriptions and disconnecting abruptly..."
# Create subscriptions in background and kill the connection
for i in {6..10}; do
(echo "[\"REQ\",\"orphan_test_$i\",{}]" | timeout 2 websocat "$RELAY_URL" &>/dev/null) &
pid=$!
sleep 0.5
kill -9 $pid 2>/dev/null || true
done
sleep 2
orphaned_subs=$(get_subscription_count)
echo "[INFO] Subscriptions after abrupt disconnects: $orphaned_subs"
if [ "$orphaned_subs" -gt "$active_subs" ]; then
print_result "PASS" "Orphaned subscriptions detected ($orphaned_subs total, was $active_subs)"
else
print_result "WARN" "No increase in orphaned subscriptions detected"
fi
echo ""
# Test 4: Startup Cleanup (requires relay restart)
print_test_header "4" "Startup Cleanup Feature"
echo "[INFO] This test requires relay restart to verify startup cleanup"
echo "[INFO] Current orphaned subscriptions: $orphaned_subs"
echo ""
echo -e "${YELLOW}[ACTION REQUIRED]${NC} Please restart the relay now with:"
echo " ./make_and_restart_relay.sh"
echo ""
echo -n "Press Enter after relay has restarted to continue..."
read
# Wait for relay to be ready
echo "[INFO] Waiting for relay to be ready..."
sleep 3
if ! check_relay_running; then
print_result "FAIL" "Relay not responding after restart"
exit 1
fi
# Check if orphaned subscriptions were cleaned up
cleaned_subs=$(get_subscription_count)
echo "[INFO] Active subscriptions after restart: $cleaned_subs"
if [ "$cleaned_subs" -eq 0 ]; then
print_result "PASS" "Startup cleanup removed all orphaned subscriptions"
elif [ "$cleaned_subs" -lt "$orphaned_subs" ]; then
print_result "PASS" "Startup cleanup reduced orphaned subscriptions (from $orphaned_subs to $cleaned_subs)"
else
print_result "FAIL" "Startup cleanup did not reduce orphaned subscriptions"
fi
echo ""
# Test 5: Connection Age Limit (requires configuration)
print_test_header "5" "Connection Age Limit Feature"
echo "[INFO] Testing connection age limit feature..."
echo "[INFO] Default max_connection_seconds is 86400 (24 hours)"
echo ""
echo -e "${YELLOW}[INFO]${NC} To test connection age limit with shorter timeout:"
echo " 1. Set max_connection_seconds to 60 (1 minute) via admin event"
echo " 2. Create a subscription and wait 61 seconds"
echo " 3. Connection should be automatically closed"
echo ""
echo "[INFO] For this test, we'll verify the feature is enabled in config"
# Create a test subscription to verify connection works
response=$(create_subscription "age_test_1")
if echo "$response" | grep -q "EOSE"; then
print_result "PASS" "Connection age limit feature is operational (subscription created)"
else
print_result "WARN" "Could not verify connection age limit feature"
fi
echo ""
# Test 6: Verify Client Reconnection
print_test_header "6" "Client Reconnection After Cleanup"
echo "[INFO] Testing that clients can reconnect after cleanup..."
# Create a subscription
response=$(create_subscription "reconnect_test_1")
if echo "$response" | grep -q "EOSE"; then
echo "[INFO] First connection successful"
# Close and reconnect
sleep 1
response=$(create_subscription "reconnect_test_2")
if echo "$response" | grep -q "EOSE"; then
print_result "PASS" "Client can reconnect and create new subscriptions"
else
print_result "FAIL" "Client reconnection failed"
fi
else
print_result "FAIL" "Initial connection failed"
fi
echo ""
# Test 7: Verify Disabled State (max_connection_seconds = 0)
print_test_header "7" "Verify Feature Can Be Disabled"
echo "[INFO] Connection age limit can be disabled by setting max_connection_seconds=0"
echo "[INFO] When disabled, connections remain open indefinitely"
echo "[INFO] This is the recommended setting for most relays"
# Create a long-lived subscription
response=$(create_subscription "disabled_test_1")
if echo "$response" | grep -q "EOSE"; then
print_result "PASS" "Subscriptions work normally when feature is disabled/default"
else
print_result "WARN" "Could not verify disabled state"
fi
echo ""
# Test 8: Database Integrity Check
print_test_header "8" "Database Integrity After Cleanup"
echo "[INFO] Checking database integrity..."
db_file=$(find . -name "*.db" -type f 2>/dev/null | head -1)
if [ -n "$db_file" ]; then
# Check if database is accessible
if sqlite3 "$db_file" "PRAGMA integrity_check;" 2>/dev/null | grep -q "ok"; then
print_result "PASS" "Database integrity check passed"
else
print_result "FAIL" "Database integrity check failed"
fi
# Check subscription table structure
if sqlite3 "$db_file" "SELECT COUNT(*) FROM subscriptions;" &>/dev/null; then
print_result "PASS" "Subscription table is accessible"
else
print_result "FAIL" "Subscription table is not accessible"
fi
else
print_result "WARN" "No database file found"
fi
echo ""
# Final Summary
echo -e "${BLUE}========================================${NC}"
echo -e "${BLUE}Test Summary${NC}"
echo -e "${BLUE}========================================${NC}"
echo "Total Tests: $TOTAL_TESTS"
echo -e "${GREEN}Passed: $PASSED_TESTS${NC}"
echo -e "${RED}Failed: $FAILED_TESTS${NC}"
echo ""
if [ $FAILED_TESTS -eq 0 ]; then
echo -e "${GREEN}All tests passed!${NC}"
exit 0
else
echo -e "${RED}Some tests failed. Please review the output above.${NC}"
exit 1
fi