Files
ginxsom/src/bud04.c
2025-11-11 17:02:14 -04:00

512 lines
18 KiB
C

/*
* BUD-04 Mirroring Support
* Handles PUT /mirror requests for remote blob downloading
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>
#include <curl/curl.h>
#include <sys/stat.h>
#include <time.h>
#include <unistd.h>
#include "ginxsom.h"
// HTTP download response structure
typedef struct {
unsigned char* data;
size_t size;
char content_type[128];
long http_code;
size_t capacity;
} mirror_download_t;
// CURL write callback for collecting response data
static size_t write_callback(void* contents, size_t size, size_t nmemb, mirror_download_t* response) {
size_t realsize = size * nmemb;
if (!response) return 0;
// Check if we need to expand buffer
if (response->size + realsize >= response->capacity) {
size_t new_capacity = response->capacity == 0 ? 8192 : response->capacity * 2;
while (new_capacity < response->size + realsize + 1) {
new_capacity *= 2;
}
unsigned char* new_data = realloc(response->data, new_capacity);
if (!new_data) {
return 0; // Out of memory
}
response->data = new_data;
response->capacity = new_capacity;
}
memcpy(response->data + response->size, contents, realsize);
response->size += realsize;
response->data[response->size] = '\0'; // Null terminate for safety
return realsize;
}
// CURL header callback for collecting Content-Type
static size_t header_callback(char* buffer, size_t size, size_t nitems, mirror_download_t* response) {
size_t realsize = size * nitems;
if (!response) return realsize;
// Look for Content-Type header (case-insensitive)
if (realsize > 14 && strncasecmp(buffer, "Content-Type:", 13) == 0) {
// Skip "Content-Type:" and whitespace
char* value = buffer + 13;
while (*value == ' ' || *value == '\t') value++;
// Find end of value (before \r\n)
char* end = value;
while (*end && *end != '\r' && *end != '\n') end++;
// Copy content type, limiting to buffer size
size_t copy_len = end - value;
if (copy_len >= sizeof(response->content_type)) {
copy_len = sizeof(response->content_type) - 1;
}
strncpy(response->content_type, value, copy_len);
response->content_type[copy_len] = '\0';
}
return realsize;
}
// Validate URL for security (prevent SSRF attacks)
int validate_mirror_url(const char* url) {
if (!url || strlen(url) == 0) {
return 0; // Invalid URL
}
// Must start with https:// (security requirement)
if (strncmp(url, "https://", 8) != 0) {
return 0; // Only HTTPS allowed
}
// URL length check
if (strlen(url) > 2048) {
return 0; // URL too long
}
// Check for prohibited hosts/IPs (basic SSRF protection)
const char* host_start = url + 8; // Skip "https://"
// Block localhost and private IPs
if (strncasecmp(host_start, "localhost", 9) == 0 ||
strncasecmp(host_start, "127.", 4) == 0 ||
strncasecmp(host_start, "192.168.", 8) == 0 ||
strncasecmp(host_start, "10.", 3) == 0 ||
strncmp(host_start, "172.16.", 7) == 0 ||
strncmp(host_start, "172.17.", 7) == 0 ||
strncmp(host_start, "172.18.", 7) == 0 ||
strncmp(host_start, "172.19.", 7) == 0 ||
strncmp(host_start, "172.2", 5) == 0 ||
strncmp(host_start, "172.30.", 7) == 0 ||
strncmp(host_start, "172.31.", 7) == 0) {
return 0; // Private network blocked
}
return 1; // URL appears valid
}
// Detect/validate Content-Type
const char* determine_blob_content_type(const char* url, const char* header_content_type,
const unsigned char* data, size_t size) {
// Priority 1: Use Content-Type header if present and valid
if (header_content_type && strlen(header_content_type) > 0) {
// Extract main MIME type (before semicolon)
static char clean_type[128];
const char* semicolon = strchr(header_content_type, ';');
size_t len = semicolon ? (size_t)(semicolon - header_content_type) : strlen(header_content_type);
if (len < sizeof(clean_type)) {
strncpy(clean_type, header_content_type, len);
clean_type[len] = '\0';
// Remove trailing whitespace
while (len > 0 && (clean_type[len-1] == ' ' || clean_type[len-1] == '\t')) {
clean_type[--len] = '\0';
}
return clean_type;
}
}
// Priority 2: Detect from URL extension
if (url) {
const char* dot = strrchr(url, '.');
if (dot && dot[1]) {
const char* ext = dot + 1;
// Remove query parameters
const char* question = strchr(ext, '?');
size_t ext_len = question ? (size_t)(question - ext) : strlen(ext);
if (ext_len > 0) {
if (strncasecmp(ext, "png", ext_len) == 0) return "image/png";
if (strncasecmp(ext, "jpg", ext_len) == 0) return "image/jpeg";
if (strncasecmp(ext, "jpeg", ext_len) == 0) return "image/jpeg";
if (strncasecmp(ext, "gif", ext_len) == 0) return "image/gif";
if (strncasecmp(ext, "webp", ext_len) == 0) return "image/webp";
if (strncasecmp(ext, "pdf", ext_len) == 0) return "application/pdf";
if (strncasecmp(ext, "mp4", ext_len) == 0) return "video/mp4";
if (strncasecmp(ext, "mp3", ext_len) == 0) return "audio/mpeg";
if (strncasecmp(ext, "txt", ext_len) == 0) return "text/plain";
}
}
}
// Priority 3: Basic content detection from data
if (data && size >= 8) {
// PNG signature
if (memcmp(data, "\x89PNG\r\n\x1a\n", 8) == 0) {
return "image/png";
}
// JPEG signature
if (size >= 3 && memcmp(data, "\xff\xd8\xff", 3) == 0) {
return "image/jpeg";
}
// GIF signature
if (memcmp(data, "GIF87a", 6) == 0 || memcmp(data, "GIF89a", 6) == 0) {
return "image/gif";
}
// PDF signature
if (memcmp(data, "%PDF-", 5) == 0) {
return "application/pdf";
}
}
// Default fallback
return "application/octet-stream";
}
// Download blob from remote URL
mirror_download_t* download_blob_from_url(const char* url, size_t max_size) {
if (!url || !validate_mirror_url(url)) {
return NULL;
}
CURL* curl = curl_easy_init();
if (!curl) {
return NULL;
}
mirror_download_t* download = calloc(1, sizeof(mirror_download_t));
if (!download) {
curl_easy_cleanup(curl);
return NULL;
}
// Initialize download structure
download->data = malloc(8192);
if (!download->data) {
free(download);
curl_easy_cleanup(curl);
return NULL;
}
download->capacity = 8192;
download->size = 0;
download->content_type[0] = '\0';
// Configure CURL
curl_easy_setopt(curl, CURLOPT_URL, url);
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_callback);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, download);
curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, header_callback);
curl_easy_setopt(curl, CURLOPT_HEADERDATA, download);
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
curl_easy_setopt(curl, CURLOPT_MAXREDIRS, 5L);
curl_easy_setopt(curl, CURLOPT_TIMEOUT, 30L);
curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT, 10L);
curl_easy_setopt(curl, CURLOPT_USERAGENT, "Ginxsom-Blossom/1.0");
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 1L);
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 2L);
// Set maximum file size
curl_easy_setopt(curl, CURLOPT_MAXFILESIZE, (long)max_size);
// Perform the request
CURLcode res = curl_easy_perform(curl);
// Get HTTP response code
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &download->http_code);
curl_easy_cleanup(curl);
// Check for errors
if (res != CURLE_OK || download->http_code != 200) {
free(download->data);
free(download);
return NULL;
}
return download;
}
// Free download result memory
void free_mirror_download(mirror_download_t* download) {
if (download) {
if (download->data) {
free(download->data);
}
free(download);
}
}
// Parse JSON request body to extract URL
int parse_mirror_request_body(const char* json_body, char* url_buffer, size_t url_buffer_size) {
if (!json_body || !url_buffer || url_buffer_size == 0) {
return 0;
}
cJSON* json = cJSON_Parse(json_body);
if (!json) {
return 0; // Invalid JSON
}
cJSON* url_item = cJSON_GetObjectItem(json, "url");
if (!url_item || !cJSON_IsString(url_item)) {
cJSON_Delete(json);
return 0; // Missing or invalid URL field
}
const char* url = cJSON_GetStringValue(url_item);
if (!url || strlen(url) >= url_buffer_size) {
cJSON_Delete(json);
return 0; // URL too long or null
}
strcpy(url_buffer, url);
cJSON_Delete(json);
return 1; // Success
}
// Handle PUT /mirror requests (BUD-04)
void handle_mirror_request(void) {
// Log the incoming request
log_request("PUT", "/mirror", "pending", 0);
// Get HTTP headers
const char* content_type = getenv("CONTENT_TYPE");
const char* content_length_str = getenv("CONTENT_LENGTH");
// Validate Content-Type
if (!content_type || strstr(content_type, "application/json") == NULL) {
send_error_response(400, "invalid_content_type",
"Content-Type must be application/json",
"The mirror endpoint requires JSON request body");
log_request("PUT", "/mirror", "none", 400);
return;
}
// Validate Content-Length
if (!content_length_str) {
send_error_response(400, "missing_header",
"Content-Length header required",
"The Content-Length header must be specified");
log_request("PUT", "/mirror", "none", 400);
return;
}
long content_length = atol(content_length_str);
if (content_length <= 0 || content_length > 4096) { // 4KB max for JSON
send_error_response(400, "invalid_content_length",
"Invalid content length",
"JSON request body must be between 1 byte and 4KB");
log_request("PUT", "/mirror", "none", 400);
return;
}
// Read JSON request body
char* json_body = malloc(content_length + 1);
if (!json_body) {
send_error_response(500, "memory_error",
"Failed to allocate memory",
"Internal server error");
log_request("PUT", "/mirror", "none", 500);
return;
}
size_t bytes_read = fread(json_body, 1, content_length, stdin);
if (bytes_read != (size_t)content_length) {
free(json_body);
send_error_response(400, "incomplete_body",
"Failed to read complete request body",
"The request body was incomplete");
log_request("PUT", "/mirror", "none", 400);
return;
}
json_body[content_length] = '\0';
// Parse JSON to extract URL
char url[2048];
if (!parse_mirror_request_body(json_body, url, sizeof(url))) {
free(json_body);
send_error_response(400, "invalid_json",
"Invalid JSON or missing URL field",
"Request body must be valid JSON with 'url' field");
log_request("PUT", "/mirror", "none", 400);
return;
}
free(json_body);
// Validate URL
if (!validate_mirror_url(url)) {
send_error_response(400, "invalid_url",
"Invalid or prohibited URL",
"URL must be HTTPS and not point to private networks");
log_request("PUT", "/mirror", "none", 400);
return;
}
// Check for authorization
const char* auth_header = getenv("HTTP_AUTHORIZATION");
const char* expected_hash = NULL;
const char* uploader_pubkey = NULL;
if (auth_header) {
// NOTE: Authorization validation now handled by centralized validation system in main.c
// This handler receives pre-validated requests, so if we reach here with auth_header,
// the authentication was already successful
// TODO: Extract uploader pubkey from centralized validation results
// For now, set a placeholder until integration is complete
uploader_pubkey = "authenticated_user";
}
// Download the blob
mirror_download_t* download = download_blob_from_url(url, 100 * 1024 * 1024); // 100MB limit
if (!download) {
send_error_response(400, "download_failed",
"Failed to download blob from URL",
"Could not fetch the specified URL or file too large");
log_request("PUT", "/mirror", uploader_pubkey ? "authenticated" : "anonymous", 400);
return;
}
// Calculate hash of downloaded content
unsigned char hash[32];
if (nostr_sha256(download->data, download->size, hash) != NOSTR_SUCCESS) {
free_mirror_download(download);
send_error_response(500, "hash_error",
"Failed to calculate hash",
"Internal server error during hash calculation");
log_request("PUT", "/mirror", uploader_pubkey ? "authenticated" : "anonymous", 500);
return;
}
// Convert hash to hex string
char sha256_hex[65];
nostr_bytes_to_hex(hash, 32, sha256_hex);
// If authorization provided, verify hash matches
if (expected_hash && strcmp(sha256_hex, expected_hash) != 0) {
free_mirror_download(download);
send_error_response(400, "hash_mismatch",
"Downloaded content hash does not match authorization",
"The file hash does not match the expected hash in the authorization event");
log_request("PUT", "/mirror", "auth_mismatch", 400);
return;
}
// Determine content type
const char* content_type_final = determine_blob_content_type(url, download->content_type,
download->data, download->size);
// Determine file extension from Content-Type using centralized mapping
const char* extension = mime_to_extension(content_type_final);
// Save file to storage directory using global g_storage_dir variable
char filepath[4096];
int filepath_len = snprintf(filepath, sizeof(filepath), "%s/%s%s", g_storage_dir, sha256_hex, extension);
if (filepath_len >= (int)sizeof(filepath)) {
free_mirror_download(download);
send_error_response(500, "file_error",
"File path too long",
"Internal server error during file path construction");
log_request("PUT", "/mirror", uploader_pubkey ? "authenticated" : "anonymous", 500);
return;
}
FILE* outfile = fopen(filepath, "wb");
if (!outfile) {
free_mirror_download(download);
send_error_response(500, "file_error",
"Failed to create file",
"Internal server error during file creation");
log_request("PUT", "/mirror", uploader_pubkey ? "authenticated" : "anonymous", 500);
return;
}
size_t bytes_written = fwrite(download->data, 1, download->size, outfile);
fclose(outfile);
if (bytes_written != download->size) {
unlink(filepath); // Clean up partial file
free_mirror_download(download);
send_error_response(500, "write_error",
"Failed to write complete file",
"Internal server error during file write");
log_request("PUT", "/mirror", uploader_pubkey ? "authenticated" : "anonymous", 500);
return;
}
// Set file permissions
chmod(filepath, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
// Store metadata in database
time_t uploaded_time = time(NULL);
if (!insert_blob_metadata(sha256_hex, download->size, content_type_final,
uploaded_time, uploader_pubkey, NULL)) {
unlink(filepath); // Clean up file
free_mirror_download(download);
send_error_response(500, "database_error",
"Failed to store blob metadata",
"Internal server error during database operation");
log_request("PUT", "/mirror", uploader_pubkey ? "authenticated" : "anonymous", 500);
return;
}
// Get origin from config
char origin[256];
nip94_get_origin(origin, sizeof(origin));
// Build canonical blob URL
char blob_url[512];
nip94_build_blob_url(origin, sha256_hex, content_type_final, blob_url, sizeof(blob_url));
// Get dimensions for NIP-94 metadata
int width = 0, height = 0;
nip94_get_dimensions(download->data, download->size, content_type_final, &width, &height);
// Return success response with blob descriptor
printf("Status: 200 OK\r\n");
printf("Content-Type: application/json\r\n\r\n");
printf("{\n");
printf(" \"sha256\": \"%s\",\n", sha256_hex);
printf(" \"size\": %zu,\n", download->size);
printf(" \"type\": \"%s\",\n", content_type_final);
printf(" \"uploaded\": %ld,\n", uploaded_time);
printf(" \"url\": \"%s\"", blob_url);
// Add NIP-94 metadata if enabled
if (nip94_is_enabled()) {
printf(",\n");
nip94_emit_field(blob_url, content_type_final, sha256_hex, download->size, width, height);
}
printf("\n}\n");
free_mirror_download(download);
log_request("PUT", "/mirror", uploader_pubkey ? "authenticated" : "anonymous", 200);
}