diff --git a/crates/common/src/http_util.rs b/crates/common/src/http_util.rs index 132d6bd..c830aab 100644 --- a/crates/common/src/http_util.rs +++ b/crates/common/src/http_util.rs @@ -6,6 +6,157 @@ use sha2::{Digest, Sha256}; use crate::settings::Settings; +/// Extracted request information for host rewriting. +/// +/// This struct captures the effective host and scheme from an incoming request, +/// accounting for proxy headers like `X-Forwarded-Host` and `X-Forwarded-Proto`. +#[derive(Debug, Clone)] +pub struct RequestInfo { + /// The effective host for URL rewriting (from Forwarded, X-Forwarded-Host, or Host header) + pub host: String, + /// The effective scheme (from TLS detection, Forwarded, X-Forwarded-Proto, or default) + pub scheme: String, +} + +impl RequestInfo { + /// Extract request info from a Fastly request. + /// + /// Host priority: + /// 1. `Forwarded` header (RFC 7239, `host=...`) + /// 2. `X-Forwarded-Host` header (for chained proxy setups) + /// 3. `Host` header + /// + /// Scheme priority: + /// 1. Fastly SDK TLS detection (most reliable) + /// 2. `Forwarded` header (RFC 7239, `proto=https`) + /// 3. `X-Forwarded-Proto` header + /// 4. `Fastly-SSL` header + /// 5. Default to `http` + pub fn from_request(req: &Request) -> Self { + let host = extract_request_host(req); + let scheme = detect_request_scheme(req); + + Self { host, scheme } + } +} + +fn extract_request_host(req: &Request) -> String { + req.get_header("forwarded") + .and_then(|h| h.to_str().ok()) + .and_then(|value| parse_forwarded_param(value, "host")) + .or_else(|| { + req.get_header("x-forwarded-host") + .and_then(|h| h.to_str().ok()) + .and_then(parse_list_header_value) + }) + .or_else(|| req.get_header(header::HOST).and_then(|h| h.to_str().ok())) + .unwrap_or_default() + .to_string() +} + +fn parse_forwarded_param<'a>(forwarded: &'a str, param: &str) -> Option<&'a str> { + for entry in forwarded.split(',') { + for part in entry.split(';') { + let mut iter = part.splitn(2, '='); + let key = iter.next().unwrap_or("").trim(); + let value = iter.next().unwrap_or("").trim(); + if key.is_empty() || value.is_empty() { + continue; + } + if key.eq_ignore_ascii_case(param) { + let value = strip_quotes(value); + if !value.is_empty() { + return Some(value); + } + } + } + } + None +} + +fn parse_list_header_value(value: &str) -> Option<&str> { + value + .split(',') + .map(|part| part.trim()) + .find(|part| !part.is_empty()) + .map(strip_quotes) + .filter(|part| !part.is_empty()) +} + +fn strip_quotes(value: &str) -> &str { + let trimmed = value.trim(); + if trimmed.len() >= 2 && trimmed.starts_with('"') && trimmed.ends_with('"') { + &trimmed[1..trimmed.len() - 1] + } else { + trimmed + } +} + +fn normalize_scheme(value: &str) -> Option { + let scheme = value.trim().to_ascii_lowercase(); + if scheme == "https" || scheme == "http" { + Some(scheme) + } else { + None + } +} + +/// Detects the request scheme (HTTP or HTTPS) using Fastly SDK methods and headers. +/// +/// Tries multiple methods in order of reliability: +/// 1. Fastly SDK TLS detection methods (most reliable) +/// 2. Forwarded header (RFC 7239) +/// 3. X-Forwarded-Proto header +/// 4. Fastly-SSL header (least reliable, can be spoofed) +/// 5. Default to HTTP +fn detect_request_scheme(req: &Request) -> String { + // 1. First try Fastly SDK's built-in TLS detection methods + if let Some(tls_protocol) = req.get_tls_protocol() { + log::debug!("TLS protocol detected: {}", tls_protocol); + return "https".to_string(); + } + + // Also check TLS cipher - if present, connection is HTTPS + if req.get_tls_cipher_openssl_name().is_some() { + log::debug!("TLS cipher detected, using HTTPS"); + return "https".to_string(); + } + + // 2. Try the Forwarded header (RFC 7239) + if let Some(forwarded) = req.get_header("forwarded") { + if let Ok(forwarded_str) = forwarded.to_str() { + if let Some(proto) = parse_forwarded_param(forwarded_str, "proto") { + if let Some(scheme) = normalize_scheme(proto) { + return scheme; + } + } + } + } + + // 3. Try X-Forwarded-Proto header + if let Some(proto) = req.get_header("x-forwarded-proto") { + if let Ok(proto_str) = proto.to_str() { + if let Some(value) = parse_list_header_value(proto_str) { + if let Some(scheme) = normalize_scheme(value) { + return scheme; + } + } + } + } + + // 4. Check Fastly-SSL header (can be spoofed by clients, use as last resort) + if let Some(ssl) = req.get_header("fastly-ssl") { + if let Ok(ssl_str) = ssl.to_str() { + if ssl_str == "1" || ssl_str.to_lowercase() == "true" { + return "https".to_string(); + } + } + } + + // Default to HTTP + "http".to_string() +} + /// Build a static text response with strong ETag and standard caching headers. /// Handles If-None-Match to return 304 when appropriate. pub fn serve_static_with_etag(body: &str, req: &Request, content_type: &str) -> Response { @@ -166,4 +317,118 @@ mod tests { &t1 )); } + + // RequestInfo tests + + #[test] + fn test_request_info_from_host_header() { + let mut req = Request::new(fastly::http::Method::GET, "https://test.example.com/page"); + req.set_header("host", "test.example.com"); + + let info = RequestInfo::from_request(&req); + assert_eq!( + info.host, "test.example.com", + "Host should use Host header when forwarded headers are missing" + ); + // No TLS or forwarded headers, defaults to http. + assert_eq!( + info.scheme, "http", + "Scheme should default to http without TLS or forwarded headers" + ); + } + + #[test] + fn test_request_info_x_forwarded_host_precedence() { + let mut req = Request::new(fastly::http::Method::GET, "https://test.example.com/page"); + req.set_header("host", "internal-proxy.local"); + req.set_header("x-forwarded-host", "public.example.com, proxy.local"); + + let info = RequestInfo::from_request(&req); + assert_eq!( + info.host, "public.example.com", + "Host should prefer X-Forwarded-Host over Host" + ); + } + + #[test] + fn test_request_info_scheme_from_x_forwarded_proto() { + let mut req = Request::new(fastly::http::Method::GET, "https://test.example.com/page"); + req.set_header("host", "test.example.com"); + req.set_header("x-forwarded-proto", "https, http"); + + let info = RequestInfo::from_request(&req); + assert_eq!( + info.scheme, "https", + "Scheme should prefer the first X-Forwarded-Proto value" + ); + + // Test HTTP + let mut req = Request::new(fastly::http::Method::GET, "http://test.example.com/page"); + req.set_header("host", "test.example.com"); + req.set_header("x-forwarded-proto", "http"); + + let info = RequestInfo::from_request(&req); + assert_eq!( + info.scheme, "http", + "Scheme should use the X-Forwarded-Proto value when present" + ); + } + + #[test] + fn request_info_forwarded_header_precedence() { + // Forwarded header takes precedence over X-Forwarded-Proto + let mut req = Request::new(fastly::http::Method::GET, "https://test.example.com/page"); + req.set_header( + "forwarded", + "for=192.0.2.60;proto=\"HTTPS\";host=\"public.example.com:443\"", + ); + req.set_header("host", "internal-proxy.local"); + req.set_header("x-forwarded-host", "proxy.local"); + req.set_header("x-forwarded-proto", "http"); + + let info = RequestInfo::from_request(&req); + assert_eq!( + info.host, "public.example.com:443", + "Host should prefer Forwarded host over X-Forwarded-Host" + ); + assert_eq!( + info.scheme, "https", + "Scheme should prefer Forwarded proto over X-Forwarded-Proto" + ); + } + + #[test] + fn test_request_info_scheme_from_fastly_ssl() { + let mut req = Request::new(fastly::http::Method::GET, "https://test.example.com/page"); + req.set_header("fastly-ssl", "1"); + + let info = RequestInfo::from_request(&req); + assert_eq!( + info.scheme, "https", + "Scheme should fall back to Fastly-SSL when other signals are missing" + ); + } + + #[test] + fn test_request_info_chained_proxy_scenario() { + // Simulate: Client (HTTPS) -> Proxy A -> Trusted Server (HTTP internally) + // Proxy A sets X-Forwarded-Host and X-Forwarded-Proto + let mut req = Request::new( + fastly::http::Method::GET, + "http://trusted-server.internal/page", + ); + req.set_header("host", "trusted-server.internal"); + req.set_header("x-forwarded-host", "public.example.com"); + req.set_header("x-forwarded-proto", "https"); + + let info = RequestInfo::from_request(&req); + assert_eq!( + info.host, "public.example.com", + "Host should use X-Forwarded-Host in chained proxy scenarios" + ); + assert_eq!( + info.scheme, "https", + "Scheme should use X-Forwarded-Proto in chained proxy scenarios" + ); + } } diff --git a/crates/common/src/integrations/prebid.rs b/crates/common/src/integrations/prebid.rs index 51b1b9f..822990c 100644 --- a/crates/common/src/integrations/prebid.rs +++ b/crates/common/src/integrations/prebid.rs @@ -16,6 +16,7 @@ use crate::constants::{HEADER_SYNTHETIC_FRESH, HEADER_SYNTHETIC_TRUSTED_SERVER}; use crate::creative; use crate::error::TrustedServerError; use crate::geo::GeoInfo; +use crate::http_util::RequestInfo; use crate::integrations::{ AttributeRewriteAction, IntegrationAttributeContext, IntegrationAttributeRewriter, IntegrationEndpoint, IntegrationProxy, IntegrationRegistration, @@ -41,12 +42,16 @@ pub struct PrebidIntegrationConfig { deserialize_with = "crate::settings::vec_from_seq_or_map" )] pub bidders: Vec, - #[serde(default = "default_auto_configure")] - pub auto_configure: bool, #[serde(default)] pub debug: bool, - #[serde(default)] - pub script_handler: Option, + /// Patterns to match Prebid script URLs for serving empty JS. + /// Supports suffix matching (e.g., "/prebid.min.js" matches any path ending with that) + /// and wildcard patterns (e.g., "/static/prebid/*" matches paths under that prefix). + #[serde( + default = "default_script_patterns", + deserialize_with = "crate::settings::vec_from_seq_or_map" + )] + pub script_patterns: Vec, } impl IntegrationConfig for PrebidIntegrationConfig { @@ -63,12 +68,29 @@ fn default_bidders() -> Vec { vec!["mocktioneer".to_string()] } -fn default_auto_configure() -> bool { +fn default_enabled() -> bool { true } -fn default_enabled() -> bool { - true +/// Default suffixes that identify Prebid scripts +const PREBID_SCRIPT_SUFFIXES: &[&str] = &[ + "/prebid.js", + "/prebid.min.js", + "/prebidjs.js", + "/prebidjs.min.js", +]; + +fn default_script_patterns() -> Vec { + // Default patterns to intercept Prebid scripts and serve empty JS + // - Exact paths like "/prebid.min.js" match only that path + // - Wildcard paths like "/static/prebid/*" match anything under that prefix + // and are filtered by PREBID_SCRIPT_SUFFIXES in matches_script_pattern() + vec![ + "/prebid.js".to_string(), + "/prebid.min.js".to_string(), + "/prebidjs.js".to_string(), + "/prebidjs.min.js".to_string(), + ] } #[derive(Debug, Deserialize)] @@ -118,6 +140,72 @@ impl PrebidIntegration { Arc::new(Self { config }) } + fn matches_script_url(&self, attr_value: &str) -> bool { + let trimmed = attr_value.trim(); + let without_query = trimmed.split(['?', '#']).next().unwrap_or(trimmed); + + if self.matches_script_pattern(without_query) { + return true; + } + + if !without_query.starts_with('/') + && !without_query.starts_with("//") + && !without_query.contains("://") + { + let with_slash = format!("/{without_query}"); + if self.matches_script_pattern(&with_slash) { + return true; + } + } + + let parsed = if without_query.starts_with("//") { + Url::parse(&format!("https:{without_query}")) + } else { + Url::parse(without_query) + }; + + parsed + .ok() + .is_some_and(|url| self.matches_script_pattern(url.path())) + } + + fn matches_script_pattern(&self, path: &str) -> bool { + // Normalize path to lowercase for case-insensitive matching + let path_lower = path.to_ascii_lowercase(); + + // Check if path matches any configured pattern + for pattern in &self.config.script_patterns { + let pattern_lower = pattern.to_ascii_lowercase(); + + // Check for wildcard patterns: /* or {*name} + if pattern_lower.ends_with("/*") || pattern_lower.contains("{*") { + // Extract prefix before the wildcard + let prefix = if pattern_lower.ends_with("/*") { + &pattern_lower[..pattern_lower.len() - 1] // Remove trailing * + } else { + // Find {* and extract prefix before it + pattern_lower.split("{*").next().unwrap_or("") + }; + + if path_lower.starts_with(prefix) { + // Check if it ends with a known Prebid script suffix + if PREBID_SCRIPT_SUFFIXES + .iter() + .any(|suffix| path_lower.ends_with(suffix)) + { + return true; + } + } + } else { + // Exact match or suffix match + if path_lower.ends_with(&pattern_lower) { + return true; + } + } + } + false + } + fn error(message: impl Into) -> TrustedServerError { TrustedServerError::Integration { integration: PREBID_INTEGRATION_ID.to_string(), @@ -274,10 +362,11 @@ impl IntegrationProxy for PrebidIntegration { IntegrationEndpoint::post(ROUTE_THIRD_PARTY_AD), ]; - if let Some(script_path) = &self.config.script_handler { - // We need to leak the string to get a 'static str for IntegrationEndpoint - // This is safe because the config lives for the lifetime of the application - let static_path: &'static str = Box::leak(script_path.clone().into_boxed_str()); + // Register routes for script removal patterns + // Patterns can be exact paths (e.g., "/prebid.min.js") or use matchit wildcards + // (e.g., "/static/prebid/{*rest}") + for pattern in &self.config.script_patterns { + let static_path: &'static str = Box::leak(pattern.clone().into_boxed_str()); routes.push(IntegrationEndpoint::get(static_path)); } @@ -293,15 +382,14 @@ impl IntegrationProxy for PrebidIntegration { let method = req.get_method().clone(); match method { - Method::GET if self.config.script_handler.as_ref() == Some(&path) => { - self.handle_script_handler() - } Method::GET if path == ROUTE_FIRST_PARTY_AD => { self.handle_first_party_ad(settings, req).await } Method::POST if path == ROUTE_THIRD_PARTY_AD => { self.handle_third_party_ad(settings, req).await } + // Serve empty JS for matching script patterns + Method::GET if self.matches_script_pattern(&path) => self.handle_script_handler(), _ => Err(Report::new(Self::error(format!( "Unsupported Prebid route: {path}" )))), @@ -315,7 +403,7 @@ impl IntegrationAttributeRewriter for PrebidIntegration { } fn handles_attribute(&self, attribute: &str) -> bool { - self.config.auto_configure && matches!(attribute, "src" | "href") + matches!(attribute, "src" | "href") } fn rewrite( @@ -324,7 +412,7 @@ impl IntegrationAttributeRewriter for PrebidIntegration { attr_value: &str, _ctx: &IntegrationAttributeContext<'_>, ) -> AttributeRewriteAction { - if self.config.auto_configure && is_prebid_script_url(attr_value) { + if self.matches_script_url(attr_value) { AttributeRewriteAction::remove_element() } else { AttributeRewriteAction::keep() @@ -388,16 +476,6 @@ fn build_openrtb_from_ts( } } -fn is_prebid_script_url(url: &str) -> bool { - let lower = url.to_ascii_lowercase(); - let without_query = lower.split('?').next().unwrap_or(""); - let filename = without_query.rsplit('/').next().unwrap_or(""); - matches!( - filename, - "prebid.js" | "prebid.min.js" | "prebidjs.js" | "prebidjs.min.js" - ) -} - async fn pbs_auction_for_get( settings: &Settings, req: Request, @@ -485,9 +563,12 @@ async fn handle_prebid_auction( let response_body = pbs_response.take_body_bytes(); match serde_json::from_slice::(&response_body) { Ok(mut response_json) => { - let request_host = get_request_host(&req); - let request_scheme = get_request_scheme(&req); - transform_prebid_response(&mut response_json, &request_host, &request_scheme)?; + let request_info = RequestInfo::from_request(&req); + transform_prebid_response( + &mut response_json, + &request_info.host, + &request_info.scheme, + )?; let transformed_body = serde_json::to_vec(&response_json).change_context( TrustedServerError::Prebid { @@ -678,38 +759,16 @@ fn copy_request_headers(from: &Request, to: &mut Request) { } } -fn get_request_host(req: &Request) -> String { - req.get_header(header::HOST) - .and_then(|h| h.to_str().ok()) - .unwrap_or("") - .to_string() -} - -fn get_request_scheme(req: &Request) -> String { - if req.get_tls_protocol().is_some() || req.get_tls_cipher_openssl_name().is_some() { - return "https".to_string(); - } - - if let Some(proto) = req.get_header("X-Forwarded-Proto") { - if let Ok(proto_str) = proto.to_str() { - return proto_str.to_lowercase(); - } - } - - "https".to_string() -} +// Request host/scheme extraction is now centralized in http_util::RequestInfo #[cfg(test)] mod tests { use super::*; - use crate::html_processor::{create_html_processor, HtmlProcessorConfig}; - use crate::integrations::{AttributeRewriteAction, IntegrationRegistry}; + use crate::integrations::{AttributeRewriteAction, IntegrationAttributeContext}; use crate::settings::Settings; - use crate::streaming_processor::{Compression, PipelineConfig, StreamingPipeline}; use crate::test_support::tests::crate_test_settings_str; use fastly::http::Method; use serde_json::json; - use std::io::Cursor; fn make_settings() -> Settings { Settings::from_toml(&crate_test_settings_str()).expect("should parse settings") @@ -721,30 +780,14 @@ mod tests { server_url: "https://prebid.example".to_string(), timeout_ms: 1000, bidders: vec!["exampleBidder".to_string()], - auto_configure: true, debug: false, - script_handler: None, + script_patterns: default_script_patterns(), } } - fn config_from_settings( - settings: &Settings, - registry: &IntegrationRegistry, - ) -> HtmlProcessorConfig { - HtmlProcessorConfig::from_settings( - settings, - registry, - "origin.example.com", - "test.example.com", - "https", - ) - } - #[test] fn attribute_rewriter_removes_prebid_scripts() { - let integration = PrebidIntegration { - config: base_config(), - }; + let integration = PrebidIntegration::new(base_config()); let ctx = IntegrationAttributeContext { attribute_name: "src", request_host: "pub.example", @@ -753,17 +796,21 @@ mod tests { }; let rewritten = integration.rewrite("src", "https://cdn.prebid.org/prebid.min.js", &ctx); - assert!(matches!(rewritten, AttributeRewriteAction::RemoveElement)); + assert!( + matches!(rewritten, AttributeRewriteAction::RemoveElement), + "Prebid script tags should be removed" + ); let untouched = integration.rewrite("src", "https://cdn.example.com/app.js", &ctx); - assert!(matches!(untouched, AttributeRewriteAction::Keep)); + assert!( + matches!(untouched, AttributeRewriteAction::Keep), + "Non-Prebid scripts should remain" + ); } #[test] - fn attribute_rewriter_handles_query_strings_and_links() { - let integration = PrebidIntegration { - config: base_config(), - }; + fn attribute_rewriter_handles_query_strings() { + let integration = PrebidIntegration::new(base_config()); let ctx = IntegrationAttributeContext { attribute_name: "href", request_host: "pub.example", @@ -773,107 +820,125 @@ mod tests { let rewritten = integration.rewrite("href", "https://cdn.prebid.org/prebid.js?v=1.2.3", &ctx); - assert!(matches!(rewritten, AttributeRewriteAction::RemoveElement)); + assert!( + matches!(rewritten, AttributeRewriteAction::RemoveElement), + "Prebid links with query strings should be removed" + ); } #[test] - fn html_processor_keeps_prebid_scripts_when_auto_config_disabled() { - let html = r#" - - - "#; - - let mut settings = make_settings(); - settings - .integrations - .insert_config( - "prebid", - &json!({ - "enabled": true, - "server_url": "https://test-prebid.com/openrtb2/auction", - "timeout_ms": 1000, - "bidders": ["mocktioneer"], - "auto_configure": false, - "debug": false - }), - ) - .expect("should update prebid config"); - let registry = IntegrationRegistry::new(&settings); - let config = config_from_settings(&settings, ®istry); - let processor = create_html_processor(config); - let pipeline_config = PipelineConfig { - input_compression: Compression::None, - output_compression: Compression::None, - chunk_size: 8192, + fn attribute_rewriter_matches_wildcard_patterns() { + let mut config = base_config(); + config.script_patterns = vec!["/static/prebid/*".to_string()]; + let integration = PrebidIntegration::new(config); + let ctx = IntegrationAttributeContext { + attribute_name: "src", + request_host: "pub.example", + request_scheme: "https", + origin_host: "origin.example", }; - let mut pipeline = StreamingPipeline::new(pipeline_config, processor); - let mut output = Vec::new(); - let result = pipeline.process(Cursor::new(html.as_bytes()), &mut output); - assert!(result.is_ok()); - let processed = String::from_utf8_lossy(&output); - assert!( - processed.contains("tsjs-unified"), - "Unified bundle should be injected" + let rewritten = integration.rewrite( + "src", + "https://cdn.example.com/static/prebid/v1/prebid.min.js", + &ctx, ); assert!( - processed.contains("prebid.min.js"), - "Prebid script should remain when auto-config is disabled" + matches!(rewritten, AttributeRewriteAction::RemoveElement), + "Wildcard patterns should match prebid assets on full URLs" ); + + let rewritten_relative = integration.rewrite("src", "static/prebid/prebid.min.js", &ctx); assert!( - processed.contains("cdn.prebid.org/prebid.js"), - "Prebid preload should remain when auto-config is disabled" + matches!(rewritten_relative, AttributeRewriteAction::RemoveElement), + "Wildcard patterns should match relative paths without a leading slash" ); } #[test] - fn html_processor_removes_prebid_scripts_when_auto_config_enabled() { - let html = r#" - - - "#; - - let mut settings = make_settings(); - settings - .integrations - .insert_config( - "prebid", - &json!({ - "enabled": true, - "server_url": "https://test-prebid.com/openrtb2/auction", - "timeout_ms": 1000, - "bidders": ["mocktioneer"], - "auto_configure": true, - "debug": false - }), - ) - .expect("should update prebid config"); - let registry = IntegrationRegistry::new(&settings); - let config = config_from_settings(&settings, ®istry); - let processor = create_html_processor(config); - let pipeline_config = PipelineConfig { - input_compression: Compression::None, - output_compression: Compression::None, - chunk_size: 8192, - }; - let mut pipeline = StreamingPipeline::new(pipeline_config, processor); + fn script_pattern_matching_exact_paths() { + let integration = PrebidIntegration::new(base_config()); + + // Should match default exact patterns (suffix matching) + assert!(integration.matches_script_pattern("/prebid.js")); + assert!(integration.matches_script_pattern("/prebid.min.js")); + assert!(integration.matches_script_pattern("/prebidjs.js")); + assert!(integration.matches_script_pattern("/prebidjs.min.js")); + + // Suffix matching means nested paths also match + assert!(integration.matches_script_pattern("/static/prebid.min.js")); + assert!(integration.matches_script_pattern("/static/prebid/v8.53.0/prebid.min.js")); + + // Should not match other scripts + assert!(!integration.matches_script_pattern("/app.js")); + assert!(!integration.matches_script_pattern("/static/bundle.min.js")); + } - let mut output = Vec::new(); - let result = pipeline.process(Cursor::new(html.as_bytes()), &mut output); - assert!(result.is_ok()); - let processed = String::from_utf8_lossy(&output); - assert!( - processed.contains("tsjs-unified"), - "Unified bundle should be injected" - ); - assert!( - !processed.contains("prebid.min.js"), - "Prebid script should be removed when auto-config is enabled" - ); + #[test] + fn script_pattern_matching_wildcard_slash_star() { + // Test /* wildcard pattern matching + let mut config = base_config(); + config.script_patterns = vec!["/static/prebid/*".to_string()]; + let integration = PrebidIntegration::new(config); + + // Should match paths under the prefix with known suffixes + assert!(integration.matches_script_pattern("/static/prebid/prebid.min.js")); + assert!(integration.matches_script_pattern("/static/prebid/v8.53.0/prebid.min.js")); + assert!(integration.matches_script_pattern("/static/prebid/prebidjs.js")); + + // Should not match paths outside prefix + assert!(!integration.matches_script_pattern("/prebid.min.js")); + assert!(!integration.matches_script_pattern("/other/prebid.min.js")); + + // Should not match non-prebid scripts even under prefix + assert!(!integration.matches_script_pattern("/static/prebid/app.js")); + } + + #[test] + fn script_pattern_matching_wildcard_matchit_syntax() { + // Test {*rest} matchit-style wildcard pattern matching + let mut config = base_config(); + config.script_patterns = vec!["/wp-content/plugins/prebidjs/{*rest}".to_string()]; + let integration = PrebidIntegration::new(config); + + // Should match paths under the prefix with known suffixes assert!( - !processed.contains("cdn.prebid.org/prebid.js"), - "Prebid preload should be removed when auto-config is enabled" + integration.matches_script_pattern("/wp-content/plugins/prebidjs/js/prebidjs.min.js") ); + assert!(integration.matches_script_pattern("/wp-content/plugins/prebidjs/prebid.min.js")); + assert!(integration.matches_script_pattern("/wp-content/plugins/prebidjs/v1/v2/prebid.js")); + + // Should not match paths outside prefix + assert!(!integration.matches_script_pattern("/prebid.min.js")); + assert!(!integration.matches_script_pattern("/wp-content/other/prebid.min.js")); + + // Should not match non-prebid scripts even under prefix + assert!(!integration.matches_script_pattern("/wp-content/plugins/prebidjs/app.js")); + } + + #[test] + fn script_pattern_matching_case_insensitive() { + let integration = PrebidIntegration::new(base_config()); + + assert!(integration.matches_script_pattern("/Prebid.JS")); + assert!(integration.matches_script_pattern("/PREBID.MIN.JS")); + assert!(integration.matches_script_pattern("/Static/Prebid.min.js")); + } + + #[test] + fn routes_include_script_patterns() { + let integration = PrebidIntegration::new(base_config()); + let routes = integration.routes(); + + // Should include the default ad routes + assert!(routes.iter().any(|r| r.path == "/first-party/ad")); + assert!(routes.iter().any(|r| r.path == "/third-party/ad")); + + // Should include default script removal patterns + assert!(routes.iter().any(|r| r.path == "/prebid.js")); + assert!(routes.iter().any(|r| r.path == "/prebid.min.js")); + assert!(routes.iter().any(|r| r.path == "/prebidjs.js")); + assert!(routes.iter().any(|r| r.path == "/prebidjs.min.js")); } #[test] @@ -983,16 +1048,7 @@ mod tests { } #[test] - fn is_prebid_script_url_matches_common_variants() { - assert!(is_prebid_script_url("https://cdn.com/prebid.js")); - assert!(is_prebid_script_url( - "https://cdn.com/prebid.min.js?version=1" - )); - assert!(!is_prebid_script_url("https://cdn.com/app.js")); - } - - #[test] - fn test_script_handler_config_parsing() { + fn test_script_patterns_config_parsing() { let toml_str = r#" [publisher] domain = "test-publisher.com" @@ -1009,7 +1065,7 @@ template = "{{client_ip}}:{{user_agent}}" [integrations.prebid] enabled = true server_url = "https://prebid.example" -script_handler = "/prebid.js" +script_patterns = ["/static/prebid/*"] "#; let settings = Settings::from_toml(toml_str).expect("should parse TOML"); @@ -1018,11 +1074,11 @@ script_handler = "/prebid.js" .expect("should get config") .expect("should be enabled"); - assert_eq!(config.script_handler, Some("/prebid.js".to_string())); + assert_eq!(config.script_patterns, vec!["/static/prebid/*"]); } #[test] - fn test_script_handler_none_by_default() { + fn test_script_patterns_default() { let toml_str = r#" [publisher] domain = "test-publisher.com" @@ -1047,21 +1103,13 @@ server_url = "https://prebid.example" .expect("should get config") .expect("should be enabled"); - assert_eq!(config.script_handler, None); + // Should have default patterns + assert_eq!(config.script_patterns, default_script_patterns()); } #[test] fn test_script_handler_returns_empty_js() { - let config = PrebidIntegrationConfig { - enabled: true, - server_url: "https://prebid.example".to_string(), - timeout_ms: 1000, - bidders: vec![], - auto_configure: false, - debug: false, - script_handler: Some("/prebid.js".to_string()), - }; - let integration = PrebidIntegration::new(config); + let integration = PrebidIntegration::new(base_config()); let response = integration .handle_script_handler() @@ -1085,37 +1133,23 @@ server_url = "https://prebid.example" } #[test] - fn test_routes_includes_script_handler() { - let config = PrebidIntegrationConfig { - enabled: true, - server_url: "https://prebid.example".to_string(), - timeout_ms: 1000, - bidders: vec![], - auto_configure: false, - debug: false, - script_handler: Some("/prebid.js".to_string()), - }; + fn test_routes_with_default_patterns() { + let config = base_config(); // Has default script_patterns let integration = PrebidIntegration::new(config); let routes = integration.routes(); - // Should have 3 routes: first-party ad, third-party ad, and script handler - assert_eq!(routes.len(), 3); + // Should have 2 ad routes + 4 default script patterns + assert_eq!(routes.len(), 6); - let has_script_route = routes - .iter() - .any(|r| r.path == "/prebid.js" && r.method == Method::GET); - assert!(has_script_route, "should register script handler route"); - } - - #[test] - fn test_routes_without_script_handler() { - let config = base_config(); // Has script_handler: None - let integration = PrebidIntegration::new(config); - - let routes = integration.routes(); + // Verify ad routes + assert!(routes.iter().any(|r| r.path == "/first-party/ad")); + assert!(routes.iter().any(|r| r.path == "/third-party/ad")); - // Should only have 2 routes: first-party ad and third-party ad - assert_eq!(routes.len(), 2); + // Verify script pattern routes + assert!(routes.iter().any(|r| r.path == "/prebid.js")); + assert!(routes.iter().any(|r| r.path == "/prebid.min.js")); + assert!(routes.iter().any(|r| r.path == "/prebidjs.js")); + assert!(routes.iter().any(|r| r.path == "/prebidjs.min.js")); } } diff --git a/crates/common/src/publisher.rs b/crates/common/src/publisher.rs index 6041536..796728f 100644 --- a/crates/common/src/publisher.rs +++ b/crates/common/src/publisher.rs @@ -3,7 +3,7 @@ use fastly::http::{header, StatusCode}; use fastly::{Body, Request, Response}; use crate::backend::ensure_backend_from_url; -use crate::http_util::serve_static_with_etag; +use crate::http_util::{serve_static_with_etag, RequestInfo}; use crate::constants::{HEADER_SYNTHETIC_TRUSTED_SERVER, HEADER_X_COMPRESS_HINT}; use crate::cookies::create_synthetic_cookie; @@ -15,65 +15,6 @@ use crate::streaming_processor::{Compression, PipelineConfig, StreamProcessor, S use crate::streaming_replacer::create_url_replacer; use crate::synthetic::get_or_generate_synthetic_id; -/// Detects the request scheme (HTTP or HTTPS) using Fastly SDK methods and headers. -/// -/// Tries multiple methods in order of reliability: -/// 1. Fastly SDK TLS detection methods (most reliable) -/// 2. Forwarded header (RFC 7239) -/// 3. X-Forwarded-Proto header -/// 4. Fastly-SSL header (least reliable, can be spoofed) -/// 5. Default to HTTP -fn detect_request_scheme(req: &Request) -> String { - // 1. First try Fastly SDK's built-in TLS detection methods - // These are the most reliable as they check the actual connection - if let Some(tls_protocol) = req.get_tls_protocol() { - // If we have a TLS protocol, the connection is definitely HTTPS - log::debug!("TLS protocol detected: {}", tls_protocol); - return "https".to_string(); - } - - // Also check TLS cipher - if present, connection is HTTPS - if req.get_tls_cipher_openssl_name().is_some() { - log::debug!("TLS cipher detected, using HTTPS"); - return "https".to_string(); - } - - // 2. Try the Forwarded header (RFC 7239) - if let Some(forwarded) = req.get_header("forwarded") { - if let Ok(forwarded_str) = forwarded.to_str() { - // Parse the Forwarded header - // Format: Forwarded: for=192.0.2.60;proto=https;by=203.0.113.43 - if forwarded_str.contains("proto=https") { - return "https".to_string(); - } else if forwarded_str.contains("proto=http") { - return "http".to_string(); - } - } - } - - // 3. Try X-Forwarded-Proto header - if let Some(proto) = req.get_header("x-forwarded-proto") { - if let Ok(proto_str) = proto.to_str() { - let proto_lower = proto_str.to_lowercase(); - if proto_lower == "https" || proto_lower == "http" { - return proto_lower; - } - } - } - - // 4. Check Fastly-SSL header (can be spoofed by clients, use as last resort) - if let Some(ssl) = req.get_header("fastly-ssl") { - if let Ok(ssl_str) = ssl.to_str() { - if ssl_str == "1" || ssl_str.to_lowercase() == "true" { - return "https".to_string(); - } - } - } - - // Default to HTTP (changed from HTTPS based on your settings file) - "http".to_string() -} - /// Unified tsjs static serving: `/static/tsjs=` /// Accepts: `tsjs-core(.min).js`, `tsjs-ext(.min).js`, `tsjs-creative(.min).js` pub fn handle_tsjs_dynamic( @@ -238,29 +179,20 @@ pub fn handle_publisher_request( // Prebid.js requests are not intercepted here anymore. The HTML processor rewrites // any Prebid script references to `/static/tsjs-ext.min.js` when auto-configure is enabled. - // Extract the request host from the incoming request - let request_host = req - .get_header(header::HOST) - .map(|h| h.to_str().unwrap_or_default()) - .unwrap_or_default() - .to_string(); + // Extract request host and scheme from headers (supports X-Forwarded-Host/Proto for chained proxies) + let request_info = RequestInfo::from_request(&req); + let request_host = &request_info.host; + let request_scheme = &request_info.scheme; - // Detect the request scheme using multiple methods - let request_scheme = detect_request_scheme(&req); - - // Log detection details for debugging log::debug!( - "Scheme detection - TLS Protocol: {:?}, TLS Cipher: {:?}, Forwarded: {:?}, X-Forwarded-Proto: {:?}, Fastly-SSL: {:?}, Result: {}", - req.get_tls_protocol(), - req.get_tls_cipher_openssl_name(), - req.get_header("forwarded"), + "Request info: host={}, scheme={} (X-Forwarded-Host: {:?}, Host: {:?}, X-Forwarded-Proto: {:?})", + request_host, + request_scheme, + req.get_header("x-forwarded-host"), + req.get_header(header::HOST), req.get_header("x-forwarded-proto"), - req.get_header("fastly-ssl"), - request_scheme ); - log::debug!("Request host: {}, scheme: {}", request_host, request_scheme); - // Generate synthetic identifiers before the request body is consumed. let synthetic_id = get_or_generate_synthetic_id(settings, &req)?; let has_synthetic_cookie = req @@ -334,8 +266,8 @@ pub fn handle_publisher_request( content_encoding: &content_encoding, origin_host: &origin_host, origin_url: &settings.publisher.origin_url, - request_host: &request_host, - request_scheme: &request_scheme, + request_host, + request_scheme, settings, content_type: &content_type, integration_registry, @@ -387,73 +319,6 @@ mod tests { use crate::test_support::tests::create_test_settings; use fastly::http::Method; - #[test] - fn test_detect_request_scheme() { - // Note: In tests, we can't mock the TLS methods on Request, so we test header fallbacks - - // Test Forwarded header with HTTPS - let mut req = Request::new(Method::GET, "https://test.example.com/page"); - req.set_header("forwarded", "for=192.0.2.60;proto=https;by=203.0.113.43"); - assert_eq!(detect_request_scheme(&req), "https"); - - // Test Forwarded header with HTTP - let mut req = Request::new(Method::GET, "http://test.example.com/page"); - req.set_header("forwarded", "for=192.0.2.60;proto=http;by=203.0.113.43"); - assert_eq!(detect_request_scheme(&req), "http"); - - // Test X-Forwarded-Proto with HTTPS - let mut req = Request::new(Method::GET, "https://test.example.com/page"); - req.set_header("x-forwarded-proto", "https"); - assert_eq!(detect_request_scheme(&req), "https"); - - // Test X-Forwarded-Proto with HTTP - let mut req = Request::new(Method::GET, "http://test.example.com/page"); - req.set_header("x-forwarded-proto", "http"); - assert_eq!(detect_request_scheme(&req), "http"); - - // Test Fastly-SSL header - let mut req = Request::new(Method::GET, "https://test.example.com/page"); - req.set_header("fastly-ssl", "1"); - assert_eq!(detect_request_scheme(&req), "https"); - - // Test default to HTTP when no headers present - let req = Request::new(Method::GET, "https://test.example.com/page"); - assert_eq!(detect_request_scheme(&req), "http"); - - // Test priority: Forwarded takes precedence over X-Forwarded-Proto - let mut req = Request::new(Method::GET, "https://test.example.com/page"); - req.set_header("forwarded", "proto=https"); - req.set_header("x-forwarded-proto", "http"); - assert_eq!(detect_request_scheme(&req), "https"); - } - - #[test] - fn test_handle_publisher_request_extracts_headers() { - // Test that the function correctly extracts host and scheme from request headers - let mut req = Request::new(Method::GET, "https://test.example.com/page"); - req.set_header("host", "test.example.com"); - req.set_header("x-forwarded-proto", "https"); - - // Extract headers like the function does - let request_host = req - .get_header("host") - .map(|h| h.to_str().unwrap_or_default()) - .unwrap_or_default() - .to_string(); - - let request_scheme = req - .get_header("x-forwarded-proto") - .and_then(|h| h.to_str().ok()) - .unwrap_or("https") - .to_string(); - - assert_eq!(request_host, "test.example.com"); - assert_eq!(request_scheme, "https"); - } - - // Note: test_handle_publisher_request_default_https_scheme and test_handle_publisher_request_http_scheme - // were removed as they're redundant with test_detect_request_scheme which covers all scheme detection cases - #[test] fn test_content_type_detection() { // Test which content types should be processed diff --git a/crates/common/src/synthetic.rs b/crates/common/src/synthetic.rs index d84a95e..b60736c 100644 --- a/crates/common/src/synthetic.rs +++ b/crates/common/src/synthetic.rs @@ -14,6 +14,7 @@ use sha2::Sha256; use crate::constants::{HEADER_SYNTHETIC_PUB_USER_ID, HEADER_SYNTHETIC_TRUSTED_SERVER}; use crate::cookies::handle_request_cookies; use crate::error::TrustedServerError; +use crate::http_util::RequestInfo; use crate::settings::Settings; type HmacSha256 = Hmac; @@ -41,9 +42,13 @@ pub fn generate_synthetic_id( let auth_user_id = req .get_header(HEADER_SYNTHETIC_PUB_USER_ID) .map(|h| h.to_str().unwrap_or("anonymous")); - let publisher_domain = req - .get_header(header::HOST) - .map(|h| h.to_str().unwrap_or("unknown")); + // Use RequestInfo for consistent host extraction (respects X-Forwarded-Host) + let request_info = RequestInfo::from_request(req); + let publisher_domain = if request_info.host.is_empty() { + None + } else { + Some(request_info.host.as_str()) + }; let client_ip = req.get_client_ip_addr().map(|ip| ip.to_string()); let accept_language = req .get_header(header::ACCEPT_LANGUAGE) diff --git a/docs/guide/api-reference.md b/docs/guide/api-reference.md index b10d345..eabd1fa 100644 --- a/docs/guide/api-reference.md +++ b/docs/guide/api-reference.md @@ -421,18 +421,22 @@ See [First-Party Endpoints](#get-first-party-ad) above. #### POST /third-party/ad See [First-Party Endpoints](#post-third-party-ad) above. -#### GET /prebid.js (Optional) -Returns empty JavaScript to override Prebid.js when `script_handler` is configured. +#### GET /prebid.js, /prebid.min.js, etc. (Script Override) +Returns empty JavaScript to override Prebid.js scripts when the Prebid integration is enabled. By default, exact requests to `/prebid.js`, `/prebid.min.js`, `/prebidjs.js`, or `/prebidjs.min.js` will be intercepted and served an empty script. **Configuration:** ```toml [integrations.prebid] -script_handler = "/prebid.js" +# Default patterns (exact paths) +script_patterns = ["/prebid.js", "/prebid.min.js", "/prebidjs.js", "/prebidjs.min.js"] + +# Use wildcard patterns to match paths under a prefix +# script_patterns = ["/static/prebid/*"] ``` **Response:** - **Content-Type:** `application/javascript; charset=utf-8` -- **Body:** `// Prebid.js override by Trusted Server` +- **Body:** `// Script overridden by Trusted Server` - **Cache:** `immutable, max-age=31536000` --- diff --git a/docs/guide/configuration-reference.md b/docs/guide/configuration-reference.md index c820ca7..db4d7ec 100644 --- a/docs/guide/configuration-reference.md +++ b/docs/guide/configuration-reference.md @@ -597,9 +597,15 @@ All integrations support: | `server_url` | String | Required | Prebid Server endpoint URL | | `timeout_ms` | Integer | `1000` | Request timeout in milliseconds | | `bidders` | Array[String] | `[]` | List of enabled bidders | -| `auto_configure` | Boolean | `false` | Auto-inject Prebid.js shim | | `debug` | Boolean | `false` | Enable debug logging | -| `script_handler` | String | Optional | Custom script endpoint path | +| `script_patterns` | Array[String] | See below | Patterns for removing Prebid script tags and intercepting requests | + +**Default `script_patterns`**: +```toml +["/prebid.js", "/prebid.min.js", "/prebidjs.js", "/prebidjs.min.js"] +``` + +These patterns use suffix matching when stripping HTML, so `/static/prebid/v8/prebid.min.js` matches because it ends with `/prebid.min.js`. For request interception, exact paths are registered unless you use wildcard patterns (e.g., `/static/prebid/*`), which match paths under that prefix. **Example**: ```toml @@ -608,8 +614,8 @@ enabled = true server_url = "https://prebid-server.example/openrtb2/auction" timeout_ms = 1200 bidders = ["kargo", "rubicon", "appnexus", "openx"] -auto_configure = true debug = false +# script_patterns = ["/static/prebid/*"] # Optional: restrict to specific path ``` **Environment Override**: @@ -618,8 +624,9 @@ TRUSTED_SERVER__INTEGRATIONS__PREBID__ENABLED=true TRUSTED_SERVER__INTEGRATIONS__PREBID__SERVER_URL=https://prebid.example/auction TRUSTED_SERVER__INTEGRATIONS__PREBID__TIMEOUT_MS=1200 TRUSTED_SERVER__INTEGRATIONS__PREBID__BIDDERS=kargo,rubicon,appnexus -TRUSTED_SERVER__INTEGRATIONS__PREBID__AUTO_CONFIGURE=true TRUSTED_SERVER__INTEGRATIONS__PREBID__DEBUG=false +TRUSTED_SERVER__INTEGRATIONS__PREBID__SCRIPT_PATTERNS__0=/prebid.js +TRUSTED_SERVER__INTEGRATIONS__PREBID__SCRIPT_PATTERNS__1=/prebid.min.js ``` ### Next.js Integration diff --git a/docs/guide/configuration.md b/docs/guide/configuration.md index 962e440..d929a1c 100644 --- a/docs/guide/configuration.md +++ b/docs/guide/configuration.md @@ -95,7 +95,7 @@ enabled = true server_url = "https://prebid-server.com/openrtb2/auction" timeout_ms = 1200 bidders = ["kargo", "rubicon", "appnexus"] -auto_configure = false +# script_patterns = ["/static/prebid/*"] ``` ### `fastly.toml` @@ -264,7 +264,7 @@ enabled = true server_url = "https://prebid-server.com/openrtb2/auction" timeout_ms = 1200 bidders = ["kargo", "rubicon", "appnexus"] -auto_configure = false +# script_patterns = ["/static/prebid/*"] ``` **Next.js**: diff --git a/docs/guide/environment-variables.md b/docs/guide/environment-variables.md index 82a11f9..ce4b5a6 100644 --- a/docs/guide/environment-variables.md +++ b/docs/guide/environment-variables.md @@ -201,14 +201,15 @@ TRUSTED_SERVER__INTEGRATIONS__PREBID__TIMEOUT_MS=1000 # Bidders (comma-separated) TRUSTED_SERVER__INTEGRATIONS__PREBID__BIDDERS="appnexus,rubicon,openx" -# Auto-remove Prebid.js scripts -TRUSTED_SERVER__INTEGRATIONS__PREBID__AUTO_CONFIGURE=true - # Enable debug logging TRUSTED_SERVER__INTEGRATIONS__PREBID__DEBUG=false -# Optional: Script handler path -TRUSTED_SERVER__INTEGRATIONS__PREBID__SCRIPT_HANDLER="/prebid.js" +# Script patterns to remove Prebid tags and serve empty JS (indexed format) +# Default patterns match common Prebid filenames at exact paths +TRUSTED_SERVER__INTEGRATIONS__PREBID__SCRIPT_PATTERNS__0="/prebid.js" +TRUSTED_SERVER__INTEGRATIONS__PREBID__SCRIPT_PATTERNS__1="/prebid.min.js" +# For versioned paths, use wildcards: +# TRUSTED_SERVER__INTEGRATIONS__PREBID__SCRIPT_PATTERNS__0="/static/prebid/{*rest}" ``` **TOML Equivalent:** @@ -218,9 +219,8 @@ enabled = true server_url = "https://prebid-server.example.com" timeout_ms = 1000 bidders = ["appnexus", "rubicon", "openx"] -auto_configure = true debug = false -script_handler = "/prebid.js" +script_patterns = ["/prebid.js", "/prebid.min.js", "/prebidjs.js", "/prebidjs.min.js"] ``` --- @@ -642,7 +642,6 @@ export TRUSTED_SERVER__INTEGRATIONS__PREBID__ENABLED=true export TRUSTED_SERVER__INTEGRATIONS__PREBID__SERVER_URL="https://prebid-server.com" export TRUSTED_SERVER__INTEGRATIONS__PREBID__TIMEOUT_MS=2000 export TRUSTED_SERVER__INTEGRATIONS__PREBID__BIDDERS="appnexus,rubicon,openx" -export TRUSTED_SERVER__INTEGRATIONS__PREBID__AUTO_CONFIGURE=true # Optional: Security Headers export TRUSTED_SERVER__RESPONSE_HEADERS__STRICT_TRANSPORT_SECURITY="max-age=31536000" diff --git a/docs/guide/integration-guide.md b/docs/guide/integration-guide.md index 74f46cf..f96a9a3 100644 --- a/docs/guide/integration-guide.md +++ b/docs/guide/integration-guide.md @@ -276,7 +276,7 @@ enabled = true server_url = "https://prebid.example/openrtb2/auction" timeout_ms = 1200 bidders = ["equativ", "sampleBidder"] -auto_configure = true +# script_patterns = ["/static/prebid/*"] ``` Tests or scaffolding can inject configs by calling `settings.integrations.insert_config("prebid", &serde_json::json!({...}))`, the same helper that other integrations use. @@ -287,7 +287,7 @@ Tests or scaffolding can inject configs by calling `settings.integrations.insert **3. HTML Rewrites Through the Registry** -When `auto_configure` is enabled, the integration's `IntegrationAttributeRewriter` removes any `