diff --git a/libdd-trace-obfuscation/src/redis.rs b/libdd-trace-obfuscation/src/redis.rs index 8aaf1df38e..f26f082951 100644 --- a/libdd-trace-obfuscation/src/redis.rs +++ b/libdd-trace-obfuscation/src/redis.rs @@ -6,22 +6,41 @@ use crate::redis_tokenizer::{RedisTokenType, RedisTokenizer}; const REDIS_TRUNCATION_MARK: &str = "..."; const MAX_REDIS_NB_COMMANDS: usize = 3; +/// Uppercase a single char to match Go's unicode.ToUpper (Unicode 15.0). +/// Rust uses Unicode 16.0 which added case pairs for U+16E80–U+16EFF (Bamum Supplement) +/// that Go 1.25 doesn't know about — keep those chars unchanged to match Go. +fn go_toupper(c: char) -> char { + if (0x16E80..=0x16EFF).contains(&(c as u32)) { + return c; + } + let mut upper = c.to_uppercase(); + match (upper.next(), upper.next()) { + (Some(u), None) => u, + _ => c, + } +} + /// Returns a quantized version of a Redis query, keeping only up to 3 command names. pub fn quantize_redis_string(query: &str) -> String { let mut commands: Vec = Vec::with_capacity(MAX_REDIS_NB_COMMANDS); let mut truncated = false; - for raw_line in query.lines() { + // Split on '\n' only (like Go's strings.IndexByte), preserving '\r' in line content + for raw_line in query.split('\n') { if commands.len() >= MAX_REDIS_NB_COMMANDS { break; } - let line = raw_line.trim(); + // Go's QuantizeRedisString trims only ASCII spaces (strings.Trim(rawLine, " ")), + // not all whitespace. Use trim_matches(' ') to match that behavior. + let line = raw_line.trim_matches(' '); if line.is_empty() { continue; } - let mut tokens = line.split_whitespace(); + // Go splits on spaces only (strings.SplitN(line, " ", 3)), not all whitespace. + // Use split(' ').filter to match that behavior and preserve tab tokens. + let mut tokens = line.split(' ').filter(|s| !s.is_empty()); let Some(first) = tokens.next() else { continue }; if first.ends_with(REDIS_TRUNCATION_MARK) { @@ -29,7 +48,7 @@ pub fn quantize_redis_string(query: &str) -> String { continue; } - let cmd = first.to_ascii_uppercase(); + let cmd: String = first.chars().map(go_toupper).collect(); let command = match cmd.as_bytes() { b"CLIENT" | b"CLUSTER" | b"COMMAND" | b"CONFIG" | b"DEBUG" | b"SCRIPT" => { match tokens.next() { @@ -37,7 +56,9 @@ pub fn quantize_redis_string(query: &str) -> String { truncated = true; continue; } - Some(sub) => format!("{cmd} {}", sub.to_ascii_uppercase()), + Some(sub) => { + format!("{cmd} {}", sub.chars().map(go_toupper).collect::()) + } None => cmd, } } @@ -59,6 +80,8 @@ pub fn quantize_redis_string(query: &str) -> String { } pub fn obfuscate_redis_string(cmd: &str) -> String { + // Go's newRedisTokenizer calls bytes.TrimSpace before tokenizing + let cmd = cmd.trim(); let mut tokenizer = RedisTokenizer::new(cmd); let s = &mut String::new(); let mut cmd: Option<&str> = None; @@ -94,29 +117,27 @@ fn obfuscate_redis_cmd<'a>(str: &mut String, cmd: &'a str, mut args: Vec<&'a str let mut uppercase_cmd = [0; 32]; // no redis cmd is longer than 32 chars let uppercase_cmd = ascii_uppercase(cmd, &mut uppercase_cmd).unwrap_or(&[]); match uppercase_cmd { - b"AUTH" | b"MIGRATE" | b"HELLO" => { + b"AUTH" | b"MIGRATE" | b"HELLO" // Obfuscate everything after command: // • AUTH password // • MIGRATE host port key|"" destination-db timeout [COPY] [REPLACE] [AUTH password] // [AUTH2 username password] [KEYS key [key ...]] // • HELLO [protover [AUTH username password] [SETNAME clientname]] - if !args.is_empty() { + if !args.is_empty() => { args.clear(); args.push("?"); } - } - b"ACL" => { + b"ACL" // Obfuscate all arguments after the subcommand: // • ACL SETUSER username on >password ~keys &channels +commands // • ACL GETUSER username // • ACL DELUSER username [username ...] // • ACL LIST // • ACL WHOAMI - if args.len() > 1 { + if args.len() > 1 => { args[1] = "?"; args.drain(2..); } - } b"APPEND" | b"GETSET" | b"LPUSHX" | b"GEORADIUSBYMEMBER" | b"RPUSHX" | b"SET" | b"SETNX" | b"SISMEMBER" | b"ZRANK" | b"ZREVRANK" | b"ZSCORE" => { // Obfuscate 2nd argument: @@ -155,7 +176,7 @@ fn obfuscate_redis_cmd<'a>(str: &mut String, cmd: &'a str, mut args: Vec<&'a str // • LINSERT key BEFORE|AFTER pivot value args = obfuscate_redis_args_n(args, 3); } - b"GEOHASH" | b"GEOPOS" | b"GEODIST" | b"LPUSH" | b"RPUSH" | b"SREM" | b"ZREM" | b"SADD" => { + b"GEOHASH" | b"GEOPOS" | b"GEODIST" | b"LPUSH" | b"RPUSH" | b"SREM" | b"ZREM" | b"SADD" // Obfuscate all arguments after the first one. // • GEOHASH key member [member ...] // • GEOPOS key member [member ...] @@ -165,11 +186,10 @@ fn obfuscate_redis_cmd<'a>(str: &mut String, cmd: &'a str, mut args: Vec<&'a str // • SREM key member [member ...] // • ZREM key member [member ...] // • SADD key member [member ...] - if args.len() > 1 { + if args.len() > 1 => { args[1] = "?"; args.drain(2..); } - } b"GEOADD" => { // Obfuscating every 3rd argument starting from first // • GEOADD key longitude latitude member [longitude latitude member ...] @@ -335,6 +355,11 @@ mod tests { input ["CLIENT LIST"] expected ["CLIENT LIST"]; ] + [ + test_name [test_quantize_redis_string_client_truncated] + input ["CLIENT ..."] + expected ["..."]; + ] [ test_name [test_quantize_redis_string_get_lowercase] input ["get my_key"] @@ -420,6 +445,27 @@ mod tests { input ["UNKNOWN 123"] expected ["UNKNOWN"]; ] + [ + test_name [fuzzing_3286489773] + input ["ꭺ"] + expected ["Ꭺ"]; + ] + [ + test_name [fuzzing_2812552373] + input ["\t"] + expected ["\t"]; + ] + [ + test_name [fuzzing_crlf] + input ["\r\n"] + // Split on \n specifically, not newlines, to copy agent's behaviour + expected ["\r"]; + ] + [ + test_name [fuzzing_box_char] + input ["𖺻"] + expected ["𖺻"]; + ] )] #[test] fn test_name() { @@ -940,6 +986,16 @@ SET k ?"#]; input ["bitfield key SET key value incrby 3"] expected ["bitfield ? SET ? incrby ?"]; ] + [ + test_name [test_obfuscate_fuzzing_unicode] + input ["\u{00b}ჸ"] + expected ["ჸ"]; + ] + [ + test_name [test_obfuscate_fuzzing_whitespaces] + input ["ჸ\n\tჸ"] + expected ["ჸ ?"]; + ] )] #[test] fn test_name() { diff --git a/libdd-trace-obfuscation/src/redis_tokenizer.rs b/libdd-trace-obfuscation/src/redis_tokenizer.rs index 0707b63aa4..e5f8982794 100644 --- a/libdd-trace-obfuscation/src/redis_tokenizer.rs +++ b/libdd-trace-obfuscation/src/redis_tokenizer.rs @@ -47,7 +47,10 @@ impl<'a> RedisTokenizer<'a> { RedisTokenType::RedisTokenArgument => self.next_arg(), }; loop { - self.skip_whitespace(); + // Only skip spaces between commands (not tabs - Go only skips spaces) + while self.curr_char() == b' ' { + self.offset += 1; + } if self.curr_char() != b'\n' { break; } @@ -58,7 +61,11 @@ impl<'a> RedisTokenizer<'a> { } fn next_cmd(&mut self) -> (usize, usize) { - self.skip_whitespace(); + // Go's scanCommand only skips ASCII spaces before the command (not tabs). + // Tabs are included in the command token (default case in Go's switch). + while self.curr_char() == b' ' { + self.offset += 1; + } let start = self.offset; loop { match self.curr_char() { @@ -86,30 +93,20 @@ impl<'a> RedisTokenizer<'a> { loop { match self.curr_char() { 0 => break, - b'\\' => { - if !escape { - escape = true; - self.offset += 1; - continue; - } - } - b'"' => { - if !escape { - quote = !quote - } + b'\\' if !escape => { + escape = true; + self.offset += 1; + continue; } - b'\n' => { - if !quote { - let span = (start, self.offset); - self.offset += 1; - self.state = RedisTokenType::RedisTokenCommand; - return span; - } + b'"' if !escape => quote = !quote, + b'\n' if !quote => { + let span = (start, self.offset); + self.offset += 1; + self.state = RedisTokenType::RedisTokenCommand; + return span; } - b' ' => { - if !quote { - return (start, self.offset); - } + b' ' if !quote => { + return (start, self.offset); } _ => {} }