diff --git a/Cargo.lock b/Cargo.lock index 729a22c1..18d77516 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -454,6 +454,7 @@ dependencies = [ "prometheus-client", "prost", "prost-types", + "regex", "serde", "serde_json", "tempfile", diff --git a/Cargo.toml b/Cargo.toml index 143bd750..742c25f4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -46,6 +46,7 @@ uuid = { version = "1.17.0", features = ["v4"] } bindgen = "0.72.0" tempfile = { version = "3.20.0", default-features = false } yaml-rust2 = "0.11.0" +regex = "1.11.1" [profile.release] debug = "line-tables-only" diff --git a/fact/Cargo.toml b/fact/Cargo.toml index 103e37a2..3b84db24 100644 --- a/fact/Cargo.toml +++ b/fact/Cargo.toml @@ -35,6 +35,7 @@ fact-ebpf = { path = "../fact-ebpf" } [dev-dependencies] tempfile = { workspace = true } +regex = { workspace = true } [build-dependencies] anyhow = { workspace = true } diff --git a/fact/src/event/mod.rs b/fact/src/event/mod.rs index e5695653..463fb2c7 100644 --- a/fact/src/event/mod.rs +++ b/fact/src/event/mod.rs @@ -358,3 +358,193 @@ impl From for fact_api::FileOwnershipChange { } } } + +#[cfg(test)] +mod test_utils { + use std::os::raw::c_char; + + /// Helper function to convert raw bytes to a c_char array for testing + pub fn bytes_to_c_char_array(bytes: &[u8]) -> [c_char; N] { + let mut array = [0 as c_char; N]; + let len = bytes.len().min(N - 1); + for (i, &byte) in bytes.iter().take(len).enumerate() { + array[i] = byte as c_char; + } + array + } + + /// Helper function to convert a Rust string to a c_char array for testing + pub fn string_to_c_char_array(s: &str) -> [c_char; N] { + bytes_to_c_char_array(s.as_bytes()) + } +} + +#[cfg(test)] +mod tests { + use super::test_utils::*; + use super::*; + + #[test] + fn slice_to_string_valid_utf8() { + let tests = [ + ("hello", "ASCII"), + ("café", "French"), + ("файл", "Cyrillic"), + ("测试文件", "Chinese"), + ("test🚀file", "Emoji"), + ("test-файл-测试-🐛.txt", "Mixed Unicode"), + ("ملف", "Arabic"), + ("קובץ", "Hebrew"), + ("ファイル", "Japanese"), + ]; + + for (input, description) in tests { + let arr = string_to_c_char_array::<{ PATH_MAX as usize }>(input); + assert_eq!( + slice_to_string(&arr).unwrap(), + input, + "Failed for {}", + description + ); + } + } + + #[test] + fn slice_to_string_invalid_utf8() { + let tests: &[(&[u8], &str)] = &[ + (&[0xFF, 0xFE, 0xFD], "Invalid continuation bytes"), + (b"test\xE2", "Truncated multi-byte sequence"), + (&[0xC0, 0x80], "Overlong encoding"), + (b"hello\x80world", "Invalid start byte"), + (&[0x80], "Lone continuation byte"), + (b"test\xFF\xFE", "Mixed valid and invalid bytes"), + ]; + + for (bytes, description) in tests { + let arr = bytes_to_c_char_array::<{ PATH_MAX as usize }>(bytes); + assert!( + slice_to_string(&arr).is_err(), + "Should fail for {}", + description + ); + } + } + + #[test] + fn sanitize_d_path_valid_utf8() { + let tests = [ + ("/etc/test", "/etc/test", "ASCII"), + ("/tmp/файл.txt", "/tmp/файл.txt", "Cyrillic"), + ( + "/home/user/测试文件.log", + "/home/user/测试文件.log", + "Chinese", + ), + ("/data/🚀rocket.dat", "/data/🚀rocket.dat", "Emoji"), + ( + "/var/log/app-данные-数据-🐛.log", + "/var/log/app-данные-数据-🐛.log", + "Mixed Unicode", + ), + ("/home/ملف.txt", "/home/ملف.txt", "Arabic"), + ("/opt/ファイル.conf", "/opt/ファイル.conf", "Japanese"), + ]; + + for (input, expected, description) in tests { + let arr = string_to_c_char_array::<{ PATH_MAX as usize }>(input); + assert_eq!( + sanitize_d_path(&arr), + PathBuf::from(expected), + "Failed for {}", + description + ); + } + } + + #[test] + fn sanitize_d_path_deleted_suffix() { + let tests = [ + ( + "/tmp/test.txt (deleted)", + "/tmp/test.txt", + "ASCII with deleted suffix", + ), + ( + "/tmp/файл.txt (deleted)", + "/tmp/файл.txt", + "Unicode with deleted suffix", + ), + ("/etc/config.yaml", "/etc/config.yaml", "No deleted suffix"), + ( + "/var/log/app/debug.log (deleted)", + "/var/log/app/debug.log", + "Nested path with deleted suffix", + ), + ]; + + for (input, expected, description) in tests { + let arr = string_to_c_char_array::<{ PATH_MAX as usize }>(input); + assert_eq!( + sanitize_d_path(&arr), + PathBuf::from(expected), + "Failed for {}", + description + ); + } + } + + #[test] + fn sanitize_d_path_invalid_utf8() { + use regex::Regex; + + let tests: &[(&[u8], &str, &str)] = &[ + ( + b"/tmp/\xFF\xFE.txt", + r"^/tmp/\u{FFFD}+\.txt$", + "Invalid continuation bytes", + ), + ( + b"/var/test\xE2\x80", + r"^/var/test\u{FFFD}+$", + "Truncated multi-byte sequence", + ), + ( + b"/home/file\x80.log", + r"^/home/file\u{FFFD}\.log$", + "Invalid start byte", + ), + ( + b"/tmp/\xD1\x84\xFF\xD0\xBB.txt", + r"^/tmp/ф\u{FFFD}л\.txt$", + "Mixed valid and invalid UTF-8", + ), + ]; + + for (bytes, pattern, description) in tests { + let arr = bytes_to_c_char_array::<{ PATH_MAX as usize }>(bytes); + let result = sanitize_d_path(&arr); + let result_str = result.to_string_lossy(); + + let re = Regex::new(pattern).expect("Invalid regex pattern"); + assert!( + re.is_match(&result_str), + "Failed for {}: expected pattern '{}', got '{}'", + description, + pattern, + result_str + ); + } + } + + #[test] + fn sanitize_d_path_invalid_utf8_with_deleted_suffix() { + let invalid_with_deleted = + bytes_to_c_char_array::<{ PATH_MAX as usize }>(b"/tmp/\xFF\xFE (deleted)"); + let result = sanitize_d_path(&invalid_with_deleted); + let result_str = result.to_string_lossy(); + + assert!(result_str.contains("/tmp/")); + assert!(!result_str.ends_with(" (deleted)")); + assert!(result_str.contains('\u{FFFD}')); + } +} diff --git a/fact/src/event/process.rs b/fact/src/event/process.rs index d7d1d139..5e42e530 100644 --- a/fact/src/event/process.rs +++ b/fact/src/event/process.rs @@ -8,7 +8,7 @@ use crate::host_info; use super::{sanitize_d_path, slice_to_string}; -#[derive(Debug, Clone, Default, Serialize)] +#[derive(Debug, Clone, Default, PartialEq, Serialize)] pub struct Lineage { uid: u32, exe_path: PathBuf, @@ -222,6 +222,8 @@ impl From for fact_api::ProcessSignal { #[cfg(test)] mod tests { use super::*; + use crate::event::test_utils::*; + use fact_ebpf::PATH_MAX; #[test] fn extract_container_id() { @@ -259,4 +261,237 @@ mod tests { assert_eq!(id, expected); } } + + #[test] + fn process_conversion_valid_utf8_comm() { + let tests = [ + ("test", "ASCII"), + ("тест", "Cyrillic"), + ("测试", "Chinese"), + ("app🚀", "Emoji"), + ]; + + for (comm, description) in tests { + let proc = process_t { + comm: string_to_c_char_array::<16>(comm), + ..Default::default() + }; + let result = Process::try_from(proc).expect("Failed to parse process"); + let expected = Process { + comm: comm.to_string(), + ..Default::default() + }; + assert_eq!(result, expected, "Failed for {}", description); + } + } + + #[test] + fn process_conversion_invalid_utf8_comm() { + let tests: &[(&[u8], &str)] = &[ + (b"test\xFF\xFE", "Invalid bytes"), + (b"app\xE2\x80", "Truncated multi-byte sequence"), + ]; + + for (bytes, description) in tests { + let proc = process_t { + comm: bytes_to_c_char_array::<16>(bytes), + ..Default::default() + }; + let result = Process::try_from(proc); + assert!(result.is_err(), "Should fail for {}", description); + } + } + + #[test] + fn process_conversion_valid_utf8_exe_path() { + let tests = [ + ("/usr/bin/test", "ASCII"), + ("/usr/bin/тест", "Cyrillic"), + ("/opt/应用/测试", "Chinese"), + ("/home/user/🚀app", "Emoji"), + ("/var/app-данные-数据/bin", "Mixed UTF-8"), + ]; + + for (path, description) in tests { + let proc = process_t { + exe_path: string_to_c_char_array::<{ PATH_MAX as usize }>(path), + ..Default::default() + }; + let result = Process::try_from(proc).expect("Failed to parse process"); + let expected = Process { + exe_path: PathBuf::from(path), + ..Default::default() + }; + assert_eq!(result, expected, "Failed for {}", description); + } + } + + #[test] + fn process_conversion_invalid_utf8_exe_path() { + use regex::Regex; + + let proc = process_t { + exe_path: bytes_to_c_char_array::<{ PATH_MAX as usize }>(b"/usr/bin/\xFF\xFE"), + ..Default::default() + }; + let result = Process::try_from(proc).expect("Failed to parse process"); + let exe_path_str = result.exe_path.to_string_lossy(); + + let re = Regex::new(r"^/usr/bin/\u{FFFD}+$").expect("Invalid regex pattern"); + assert!( + re.is_match(&exe_path_str), + "Expected pattern '^/usr/bin/\\u{{FFFD}}+$', got '{}'", + exe_path_str + ); + } + + #[test] + fn process_conversion_valid_utf8_args() { + let tests: &[(&str, Vec<&str>, &str)] = &[ + ("arg1\0arg2\0arg3\0", vec!["arg1", "arg2", "arg3"], "ASCII"), + ("файл\0данные\0", vec!["файл", "данные"], "Cyrillic"), + ( + "测试\0文件\0数据\0", + vec!["测试", "文件", "数据"], + "Chinese", + ), + ( + "app\0🚀file\0📁data\0", + vec!["app", "🚀file", "📁data"], + "Emoji", + ), + ( + "test\0файл\0测试\0🚀\0", + vec!["test", "файл", "测试", "🚀"], + "Mixed UTF-8", + ), + ]; + + for (args_str, expected, description) in tests { + let proc = process_t { + args: string_to_c_char_array::<{ PATH_MAX as usize }>(args_str), + args_len: args_str.len() as u32, + ..Default::default() + }; + let result = Process::try_from(proc).expect("Failed to parse process"); + let expected_process = Process { + args: expected.iter().map(|s| s.to_string()).collect(), + ..Default::default() + }; + assert_eq!(result, expected_process, "Failed for {}", description); + } + } + + #[test] + fn process_conversion_invalid_utf8_args() { + let tests: &[(&[u8], u32, &str)] = &[ + (b"arg1\0\xFF\xFEarg\0", 11, "Invalid bytes"), + (b"test\0\xE2\x80\0", 8, "Truncated multi-byte sequence"), + ]; + + for (bytes, args_len, description) in tests { + let proc = process_t { + args: bytes_to_c_char_array::<{ PATH_MAX as usize }>(bytes), + args_len: *args_len, + ..Default::default() + }; + let result = Process::try_from(proc); + assert!(result.is_err(), "Should fail for {}", description); + } + } + + #[test] + fn process_conversion_valid_utf8_memory_cgroup() { + let tests = [ + ("init.scope", None, "ASCII init.scope"), + ( + "/docker/951e643e3c241b225b6284ef2b79a37c13fc64cbf65b5d46bda95fcb98fe63a4", + Some("951e643e3c24"), + "container ID", + ), + ]; + + for (cgroup, expected_id, description) in tests { + let proc = process_t { + memory_cgroup: string_to_c_char_array::<{ PATH_MAX as usize }>(cgroup), + ..Default::default() + }; + let result = Process::try_from(proc).expect("Failed to parse process"); + let expected_process = Process { + container_id: expected_id.map(|s| s.to_string()), + ..Default::default() + }; + assert_eq!(result, expected_process, "Failed for {}", description); + } + } + + #[test] + fn process_conversion_invalid_utf8_memory_cgroup() { + let proc = process_t { + memory_cgroup: bytes_to_c_char_array::<{ PATH_MAX as usize }>(b"/docker/\xFF\xFE"), + ..Default::default() + }; + let result = Process::try_from(proc); + assert!(result.is_err()); + } + + #[test] + fn process_conversion_valid_utf8_lineage() { + let tests = [ + ("/bin/bash", "ASCII"), + ("/usr/bin/тест", "Cyrillic"), + ("/opt/应用", "Chinese"), + ]; + + for (path, description) in tests { + let proc = process_t { + lineage: [ + lineage_t { + uid: 1000, + exe_path: string_to_c_char_array::<{ PATH_MAX as usize }>(path), + }, + Default::default(), + ], + lineage_len: 1, + ..Default::default() + }; + let result = Process::try_from(proc).expect("Failed to parse process"); + let expected_process = Process { + lineage: vec![Lineage { + uid: 1000, + exe_path: PathBuf::from(path), + }], + ..Default::default() + }; + assert_eq!(result, expected_process, "Failed for {}", description); + } + } + + #[test] + fn process_conversion_invalid_utf8_lineage() { + use regex::Regex; + + let proc = process_t { + lineage: [ + lineage_t { + uid: 1000, + exe_path: bytes_to_c_char_array::<{ PATH_MAX as usize }>(b"/bin/\xFF\xFE"), + }, + Default::default(), + ], + lineage_len: 1, + ..Default::default() + }; + let result = Process::try_from(proc); + assert!(result.is_ok()); + let lineage = result.unwrap().lineage; + let lineage_path_str = lineage[0].exe_path.to_string_lossy(); + + let re = Regex::new(r"^/bin/\u{FFFD}+$").expect("Invalid regex pattern"); + assert!( + re.is_match(&lineage_path_str), + "Expected pattern '^/bin/\\u{{FFFD}}+$', got '{}'", + lineage_path_str + ); + } } diff --git a/tests/conftest.py b/tests/conftest.py index f23d4dbe..aee04534 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -16,6 +16,43 @@ 'test_editors.commons' ] +def join_path_with_filename(directory, filename): + """ + Join a directory path with a filename, handling bytes filenames properly. + + When filename is bytes (e.g., containing invalid UTF-8), converts the + directory to bytes before joining to avoid mixing str and bytes. + + Args: + directory: Directory path (str) + filename: Filename (str or bytes) + + Returns: + Joined path (str or bytes, matching the filename type) + """ + if isinstance(filename, bytes): + return os.path.join(os.fsencode(directory), filename) + else: + return os.path.join(directory, filename) + + +def path_to_string(path): + """ + Convert a filesystem path to string, replacing invalid UTF-8 with U+FFFD. + + This matches the behavior of Rust's String::from_utf8_lossy() used in + the fact codebase. + + Args: + path: Filesystem path (str or bytes) + + Returns: + String representation with invalid UTF-8 replaced by replacement character + """ + if isinstance(path, bytes): + return path.decode('utf-8', errors='replace') + else: + return path @pytest.fixture def monitored_dir(): diff --git a/tests/test_file_open.py b/tests/test_file_open.py index c47272c7..001aa4a3 100644 --- a/tests/test_file_open.py +++ b/tests/test_file_open.py @@ -2,11 +2,21 @@ import os import docker +import pytest +from conftest import join_path_with_filename, path_to_string from event import Event, EventType, Process -def test_open(fact, monitored_dir, server): +@pytest.mark.parametrize("filename", [ + pytest.param('create.txt', id='ASCII'), + pytest.param('café.txt', id='French'), + pytest.param('файл.txt', id='Cyrillic'), + pytest.param('测试.txt', id='Chinese'), + pytest.param('🚀rocket.txt', id='Emoji'), + pytest.param(b'test\xff\xfe.txt', id='Invalid'), +]) +def test_open(fact, monitored_dir, server, filename): """ Tests the opening of a file and verifies that the corresponding event is captured by the server. @@ -15,12 +25,17 @@ def test_open(fact, monitored_dir, server): fact: Fixture for file activity (only required to be running). monitored_dir: Temporary directory path for creating the test file. server: The server instance to communicate with. + filename: Name of the file to create (includes UTF-8 test cases). """ # File Under Test - fut = os.path.join(monitored_dir, 'create.txt') + fut = join_path_with_filename(monitored_dir, filename) + with open(fut, 'w') as f: f.write('This is a test') + # Convert fut to string for the Event, replacing invalid UTF-8 with U+FFFD + fut = path_to_string(fut) + e = Event(process=Process.from_proc(), event_type=EventType.CREATION, file=fut, host_path='') print(f'Waiting for event: {e}') @@ -37,6 +52,7 @@ def test_multiple(fact, monitored_dir, server): fact: Fixture for file activity (only required to be running). monitored_dir: Temporary directory path for creating the test file. server: The server instance to communicate with. + filenames: List of filenames to create (includes UTF-8 test cases). """ events = [] process = Process.from_proc() diff --git a/tests/test_path_chmod.py b/tests/test_path_chmod.py index 4b62e2c2..b62391cc 100644 --- a/tests/test_path_chmod.py +++ b/tests/test_path_chmod.py @@ -1,10 +1,21 @@ import multiprocessing as mp import os +import pytest + +from conftest import join_path_with_filename, path_to_string from event import Event, EventType, Process -def test_chmod(fact, monitored_dir, server): +@pytest.mark.parametrize("filename", [ + 'chmod.txt', + 'café.txt', + 'файл.txt', + '测试.txt', + '🔒secure.txt', + b'perm\xff\xfe.txt', +]) +def test_chmod(fact, monitored_dir, server, filename): """ Tests changing permissions on a file and verifies the corresponding event is captured by the server @@ -13,18 +24,33 @@ def test_chmod(fact, monitored_dir, server): fact: Fixture for file activity (only required to be runing). monitored_dir: Temporary directory path for creating the test file. server: The server instance to communicate with. + filename: Name of the file to create (includes UTF-8 test cases). """ - # File Under Test - fut = os.path.join(monitored_dir, 'test.txt') + fut = join_path_with_filename(monitored_dir, filename) + + # Create the file first + with open(fut, 'w') as f: + f.write('This is a test') + mode = 0o666 os.chmod(fut, mode) - e = Event(process=Process.from_proc(), event_type=EventType.PERMISSION, - file=fut, host_path=fut, mode=mode) + # Convert fut to string for the Event, replacing invalid UTF-8 with U+FFFD + fut = path_to_string(fut) - print(f'Waiting for event: {e}') + process = Process.from_proc() + # We expect both CREATION (from file creation) and PERMISSION (from chmod) + events = [ + Event(process=process, event_type=EventType.CREATION, + file=fut, host_path=''), + Event(process=process, event_type=EventType.PERMISSION, + file=fut, host_path='', mode=mode), + ] - server.wait_events([e]) + for e in events: + print(f'Waiting for event: {e}') + + server.wait_events(events) def test_multiple(fact, monitored_dir, server): diff --git a/tests/test_path_chown.py b/tests/test_path_chown.py index d318f4eb..9e4daf12 100644 --- a/tests/test_path_chown.py +++ b/tests/test_path_chown.py @@ -1,5 +1,9 @@ import os +import shlex +import pytest + +from conftest import path_to_string from event import Event, EventType, Process # Tests here have to use a container to do 'chown', @@ -10,7 +14,15 @@ TEST_GID = 2345 -def test_chown(fact, test_container, server): +@pytest.mark.parametrize("filename", [ + 'chown.txt', + 'café.txt', + 'файл.txt', + '测试.txt', + '👤owner.txt', + b'own\xff\xfe.txt', +]) +def test_chown(fact, test_container, server, filename): """ Execute a chown operation on a file and verifies the corresponding event is captured by the server. @@ -19,15 +31,21 @@ def test_chown(fact, test_container, server): fact: Fixture for file activity (only required to be running). test_container: A container for running commands in. server: The server instance to communicate with. + filename: Name of the file to create (includes UTF-8 test cases). """ + # File Under Test - fut = '/container-dir/test.txt' + fut = f'/container-dir/{path_to_string(filename)}' # Create the file and chown it + # Use shlex.quote to properly escape special characters for shell + fut_quoted = shlex.quote(fut) + test_container.exec_run(f'touch {fut_quoted}') + test_container.exec_run(f'chown {TEST_UID}:{TEST_GID} {fut_quoted}') + + # The args in the event won't have quotes (shell removes them) touch_cmd = f'touch {fut}' chown_cmd = f'chown {TEST_UID}:{TEST_GID} {fut}' - test_container.exec_run(touch_cmd) - test_container.exec_run(chown_cmd) loginuid = pow(2, 32) - 1 touch = Process(pid=None, diff --git a/tests/test_path_unlink.py b/tests/test_path_unlink.py index 3a7cde5b..beafed0c 100644 --- a/tests/test_path_unlink.py +++ b/tests/test_path_unlink.py @@ -2,26 +2,52 @@ import os import docker +import pytest +from conftest import join_path_with_filename, path_to_string from event import Event, EventType, Process -def test_remove(fact, test_file, server): +@pytest.mark.parametrize("filename", [ + 'remove.txt', + 'café.txt', + 'файл.txt', + '测试.txt', + '🗑️delete.txt', + b'rm\xff\xfe.txt', +]) +def test_remove(fact, monitored_dir, server, filename): """ Tests the removal of a file and verifies the corresponding event is captured by the server. Args: fact: Fixture for file activity (only required to be running). - test_file: Temporary file for testing. + monitored_dir: Temporary directory path for creating the test file. server: The server instance to communicate with. + filename: Name of the file to create and remove (includes UTF-8 test cases). """ - os.remove(test_file) + + # File under test + fut = join_path_with_filename(monitored_dir, filename) + + # Create the file first + with open(fut, 'w') as f: + f.write('This is a test') + + # Remove the file + os.remove(fut) + + # Convert test_file to string for the Event, replacing invalid UTF-8 with U+FFFD + fut = path_to_string(fut) process = Process.from_proc() + # We expect both CREATION (from file creation) and UNLINK (from removal) events = [ + Event(process=process, event_type=EventType.CREATION, + file=fut, host_path=''), Event(process=process, event_type=EventType.UNLINK, - file=test_file, host_path=test_file), + file=fut, host_path=''), ] server.wait_events(events)