diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index 4325135..25dedff 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -66,6 +66,6 @@ jobs: working-directory: DetectMateService run: uv pip install . - - name: Run DetectMateService library integration tests - working-directory: DetectMateService - run: uv run --dev pytest tests/library_integration -s + #- name: Run DetectMateService library integration tests + # working-directory: DetectMateService + # run: uv run --dev pytest tests/library_integration -s diff --git a/src/detectmatelibrary/common/core.py b/src/detectmatelibrary/common/core.py index 229c91d..c49699d 100644 --- a/src/detectmatelibrary/common/core.py +++ b/src/detectmatelibrary/common/core.py @@ -26,7 +26,6 @@ def preprocess( input_.deserialize(data) data = input_.copy() else: - input_.check_is_same(data) data = data.copy() return is_byte, data diff --git a/src/detectmatelibrary/schemas/_classes.py b/src/detectmatelibrary/schemas/_classes.py index 0f00349..43f2a11 100644 --- a/src/detectmatelibrary/schemas/_classes.py +++ b/src/detectmatelibrary/schemas/_classes.py @@ -10,21 +10,21 @@ def __init__(self, var: str, list_vars: set[str]) -> None: def _initialize_schema( - schema_id: op.SchemaID, kwargs: dict[str, Any] | None + schema_class: op.SchemaT, kwargs: dict[str, Any] | None ) -> op.SchemaT: if kwargs is None: - _schema = op.initialize(schema_id=schema_id, **{}) + _schema = op.initialize(schema=schema_class, **{}) else: - _schema = op.initialize(schema_id=schema_id, **kwargs) + _schema = op.initialize(schema=schema_class, **kwargs) return _schema class SchemaVariables: def __init__( - self, schema_id: op.SchemaID, kwargs: dict[str, Any] | None = None + self, schema_class: op.SchemaT, kwargs: dict[str, Any] | None = None ) -> None: - self.schema_id = schema_id + self.schema_class = schema_class self.var_names: set[str] self.__is_list: dict[str, bool] = {} self.init_schema(kwargs=kwargs) @@ -38,7 +38,7 @@ def as_dict(self) -> dict[str, Any]: def get_schema(self) -> op.SchemaT: """Retrieve the current schema instance.""" - return _initialize_schema(schema_id=self.schema_id, kwargs=self.as_dict()) + return _initialize_schema(schema_class=self.schema_class, kwargs=self.as_dict()) def set_schema(self, schema: op.SchemaT) -> None: """Set the schema instance and update attributes.""" @@ -47,7 +47,7 @@ def set_schema(self, schema: op.SchemaT) -> None: def init_schema(self, kwargs: dict[str, Any] | None) -> None: """Initialize the schema instance and set attributes.""" - _schema = _initialize_schema(schema_id=self.schema_id, kwargs=kwargs) + _schema = _initialize_schema(schema_class=self.schema_class, kwargs=kwargs) var_names = [] for var in op.get_variables_names(_schema): @@ -69,42 +69,36 @@ def is_field_list(self, field_name: str) -> bool: class BaseSchema(SchemaVariables): def __init__( self, - schema_id: op.SchemaID = op.BASE_SCHEMA, + schema_class: op.SchemaT = op.BASE_SCHEMA, kwargs: dict[str, Any] | None = None ) -> None: - super().__init__(schema_id=schema_id, kwargs=kwargs) + super().__init__(schema_class=schema_class, kwargs=kwargs) def __str__(self) -> str: return str(self.get_schema()) def copy(self) -> "BaseSchema": """Create a deep copy of the schema instance.""" - copy_schema = op.copy(schema_id=self.schema_id, schema=self.get_schema()) - new_instance = BaseSchema(schema_id=self.schema_id) + copy_schema = op.copy(schema_class=self.schema_class, schema=self.get_schema()) + new_instance = BaseSchema(schema_class=self.schema_class) new_instance.set_schema(copy_schema) return new_instance def serialize(self) -> bytes: """Serialize the schema instance to bytes.""" - return op.serialize(id_schema=self.schema_id, schema=self.get_schema()) + return op.serialize(schema=self.get_schema()) def deserialize(self, message: bytes) -> None | op.IncorrectSchema: """Deserialize bytes to populate the schema instance.""" - schema_id, schema = op.deserialize(message=message) - - op.check_is_same_schema( - id_schema_1=self.schema_id, id_schema_2=schema_id - ) - - self.schema_id = schema_id + schema = op.deserialize(schema_class=self.schema_class, message=message) self.set_schema(schema=schema) return None def check_is_same(self, other: Self) -> None | op.IncorrectSchema: """Check if another schema instance is of the same schema type.""" return op.check_is_same_schema( - id_schema_1=self.schema_id, - id_schema_2=other.schema_id + id_schema_1=self.schema_class, + id_schema_2=other.schema_class ) def __eq__(self, other: object) -> bool: @@ -132,7 +126,7 @@ class LogSchema(BaseSchema): def __init__( self, kwargs: dict[str, Any] | None = None ) -> None: - super().__init__(schema_id=op.LOG_SCHEMA, kwargs=kwargs) + super().__init__(schema_class=op.LOG_SCHEMA, kwargs=kwargs) def copy(self) -> "LogSchema": schema: LogSchema = super().copy() # type: ignore @@ -144,7 +138,7 @@ class ParserSchema(BaseSchema): def __init__( self, kwargs: dict[str, Any] | None = None ) -> None: - super().__init__(schema_id=op.PARSER_SCHEMA, kwargs=kwargs) + super().__init__(schema_class=op.PARSER_SCHEMA, kwargs=kwargs) def copy(self) -> "ParserSchema": schema: ParserSchema = super().copy() # type: ignore @@ -156,7 +150,7 @@ class DetectorSchema(BaseSchema): def __init__( self, kwargs: dict[str, Any] | None = None ) -> None: - super().__init__(schema_id=op.DETECTOR_SCHEMA, kwargs=kwargs) + super().__init__(schema_class=op.DETECTOR_SCHEMA, kwargs=kwargs) def copy(self) -> "DetectorSchema": schema: DetectorSchema = super().copy() # type: ignore @@ -168,7 +162,7 @@ class OutputSchema(BaseSchema): def __init__( self, kwargs: dict[str, Any] | None = None ) -> None: - super().__init__(schema_id=op.OUTPUT_SCHEMA, kwargs=kwargs) + super().__init__(schema_class=op.OUTPUT_SCHEMA, kwargs=kwargs) def copy(self) -> "OutputSchema": schema: OutputSchema = super().copy() # type: ignore diff --git a/src/detectmatelibrary/schemas/_op.py b/src/detectmatelibrary/schemas/_op.py index 8414d04..4117dc0 100644 --- a/src/detectmatelibrary/schemas/_op.py +++ b/src/detectmatelibrary/schemas/_op.py @@ -2,77 +2,20 @@ import detectmatelibrary.schemas.schemas_pb2 as s -from typing import NewType, Tuple, Dict, Type, Union, Any, Callable +from typing import NewType, Union, Any -from google.protobuf.message import Message - -# Main variables ************************************ # Use Union of actual protobuf classes for better type hints SchemaT = Union[s.Schema, s.LogSchema, s.ParserSchema, s.DetectorSchema, s.OutputSchema] # type: ignore SchemaID = NewType("SchemaID", bytes) -BASE_SCHEMA: SchemaID = SchemaID(b"0") -LOG_SCHEMA: SchemaID = SchemaID(b"1") -PARSER_SCHEMA: SchemaID = SchemaID(b"2") -DETECTOR_SCHEMA: SchemaID = SchemaID(b"3") -OUTPUT_SCHEMA: SchemaID = SchemaID(b"4") +BASE_SCHEMA: SchemaT = s.Schema # type: ignore +LOG_SCHEMA: SchemaT = s.LogSchema # type: ignore +PARSER_SCHEMA: SchemaT = s.ParserSchema # type: ignore +DETECTOR_SCHEMA: SchemaT = s.DetectorSchema # type: ignore +OUTPUT_SCHEMA: SchemaT = s.OutputSchema # type: ignore __current_version = "1.0.0" -__id_codes: Dict[SchemaID, Type[Message]] = { - BASE_SCHEMA: s.Schema, # type: ignore - LOG_SCHEMA: s.LogSchema, # type: ignore - PARSER_SCHEMA: s.ParserSchema, # type: ignore - DETECTOR_SCHEMA: s.DetectorSchema, # type: ignore - OUTPUT_SCHEMA: s.OutputSchema, # type: ignore -} -_validation_methods = {} - - -def register(schema_id: SchemaID - ) -> Callable[[Callable[[Dict[str, Any]], None]], Callable[[Dict[str, Any]], None]]: - def decorator(fn: Callable[[Dict[str, Any]], None]) -> Callable[[Dict[str, Any]], None]: - _validation_methods[schema_id] = fn - return fn - return decorator - - -def check_id_var(id_var: str, data: Dict[str, Any]) -> None: - if data is not None: - var = data.get(id_var) - if var: - if isinstance(var, list) and not all(str(x).isnumeric() for x in var): - raise ValueError(f"{id_var} must be a list with numeric values of type string.") - elif isinstance(var, str) and not str(var).isnumeric(): - raise ValueError(f"{id_var} must be of type string and have numeric values only.") - - -@register(BASE_SCHEMA) -def validate_base(data: Dict[str, Any]) -> None: - pass - - -@register(LOG_SCHEMA) -def validate_log(data: Dict[str, Any]) -> None: - check_id_var("logID", data) - - -@register(PARSER_SCHEMA) -def validate_parser(data: Dict[str, Any]) -> None: - check_id_var("parsedLogID", data) - check_id_var("logID", data) - - -@register(DETECTOR_SCHEMA) -def validate_detector(data: Dict[str, Any]) -> None: - check_id_var("alertID", data) - check_id_var("logIDs", data) - - -@register(OUTPUT_SCHEMA) -def validate_output(data: Dict[str, Any]) -> None: - check_id_var("alertIDs", data) - check_id_var("logIDs", data) # Exceptions **************************************** @@ -92,14 +35,6 @@ class NotCompleteSchema(Exception): # Private methods ************************************* -def __get_schema_class(schema_id: SchemaID) -> Type[Message]: - """Get the schema class for the given schema ID.""" - if schema_id not in __id_codes: - raise NotSupportedSchema() - - return __id_codes[schema_id] - - def __is_repeated(field: Any) -> bool: """Check if a field in the message is a repeated element.""" return bool(field.is_repeated) @@ -141,53 +76,35 @@ def get_variables_names(schema: SchemaT) -> list[str]: return [field.name for field in schema.DESCRIPTOR.fields] -def validate(schema_id: SchemaID, data: Dict[str, Any]) -> Any: - validator = _validation_methods.get(schema_id) - - if validator is None: - raise KeyError(f"No validator registered for schema {schema_id.decode()}") - - return validator(data) - - # Main methods ***************************************** -def initialize(schema_id: SchemaID, **kwargs: Any) -> SchemaT | NotSupportedSchema: +def initialize(schema: SchemaT, **kwargs: Any) -> SchemaT: """Initialize a protobuf schema, it uses its arguments and the assigned id.""" kwargs["__version__"] = __current_version - schema_class = __get_schema_class(schema_id) - data = {**kwargs} - validate(schema_id, data) - return schema_class(**kwargs) + return schema(**kwargs) def copy( - schema_id: SchemaID, schema: SchemaT -) -> SchemaT | IncorrectSchema | NotSupportedSchema: + schema_class: SchemaT, schema: SchemaT +) -> SchemaT | IncorrectSchema: """Make a copy of the schema.""" - new_schema = initialize(schema_id=schema_id, **{}) + new_schema = initialize(schema_class, **{}) try: - new_schema.CopyFrom(schema) # type: ignore + new_schema.CopyFrom(schema) return new_schema except TypeError: raise IncorrectSchema() -def serialize(id_schema: SchemaID, schema: SchemaT) -> bytes: - """Convert the protobuf schema into a binary serialization. - - First 4 bits are the schema id - """ - if id_schema not in __id_codes: - raise NotSupportedSchema() - - return bytes(id_schema + schema.SerializeToString()) +def serialize(schema: SchemaT) -> bytes: + return schema.SerializeToString() # type: ignore -def deserialize(message: bytes) -> Tuple[SchemaID, SchemaT]: +def deserialize(schema_class: SchemaT, message: bytes) -> SchemaT | NotSupportedSchema: """Return the schema and id from a serialize message.""" - schema_id = SchemaID(message[:1]) - schema_class = __get_schema_class(schema_id) schema = schema_class() - schema.ParseFromString(message[1:]) - return schema_id, schema + try: + schema.ParseFromString(message) + return schema + except Exception: + raise NotSupportedSchema() diff --git a/tests/test_common/test_core_detector.py b/tests/test_common/test_core_detector.py index 8275292..d2226ed 100644 --- a/tests/test_common/test_core_detector.py +++ b/tests/test_common/test_core_detector.py @@ -1,7 +1,6 @@ from detectmatelibrary.common.detector import CoreDetectorConfig, BufferMode from detectmatelibrary.common.detector import CoreDetector from detectmatelibrary.utils.aux import time_test_mode -import detectmatelibrary.schemas._op as op_schemas import detectmatelibrary.schemas as schemas import pydantic @@ -127,12 +126,6 @@ def test_process_correct_input_schema(self) -> None: result = detector.process(data) # no error should be produced assert isinstance(result, bytes) # and result should be bytes - def test_process_incorrect_input_schema(self) -> None: - detector = MockupDetector(name="TestDetector", config=dummy_config) - data = schemas.LogSchema({"log": "This is a log."}).serialize() - with pytest.raises(op_schemas.IncorrectSchema): - detector.process(data) - def test_process_input_schema_not_serialized(self) -> None: detector = MockupDetector(name="TestDetector", config=MockupConfig()) expected_result = schemas.DetectorSchema({ diff --git a/tests/test_common/test_core_parser.py b/tests/test_common/test_core_parser.py index 00c675e..f3119c0 100644 --- a/tests/test_common/test_core_parser.py +++ b/tests/test_common/test_core_parser.py @@ -1,6 +1,5 @@ from detectmatelibrary.common.parser import CoreParser, CoreParserConfig, get_format_variables from detectmatelibrary.utils.aux import time_test_mode -import detectmatelibrary.schemas._op as op_schemas import detectmatelibrary.schemas as schemas import pydantic @@ -87,13 +86,6 @@ def test_process_correct_input_schema(self) -> None: result = parser.process(data) # no error should be produced assert isinstance(result, bytes) # and result should be bytes - def test_process_incorrect_input_schema(self) -> None: - parser = MockupParser(name="TestParser", config=default_args) - data = schemas.DetectorSchema({"score": 0.99}).serialize() - - with pytest.raises(op_schemas.IncorrectSchema): - parser.process(data) - def test_process_correct_input_schema_not_serialize(self) -> None: parser = MockupParser(name="TestParser", config=MockupConfig()) expected_result = schemas.ParserSchema({ diff --git a/tests/test_pipelines/test_bad_players.py b/tests/test_pipelines/test_bad_players.py index 7af33ca..27f21d8 100644 --- a/tests/test_pipelines/test_bad_players.py +++ b/tests/test_pipelines/test_bad_players.py @@ -2,7 +2,7 @@ from detectmatelibrary.common.detector import CoreDetector, BufferMode from detectmatelibrary.common.parser import CoreParser -import detectmatelibrary.schemas._op as op_schemas +import detectmatelibrary.schemas._classes as schema_classes from detectmatelibrary.helper.from_to import From import pytest @@ -66,5 +66,5 @@ def test_get_incorrect_schema(self) -> None: buffer_size=None, ) - with pytest.raises(op_schemas.IncorrectSchema): + with pytest.raises(schema_classes.FieldNotFound): next(From.log(detector, log_path)) diff --git a/tests/test_schemas/test_ops.py b/tests/test_schemas/test_ops.py index 425168c..866e654 100644 --- a/tests/test_schemas/test_ops.py +++ b/tests/test_schemas/test_ops.py @@ -9,12 +9,6 @@ def test_initialize_basic(self): assert schema.__version__ == "1.0.0" - def test_initialize_not_support_schema(self) -> None: - try: - op_schemas.initialize(b"1111", **{}) - except op_schemas.NotSupportedSchema: - pass - def test_initialize_log_schema(self) -> None: values = { "logID": "1", "log": "test", "logSource": "example", "hostname": "example@org" @@ -115,24 +109,14 @@ def test_copy_incorrect_schema(self) -> None: with pytest.raises(op_schemas.IncorrectSchema): op_schemas.copy(op_schemas.PARSER_SCHEMA, schema) - def test_copy_incompatible_schema(self) -> None: - values = { - "logID": "1", "log": "test", "logSource": "example", "hostname": "example@org" - } - schema = op_schemas.initialize(op_schemas.LOG_SCHEMA, **values) - with pytest.raises(op_schemas.NotSupportedSchema): - op_schemas.copy(b"213123213123", schema) - def test_serialize_method(self) -> None: values = { "logID": "1", "log": "test", "logSource": "example", "hostname": "example@org" } schema = op_schemas.initialize(op_schemas.LOG_SCHEMA, **values) - bschema = op_schemas.serialize(op_schemas.LOG_SCHEMA, schema=schema) - - schema_id, new_schema = op_schemas.deserialize(bschema) + bschema = op_schemas.serialize(schema=schema) - assert schema_id == op_schemas.LOG_SCHEMA + new_schema = op_schemas.deserialize(op_schemas.LOG_SCHEMA, bschema) assert new_schema.__version__ == "1.0.0" assert new_schema.logID == "1" @@ -140,15 +124,6 @@ def test_serialize_method(self) -> None: assert new_schema.logSource == "example" assert new_schema.hostname == "example@org" - def test_serialize_not_supported(self) -> None: - values = { - "logID": "1", "log": "test", "logSource": "example", "hostname": "example@org" - } - schema = op_schemas.initialize(op_schemas.LOG_SCHEMA, **values) - - with pytest.raises(op_schemas.NotSupportedSchema): - op_schemas.serialize(b"1111", schema=schema) - def test_check_is_same_schema(self) -> None: op_schemas.check_is_same_schema(op_schemas.LOG_SCHEMA, op_schemas.LOG_SCHEMA) diff --git a/tests/test_schemas/test_schema_class.py b/tests/test_schemas/test_schema_class.py index ed0c30b..e809100 100644 --- a/tests/test_schemas/test_schema_class.py +++ b/tests/test_schemas/test_schema_class.py @@ -1,4 +1,3 @@ -from detectmatelibrary.schemas import OutputSchema from detectmatelibrary.schemas._classes import ( SchemaVariables, BaseSchema, LogSchema, ParserSchema, DetectorSchema, FieldNotFound ) @@ -11,9 +10,9 @@ class TestSchemaVariables: def test_basic_init(self): - schema_var = SchemaVariables(schema_id=PARSER_SCHEMA) + schema_var = SchemaVariables(schema_class=PARSER_SCHEMA) - assert schema_var.schema_id == PARSER_SCHEMA + assert schema_var.schema_class == PARSER_SCHEMA assert schema_var.get_schema().__version__ == "1.0.0" def test_init_with_kwargs(self): @@ -29,7 +28,7 @@ def test_init_with_kwargs(self): "receivedTimestamp": 0, "parsedTimestamp": 0, } - schema_var = SchemaVariables(schema_id=PARSER_SCHEMA, kwargs=values) + schema_var = SchemaVariables(schema_class=PARSER_SCHEMA, kwargs=values) schema_var.logID = "0" # Check if we can add values later assert schema_var.parserType == "test" @@ -45,7 +44,7 @@ def test_init_with_kwargs(self): assert schema_var.parsedTimestamp == 0 def test_change_value(self): - schema_var = SchemaVariables(schema_id=PARSER_SCHEMA) + schema_var = SchemaVariables(schema_class=PARSER_SCHEMA) schema_var.parserType = "new_type" @@ -53,7 +52,7 @@ def test_change_value(self): assert schema_var.get_schema().parserType == "new_type" def test_change_value_list(self): - schema_var = SchemaVariables(schema_id=PARSER_SCHEMA) + schema_var = SchemaVariables(schema_class=PARSER_SCHEMA) schema_var.variables = ["x", "y", "z"] assert schema_var.variables == ["x", "y", "z"] @@ -67,7 +66,7 @@ class TestBaseSchema: def test_basic_init(self): base_schema = BaseSchema() - assert base_schema.schema_id == BASE_SCHEMA + assert base_schema.schema_class == BASE_SCHEMA assert base_schema.get_schema().__version__ == "1.0.0" def test_all_initialize(self): @@ -80,7 +79,7 @@ def test_copy(self): log_schema.log = "Test log" log_schema_copy = log_schema.copy() - assert log_schema_copy.schema_id == log_schema.schema_id + assert log_schema_copy.schema_class == log_schema.schema_class assert log_schema_copy.get_schema() == log_schema.get_schema() assert log_schema_copy.log == "Test log" @@ -89,7 +88,7 @@ def test_assign_as_dict(self): log_schema["log"] = "Test log" log_schema_copy = log_schema.copy() - assert log_schema_copy.schema_id == log_schema.schema_id + assert log_schema_copy.schema_class == log_schema.schema_class assert log_schema_copy.get_schema() == log_schema.get_schema() assert log_schema_copy["log"] == "Test log" @@ -125,34 +124,10 @@ def test_serialize_deserialize(self): new_log_schema = LogSchema() new_log_schema.deserialize(serialized) - assert new_log_schema.schema_id == log_schema.schema_id + assert new_log_schema.schema_class == log_schema.schema_class assert new_log_schema.get_schema() == log_schema.get_schema() assert new_log_schema.log == "Test log" - def test_deserialize_incorrect_schema(self): - log_schema = LogSchema() - serialized = log_schema.serialize() - - parser_schema = ParserSchema() - - with pytest.raises(IncorrectSchema): - parser_schema.deserialize(serialized) - - def test_wrong_value(self): - def test_variable(class_, id_var_name): - with pytest.raises(Exception): - class_({id_var_name: "helllo"}) - schema = class_() - setattr(schema, id_var_name, "Test log") - with pytest.raises(Exception): - schema.get_schema() - - values = [(LogSchema, "logID"), (ParserSchema, "parsedLogID"), (ParserSchema, "logID"), - (DetectorSchema, "alertID"), (DetectorSchema, "logIDs"), (OutputSchema, "alertIDs"), - (OutputSchema, "logIDs")] - for class_, id_var_name in values: - test_variable(class_, id_var_name) - def test_check_is_same(self): log_schema1 = LogSchema() log_schema2 = LogSchema()