From 102ee9ff51a99e27ca1a984ddd357327815a20b0 Mon Sep 17 00:00:00 2001 From: clintval Date: Fri, 14 Mar 2025 17:04:45 -1000 Subject: [PATCH] feat: keep track of line numbers when making exceptions --- tests/test_reader.py | 6 +++--- typeline/_reader.py | 21 +++++++++++---------- typeline/_writer.py | 15 ++++++--------- 3 files changed, 20 insertions(+), 22 deletions(-) diff --git a/tests/test_reader.py b/tests/test_reader.py index d4cabb9..bacfefa 100644 --- a/tests/test_reader.py +++ b/tests/test_reader.py @@ -207,8 +207,8 @@ def test_reader_raises_exception_for_failed_type_coercion(tmp_path: Path) -> Non """Test the reader raises an exception for failed type coercion.""" (tmp_path / "test.txt").write_text( "\n".join([ - "field1\tfield2\tfield3\n", - "1\tname\tBOMB\n", + "field1\tfield2\tfield3", + "1\tname\tBOMB", ]) ) @@ -217,7 +217,7 @@ def test_reader_raises_exception_for_failed_type_coercion(tmp_path: Path) -> Non pytest.raises( DecodeError, match=( - r"Could not load delimited data line into JSON\-like format\." + r"Could not load delimited data into JSON\-like format on line 2\." + r" Built improperly formatted JSON\:" + r" \{\"field1\"\:1\,\"field2\"\:\"name\"\,\"field3\"\:BOMB\}\." + r" Original exception\: JSON is malformed\:" diff --git a/typeline/_reader.py b/typeline/_reader.py index c0bae8f..82c5aa9 100644 --- a/typeline/_reader.py +++ b/typeline/_reader.py @@ -9,6 +9,7 @@ from dataclasses import fields as fields_of from dataclasses import is_dataclass from io import TextIOWrapper +from os import linesep from pathlib import Path from types import NoneType from types import TracebackType @@ -62,6 +63,7 @@ def __init__( # Initialize and save internal attributes of this class. self._handle: TextIOWrapper = handle + self._line_count: int = 0 self._record_type: type[RecordType] = record_type self._comment_prefixes: set[str] = comment_prefixes @@ -70,22 +72,21 @@ def __init__( self._header: list[str] = [field.name for field in self._fields] self._field_types: list[type | str | Any] = [field.type for field in self._fields] - # Build a JSON decoder for intermediate data conversion (after delimited, before dataclass). - self._decoder: JSONDecoder[Any] = JSONDecoder(strict=False) + # Build a JSON decoder for intermediate data conversion (after delimited; before dataclass). + self._decoder: JSONDecoder[dict[str, JsonType]] = JSONDecoder(strict=False) # Build the delimited dictionary reader, filtering out any comment lines along the way. self._reader: DictReader[str] = DictReader( self._filter_out_comments(handle), - fieldnames=self._header if not header else None, delimiter=self.delimiter, + fieldnames=self._header if not header else None, + lineterminator=linesep, quotechar="'", quoting=csv.QUOTE_MINIMAL, ) # Protect the user from the case where a header was specified, but a data line was found! - if self._reader.fieldnames is not None and ( - set(self._reader.fieldnames) != set(self._header) - ): + if self._reader.fieldnames is not None and self._reader.fieldnames != self._header: raise ValueError("Fields of header do not match fields of dataclass!") @property @@ -113,8 +114,8 @@ def __exit__( def _filter_out_comments(self, lines: Iterator[str]) -> Iterator[str]: """Yield only lines in an iterator that do not start with a comment prefix.""" for line in lines: - stripped: str = line.strip() - if not stripped: + self._line_count += 1 + if not (stripped := line.strip()): continue elif any(stripped.startswith(prefix) for prefix in self._comment_prefixes): continue @@ -126,7 +127,7 @@ def __iter__(self) -> Iterator[RecordType]: for record in self._reader: as_builtins = self._csv_dict_to_json(record) try: - yield convert(as_builtins, self._record_type, strict=False) + yield convert(as_builtins, self._record_type, strict=False, str_keys=True) except ValidationError as exception: raise ValidationError( "Could not parse JSON-like object into requested structure:" @@ -152,7 +153,7 @@ def _csv_dict_to_json(self, record: dict[str, str]) -> dict[str, JsonType]: as_builtins: dict[str, JsonType] = self._decoder.decode(json_string) except DecodeError as exception: raise DecodeError( - "Could not load delimited data line into JSON-like format." + f"Could not load delimited data into JSON-like format on line {self._line_count}." + f" Built improperly formatted JSON: {json_string}." + f" Original exception: {exception}." ) from exception diff --git a/typeline/_writer.py b/typeline/_writer.py index fd4c236..c8f3714 100644 --- a/typeline/_writer.py +++ b/typeline/_writer.py @@ -44,14 +44,14 @@ def __init__(self, handle: TextIOWrapper, record_type: type[RecordType]) -> None self._handle: TextIOWrapper = handle self._record_type: type[RecordType] = record_type - # Inspect the record type and save the fields, field names, and field types. + # Inspect the record type and save the fields and field names. self._fields: tuple[Field[Any], ...] = fields_of(record_type) - self._header: list[str] = [field.name for field in fields_of(record_type)] + self._header: list[str] = [field.name for field in self._fields] - # Build a JSON encoder for intermediate data conversion (after dataclass, before delimited). + # Build a JSON encoder for intermediate data conversion (after dataclass; before delimited). self._encoder: JSONEncoder = JSONEncoder() - # Build the delimited dictionary reader, filtering out any comment lines along the way. + # Build the delimited dictionary writer which will use platform-dependent newlines. self._writer: DictWriter[str] = DictWriter( handle, fieldnames=self._header, @@ -84,17 +84,14 @@ def __exit__( return None def _encode(self, item: Any) -> Any: - """A callback for overriding the encoding of builtin types and custom types.""" - if isinstance(item, tuple): - return list(item) # pyright: ignore[reportUnknownVariableType, reportUnknownArgumentType] + """A custom encoder that can pre-process an item prior to serialization.""" return item def write(self, record: RecordType) -> None: """Write the record to the open file-like object.""" if not isinstance(record, self._record_type): raise ValueError( - f"Expected {self._record_type.__name__} but found" - + f" {record.__class__.__qualname__}!" + f"Expected {self._record_type.__name__} but found {record.__class__.__qualname__}!" ) encoded = {name: self._encode(getattr(record, name)) for name in self._header}