From 7a284d9ab45722ed601a8505318b19eb1015e154 Mon Sep 17 00:00:00 2001
From: Enkidu93 <lowryec17@gcc.edu>
Date: Fri, 6 Feb 2026 17:37:50 -0500
Subject: [PATCH 1/3] Port unported updates since Machine
 df7d6e9c0bf1de8cba9462fba89208e6546db8fe

---
 machine/corpora/__init__.py                   |   8 +-
 .../file_paratext_project_file_handler.py     |  18 +-
 ...xt_project_versification_error_detector.py |   4 +-
 machine/corpora/paratext_project_settings.py  |   4 +-
 ...ject_versification_error_detector_base.py} |  16 +-
 ...scripture_ref_usfm_parser_handler_base.py} |  69 +++--
 machine/corpora/update_usfm_parser_handler.py |  22 +-
 machine/corpora/usfm_parser.py                |   7 +
 machine/corpora/usfm_parser_state.py          |   5 +-
 machine/corpora/usfm_text_base.py             |   4 +-
 .../zip_paratext_project_file_handler.py      |  10 +-
 ...paratext_project_versification_detector.py |   4 +-
 machine/scripture/verse_ref.py                |   2 -
 pyproject.toml                                |   2 +-
 ...place_markers_usfm_update_block_handler.py |  87 +++++--
 .../test_update_usfm_parser_handler.py        | 113 +++++++++
 tests/corpora/test_usfm_memory_text.py        | 238 ++++++++++++++++++
 tests/scripture/test_verse_ref.py             |   2 -
 .../data/usfm/Tes/{custom.vrs => Custom.vrs}  |   0
 ...xt_project_versification_error_detector.py |   4 +-
 20 files changed, 552 insertions(+), 67 deletions(-)
 rename machine/corpora/{paratext_project_versification_error_detector.py => paratext_project_versification_error_detector_base.py} (77%)
 rename machine/corpora/{scripture_ref_usfm_parser_handler.py => scripture_ref_usfm_parser_handler_base.py} (81%)
 rename tests/testutils/data/usfm/Tes/{custom.vrs => Custom.vrs} (100%)

diff --git a/machine/corpora/__init__.py b/machine/corpora/__init__.py
index 99a69191..d07e52ee 100644
--- a/machine/corpora/__init__.py
+++ b/machine/corpora/__init__.py
@@ -28,12 +28,12 @@
 from .paratext_project_settings_parser_base import ParatextProjectSettingsParserBase
 from .paratext_project_terms_parser_base import KeyTerm, ParatextProjectTermsParserBase
 from .paratext_project_text_updater_base import ParatextProjectTextUpdaterBase
-from .paratext_project_versification_error_detector import ParatextProjectVersificationErrorDetector
+from .paratext_project_versification_error_detector_base import ParatextProjectVersificationErrorDetectorBase
 from .paratext_text_corpus import ParatextTextCorpus
 from .place_markers_usfm_update_block_handler import PlaceMarkersAlignmentInfo, PlaceMarkersUsfmUpdateBlockHandler
 from .scripture_element import ScriptureElement
 from .scripture_ref import EMPTY_SCRIPTURE_REF, ScriptureRef
-from .scripture_ref_usfm_parser_handler import ScriptureRefUsfmParserHandler, ScriptureTextType
+from .scripture_ref_usfm_parser_handler_base import ScriptureRefUsfmParserHandlerBase, ScriptureTextType
 from .scripture_text_corpus import (
     ScriptureTextCorpus,
     create_versification_ref_corpus,
@@ -139,7 +139,7 @@
     "ParatextProjectSettingsParserBase",
     "ParatextProjectTermsParserBase",
     "ParatextProjectTextUpdaterBase",
-    "ParatextProjectVersificationErrorDetector",
+    "ParatextProjectVersificationErrorDetectorBase",
     "ParatextTextCorpus",
     "parse_usfm",
     "PlaceMarkersAlignmentInfo",
@@ -147,7 +147,7 @@
     "RtlReferenceOrder",
     "ScriptureElement",
     "ScriptureRef",
-    "ScriptureRefUsfmParserHandler",
+    "ScriptureRefUsfmParserHandlerBase",
     "ScriptureTextCorpus",
     "ScriptureTextType",
     "StandardParallelTextCorpus",
diff --git a/machine/corpora/file_paratext_project_file_handler.py b/machine/corpora/file_paratext_project_file_handler.py
index 8cdc3dd2..a9846645 100644
--- a/machine/corpora/file_paratext_project_file_handler.py
+++ b/machine/corpora/file_paratext_project_file_handler.py
@@ -1,3 +1,4 @@
+import os
 from pathlib import Path
 from typing import BinaryIO, Optional
 
@@ -11,17 +12,28 @@ def __init__(self, project_dir: StrPath) -> None:
         self._project_dir = Path(project_dir)
 
     def exists(self, file_name: str) -> bool:
-        return (self._project_dir / file_name).is_file()
+        for actual_file_name in os.listdir(self._project_dir):
+            if actual_file_name.lower() == file_name.lower():
+                return True
+        return False
 
     def open(self, file_name: str) -> BinaryIO:
+        for actual_file_name in os.listdir(self._project_dir):
+            if actual_file_name.lower() == file_name.lower():
+                return open(self._project_dir / actual_file_name, "rb")
         return open(self._project_dir / file_name, "rb")
 
     def find(self, extension: str) -> Optional[Path]:
         return next(self._project_dir.glob(f"*{extension}"), None)
 
     def create_stylesheet(self, file_name: str) -> UsfmStylesheet:
-        custom_stylesheet_filename = self._project_dir / "custom.sty"
+        custom_stylesheet_file_name = "custom.sty"
+        for actual_file_name in os.listdir(self._project_dir):
+            if actual_file_name.lower() == custom_stylesheet_file_name:
+                custom_stylesheet_file_name = actual_file_name
+                break
+        custom_stylesheet_path = self._project_dir / custom_stylesheet_file_name
         return UsfmStylesheet(
             file_name,
-            custom_stylesheet_filename if custom_stylesheet_filename.is_file() else None,
+            custom_stylesheet_path if custom_stylesheet_path.is_file() else None,
         )
diff --git a/machine/corpora/file_paratext_project_versification_error_detector.py b/machine/corpora/file_paratext_project_versification_error_detector.py
index 4e2cdac3..5f451894 100644
--- a/machine/corpora/file_paratext_project_versification_error_detector.py
+++ b/machine/corpora/file_paratext_project_versification_error_detector.py
@@ -1,10 +1,10 @@
 from ..utils.typeshed import StrPath
 from .file_paratext_project_file_handler import FileParatextProjectFileHandler
 from .file_paratext_project_settings_parser import FileParatextProjectSettingsParser
-from .paratext_project_versification_error_detector import ParatextProjectVersificationErrorDetector
+from .paratext_project_versification_error_detector_base import ParatextProjectVersificationErrorDetectorBase
 
 
-class FileParatextProjectVersificationErrorDetector(ParatextProjectVersificationErrorDetector):
+class FileParatextProjectVersificationErrorDetector(ParatextProjectVersificationErrorDetectorBase):
     def __init__(self, project_dir: StrPath) -> None:
         super().__init__(
             FileParatextProjectFileHandler(project_dir), FileParatextProjectSettingsParser(project_dir).parse()
diff --git a/machine/corpora/paratext_project_settings.py b/machine/corpora/paratext_project_settings.py
index ad86b303..5f747257 100644
--- a/machine/corpora/paratext_project_settings.py
+++ b/machine/corpora/paratext_project_settings.py
@@ -53,9 +53,9 @@ def get_book_file_name(self, book_id: str) -> str:
             book_part = _get_book_file_name_digits(book_id) + book_id
         return self.file_name_prefix + book_part + self.file_name_suffix
 
-    def get_all_scripture_book_file_names(self) -> Iterable[str]:
+    def get_all_scripture_book_ids(self) -> Iterable[str]:
         for book_id in get_scripture_books():
-            yield self.get_book_file_name(book_id)
+            yield book_id
 
 
 def _get_book_file_name_digits(book_id: str) -> str:
diff --git a/machine/corpora/paratext_project_versification_error_detector.py b/machine/corpora/paratext_project_versification_error_detector_base.py
similarity index 77%
rename from machine/corpora/paratext_project_versification_error_detector.py
rename to machine/corpora/paratext_project_versification_error_detector_base.py
index 64ceee32..4b96be59 100644
--- a/machine/corpora/paratext_project_versification_error_detector.py
+++ b/machine/corpora/paratext_project_versification_error_detector_base.py
@@ -1,5 +1,6 @@
-from typing import List, Optional, Union
+from typing import List, Optional, Set, Union
 
+from ..scripture.canon import book_id_to_number
 from .paratext_project_file_handler import ParatextProjectFileHandler
 from .paratext_project_settings import ParatextProjectSettings
 from .paratext_project_settings_parser_base import ParatextProjectSettingsParserBase
@@ -7,7 +8,7 @@
 from .usfm_versification_error_detector import UsfmVersificationError, UsfmVersificationErrorDetector
 
 
-class ParatextProjectVersificationErrorDetector:
+class ParatextProjectVersificationErrorDetectorBase:
     def __init__(
         self,
         paratext_project_file_handler: ParatextProjectFileHandler,
@@ -20,14 +21,19 @@ def __init__(
             self._settings = settings
 
     def get_usfm_versification_errors(
-        self,
-        handler: Optional[UsfmVersificationErrorDetector] = None,
+        self, handler: Optional[UsfmVersificationErrorDetector] = None, books: Optional[Set[int]] = None
     ) -> List[UsfmVersificationError]:
         handler = handler or UsfmVersificationErrorDetector(self._settings)
-        for file_name in self._settings.get_all_scripture_book_file_names():
+        for book_id in self._settings.get_all_scripture_book_ids():
+
+            file_name = self._settings.get_book_file_name(book_id)
+
             if not self._paratext_project_file_handler.exists(file_name):
                 continue
 
+            if books is not None and not book_id_to_number(book_id) in books:
+                continue
+
             with self._paratext_project_file_handler.open(file_name) as sfm_file:
                 usfm: str = sfm_file.read().decode(self._settings.encoding)
             try:
diff --git a/machine/corpora/scripture_ref_usfm_parser_handler.py b/machine/corpora/scripture_ref_usfm_parser_handler_base.py
similarity index 81%
rename from machine/corpora/scripture_ref_usfm_parser_handler.py
rename to machine/corpora/scripture_ref_usfm_parser_handler_base.py
index efd2962d..fc88dcf1 100644
--- a/machine/corpora/scripture_ref_usfm_parser_handler.py
+++ b/machine/corpora/scripture_ref_usfm_parser_handler_base.py
@@ -22,10 +22,14 @@ class ScriptureTextType(Enum):
 
 
 def _is_embed_style(marker: Optional[str]) -> bool:
-    return marker is not None and (marker.strip("*") in _EMBED_STYLES or marker.startswith("z"))
+    return marker is not None and marker.strip("*") in _EMBED_STYLES
 
 
-class ScriptureRefUsfmParserHandler(UsfmParserHandler, ABC):
+def is_private_use_marker(marker: str):
+    return marker is not None and marker.startswith("z")
+
+
+class ScriptureRefUsfmParserHandlerBase(UsfmParserHandler, ABC):
     def __init__(self) -> None:
         self._cur_verse_ref: VerseRef = VerseRef()
         self._cur_elements_stack: List[ScriptureElement] = []
@@ -46,22 +50,29 @@ def chapter(self, state: UsfmParserState, number: str, marker: str, alt_number:
     def verse(
         self, state: UsfmParserState, number: str, marker: str, alt_number: Optional[str], pub_number: Optional[str]
     ) -> None:
-        if state.verse_ref == self._cur_verse_ref and not self._duplicate_verse:
-            self._end_verse_text(state, self._create_verse_refs())
-            # ignore duplicate verses
-            self._duplicate_verse = True
+        # Non-latin numbers are implicitly handled
+
+        if state.chapter_has_verse_zero and state.verse_ref.verse_num == 0:
+            # Fall through for the special case of verse 0 being specified in the USFM
+            pass
+        elif state.verse_ref == self._cur_verse_ref and not self._duplicate_verse:
+            if state.verse_ref.verse_num > 0:
+                self._end_verse_text(state, self._create_verse_refs())
+                # ignore duplicate verses
+                self._duplicate_verse = True
+            return
         elif are_overlapping_verse_ranges(verse1=number, verse2=self._cur_verse_ref.verse):
             # merge overlapping verse ranges in to one range
             verse_ref: VerseRef = self._cur_verse_ref.copy()
             verse_ref.verse = merge_verse_ranges(number, self._cur_verse_ref.verse)
             self._update_verse_ref(verse_ref, marker)
+            return
+        if self._current_text_type == ScriptureTextType.NONVERSE:
+            self._end_non_verse_text_wrapper(state)
         else:
-            if self._current_text_type == ScriptureTextType.NONVERSE:
-                self._end_non_verse_text_wrapper(state)
-            elif self._current_text_type == ScriptureTextType.VERSE:
-                self._end_verse_text_wrapper(state)
-            self._update_verse_ref(state.verse_ref, marker)
-            self._start_verse_text_wrapper(state)
+            self._end_verse_text_wrapper(state)
+        self._update_verse_ref(state.verse_ref, marker)
+        self._start_verse_text_wrapper(state)
 
     def start_para(
         self,
@@ -70,6 +81,10 @@ def start_para(
         unknown: Optional[bool],
         attributes: Optional[Sequence[UsfmAttribute]],
     ) -> None:
+        # ignore private-use markers
+        if is_private_use_marker(marker):
+            return
+
         if self._cur_verse_ref.is_default:
             self._update_verse_ref(state.verse_ref, marker)
         if not state.is_verse_text:
@@ -77,6 +92,10 @@ def start_para(
             self._start_non_verse_text_wrapper(state)
 
     def end_para(self, state: UsfmParserState, marker: str) -> None:
+        # ignore private-use markers
+        if is_private_use_marker(marker):
+            return
+
         if self._current_text_type == ScriptureTextType.NONVERSE:
             self._end_parent_element()
             self._end_non_verse_text_wrapper(state)
@@ -126,6 +145,10 @@ def opt_break(self, state: UsfmParserState) -> None:
     def start_char(
         self, state: UsfmParserState, marker: str, unknown: bool, attributes: Optional[Sequence[UsfmAttribute]]
     ) -> None:
+        # ignore private-use markers
+        if is_private_use_marker(marker):
+            return
+
         # if we hit a character marker in a verse paragraph and we aren't in a verse, then start a non-verse segment
         self._check_convert_verse_para_to_non_verse(state)
 
@@ -135,6 +158,10 @@ def start_char(
     def end_char(
         self, state: UsfmParserState, marker: str, attributes: Optional[Sequence[UsfmAttribute]], closed: bool
     ) -> None:
+        # ignore private-use markers
+        if is_private_use_marker(marker):
+            return
+
         if _is_embed_style(marker):
             self._end_embed_text_wrapper(state)
 
@@ -162,9 +189,9 @@ def _start_verse_text_wrapper(self, state: UsfmParserState) -> None:
         self._start_verse_text(state, self._create_verse_refs())
 
     def _end_verse_text_wrapper(self, state: UsfmParserState) -> None:
-        if not self._duplicate_verse and self._cur_verse_ref.verse_num > 0:
+        if not self._duplicate_verse and (self._cur_verse_ref.verse_num > 0 or state.chapter_has_verse_zero):
             self._end_verse_text(state, self._create_verse_refs())
-        if self._cur_verse_ref.verse_num > 0:
+        if self._cur_verse_ref.verse_num > 0 or state.chapter_has_verse_zero:
             self._cur_text_type_stack.pop()
 
     def _start_non_verse_text_wrapper(self, state: UsfmParserState) -> None:
@@ -177,7 +204,17 @@ def _end_non_verse_text_wrapper(self, state: UsfmParserState) -> None:
         self._cur_text_type_stack.pop()
 
     def _update_verse_ref(self, verse_ref: VerseRef, marker: str) -> None:
-        if not are_overlapping_verse_ranges(verse_ref, self._cur_verse_ref):
+        if (
+            self._cur_verse_ref.verse_num == 0
+            and verse_ref.verse_num == 0
+            and not verse_ref.has_multiple
+            and marker == "v"
+        ):
+            # As the verse 0 marker appears within the middle of verse 0,
+            # we should not break the position of current element stack by clearing it.
+            # Instead, we just need to pop the current element off the stack.
+            self._cur_elements_stack.pop()
+        elif not are_overlapping_verse_ranges(verse_ref, self._cur_verse_ref):
             self._cur_elements_stack.clear()
             self._cur_elements_stack.append(ScriptureElement(0, marker))
         self._cur_verse_ref = verse_ref.copy()
@@ -239,6 +276,8 @@ def _check_convert_verse_para_to_non_verse(self, state: UsfmParserState) -> None
             and para_tag.marker != "tr"
             and state.is_verse_para
             and self._cur_verse_ref.verse_num == 0
+            and not state.chapter_has_verse_zero
+            and not is_private_use_marker(para_tag.marker)
         ):
             self._start_parent_element(para_tag.marker)
             self._start_non_verse_text_wrapper(state)
diff --git a/machine/corpora/update_usfm_parser_handler.py b/machine/corpora/update_usfm_parser_handler.py
index fafabcc2..903ab3d1 100644
--- a/machine/corpora/update_usfm_parser_handler.py
+++ b/machine/corpora/update_usfm_parser_handler.py
@@ -3,7 +3,7 @@
 
 from ..scripture.verse_ref import IgnoreSegmentsVerseRef, VerseRef, Versification
 from .scripture_ref import ScriptureRef
-from .scripture_ref_usfm_parser_handler import ScriptureRefUsfmParserHandler, ScriptureTextType
+from .scripture_ref_usfm_parser_handler_base import ScriptureRefUsfmParserHandlerBase, ScriptureTextType
 from .usfm_parser_state import UsfmParserState
 from .usfm_stylesheet import UsfmStylesheet
 from .usfm_tag import UsfmTextType
@@ -38,7 +38,11 @@ def __init__(self, refs: Sequence[ScriptureRef], text: str, metadata: Optional[d
         self.metadata = metadata
 
 
-class UpdateUsfmParserHandler(ScriptureRefUsfmParserHandler):
+def sanitize_verse_data(verse_data: str) -> str:
+    return verse_data.replace("\u200F", "")
+
+
+class UpdateUsfmParserHandler(ScriptureRefUsfmParserHandlerBase):
     def __init__(
         self,
         rows: Optional[Sequence[UpdateUsfmRow]] = None,
@@ -319,10 +323,16 @@ def _end_non_verse_text(self, state: UsfmParserState, scripture_ref: ScriptureRe
         self._end_update_block(state, [scripture_ref])
 
     def _end_embed_text(self, state: UsfmParserState, scripture_ref: ScriptureRef) -> None:
+        # If this embed is outside an update block, create an update block just for this embed
+        embed_outside_of_block = len(self._update_block_stack) == 0
+        if embed_outside_of_block:
+            self._start_update_block([scripture_ref])
         self._update_block_stack[-1].add_embed(
             self._embed_tokens, marked_for_removal=self._embed_behavior == UpdateUsfmMarkerBehavior.STRIP
         )
         self._embed_tokens.clear()
+        if embed_outside_of_block:
+            self._end_update_block(state, [scripture_ref])
 
     def get_usfm(self, stylesheet: Union[str, UsfmStylesheet] = "usfm.sty") -> str:
         if isinstance(stylesheet, str):
@@ -349,6 +359,12 @@ def _advance_rows(self, seg_scr_refs: Sequence[ScriptureRef]) -> Tuple[List[str]
         row_texts: List[str] = []
         row_metadata = None
         source_index: int = 0
+
+        # handle the special case of verse 0, which although first in the rows,
+        # it will be retrieved some of other segments in the verse.
+        if len(seg_scr_refs) > 0 and seg_scr_refs[0].verse_num == 0 and len(seg_scr_refs[0].path) == 0:
+            self._verse_row_index = 0
+
         while self._verse_row_index < len(self._verse_rows) and source_index < len(seg_scr_refs):
             compare: int = 0
             row = self._rows[self._verse_rows[self._verse_row_index]]
@@ -378,6 +394,8 @@ def _collect_updatable_tokens(self, state: UsfmParserState) -> None:
         self._use_updated_text()
         while self._token_index <= state.index + state.special_token_count:
             token = state.tokens[self._token_index]
+            if token.type == UsfmTokenType.VERSE and token.data is not None:
+                token.data = sanitize_verse_data(token.data)
             if self._current_text_type == ScriptureTextType.EMBED:
                 self._embed_tokens.append(token)
             elif (
diff --git a/machine/corpora/usfm_parser.py b/machine/corpora/usfm_parser.py
index e13acfed..a37d5396 100644
--- a/machine/corpora/usfm_parser.py
+++ b/machine/corpora/usfm_parser.py
@@ -223,6 +223,8 @@ def process_token(self) -> bool:
             verse_ref = self.state.verse_ref
             verse_ref.chapter = token.data
             verse_ref.verse_num = 0
+            self.state.chapter_has_verse_zero = False
+
             # Verse offset is not zeroed for chapter 1, as it is part of intro
             if verse_ref.chapter_num != 1:
                 self.state.verse_offset = 0
@@ -261,7 +263,12 @@ def process_token(self) -> bool:
 
             assert token.data is not None
             verse_ref = self.state.verse_ref
+            prev_verse_num = verse_ref.verse_num
             verse_ref.verse = token.data
+            if verse_ref.verse_num == 0:  # This token is \v 0
+                self.state.chapter_has_verse_zero = True
+            elif verse_ref.verse_num == -1:  # Ignore invalid verse numbers
+                verse_ref.verse_num = prev_verse_num
             self.state.verse_offset = 0
 
             if self.handler is not None:
diff --git a/machine/corpora/usfm_parser_state.py b/machine/corpora/usfm_parser_state.py
index 3d0b9e82..f0ef8a74 100644
--- a/machine/corpora/usfm_parser_state.py
+++ b/machine/corpora/usfm_parser_state.py
@@ -37,6 +37,7 @@ def __init__(self, stylesheet: UsfmStylesheet, versification: Versification, tok
         self._tokens = tokens
         self.index = -1
         self.special_token = False
+        self.chapter_has_verse_zero = False
         self._special_token_count: int = 0
 
     @property
@@ -108,8 +109,8 @@ def is_verse_para(self) -> bool:
 
     @property
     def is_verse_text(self) -> bool:
-        # anything before verse 1 is not verse text
-        if self.verse_ref.verse_num == 0:
+        # anything before verse 1 is not verse text, unless the USFM specified verse 0
+        if self.verse_ref.verse_num == 0 and not self.chapter_has_verse_zero:
             return False
 
         # Sidebars and notes are not verse text
diff --git a/machine/corpora/usfm_text_base.py b/machine/corpora/usfm_text_base.py
index 830802ca..ee400909 100644
--- a/machine/corpora/usfm_text_base.py
+++ b/machine/corpora/usfm_text_base.py
@@ -7,7 +7,7 @@
 from ..utils.string_utils import has_sentence_ending
 from .corpora_utils import gen
 from .scripture_ref import ScriptureRef
-from .scripture_ref_usfm_parser_handler import ScriptureRefUsfmParserHandler, ScriptureTextType
+from .scripture_ref_usfm_parser_handler_base import ScriptureRefUsfmParserHandlerBase, ScriptureTextType
 from .scripture_text import ScriptureText
 from .stream_container import StreamContainer
 from .text_row import TextRow
@@ -76,7 +76,7 @@ def _read_usfm(self) -> str:
             return reader.read()
 
 
-class _TextRowCollector(ScriptureRefUsfmParserHandler):
+class _TextRowCollector(ScriptureRefUsfmParserHandlerBase):
     def __init__(self, text: UsfmTextBase) -> None:
         super().__init__()
 
diff --git a/machine/corpora/zip_paratext_project_file_handler.py b/machine/corpora/zip_paratext_project_file_handler.py
index f97c46d5..3f430048 100644
--- a/machine/corpora/zip_paratext_project_file_handler.py
+++ b/machine/corpora/zip_paratext_project_file_handler.py
@@ -13,7 +13,10 @@ def __init__(self, archive: ZipFile) -> None:
         self._archive = archive
 
     def exists(self, file_name: str) -> bool:
-        return file_name in self._archive.namelist()
+        for actual_entry_name in self._archive.namelist():
+            if actual_entry_name.lower() == file_name.lower():
+                return True
+        return False
 
     def find(self, extension: str) -> Optional[str]:
         for entry in self._archive.namelist():
@@ -22,8 +25,9 @@ def find(self, extension: str) -> Optional[str]:
         return None
 
     def open(self, file_name: str) -> Optional[BinaryIO]:
-        if file_name in self._archive.namelist():
-            return BytesIO(self._archive.read(file_name))
+        for actual_entry_name in self._archive.namelist():
+            if actual_entry_name.lower() == file_name.lower():
+                return BytesIO(self._archive.read(actual_entry_name))
         return None
 
     def create_stylesheet(self, file_name: str) -> UsfmStylesheet:
diff --git a/machine/corpora/zip_paratext_project_versification_detector.py b/machine/corpora/zip_paratext_project_versification_detector.py
index cf4bf66e..ccb287c2 100644
--- a/machine/corpora/zip_paratext_project_versification_detector.py
+++ b/machine/corpora/zip_paratext_project_versification_detector.py
@@ -1,10 +1,10 @@
 from zipfile import ZipFile
 
-from .paratext_project_versification_error_detector import ParatextProjectVersificationErrorDetector
+from .paratext_project_versification_error_detector_base import ParatextProjectVersificationErrorDetectorBase
 from .zip_paratext_project_file_handler import ZipParatextProjectFileHandler
 from .zip_paratext_project_settings_parser import ZipParatextProjectSettingsParser
 
 
-class ZipParatextProjectVersificationErrorDetector(ParatextProjectVersificationErrorDetector):
+class ZipParatextProjectVersificationErrorDetector(ParatextProjectVersificationErrorDetectorBase):
     def __init__(self, archive: ZipFile):
         super().__init__(ZipParatextProjectFileHandler(archive), ZipParatextProjectSettingsParser(archive).parse())
diff --git a/machine/scripture/verse_ref.py b/machine/scripture/verse_ref.py
index f35ea0fe..86ca1407 100644
--- a/machine/scripture/verse_ref.py
+++ b/machine/scripture/verse_ref.py
@@ -149,8 +149,6 @@ def verse_num(self) -> int:
 
     @verse_num.setter
     def verse_num(self, value: int) -> None:
-        if value < 0:
-            raise ValueError("The verse number cannot be negative.")
         self._verse_num = value
         self._verse = None
 
diff --git a/pyproject.toml b/pyproject.toml
index d337c7f5..c3a19faa 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -32,7 +32,7 @@ reportMissingModuleSource = false
 
 [tool.poetry]
 name = "sil-machine"
-version = "1.8.5"
+version = "1.8.6"
 description = "A natural language processing library that is focused on providing tools for resource-poor languages."
 license = "MIT"
 authors = ["SIL International"]
diff --git a/tests/corpora/test_place_markers_usfm_update_block_handler.py b/tests/corpora/test_place_markers_usfm_update_block_handler.py
index 5f75c635..cc4c7385 100644
--- a/tests/corpora/test_place_markers_usfm_update_block_handler.py
+++ b/tests/corpora/test_place_markers_usfm_update_block_handler.py
@@ -50,7 +50,7 @@ def test_paragraph_markers() -> None:
 \p Este texto está en inglés
 \p y esta prueba es para marcadores de párrafo.
 """
-    assess(target, result)
+    assert_usfm_equals(target, result)
 
 
 def test_style_markers() -> None:
@@ -81,7 +81,7 @@ def test_style_markers() -> None:
 \c 1
 \v 1 Esta es la \w primera\w* oración. Este texto está en \w inglés\w* y esta prueba es \w para\w* marcadores de estilo.
 """
-    assess(target, result)
+    assert_usfm_equals(target, result)
 
     align_info = PlaceMarkersAlignmentInfo(
         source_tokens=[t for t in TOKENIZER.tokenize(source)],
@@ -103,7 +103,7 @@ def test_style_markers() -> None:
 \c 1
 \v 1 Esta es la primera oración. Este texto está en inglés y esta prueba es para marcadores de estilo.
 """
-    assess(target, result)
+    assert_usfm_equals(target, result)
 
 
 # NOTE: Not currently updating embeds, will need to change test when we do
@@ -143,7 +143,7 @@ def test_embeds() -> None:
 \v 5 New verse 5 \f \fr 1.5 \ft A \+w stylish\+w* note \f*
 \v 6 New verse 6 \f \fr 1.6 \ft Another \+w stylish\+w* note \f*
 """
-    assess(target, result)
+    assert_usfm_equals(target, result)
 
     target = update_usfm(
         rows,
@@ -160,7 +160,7 @@ def test_embeds() -> None:
 \v 5 New verse 5
 \v 6 New verse 6
 """
-    assess(target, result)
+    assert_usfm_equals(target, result)
 
 
 def test_trailing_empty_paragraphs() -> None:
@@ -200,7 +200,7 @@ def test_trailing_empty_paragraphs() -> None:
 \b
 \q1
 """
-    assess(target, result)
+    assert_usfm_equals(target, result)
 
 
 def test_headers() -> None:
@@ -283,7 +283,7 @@ def test_headers() -> None:
 \v 3 Y
 \s1 Updated header
 """
-    assess(target, result)
+    assert_usfm_equals(target, result)
 
 
 def test_consecutive_markers() -> None:
@@ -320,7 +320,7 @@ def test_consecutive_markers() -> None:
 \v 1 New verse 1
 \p \qt \+w WORD\+w*\qt*
 """
-    assess(target, result)
+    assert_usfm_equals(target, result)
 
 
 def test_verse_ranges() -> None:
@@ -356,7 +356,7 @@ def test_verse_ranges() -> None:
 \v 1-5 New verse range text
 \p new paragraph 2
 """
-    assess(target, result)
+    assert_usfm_equals(target, result)
 
 
 def test_no_update() -> None:
@@ -392,7 +392,7 @@ def test_no_update() -> None:
 \c 1
 \v 1 New paragraph 1 New paragraph 2
 """
-    assess(target, result)
+    assert_usfm_equals(target, result)
 
     # No alignment
     rows = [
@@ -422,7 +422,7 @@ def test_no_update() -> None:
 \v 1 New paragraph 1 New paragraph 2
 \p
 """
-    assess(target, result)
+    assert_usfm_equals(target, result)
 
     # No text update
     rows = []
@@ -437,7 +437,7 @@ def test_no_update() -> None:
 \v 1 Old paragraph 1
 \p Old paragraph 2
 """
-    assess(target, result)
+    assert_usfm_equals(target, result)
 
 
 def test_split_tokens() -> None:
@@ -475,7 +475,7 @@ def test_split_tokens() -> None:
 \p words split
 \p words split
 """
-    assess(target, result)
+    assert_usfm_equals(target, result)
 
 
 def test_no_text() -> None:
@@ -510,7 +510,7 @@ def test_no_text() -> None:
 \c 1
 \v 1  \w \w*
 """
-    assess(target, result)
+    assert_usfm_equals(target, result)
 
 
 def test_consecutive_substring() -> None:
@@ -546,7 +546,7 @@ def test_consecutive_substring() -> None:
 \v 1 string
 \p ring
 """
-    assess(target, result)
+    assert_usfm_equals(target, result)
 
 
 def test_verses_out_of_order() -> None:
@@ -597,7 +597,7 @@ def test_verses_out_of_order() -> None:
 \v 1 new verse 1
 \p new paragraph 2
 """
-    assess(target, result)
+    assert_usfm_equals(target, result)
 
 
 def test_strip_paragraphs_with_header() -> None:
@@ -638,7 +638,58 @@ def test_strip_paragraphs_with_header() -> None:
 \p
 \v 2 verse 2
 """
-    assess(target, result)
+    assert_usfm_equals(target, result)
+
+
+def test_support_verse_zero():
+    # Note: Verse 0 has an empty paragraph as the paragraph occurs before verse text,
+    # so is not included in the verse text as it is for the paragraphs for the other verses.
+    rows = [
+        UpdateUsfmRow(scr_ref("MAT 1:0"), "New verse 0"),
+        UpdateUsfmRow(scr_ref("MAT 1:0/1:mt"), "New book header"),
+        UpdateUsfmRow(scr_ref("MAT 1:0/2:s"), "New chapter header"),
+        UpdateUsfmRow(scr_ref("MAT 1:0/3:p"), ""),
+        UpdateUsfmRow(scr_ref("MAT 1:0/4:ms"), "New major section header"),
+        UpdateUsfmRow(scr_ref("MAT 1:0/5:s"), "New section header 1"),
+        UpdateUsfmRow(scr_ref("MAT 1:1"), "New verse 1"),
+        UpdateUsfmRow(scr_ref("MAT 1:1/1:s"), "New section header 2"),
+        UpdateUsfmRow(scr_ref("MAT 1:2"), "New verse 2"),
+        UpdateUsfmRow(scr_ref("MAT 1:3"), "New verse 3"),
+    ]
+    usfm = r"""\id MAT
+\mt Old book header
+\c 1
+\s Old chapter header
+\p
+\v 0 Old verse 0
+\ms Old major section header
+\s Old section header 1
+\p
+\v 1 Old verse 1
+\s Old section header 2
+\p
+\v 2 Old verse 2
+\v 3 Old verse 3
+"""
+
+    target = update_usfm(rows, usfm, update_block_handlers=[PlaceMarkersUsfmUpdateBlockHandler()])
+
+    result = r"""\id MAT
+\mt New book header
+\c 1
+\s New chapter header
+\p
+\v 0 New verse 0
+\ms New major section header
+\s New section header 1
+\p
+\v 1 New verse 1
+\s New section header 2
+\p
+\v 2 New verse 2
+\v 3 New verse 3
+"""
+    assert_usfm_equals(target, result)
 
 
 def scr_ref(*refs: str) -> List[ScriptureRef]:
@@ -683,7 +734,7 @@ def update_usfm(
     return updater.get_usfm()
 
 
-def assess(target: Optional[str], truth: str) -> None:
+def assert_usfm_equals(target: Optional[str], truth: str) -> None:
     assert target is not None
     for target_line, truth_line in zip(target.split("\n"), truth.split("\n")):
         assert target_line.strip() == truth_line.strip()
diff --git a/tests/corpora/test_update_usfm_parser_handler.py b/tests/corpora/test_update_usfm_parser_handler.py
index 202f5223..a9c1cdc1 100644
--- a/tests/corpora/test_update_usfm_parser_handler.py
+++ b/tests/corpora/test_update_usfm_parser_handler.py
@@ -916,6 +916,42 @@ def test_update_block_verse_range() -> None:
     )
 
 
+def test_update_block_verse_range_right_to_left_marker() -> None:
+    rows = [
+        UpdateUsfmRow(
+            scr_ref("MAT 1:1", "MAT 1:2", "MAT 1:3"),
+            str("Update 1-3"),
+        ),
+    ]
+    usfm = (
+        r"""\id MAT - Test
+\c 1
+\v 1"""
+        + "\u200f"
+        + """-3 verse 1 through 3
+"""
+    )
+
+    update_block_handler = _TestUsfmUpdateBlockHandler()
+    updated_usfm = update_usfm(
+        rows, usfm, embed_behavior=UpdateUsfmMarkerBehavior.PRESERVE, update_block_handlers=[update_block_handler]
+    )
+    expected_usfm = r"""\id MAT - Test
+\c 1
+\v 1-3 Update 1-3
+"""
+    assert_usfm_equals(updated_usfm, expected_usfm)
+    assert len(update_block_handler.blocks) == 1
+
+    update_block = update_block_handler.blocks[0]
+    assert_update_block_equals(
+        update_block,
+        ["MAT 1:1", "MAT 1:2", "MAT 1:3"],
+        (UsfmUpdateBlockElementType.TEXT, "Update 1-3 ", False),
+        (UsfmUpdateBlockElementType.TEXT, "verse 1 through 3 ", True),
+    )
+
+
 def test_update_block_footnote_preserve_embeds() -> None:
     rows = [
         UpdateUsfmRow(
@@ -1381,6 +1417,83 @@ def test_pass_remark():
     assert_usfm_equals(target, result)
 
 
+def test_update_block_footnote_in_published_chapter_number():
+    rows = [UpdateUsfmRow(scr_ref("ESG 1:0/2:s"), "Update 1")]
+    usfm = r"""\id ESG - Test
+\c 1
+\cp A \f + \fr A.1-3: \ft Some note.\f*
+\s Heading 1
+"""
+    update_block_handler = _TestUsfmUpdateBlockHandler()
+    target = update_usfm(
+        rows,
+        usfm,
+        update_block_handlers=[update_block_handler],
+        text_behavior=UpdateUsfmTextBehavior.STRIP_EXISTING,
+        paragraph_behavior=UpdateUsfmMarkerBehavior.PRESERVE,
+        embed_behavior=UpdateUsfmMarkerBehavior.PRESERVE,
+        style_behavior=UpdateUsfmMarkerBehavior.PRESERVE,
+    )
+
+    result = r"""\id ESG
+\c 1
+\cp A \f + \fr A.1-3: \ft Some note.\f*
+\s Update 1
+"""
+    assert_usfm_equals(target, result)
+
+    assert len(update_block_handler.blocks) == 2
+    assert_update_block_equals(
+        update_block_handler.blocks[0],
+        ["ESG 1:0/1:f"],
+        (UsfmUpdateBlockElementType.EMBED, r"\f + \fr A.1-3: \ft Some note.\f*", False),
+    )
+    assert_update_block_equals(
+        update_block_handler.blocks[1],
+        ["ESG 1:0/2:s"],
+        (UsfmUpdateBlockElementType.TEXT, "Update 1 ", False),
+        (UsfmUpdateBlockElementType.TEXT, "Heading 1 ", True),
+    )
+
+
+def test_update_block_footnote_at_start_of_chapter_with_preceding_text():
+    rows = [UpdateUsfmRow(scr_ref("ESG 1:0/2:s"), "Update 1")]
+    usfm = r"""\id ESG - Test
+\c 1
+Text 1\f + \fr A.1-3: \ft Some note.\f*
+\s Heading 1
+"""
+    update_block_handler = _TestUsfmUpdateBlockHandler()
+    target = update_usfm(
+        rows,
+        usfm,
+        update_block_handlers=[update_block_handler],
+        text_behavior=UpdateUsfmTextBehavior.PREFER_NEW,
+        paragraph_behavior=UpdateUsfmMarkerBehavior.PRESERVE,
+        embed_behavior=UpdateUsfmMarkerBehavior.PRESERVE,
+        style_behavior=UpdateUsfmMarkerBehavior.PRESERVE,
+    )
+
+    result = r"""\id ESG - Test
+\c 1 Text 1\f + \fr A.1-3: \ft Some note.\f*
+\s Update 1
+"""
+    assert_usfm_equals(target, result)
+
+    assert len(update_block_handler.blocks) == 2
+    assert_update_block_equals(
+        update_block_handler.blocks[0],
+        ["ESG 1:0/1:f"],
+        (UsfmUpdateBlockElementType.EMBED, r"\f + \fr A.1-3: \ft Some note.\f*", False),
+    )
+    assert_update_block_equals(
+        update_block_handler.blocks[1],
+        ["ESG 1:0/2:s"],
+        (UsfmUpdateBlockElementType.TEXT, "Update 1 ", False),
+        (UsfmUpdateBlockElementType.TEXT, "Heading 1 ", True),
+    )
+
+
 def scr_ref(*refs: str) -> List[ScriptureRef]:
     return [ScriptureRef.parse(ref) for ref in refs]
 
diff --git a/tests/corpora/test_usfm_memory_text.py b/tests/corpora/test_usfm_memory_text.py
index 183eb418..aa89412d 100644
--- a/tests/corpora/test_usfm_memory_text.py
+++ b/tests/corpora/test_usfm_memory_text.py
@@ -208,6 +208,27 @@ def test_get_rows_paragraph_before_nonverse_paragraph() -> None:
     assert rows[2].text == "header"
 
 
+def test_get_rows_verse_zero():
+    rows: List[TextRow] = get_rows(
+        r"""\id MAT - Test
+\h
+\mt
+\c 1
+\p \v 0
+\s
+\p \v 1 Verse one.
+"""
+    )
+
+    assert len(rows) == 2, str.join(",", [tr.text for tr in rows])
+
+    assert rows[0].ref == ScriptureRef.parse("MAT 1:0")
+    assert rows[0].text == ""
+
+    assert rows[1].ref == ScriptureRef.parse("MAT 1:1")
+    assert rows[1].text == "Verse one."
+
+
 def test_get_rows_style_starting_nonverse_paragraph_after_empty_paragraph() -> None:
     rows: List[TextRow] = get_rows(
         r"""\id MAT - Test
@@ -227,6 +248,223 @@ def test_get_rows_style_starting_nonverse_paragraph_after_empty_paragraph() -> N
     assert rows[2].text == "\\w header\\w*"
 
 
+def test_get_rows_verse_zero_with_text():
+    rows: List[TextRow] = get_rows(
+        r"""\id MAT - Test
+\h
+\mt
+\c 1
+\p \v 0 Verse zero.
+\s
+\p \v 1 Verse one.
+"""
+    )
+
+    assert len(rows) == 2, str.join(",", [tr.text for tr in rows])
+
+    assert rows[0].ref == ScriptureRef.parse("MAT 1:0")
+    assert rows[0].text == "Verse zero."
+
+    assert rows[1].ref == ScriptureRef.parse("MAT 1:1")
+    assert rows[1].text == "Verse one."
+
+
+def test_get_rows_private_use_marker():
+    rows: List[TextRow] = get_rows(
+        r"""\id MAT - Test English Apocrypha
+\zmt Ignore this paragraph
+\mt1 Test English Apocrypha
+\pc Copyright Statement \zimagecopyrights
+\pc Further copyright statements
+""",
+        include_all_text=True,
+    )
+
+    assert len(rows) == 3, str.join(",", [tr.text for tr in rows])
+
+    assert rows[1].ref == ScriptureRef.parse("MAT 1:0/2:pc")
+    assert rows[1].text == "Copyright Statement"
+
+
+def test_get_rows_verse_range_with_right_to_left_marker():
+    rows: List[TextRow] = get_rows(
+        r"""\id MAT - Test
+\h
+\mt
+\c 1
+\v 1"""
+        + "\u200f"
+        + r"""-2 Verse one and two.
+"""
+    )
+
+    assert len(rows) == 2, str.join(",", [tr.text for tr in rows])
+
+    assert rows[0].ref == ScriptureRef.parse("MAT 1:1")
+    assert rows[0].text == "Verse one and two."
+
+    assert rows[1].ref == ScriptureRef.parse("MAT 1:2")
+
+
+def test_get_rows_non_latin_verse_number():
+    rows: List[TextRow] = get_rows(
+        r"""\id MAT - Test
+\c 1
+\p
+\v १ Verse 1
+\v 3,৪ Verses 3 and 4
+\p
+""",
+        include_all_text=True,
+    )
+
+    assert len(rows) == 4, str.join(",", [tr.text for tr in rows])
+
+    assert rows[0].ref == ScriptureRef.parse("MAT 1:0/1:p")
+    assert rows[0].text == ""
+
+    assert rows[1].ref == ScriptureRef.parse("MAT 1:1")
+    assert rows[1].text == "Verse 1"
+
+    assert rows[2].ref == ScriptureRef.parse("MAT 1:3")
+    assert rows[2].text == "Verses 3 and 4"
+
+    assert rows[3].ref == ScriptureRef.parse("MAT 1:৪")
+    assert rows[3].text == ""
+
+
+def test_get_rows_empty_verse_number():
+    rows: List[TextRow] = get_rows(
+        r"""\id MAT - Test
+\c 1
+\p
+\v
+\b
+""",
+        include_all_text=True,
+    )
+
+    assert len(rows) == 2, str.join(",", [tr.text for tr in rows])
+
+    assert rows[0].ref == ScriptureRef.parse("MAT 1:0/1:p")
+    assert rows[0].text == ""
+
+    assert rows[1].ref == ScriptureRef.parse("MAT 1:0/2:b")
+    assert rows[1].text == ""
+
+
+def test_get_rows_multiple_empty_verse_numbers():
+    rows: List[TextRow] = get_rows(
+        r"""\id MAT - Test
+\c 1
+\p
+\v
+\p
+\v
+\p
+\v
+\p
+""",
+        include_all_text=True,
+    )
+
+    assert len(rows) == 4, str.join(",", [tr.text for tr in rows])
+
+    for i, row in enumerate(rows):
+        assert row.ref == ScriptureRef.parse(f"MAT 1:0/{i+1}:p")
+        assert row.text == ""
+
+
+def test_get_rows_empty_verse_number_with_text():
+    rows: List[TextRow] = get_rows(
+        r"""\id MAT - Test
+\c 1
+\s heading text
+\v  \vn 1 verse text
+""",
+        include_all_text=True,
+    )
+
+    assert len(rows) == 2, str.join(",", [tr.text for tr in rows])
+
+    assert rows[0].ref == ScriptureRef.parse("MAT 1:0/1:s")
+    assert rows[0].text == "heading text"
+
+    assert rows[1].ref == ScriptureRef.parse("MAT 1:0/2:vn")
+    assert rows[1].text == "1 verse text"
+
+
+def test_get_rows_empty_verse_number_mid_verse():
+    rows: List[TextRow] = get_rows(
+        r"""\id MAT - Test
+\c 1
+\p
+\v 1 verse 1 text
+\v
+\v 2 verse 2 text
+""",
+        include_all_text=True,
+    )
+
+    assert len(rows) == 3, str.join(",", [tr.text for tr in rows])
+
+    assert rows[0].ref == ScriptureRef.parse("MAT 1:0/1:p")
+    assert rows[0].text == ""
+
+    assert rows[1].ref == ScriptureRef.parse("MAT 1:1")
+    assert rows[1].text == "verse 1 text"
+
+    assert rows[2].ref == ScriptureRef.parse("MAT 1:2")
+    assert rows[2].text == "verse 2 text"
+
+
+def test_get_rows_invalid_verse_numbers():
+    rows: List[TextRow] = get_rows(
+        r"""\id MAT - Test
+\c 1
+\p
+\v BK1 text goes here
+\v BK 2 text goes here
+\v BK 3 text goes here
+\v BK 4 text goes here
+""",
+        include_all_text=True,
+    )
+
+    assert len(rows) == 1, str.join(",", [tr.text for tr in rows])
+
+    assert rows[0].ref == ScriptureRef.parse("MAT 1:0/1:p")
+    assert rows[0].text == "text goes here 2 text goes here 3 text goes here 4 text goes here"
+
+
+def test_get_rows_incomplete_verse_range():
+    rows: List[TextRow] = get_rows(
+        r"""\id MAT - Test
+\c 1
+\s heading text
+\p
+\q1
+\v 1,
+\q1 verse 1 text
+""",
+        include_all_text=True,
+    )
+
+    assert len(rows) == 4, str.join(",", [tr.text for tr in rows])
+
+    assert rows[0].ref == ScriptureRef.parse("MAT 1:0/1:s")
+    assert rows[0].text == "heading text"
+
+    assert rows[1].ref == ScriptureRef.parse("MAT 1:0/2:p")
+    assert rows[1].text == ""
+
+    assert rows[2].ref == ScriptureRef.parse("MAT 1:1/3:q1")
+    assert rows[2].text == ""
+
+    assert rows[3].ref == ScriptureRef.parse("MAT 1:1/4:q1")
+    assert rows[3].text == "verse 1 text"
+
+
 def get_rows(usfm: str, include_markers: bool = False, include_all_text: bool = False) -> List[TextRow]:
     text = UsfmMemoryText(
         UsfmStylesheet("usfm.sty"),
diff --git a/tests/scripture/test_verse_ref.py b/tests/scripture/test_verse_ref.py
index 0b0a9ed0..911225ad 100644
--- a/tests/scripture/test_verse_ref.py
+++ b/tests/scripture/test_verse_ref.py
@@ -189,8 +189,6 @@ def test_invalid() -> None:
         VerseRef(LAST_BOOK + 1, 1, 1)
     with raises(ValueError):
         VerseRef(2, -42, 1)
-    with raises(ValueError):
-        VerseRef(2, 1, -4)
     with raises(ValueError):
         VerseRef.from_string("MAT 1:")
     with raises(ValueError):
diff --git a/tests/testutils/data/usfm/Tes/custom.vrs b/tests/testutils/data/usfm/Tes/Custom.vrs
similarity index 100%
rename from tests/testutils/data/usfm/Tes/custom.vrs
rename to tests/testutils/data/usfm/Tes/Custom.vrs
diff --git a/tests/testutils/memory_paratext_project_versification_error_detector.py b/tests/testutils/memory_paratext_project_versification_error_detector.py
index 62911113..512ee930 100644
--- a/tests/testutils/memory_paratext_project_versification_error_detector.py
+++ b/tests/testutils/memory_paratext_project_versification_error_detector.py
@@ -1,10 +1,10 @@
 from typing import Dict, Optional
 
-from machine.corpora import ParatextProjectSettings, ParatextProjectVersificationErrorDetector
+from machine.corpora import ParatextProjectSettings, ParatextProjectVersificationErrorDetectorBase
 
 from .memory_paratext_project_file_handler import DefaultParatextProjectSettings, MemoryParatextProjectFileHandler
 
 
-class MemoryParatextProjectVersificationErrorDetector(ParatextProjectVersificationErrorDetector):
+class MemoryParatextProjectVersificationErrorDetector(ParatextProjectVersificationErrorDetectorBase):
     def __init__(self, settings: Optional[ParatextProjectSettings], files: Dict[str, str]) -> None:
         super().__init__(MemoryParatextProjectFileHandler(files), settings or DefaultParatextProjectSettings())

From 55be044639010b9d32eda95ff7fea2ea44e94398 Mon Sep 17 00:00:00 2001
From: Enkidu93 <lowryec17@gcc.edu>
Date: Mon, 9 Feb 2026 19:44:24 -0500
Subject: [PATCH 2/3] Additional changes for consistency with Machine

---
 .../file_paratext_project_file_handler.py     | 25 ++++++++++---------
 .../corpora/paratext_backup_terms_corpus.py   |  3 ++-
 2 files changed, 15 insertions(+), 13 deletions(-)

diff --git a/machine/corpora/file_paratext_project_file_handler.py b/machine/corpora/file_paratext_project_file_handler.py
index a9846645..7482d448 100644
--- a/machine/corpora/file_paratext_project_file_handler.py
+++ b/machine/corpora/file_paratext_project_file_handler.py
@@ -12,28 +12,29 @@ def __init__(self, project_dir: StrPath) -> None:
         self._project_dir = Path(project_dir)
 
     def exists(self, file_name: str) -> bool:
-        for actual_file_name in os.listdir(self._project_dir):
-            if actual_file_name.lower() == file_name.lower():
-                return True
-        return False
+        return self._get_file_name(file_name) is not None
 
     def open(self, file_name: str) -> BinaryIO:
-        for actual_file_name in os.listdir(self._project_dir):
-            if actual_file_name.lower() == file_name.lower():
-                return open(self._project_dir / actual_file_name, "rb")
+        actual_file_name = self._get_file_name(file_name)
+        if actual_file_name is not None:
+            file_name = actual_file_name
         return open(self._project_dir / file_name, "rb")
 
     def find(self, extension: str) -> Optional[Path]:
         return next(self._project_dir.glob(f"*{extension}"), None)
 
     def create_stylesheet(self, file_name: str) -> UsfmStylesheet:
-        custom_stylesheet_file_name = "custom.sty"
-        for actual_file_name in os.listdir(self._project_dir):
-            if actual_file_name.lower() == custom_stylesheet_file_name:
-                custom_stylesheet_file_name = actual_file_name
-                break
+        custom_stylesheet_file_name = self._get_file_name("custom.sty")
+        if custom_stylesheet_file_name is None:
+            custom_stylesheet_file_name = "custom.sty"
         custom_stylesheet_path = self._project_dir / custom_stylesheet_file_name
         return UsfmStylesheet(
             file_name,
             custom_stylesheet_path if custom_stylesheet_path.is_file() else None,
         )
+
+    def _get_file_name(self, case_insensitive_file_name: str) -> Optional[str]:
+        for actual_file_name in os.listdir(self._project_dir):
+            if actual_file_name.lower() == case_insensitive_file_name.lower():
+                return actual_file_name
+        return None
diff --git a/machine/corpora/paratext_backup_terms_corpus.py b/machine/corpora/paratext_backup_terms_corpus.py
index ee80a8e8..75e32b0f 100644
--- a/machine/corpora/paratext_backup_terms_corpus.py
+++ b/machine/corpora/paratext_backup_terms_corpus.py
@@ -29,8 +29,9 @@ def __init__(self, filename: StrPath, term_categories: Sequence[str], use_term_g
             text = MemoryText(
                 text_id,
                 [
-                    TextRow(text_id, key_term.id, key_term.renderings, content_type=TextRowContentType.WORD)
+                    TextRow(text_id, key_term.id, [rendering], content_type=TextRowContentType.WORD)
                     for key_term in key_terms
+                    for rendering in key_term.renderings
                 ],
             )
             self._add_text(text)

From a8544a803f3058d0894447de44be1465d229cc6f Mon Sep 17 00:00:00 2001
From: Enkidu93 <lowryec17@gcc.edu>
Date: Mon, 9 Feb 2026 19:50:42 -0500
Subject: [PATCH 3/3] Address reviewer comments

---
 .../scripture_ref_usfm_parser_handler_base.py        | 12 ++++++------
 machine/corpora/update_usfm_parser_handler.py        |  4 ++--
 machine/scripture/verse_ref.py                       |  2 ++
 3 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/machine/corpora/scripture_ref_usfm_parser_handler_base.py b/machine/corpora/scripture_ref_usfm_parser_handler_base.py
index fc88dcf1..30000595 100644
--- a/machine/corpora/scripture_ref_usfm_parser_handler_base.py
+++ b/machine/corpora/scripture_ref_usfm_parser_handler_base.py
@@ -25,7 +25,7 @@ def _is_embed_style(marker: Optional[str]) -> bool:
     return marker is not None and marker.strip("*") in _EMBED_STYLES
 
 
-def is_private_use_marker(marker: str):
+def _is_private_use_marker(marker: str) -> bool:
     return marker is not None and marker.startswith("z")
 
 
@@ -82,7 +82,7 @@ def start_para(
         attributes: Optional[Sequence[UsfmAttribute]],
     ) -> None:
         # ignore private-use markers
-        if is_private_use_marker(marker):
+        if _is_private_use_marker(marker):
             return
 
         if self._cur_verse_ref.is_default:
@@ -93,7 +93,7 @@ def start_para(
 
     def end_para(self, state: UsfmParserState, marker: str) -> None:
         # ignore private-use markers
-        if is_private_use_marker(marker):
+        if _is_private_use_marker(marker):
             return
 
         if self._current_text_type == ScriptureTextType.NONVERSE:
@@ -146,7 +146,7 @@ def start_char(
         self, state: UsfmParserState, marker: str, unknown: bool, attributes: Optional[Sequence[UsfmAttribute]]
     ) -> None:
         # ignore private-use markers
-        if is_private_use_marker(marker):
+        if _is_private_use_marker(marker):
             return
 
         # if we hit a character marker in a verse paragraph and we aren't in a verse, then start a non-verse segment
@@ -159,7 +159,7 @@ def end_char(
         self, state: UsfmParserState, marker: str, attributes: Optional[Sequence[UsfmAttribute]], closed: bool
     ) -> None:
         # ignore private-use markers
-        if is_private_use_marker(marker):
+        if _is_private_use_marker(marker):
             return
 
         if _is_embed_style(marker):
@@ -277,7 +277,7 @@ def _check_convert_verse_para_to_non_verse(self, state: UsfmParserState) -> None
             and state.is_verse_para
             and self._cur_verse_ref.verse_num == 0
             and not state.chapter_has_verse_zero
-            and not is_private_use_marker(para_tag.marker)
+            and not _is_private_use_marker(para_tag.marker)
         ):
             self._start_parent_element(para_tag.marker)
             self._start_non_verse_text_wrapper(state)
diff --git a/machine/corpora/update_usfm_parser_handler.py b/machine/corpora/update_usfm_parser_handler.py
index 903ab3d1..9d95850c 100644
--- a/machine/corpora/update_usfm_parser_handler.py
+++ b/machine/corpora/update_usfm_parser_handler.py
@@ -38,7 +38,7 @@ def __init__(self, refs: Sequence[ScriptureRef], text: str, metadata: Optional[d
         self.metadata = metadata
 
 
-def sanitize_verse_data(verse_data: str) -> str:
+def _sanitize_verse_data(verse_data: str) -> str:
     return verse_data.replace("\u200F", "")
 
 
@@ -395,7 +395,7 @@ def _collect_updatable_tokens(self, state: UsfmParserState) -> None:
         while self._token_index <= state.index + state.special_token_count:
             token = state.tokens[self._token_index]
             if token.type == UsfmTokenType.VERSE and token.data is not None:
-                token.data = sanitize_verse_data(token.data)
+                token.data = _sanitize_verse_data(token.data)
             if self._current_text_type == ScriptureTextType.EMBED:
                 self._embed_tokens.append(token)
             elif (
diff --git a/machine/scripture/verse_ref.py b/machine/scripture/verse_ref.py
index 86ca1407..f35ea0fe 100644
--- a/machine/scripture/verse_ref.py
+++ b/machine/scripture/verse_ref.py
@@ -149,6 +149,8 @@ def verse_num(self) -> int:
 
     @verse_num.setter
     def verse_num(self, value: int) -> None:
+        if value < 0:
+            raise ValueError("The verse number cannot be negative.")
         self._verse_num = value
         self._verse = None