sillsdev · ddaspit · Apr 24, 2026 · Apr 23, 2026 · Apr 23, 2026 · Apr 23, 2026
diff --git a/machine/corpora/usfm_file_text_corpus.py b/machine/corpora/usfm_file_text_corpus.py
@@ -42,7 +42,10 @@ def _get_id(filename: StrPath, encoding: str) -> Optional[str]:
             if line.startswith("\\id "):
                 id = line[4:]
                 index = id.find(" ")
+                # If the id is longer than 3 characters, truncate it to 3 characters.
+                if (index == -1 or index > 3) and len(id) >= 3:
+                    index = 3
                 if index != -1:
-                    id = id[:index]
+                    id = id[:index].upper()
                 return id.strip().upper()
     return None
diff --git a/machine/corpora/usfm_parser.py b/machine/corpora/usfm_parser.py
@@ -176,6 +176,8 @@ def process_token(self) -> bool:
             # Code is always upper case
             assert token.data is not None
             code = token.data.upper()
+            if len(code) > 3:
+                code = code[:3]
 
             # Update verse ref. Leave book alone if not empty to prevent parsing errors on books with bad id lines.
             verse_ref = self.state.verse_ref

diff --git a/machine/corpora/usfm_text_base.py b/machine/corpora/usfm_text_base.py
@@ -91,6 +91,9 @@ def rows(self) -> Iterable[TextRow]:
 
     def start_book(self, state: UsfmParserState, marker: str, code: str) -> None:
         super().start_book(state, marker, code)
+        if state.verse_ref.book != "" and state.verse_ref.book != code:
+            # Ignore \id markers that don't match the book code in the verse ref, if it was set
+            return
         if code not in ALL_BOOK_IDS:
             raise ValueError(f"The book {code} is not a valid book id.")
         if code != self._text.id:

diff --git a/tests/corpora/test_usfm_memory_text.py b/tests/corpora/test_usfm_memory_text.py
@@ -1,5 +1,6 @@
 from typing import List
 
+import pytest
 from testutils.corpora_test_helpers import scripture_ref
 
 from machine.corpora import ScriptureRef, TextRow, UsfmMemoryText
@@ -465,6 +466,70 @@ def test_get_rows_incomplete_verse_range():
     assert rows[3].text == "verse 1 text"
 
 
+def test_get_rows_book_code_different_to_filename() -> None:
+    with pytest.raises(RuntimeError):
+        get_rows(
+            r"""\id LUK - Test
+\c 1
+\v 1 Verse 1 Text
+""",
+            include_all_text=True,
+        )
+
+
+def test_get_rows_book_code_invalid() -> None:
+    with pytest.raises(RuntimeError):
+        get_rows(
+            r"""\id ZZZ - Test
+\c 1
+\v 1 Verse 1 Text
+""",
+            include_all_text=True,
+        )
+
+
+def test_get_rows_book_code_truncated() -> None:
+    with pytest.raises(RuntimeError):
+        get_rows(
+            r"""\id MA
+\c 1
+\v 1 Verse 1 Text
+""",
+            include_all_text=True,
+        )
+
+
+def test_get_rows_book_code_multiple() -> None:
+    rows: List[TextRow] = get_rows(
+        r"""\id MAT
+\id LUK
+\c 1
+\v 1 Verse 1 Text
+""",
+        include_all_text=True,
+    )
+
+    assert len(rows) == 1
+
+    assert rows[0].ref == ScriptureRef.parse("MAT 1:1"), str.join(",", [str(tr.ref) for tr in rows])
+    assert rows[0].text == "Verse 1 Text", str.join(",", [tr.text for tr in rows])
+
+
+def test_get_rows_book_code_no_space() -> None:
+    rows: List[TextRow] = get_rows(
+        r"""\id Matthew
+\c 1
+\v 1 Verse 1 Text
+""",
+        include_all_text=True,
+    )
+
+    assert len(rows) == 1
+
+    assert rows[0].ref == ScriptureRef.parse("MAT 1:1"), str.join(",", [str(tr.ref) for tr in rows])
+    assert rows[0].text == "Verse 1 Text", str.join(",", [tr.text for tr in rows])
+
+
 def get_rows(usfm: str, include_markers: bool = False, include_all_text: bool = False) -> List[TextRow]:
     text = UsfmMemoryText(
         UsfmStylesheet("usfm.sty"),

diff --git a/tests/testutils/data/usfm/Tes/03LEVTes.SFM b/tests/testutils/data/usfm/Tes/03LEVTes.SFM
@@ -1,4 +1,4 @@
-\id lev - Test
+\id Leviticus
 \h Leviticus
 \mt Leviticus
 \c 14

diff --git a/tests/testutils/data/usfm/Tes/131CHTes.SFM b/tests/testutils/data/usfm/Tes/131CHTes.SFM
@@ -1,4 +1,4 @@
-\id 1CH - Test
+\id 1CH
 \h 1 Chronicles
 \mt 1 Chronicles
 \c 12