Skip to content

Commit 837f0f8

Browse files
author
AI Assistant
committed
feat: native support for tracked changes (w:ins in paragraph.text, w:delText in run.text)
1 parent e454546 commit 837f0f8

File tree

4 files changed

+58
-3
lines changed

4 files changed

+58
-3
lines changed

src/docx/oxml/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@
7676
register_element_cls("w:ptab", CT_PTab)
7777
register_element_cls("w:r", CT_R)
7878
register_element_cls("w:t", CT_Text)
79+
register_element_cls("w:delText", CT_Text)
7980

8081
# ---------------------------------------------------------------------------
8182
# header/footer-related mappings

src/docx/oxml/text/paragraph.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ def clear_content(self):
5757
@property
5858
def inner_content_elements(self) -> List[CT_R | CT_Hyperlink]:
5959
"""Run and hyperlink children of the `w:p` element, in document order."""
60-
return self.xpath("./w:r | ./w:hyperlink")
60+
return self.xpath("./w:r | ./w:hyperlink | ./w:ins/w:r")
6161

6262
@property
6363
def lastRenderedPageBreaks(self) -> List[CT_LastRenderedPageBreak]:
@@ -99,7 +99,7 @@ def text(self): # pyright: ignore[reportIncompatibleMethodOverride]
9999
Inner-content child elements like `w:r` and `w:hyperlink` are translated to
100100
their text equivalent.
101101
"""
102-
return "".join(e.text for e in self.xpath("w:r | w:hyperlink"))
102+
return "".join(e.text for e in self.xpath("w:r | w:hyperlink | w:ins/w:r"))
103103

104104
def _insert_pPr(self, pPr: CT_PPr) -> CT_PPr:
105105
self.insert(0, pPr)

src/docx/oxml/text/run.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ class CT_R(BaseOxmlElement):
3535
cr = ZeroOrMore("w:cr")
3636
drawing = ZeroOrMore("w:drawing")
3737
t = ZeroOrMore("w:t")
38+
delText = ZeroOrMore("w:delText")
3839
tab = ZeroOrMore("w:tab")
3940

4041
def add_t(self, text: str) -> CT_Text:
@@ -75,6 +76,7 @@ def iter_items() -> Iterator[str | CT_Drawing | CT_LastRenderedPageBreak]:
7576
" | w:noBreakHyphen"
7677
" | w:ptab"
7778
" | w:t"
79+
" | w:delText"
7880
" | w:tab"
7981
):
8082
if isinstance(e, (CT_Drawing, CT_LastRenderedPageBreak)):
@@ -134,7 +136,7 @@ def text(self) -> str:
134136
equivalent.
135137
"""
136138
return "".join(
137-
str(e) for e in self.xpath("w:br | w:cr | w:noBreakHyphen | w:ptab | w:t | w:tab")
139+
str(e) for e in self.xpath("w:br | w:cr | w:noBreakHyphen | w:ptab | w:t | w:delText | w:tab")
138140
)
139141

140142
@text.setter

tests/test_tracked_changes_fix.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
from docx.oxml import parse_xml
2+
from docx.text.paragraph import Paragraph
3+
from docx.text.run import Run
4+
5+
class DescribeTrackedChanges:
6+
def it_includes_insertions_in_paragraph_text(self):
7+
"""
8+
paragraph.text includes text within <w:ins> tags.
9+
"""
10+
xml = (
11+
'<w:p xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" xmlns:w14="http://schemas.microsoft.com/office/word/2010/wordml">'
12+
' <w:r><w:t>Start </w:t></w:r>'
13+
' <w:ins w:id="1" w:author="Me" w:date="2023-01-01T00:00:00Z">'
14+
' <w:r><w:t>Inserted</w:t></w:r>'
15+
' </w:ins>'
16+
' <w:r><w:t> End</w:t></w:r>'
17+
'</w:p>'
18+
)
19+
p = Paragraph(parse_xml(xml), None)
20+
# Expected: "Start Inserted End"
21+
# Before Fix: "Start End"
22+
assert p.text == "Start Inserted End"
23+
24+
def it_excludes_deletions_in_paragraph_text(self):
25+
"""
26+
paragraph.text still excludes text within <w:del> tags (standard behavior).
27+
"""
28+
xml = (
29+
'<w:p xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">'
30+
' <w:r><w:t>Start </w:t></w:r>'
31+
' <w:del w:id="2" w:author="Me" w:date="2023-01-01T00:00:00Z">'
32+
' <w:r><w:delText>Deleted</w:delText></w:r>'
33+
' </w:del>'
34+
' <w:r><w:t>End</w:t></w:r>'
35+
'</w:p>'
36+
)
37+
p = Paragraph(parse_xml(xml), None)
38+
assert p.text == "Start End"
39+
40+
def it_includes_deletion_text_in_run(self):
41+
"""
42+
run.text includes <w:delText> content (e.g. for provenance extraction).
43+
"""
44+
xml = (
45+
'<w:r xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">'
46+
' <w:delText>Deleted Content</w:delText>'
47+
'</w:r>'
48+
)
49+
r = Run(parse_xml(xml), None)
50+
# Expected: "Deleted Content"
51+
# Before Fix: ""
52+
assert r.text == "Deleted Content"

0 commit comments

Comments
 (0)