From 2c88262a255d3afa14a49bfe73363aae2f4be60d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20R=C3=BC=C3=9Fler?= Date: Sun, 7 Dec 2025 18:40:03 +0100 Subject: [PATCH 1/5] Update `gix-blame` to `imara-diff` 0.2 --- gix-blame/Cargo.toml | 2 +- gix-blame/src/file/function.rs | 105 ++++++++++++++------------------- gix-blame/tests/blame.rs | 5 +- 3 files changed, 48 insertions(+), 64 deletions(-) diff --git a/gix-blame/Cargo.toml b/gix-blame/Cargo.toml index 4417277cfe6..4fc8f775d32 100644 --- a/gix-blame/Cargo.toml +++ b/gix-blame/Cargo.toml @@ -15,7 +15,7 @@ gix-commitgraph = { version = "^0.30.0", path = "../gix-commitgraph" } gix-revwalk = { version = "^0.23.0", path = "../gix-revwalk" } gix-trace = { version = "^0.1.14", path = "../gix-trace" } gix-date = { version = "^0.11.0", path = "../gix-date" } -gix-diff = { version = "^0.55.0", path = "../gix-diff", default-features = false, features = ["blob"] } +gix-diff = { version = "^0.55.0", path = "../gix-diff", default-features = false, features = ["blob", "blob-experimental"] } gix-object = { version = "^0.52.0", path = "../gix-object" } gix-hash = { version = "^0.20.0", path = "../gix-hash" } gix-worktree = { version = "^0.44.0", path = "../gix-worktree", default-features = false, features = ["attributes"] } diff --git a/gix-blame/src/file/function.rs b/gix-blame/src/file/function.rs index 773ac1425cb..649b27db84f 100644 --- a/gix-blame/src/file/function.rs +++ b/gix-blame/src/file/function.rs @@ -1,6 +1,9 @@ -use std::{num::NonZeroU32, ops::Range}; +use std::num::NonZeroU32; -use gix_diff::{blob::intern::TokenSource, tree::Visit}; +use gix_diff::{ + blob::{intern::TokenSource, v2::Hunk}, + tree::Visit, +}; use gix_hash::ObjectId; use gix_object::{ bstr::{BStr, BString}, @@ -758,85 +761,65 @@ fn blob_changes( diff_algorithm: gix_diff::blob::Algorithm, stats: &mut Statistics, ) -> Result, Error> { - /// Record all [`Change`]s to learn about additions, deletions and unchanged portions of a *Source File*. - struct ChangeRecorder { - last_seen_after_end: u32, - hunks: Vec, - total_number_of_lines: u32, - } + resource_cache.set_resource( + previous_oid, + gix_object::tree::EntryKind::Blob, + previous_file_path, + gix_diff::blob::ResourceKind::OldOrSource, + &odb, + )?; + resource_cache.set_resource( + oid, + gix_object::tree::EntryKind::Blob, + file_path, + gix_diff::blob::ResourceKind::NewOrDestination, + &odb, + )?; - impl ChangeRecorder { - /// `total_number_of_lines` is used to fill in the last unchanged hunk if needed - /// so that the entire file is represented by [`Change`]. - fn new(total_number_of_lines: u32) -> Self { - ChangeRecorder { - last_seen_after_end: 0, - hunks: Vec::new(), - total_number_of_lines, - } - } - } + let outcome = resource_cache.prepare_diff()?; + let input = gix_diff::blob::v2::InternedInput::new( + outcome.old.data.as_slice().unwrap_or_default(), + outcome.new.data.as_slice().unwrap_or_default(), + ); + + let diff_algorithm: gix_diff::blob::v2::Algorithm = match diff_algorithm { + gix_diff::blob::Algorithm::Histogram => gix_diff::blob::v2::Algorithm::Histogram, + gix_diff::blob::Algorithm::Myers => gix_diff::blob::v2::Algorithm::Myers, + gix_diff::blob::Algorithm::MyersMinimal => gix_diff::blob::v2::Algorithm::MyersMinimal, + }; + let mut diff = gix_diff::blob::v2::Diff::compute(diff_algorithm, &input); + diff.postprocess_lines(&input); - impl gix_diff::blob::Sink for ChangeRecorder { - type Out = Vec; + let changes = diff + .hunks() + .fold((Vec::new(), 0), |(mut hunks, mut last_seen_after_end), hunk| { + let Hunk { before, after } = hunk; - fn process_change(&mut self, before: Range, after: Range) { // This checks for unchanged hunks. - if after.start > self.last_seen_after_end { - self.hunks - .push(Change::Unchanged(self.last_seen_after_end..after.start)); + if after.start > last_seen_after_end { + hunks.push(Change::Unchanged(last_seen_after_end..after.start)); } match (!before.is_empty(), !after.is_empty()) { (_, true) => { - self.hunks.push(Change::AddedOrReplaced( + hunks.push(Change::AddedOrReplaced( after.start..after.end, before.end - before.start, )); } (true, false) => { - self.hunks.push(Change::Deleted(after.start, before.end - before.start)); + hunks.push(Change::Deleted(after.start, before.end - before.start)); } (false, false) => unreachable!("BUG: imara-diff provided a non-change"), } - self.last_seen_after_end = after.end; - } - - fn finish(mut self) -> Self::Out { - if self.total_number_of_lines > self.last_seen_after_end { - self.hunks - .push(Change::Unchanged(self.last_seen_after_end..self.total_number_of_lines)); - } - self.hunks - } - } - resource_cache.set_resource( - previous_oid, - gix_object::tree::EntryKind::Blob, - previous_file_path, - gix_diff::blob::ResourceKind::OldOrSource, - &odb, - )?; - resource_cache.set_resource( - oid, - gix_object::tree::EntryKind::Blob, - file_path, - gix_diff::blob::ResourceKind::NewOrDestination, - &odb, - )?; + last_seen_after_end = after.end; - let outcome = resource_cache.prepare_diff()?; - let input = gix_diff::blob::intern::InternedInput::new( - tokens_for_diffing(outcome.old.data.as_slice().unwrap_or_default()), - tokens_for_diffing(outcome.new.data.as_slice().unwrap_or_default()), - ); - let number_of_lines_in_destination = input.after.len(); - let change_recorder = ChangeRecorder::new(number_of_lines_in_destination as u32); + (hunks, last_seen_after_end) + }); - let res = gix_diff::blob::diff(diff_algorithm, &input, change_recorder); stats.blobs_diffed += 1; - Ok(res) + Ok(changes.0) } fn find_path_entry_in_commit( diff --git a/gix-blame/tests/blame.rs b/gix-blame/tests/blame.rs index bd9e1f50c35..0a6b851d91d 100644 --- a/gix-blame/tests/blame.rs +++ b/gix-blame/tests/blame.rs @@ -292,11 +292,12 @@ mktest!( 3 ); -/// As of 2024-09-24, these tests are expected to fail. +/// As of 2024-09-24, the Myers-related test was expected to fail. Both tests were initially +/// written when diffing was done by `imara-diff` 0.1. After updating to `imara-diff` 0.2 on +/// 2025-12-07, the Myers-related test started passing. /// /// Context: https://github.com/Byron/gitoxide/pull/1453#issuecomment-2371013904 #[test] -#[should_panic = "empty-lines-myers"] fn diff_disparity() { for case in ["empty-lines-myers", "empty-lines-histogram"] { let Fixture { From ed9c43753dfbff421c6b267264ae7f4cc3439b46 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20R=C3=BC=C3=9Fler?= Date: Sat, 13 Dec 2025 14:55:30 +0100 Subject: [PATCH 2/5] Add feature flag `blob-experimental` to `gix-blame` --- gix-blame/Cargo.toml | 7 ++- gix-blame/src/file/function.rs | 104 +++++++++++++++++++++++++++++++-- gix-blame/tests/blame.rs | 22 +++++-- 3 files changed, 124 insertions(+), 9 deletions(-) diff --git a/gix-blame/Cargo.toml b/gix-blame/Cargo.toml index 4fc8f775d32..51bc903424c 100644 --- a/gix-blame/Cargo.toml +++ b/gix-blame/Cargo.toml @@ -10,12 +10,17 @@ authors = ["Christoph Rüßler ", "Sebastian Thi edition = "2021" rust-version = "1.82" +[features] +## An experimental use of the v0.2 branch of `imara-diff` to allow trying it out, and for writing tests against it more easily. +## We will decide later how it should actually be exposed. +blob-experimental = ["gix-diff/blob-experimental"] + [dependencies] gix-commitgraph = { version = "^0.30.0", path = "../gix-commitgraph" } gix-revwalk = { version = "^0.23.0", path = "../gix-revwalk" } gix-trace = { version = "^0.1.14", path = "../gix-trace" } gix-date = { version = "^0.11.0", path = "../gix-date" } -gix-diff = { version = "^0.55.0", path = "../gix-diff", default-features = false, features = ["blob", "blob-experimental"] } +gix-diff = { version = "^0.55.0", path = "../gix-diff", default-features = false, features = ["blob"] } gix-object = { version = "^0.52.0", path = "../gix-object" } gix-hash = { version = "^0.20.0", path = "../gix-hash" } gix-worktree = { version = "^0.44.0", path = "../gix-worktree", default-features = false, features = ["attributes"] } diff --git a/gix-blame/src/file/function.rs b/gix-blame/src/file/function.rs index 649b27db84f..ba925364657 100644 --- a/gix-blame/src/file/function.rs +++ b/gix-blame/src/file/function.rs @@ -1,9 +1,9 @@ use std::num::NonZeroU32; -use gix_diff::{ - blob::{intern::TokenSource, v2::Hunk}, - tree::Visit, -}; +#[cfg(feature = "blob-experimental")] +use gix_diff::blob::v2::Hunk; + +use gix_diff::{blob::intern::TokenSource, tree::Visit}; use gix_hash::ObjectId; use gix_object::{ bstr::{BStr, BString}, @@ -751,6 +751,102 @@ fn tree_diff_with_rewrites_at_file_path( } #[allow(clippy::too_many_arguments)] +#[cfg(not(feature = "blob-experimental"))] +fn blob_changes( + odb: impl gix_object::Find + gix_object::FindHeader, + resource_cache: &mut gix_diff::blob::Platform, + oid: ObjectId, + previous_oid: ObjectId, + file_path: &BStr, + previous_file_path: &BStr, + diff_algorithm: gix_diff::blob::Algorithm, + stats: &mut Statistics, +) -> Result, Error> { + /// Record all [`Change`]s to learn about additions, deletions and unchanged portions of a *Source File*. + struct ChangeRecorder { + last_seen_after_end: u32, + hunks: Vec, + total_number_of_lines: u32, + } + + impl ChangeRecorder { + /// `total_number_of_lines` is used to fill in the last unchanged hunk if needed + /// so that the entire file is represented by [`Change`]. + fn new(total_number_of_lines: u32) -> Self { + ChangeRecorder { + last_seen_after_end: 0, + hunks: Vec::new(), + total_number_of_lines, + } + } + } + + use std::ops::Range; + + impl gix_diff::blob::Sink for ChangeRecorder { + type Out = Vec; + + fn process_change(&mut self, before: Range, after: Range) { + // This checks for unchanged hunks. + if after.start > self.last_seen_after_end { + self.hunks + .push(Change::Unchanged(self.last_seen_after_end..after.start)); + } + + match (!before.is_empty(), !after.is_empty()) { + (_, true) => { + self.hunks.push(Change::AddedOrReplaced( + after.start..after.end, + before.end - before.start, + )); + } + (true, false) => { + self.hunks.push(Change::Deleted(after.start, before.end - before.start)); + } + (false, false) => unreachable!("BUG: imara-diff provided a non-change"), + } + self.last_seen_after_end = after.end; + } + + fn finish(mut self) -> Self::Out { + if self.total_number_of_lines > self.last_seen_after_end { + self.hunks + .push(Change::Unchanged(self.last_seen_after_end..self.total_number_of_lines)); + } + self.hunks + } + } + + resource_cache.set_resource( + previous_oid, + gix_object::tree::EntryKind::Blob, + previous_file_path, + gix_diff::blob::ResourceKind::OldOrSource, + &odb, + )?; + resource_cache.set_resource( + oid, + gix_object::tree::EntryKind::Blob, + file_path, + gix_diff::blob::ResourceKind::NewOrDestination, + &odb, + )?; + + let outcome = resource_cache.prepare_diff()?; + let input = gix_diff::blob::intern::InternedInput::new( + tokens_for_diffing(outcome.old.data.as_slice().unwrap_or_default()), + tokens_for_diffing(outcome.new.data.as_slice().unwrap_or_default()), + ); + let number_of_lines_in_destination = input.after.len(); + let change_recorder = ChangeRecorder::new(number_of_lines_in_destination as u32); + + let res = gix_diff::blob::diff(diff_algorithm, &input, change_recorder); + stats.blobs_diffed += 1; + Ok(res) +} + +#[allow(clippy::too_many_arguments)] +#[cfg(feature = "blob-experimental")] fn blob_changes( odb: impl gix_object::Find + gix_object::FindHeader, resource_cache: &mut gix_diff::blob::Platform, diff --git a/gix-blame/tests/blame.rs b/gix-blame/tests/blame.rs index 0a6b851d91d..6d5434a40bd 100644 --- a/gix-blame/tests/blame.rs +++ b/gix-blame/tests/blame.rs @@ -292,13 +292,27 @@ mktest!( 3 ); -/// As of 2024-09-24, the Myers-related test was expected to fail. Both tests were initially -/// written when diffing was done by `imara-diff` 0.1. After updating to `imara-diff` 0.2 on -/// 2025-12-07, the Myers-related test started passing. +/// As of 2024-09-24, the Myers-related test is expected to fail. Both tests use `imara-diff` 0.1 +/// under the hood. /// /// Context: https://github.com/Byron/gitoxide/pull/1453#issuecomment-2371013904 #[test] -fn diff_disparity() { +#[should_panic = "empty-lines-myers"] +#[cfg(not(feature = "blob-experimental"))] +fn diff_disparity_imara_diff_v1() { + diff_disparity_base() +} + +/// As of 2025-12-07, both tests are expected to pass. They use `imara-diff` 0.2 under the hood. +/// +/// Context: https://github.com/Byron/gitoxide/pull/1453#issuecomment-2371013904 +#[test] +#[cfg(feature = "blob-experimental")] +fn diff_disparity_imara_diff_v2() { + diff_disparity_base() +} + +fn diff_disparity_base() { for case in ["empty-lines-myers", "empty-lines-histogram"] { let Fixture { odb, From ca9e4b9626b69bf480ac07b54ea618db53ddd897 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20R=C3=BC=C3=9Fler?= Date: Sun, 14 Dec 2025 09:57:32 +0100 Subject: [PATCH 3/5] Thanks clippy --- gix-blame/tests/blame.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gix-blame/tests/blame.rs b/gix-blame/tests/blame.rs index 6d5434a40bd..3fe22d348c0 100644 --- a/gix-blame/tests/blame.rs +++ b/gix-blame/tests/blame.rs @@ -300,7 +300,7 @@ mktest!( #[should_panic = "empty-lines-myers"] #[cfg(not(feature = "blob-experimental"))] fn diff_disparity_imara_diff_v1() { - diff_disparity_base() + diff_disparity_base(); } /// As of 2025-12-07, both tests are expected to pass. They use `imara-diff` 0.2 under the hood. @@ -309,7 +309,7 @@ fn diff_disparity_imara_diff_v1() { #[test] #[cfg(feature = "blob-experimental")] fn diff_disparity_imara_diff_v2() { - diff_disparity_base() + diff_disparity_base(); } fn diff_disparity_base() { From fcbdd65c8e2ac9de24560f185d88f0c3fc4c9e42 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20R=C3=BC=C3=9Fler?= Date: Sun, 14 Dec 2025 09:57:44 +0100 Subject: [PATCH 4/5] Run `blob-experimental` tests for `gix-blame` --- justfile | 1 + 1 file changed, 1 insertion(+) diff --git a/justfile b/justfile index eeb7a76229e..bae0747ef61 100755 --- a/justfile +++ b/justfile @@ -173,6 +173,7 @@ unit-tests: cargo nextest run -p gix-transport --features async-client --no-fail-fast cargo nextest run -p gix-protocol --features blocking-client --no-fail-fast cargo nextest run -p gix-protocol --features async-client --no-fail-fast + cargo nextest run -p gix-blame --features blob-experimental --no-fail-fast cargo nextest run -p gix --no-default-features --no-fail-fast cargo nextest run -p gix --no-default-features --features basic,comfort,max-performance-safe --no-fail-fast cargo nextest run -p gix --no-default-features --features basic,extras,comfort,need-more-recent-msrv --no-fail-fast From 0b7c1ddc1b619c2dbe0ec3c0f5ab80d1cc6ffadb Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 17 Dec 2025 06:34:52 +0100 Subject: [PATCH 5/5] refactor - fix probably critical, and untested error related to the last hunk --- gix-blame/src/file/function.rs | 59 ++++++++++++++++++---------------- gix-blame/tests/blame.rs | 2 +- 2 files changed, 32 insertions(+), 29 deletions(-) diff --git a/gix-blame/src/file/function.rs b/gix-blame/src/file/function.rs index ba925364657..9af23e665e6 100644 --- a/gix-blame/src/file/function.rs +++ b/gix-blame/src/file/function.rs @@ -1,8 +1,5 @@ use std::num::NonZeroU32; -#[cfg(feature = "blob-experimental")] -use gix_diff::blob::v2::Hunk; - use gix_diff::{blob::intern::TokenSource, tree::Visit}; use gix_hash::ObjectId; use gix_object::{ @@ -762,6 +759,8 @@ fn blob_changes( diff_algorithm: gix_diff::blob::Algorithm, stats: &mut Statistics, ) -> Result, Error> { + use std::ops::Range; + /// Record all [`Change`]s to learn about additions, deletions and unchanged portions of a *Source File*. struct ChangeRecorder { last_seen_after_end: u32, @@ -781,8 +780,6 @@ fn blob_changes( } } - use std::ops::Range; - impl gix_diff::blob::Sink for ChangeRecorder { type Out = Vec; @@ -857,6 +854,8 @@ fn blob_changes( diff_algorithm: gix_diff::blob::Algorithm, stats: &mut Statistics, ) -> Result, Error> { + use gix_diff::blob::v2::Hunk; + resource_cache.set_resource( previous_oid, gix_object::tree::EntryKind::Blob, @@ -886,36 +885,40 @@ fn blob_changes( let mut diff = gix_diff::blob::v2::Diff::compute(diff_algorithm, &input); diff.postprocess_lines(&input); - let changes = diff - .hunks() - .fold((Vec::new(), 0), |(mut hunks, mut last_seen_after_end), hunk| { - let Hunk { before, after } = hunk; + let mut last_seen_after_end = 0; + let mut changes = diff.hunks().fold(Vec::new(), |mut hunks, hunk| { + let Hunk { before, after } = hunk; - // This checks for unchanged hunks. - if after.start > last_seen_after_end { - hunks.push(Change::Unchanged(last_seen_after_end..after.start)); - } + // This checks for unchanged hunks. + if after.start > last_seen_after_end { + hunks.push(Change::Unchanged(last_seen_after_end..after.start)); + } - match (!before.is_empty(), !after.is_empty()) { - (_, true) => { - hunks.push(Change::AddedOrReplaced( - after.start..after.end, - before.end - before.start, - )); - } - (true, false) => { - hunks.push(Change::Deleted(after.start, before.end - before.start)); - } - (false, false) => unreachable!("BUG: imara-diff provided a non-change"), + match (!before.is_empty(), !after.is_empty()) { + (_, true) => { + hunks.push(Change::AddedOrReplaced( + after.start..after.end, + before.end - before.start, + )); + } + (true, false) => { + hunks.push(Change::Deleted(after.start, before.end - before.start)); } + (false, false) => unreachable!("BUG: imara-diff provided a non-change"), + } - last_seen_after_end = after.end; + last_seen_after_end = after.end; - (hunks, last_seen_after_end) - }); + hunks + }); + + let total_number_of_lines = input.after.len() as u32; + if input.after.len() > last_seen_after_end as usize { + changes.push(Change::Unchanged(last_seen_after_end..total_number_of_lines)); + } stats.blobs_diffed += 1; - Ok(changes.0) + Ok(changes) } fn find_path_entry_in_commit( diff --git a/gix-blame/tests/blame.rs b/gix-blame/tests/blame.rs index 3fe22d348c0..3b404a96153 100644 --- a/gix-blame/tests/blame.rs +++ b/gix-blame/tests/blame.rs @@ -303,7 +303,7 @@ fn diff_disparity_imara_diff_v1() { diff_disparity_base(); } -/// As of 2025-12-07, both tests are expected to pass. They use `imara-diff` 0.2 under the hood. +/// As of 2025-12-07, both algorithms are expected to pass. They use `imara-diff` 0.2 under the hood. /// /// Context: https://github.com/Byron/gitoxide/pull/1453#issuecomment-2371013904 #[test]