From 5aae1b07b8c770259afd3af8f0c77fe1007d4501 Mon Sep 17 00:00:00 2001 From: Austin Gill Date: Sun, 15 Feb 2026 11:34:31 -0600 Subject: [PATCH 1/3] Fix TGF emitter when node IDs contain whitespace The TGF parser splits on any whitespace, not just tabs, so if node IDs contain spaces, we couldn't round trip them through TGF. --- crates/csvizmo-depgraph/src/emit/tgf.rs | 47 ++++++++++++++++++++++-- crates/csvizmo-depgraph/tests/depconv.rs | 8 ++-- 2 files changed, 48 insertions(+), 7 deletions(-) diff --git a/crates/csvizmo-depgraph/src/emit/tgf.rs b/crates/csvizmo-depgraph/src/emit/tgf.rs index 253c4f5..a56f522 100644 --- a/crates/csvizmo-depgraph/src/emit/tgf.rs +++ b/crates/csvizmo-depgraph/src/emit/tgf.rs @@ -2,13 +2,29 @@ use std::io::Write; use crate::DepGraph; +/// Replace whitespace in a node ID with underscores so it survives TGF roundtripping. +/// +/// TGF uses whitespace to separate tokens, so bare whitespace inside an ID +/// would be mis-parsed on re-read. Tabs and spaces are both replaced. +fn sanitize_id(id: &str) -> String { + if id.contains(|c: char| c.is_ascii_whitespace()) { + id.chars() + .map(|c| if c.is_ascii_whitespace() { '_' } else { c }) + .collect() + } else { + id.to_string() + } +} + /// Emit a [`DepGraph`] as TGF (Trivial Graph Format). /// /// Preserves node IDs, node labels, edge endpoints, and edge labels. /// Graph-level attrs, node attrs, and edge attrs are silently dropped -/// (TGF has no syntax for them). +/// (TGF has no syntax for them). Whitespace in node IDs is replaced +/// with underscores to ensure the output can be parsed back. pub fn emit(graph: &DepGraph, writer: &mut dyn Write) -> eyre::Result<()> { for (id, info) in graph.all_nodes() { + let id = sanitize_id(id); if info.label != *id { writeln!(writer, "{id}\t{}", info.label)?; } else { @@ -19,9 +35,11 @@ pub fn emit(graph: &DepGraph, writer: &mut dyn Write) -> eyre::Result<()> { writeln!(writer, "#")?; for edge in graph.all_edges() { + let from = sanitize_id(&edge.from); + let to = sanitize_id(&edge.to); match &edge.label { - Some(label) => writeln!(writer, "{}\t{}\t{label}", edge.from, edge.to)?, - None => writeln!(writer, "{}\t{}", edge.from, edge.to)?, + Some(label) => writeln!(writer, "{from}\t{to}\t{label}")?, + None => writeln!(writer, "{from}\t{to}")?, } } @@ -106,6 +124,29 @@ mod tests { assert_eq!(output, "a\tAlpha\nb\n#\na\tb\tuses\n"); } + #[test] + fn whitespace_in_ids_replaced_with_underscores() { + let mut nodes = IndexMap::new(); + nodes.insert("my app v1.0".into(), NodeInfo::new("my app")); + nodes.insert("lib foo v2.0".into(), NodeInfo::new("lib foo")); + let graph = DepGraph { + nodes, + edges: vec![Edge { + from: "my app v1.0".into(), + to: "lib foo v2.0".into(), + ..Default::default() + }], + ..Default::default() + }; + let mut buf = Vec::new(); + emit(&graph, &mut buf).unwrap(); + let output = String::from_utf8(buf).unwrap(); + assert_eq!( + output, + "my_app_v1.0\tmy app\nlib_foo_v2.0\tlib foo\n#\nmy_app_v1.0\tlib_foo_v2.0\n" + ); + } + #[test] fn subgraph_nodes_and_edges_included() { let graph = DepGraph { diff --git a/crates/csvizmo-depgraph/tests/depconv.rs b/crates/csvizmo-depgraph/tests/depconv.rs index 891a281..dc7b8a8 100644 --- a/crates/csvizmo-depgraph/tests/depconv.rs +++ b/crates/csvizmo-depgraph/tests/depconv.rs @@ -522,22 +522,22 @@ fn cargo_tree_auto_detect() { assert_eq!(node_lines.len(), 69); assert_eq!(edge_lines.len(), 111); - // Root node should be first + // Root node should be first (spaces in IDs become underscores in TGF) assert!( - node_lines[0].starts_with("csvizmo-depgraph v0.5.0\t"), + node_lines[0].starts_with("csvizmo-depgraph_v0.5.0\t"), "root node should be first, got: {}", node_lines[0] ); // A known dependency should appear as a node assert!( - node_lines.iter().any(|l| l.starts_with("clap v4.5.57\t")), + node_lines.iter().any(|l| l.starts_with("clap_v4.5.57\t")), "clap should be in node list" ); // A known edge should exist assert!( - edge_lines.contains(&"csvizmo-depgraph v0.5.0\tclap v4.5.57"), + edge_lines.contains(&"csvizmo-depgraph_v0.5.0\tclap_v4.5.57"), "root -> clap edge should exist" ); } From 288eede29b18456dc3a99eba39e4b5e0e2b73296 Mon Sep 17 00:00:00 2001 From: Austin Gill Date: Sun, 15 Feb 2026 11:17:04 -0600 Subject: [PATCH 2/3] Add depfilter between subcommand --- .../csvizmo-depgraph/src/algorithm/between.rs | 279 ++++++++++++++++++ crates/csvizmo-depgraph/src/algorithm/mod.rs | 1 + crates/csvizmo-depgraph/src/bin/depfilter.rs | 4 + crates/csvizmo-depgraph/tests/depfilter.rs | 161 ++++++++++ 4 files changed, 445 insertions(+) create mode 100644 crates/csvizmo-depgraph/src/algorithm/between.rs diff --git a/crates/csvizmo-depgraph/src/algorithm/between.rs b/crates/csvizmo-depgraph/src/algorithm/between.rs new file mode 100644 index 0000000..47502ae --- /dev/null +++ b/crates/csvizmo-depgraph/src/algorithm/between.rs @@ -0,0 +1,279 @@ +use std::collections::HashSet; + +use clap::Parser; +use petgraph::Direction; +use petgraph::graph::NodeIndex; + +use super::{MatchKey, build_globset}; +use crate::{DepGraph, FlatGraphView}; + +#[derive(Clone, Debug, Default, Parser)] +pub struct BetweenArgs { + /// Glob pattern selecting query endpoints (can be repeated, OR logic) + #[clap(short, long)] + pub pattern: Vec, + + /// Match patterns against 'id' or 'label' + #[clap(long, default_value_t = MatchKey::default())] + pub key: MatchKey, +} + +impl BetweenArgs { + pub fn pattern(mut self, p: impl Into) -> Self { + self.pattern.push(p.into()); + self + } + + pub fn key(mut self, k: MatchKey) -> Self { + self.key = k; + self + } +} + +/// Extract the subgraph formed by all directed paths between any pair of matched query nodes. +/// +/// For matched query nodes q1..qk, computes forward and backward reachability from each, +/// then for each ordered pair (qi, qj) collects nodes on directed paths from qi to qj +/// via `forward(qi) & backward(qj)`. The union of all pairwise results is the keep set. +pub fn between(graph: &DepGraph, args: &BetweenArgs) -> eyre::Result { + let globset = build_globset(&args.pattern)?; + let view = FlatGraphView::new(graph); + + // Match query nodes by glob pattern (OR logic). + let matched: Vec = graph + .all_nodes() + .iter() + .filter_map(|(id, info)| { + let text = match args.key { + MatchKey::Id => id.as_str(), + MatchKey::Label => info.label.as_str(), + }; + if globset.is_match(text) { + view.id_to_idx.get(id.as_str()).copied() + } else { + None + } + }) + .collect(); + + // Need at least 2 matched nodes to have a path between them. + if matched.len() < 2 { + return Ok(view.filter(&HashSet::new())); + } + + // BFS forward and backward from each query node. + let forwards: Vec> = matched + .iter() + .map(|&q| view.bfs([q], Direction::Outgoing, None)) + .collect(); + let backwards: Vec> = matched + .iter() + .map(|&q| view.bfs([q], Direction::Incoming, None)) + .collect(); + + // Pairwise intersect: for each pair (i, j) where i != j, nodes on directed paths + // from qi to qj are in forward(qi) & backward(qj). + let mut keep = HashSet::new(); + for (i, fwd) in forwards.iter().enumerate() { + for (j, bwd) in backwards.iter().enumerate() { + if i == j { + continue; + } + for &node in fwd { + if bwd.contains(&node) { + keep.insert(node); + } + } + } + } + + Ok(view.filter(&keep)) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::{DepGraph, Edge, NodeInfo}; + + fn make_graph( + nodes: &[(&str, &str)], + edges: &[(&str, &str)], + subgraphs: Vec, + ) -> DepGraph { + DepGraph { + nodes: nodes + .iter() + .map(|(id, label)| (id.to_string(), NodeInfo::new(*label))) + .collect(), + edges: edges + .iter() + .map(|(from, to)| Edge { + from: from.to_string(), + to: to.to_string(), + ..Default::default() + }) + .collect(), + subgraphs, + ..Default::default() + } + } + + fn sorted_node_ids(graph: &DepGraph) -> Vec<&str> { + let mut ids: Vec<&str> = graph.nodes.keys().map(|s| s.as_str()).collect(); + ids.sort(); + ids + } + + fn sorted_edge_pairs(graph: &DepGraph) -> Vec<(&str, &str)> { + let mut pairs: Vec<(&str, &str)> = graph + .edges + .iter() + .map(|e| (e.from.as_str(), e.to.as_str())) + .collect(); + pairs.sort(); + pairs + } + + #[test] + fn direct_path() { + // a -> b: between a and b yields both + let g = make_graph(&[("a", "a"), ("b", "b")], &[("a", "b")], vec![]); + let args = BetweenArgs::default().pattern("a").pattern("b"); + let result = between(&g, &args).unwrap(); + assert_eq!(sorted_node_ids(&result), vec!["a", "b"]); + assert_eq!(sorted_edge_pairs(&result), vec![("a", "b")]); + } + + #[test] + fn intermediate_nodes() { + // a -> b -> c: between a and c includes intermediate b + let g = make_graph( + &[("a", "a"), ("b", "b"), ("c", "c")], + &[("a", "b"), ("b", "c")], + vec![], + ); + let args = BetweenArgs::default().pattern("a").pattern("c"); + let result = between(&g, &args).unwrap(); + assert_eq!(sorted_node_ids(&result), vec!["a", "b", "c"]); + assert_eq!(sorted_edge_pairs(&result), vec![("a", "b"), ("b", "c")]); + } + + #[test] + fn no_path_returns_empty() { + // a -> b, c -> d: no path between a and c + let g = make_graph( + &[("a", "a"), ("b", "b"), ("c", "c"), ("d", "d")], + &[("a", "b"), ("c", "d")], + vec![], + ); + let args = BetweenArgs::default().pattern("a").pattern("c"); + let result = between(&g, &args).unwrap(); + assert!(result.nodes.is_empty()); + assert!(result.edges.is_empty()); + } + + #[test] + fn diamond() { + // a -> b -> d, a -> c -> d: between a and d includes both paths + let g = make_graph( + &[("a", "a"), ("b", "b"), ("c", "c"), ("d", "d")], + &[("a", "b"), ("a", "c"), ("b", "d"), ("c", "d")], + vec![], + ); + let args = BetweenArgs::default().pattern("a").pattern("d"); + let result = between(&g, &args).unwrap(); + assert_eq!(sorted_node_ids(&result), vec!["a", "b", "c", "d"]); + assert_eq!( + sorted_edge_pairs(&result), + vec![("a", "b"), ("a", "c"), ("b", "d"), ("c", "d")] + ); + } + + #[test] + fn multiple_query_nodes() { + // a -> b -> c -> d: between a, b, and d includes everything on paths a->b, a->d, b->d + let g = make_graph( + &[("a", "a"), ("b", "b"), ("c", "c"), ("d", "d")], + &[("a", "b"), ("b", "c"), ("c", "d")], + vec![], + ); + let args = BetweenArgs::default() + .pattern("a") + .pattern("b") + .pattern("d"); + let result = between(&g, &args).unwrap(); + assert_eq!(sorted_node_ids(&result), vec!["a", "b", "c", "d"]); + } + + #[test] + fn no_match_returns_empty() { + let g = make_graph(&[("a", "a"), ("b", "b")], &[("a", "b")], vec![]); + let args = BetweenArgs::default().pattern("nonexistent"); + let result = between(&g, &args).unwrap(); + assert!(result.nodes.is_empty()); + assert!(result.edges.is_empty()); + } + + #[test] + fn single_match_returns_empty() { + // Only one node matches -- need at least 2 for a path + let g = make_graph(&[("a", "a"), ("b", "b")], &[("a", "b")], vec![]); + let args = BetweenArgs::default().pattern("a"); + let result = between(&g, &args).unwrap(); + assert!(result.nodes.is_empty()); + assert!(result.edges.is_empty()); + } + + #[test] + fn cycle() { + // a -> b -> c -> a: between a and c includes all nodes in the cycle + let g = make_graph( + &[("a", "a"), ("b", "b"), ("c", "c")], + &[("a", "b"), ("b", "c"), ("c", "a")], + vec![], + ); + let args = BetweenArgs::default().pattern("a").pattern("c"); + let result = between(&g, &args).unwrap(); + assert_eq!(sorted_node_ids(&result), vec!["a", "b", "c"]); + } + + #[test] + fn match_by_id() { + let g = make_graph(&[("1", "libfoo"), ("2", "libbar")], &[("1", "2")], vec![]); + let args = BetweenArgs::default() + .pattern("1") + .pattern("2") + .key(MatchKey::Id); + let result = between(&g, &args).unwrap(); + assert_eq!(sorted_node_ids(&result), vec!["1", "2"]); + assert_eq!(sorted_edge_pairs(&result), vec![("1", "2")]); + } + + #[test] + fn excludes_unrelated_nodes() { + // a -> b -> c, d -> e: between a and c should not include d or e + let g = make_graph( + &[("a", "a"), ("b", "b"), ("c", "c"), ("d", "d"), ("e", "e")], + &[("a", "b"), ("b", "c"), ("d", "e")], + vec![], + ); + let args = BetweenArgs::default().pattern("a").pattern("c"); + let result = between(&g, &args).unwrap(); + assert_eq!(sorted_node_ids(&result), vec!["a", "b", "c"]); + assert_eq!(sorted_edge_pairs(&result), vec![("a", "b"), ("b", "c")]); + } + + #[test] + fn glob_matching_multiple_nodes() { + // a -> b -> c: glob "?" matches a, b, c -- all pairs have paths + let g = make_graph( + &[("a", "a"), ("b", "b"), ("c", "c")], + &[("a", "b"), ("b", "c")], + vec![], + ); + let args = BetweenArgs::default().pattern("?"); + let result = between(&g, &args).unwrap(); + assert_eq!(sorted_node_ids(&result), vec!["a", "b", "c"]); + assert_eq!(sorted_edge_pairs(&result), vec![("a", "b"), ("b", "c")]); + } +} diff --git a/crates/csvizmo-depgraph/src/algorithm/mod.rs b/crates/csvizmo-depgraph/src/algorithm/mod.rs index f5b85db..c9f9708 100644 --- a/crates/csvizmo-depgraph/src/algorithm/mod.rs +++ b/crates/csvizmo-depgraph/src/algorithm/mod.rs @@ -1,3 +1,4 @@ +pub mod between; pub mod filter; pub mod select; diff --git a/crates/csvizmo-depgraph/src/bin/depfilter.rs b/crates/csvizmo-depgraph/src/bin/depfilter.rs index 0cb8114..db23190 100644 --- a/crates/csvizmo-depgraph/src/bin/depfilter.rs +++ b/crates/csvizmo-depgraph/src/bin/depfilter.rs @@ -3,6 +3,7 @@ use std::path::PathBuf; use clap::{Parser, Subcommand}; use csvizmo_depgraph::algorithm; +use csvizmo_depgraph::algorithm::between::BetweenArgs; use csvizmo_depgraph::algorithm::filter::FilterArgs; use csvizmo_depgraph::algorithm::select::SelectArgs; use csvizmo_depgraph::emit::OutputFormat; @@ -46,6 +47,8 @@ enum Command { Select(SelectArgs), /// Remove nodes matching patterns and optionally cascade to dependencies/ancestors Filter(FilterArgs), + /// Extract the subgraph of all directed paths between matched query nodes + Between(BetweenArgs), } fn main() -> eyre::Result<()> { @@ -94,6 +97,7 @@ fn main() -> eyre::Result<()> { let graph = match &args.command { Command::Select(select_args) => algorithm::select::select(&graph, select_args)?, Command::Filter(filter_args) => algorithm::filter::filter(&graph, filter_args)?, + Command::Between(between_args) => algorithm::between::between(&graph, between_args)?, }; let mut output = get_output_writer(&output_path)?; diff --git a/crates/csvizmo-depgraph/tests/depfilter.rs b/crates/csvizmo-depgraph/tests/depfilter.rs index e09c5f3..f1ed0e8 100644 --- a/crates/csvizmo-depgraph/tests/depfilter.rs +++ b/crates/csvizmo-depgraph/tests/depfilter.rs @@ -452,3 +452,164 @@ digraph { let stdout = String::from_utf8_lossy(&output.stdout); assert_eq!(stdout, "2\tlibbar\n3\tmyapp\n#\n3\t2\n"); } + +// -- between integration tests -- + +#[test] +fn between_two_nodes() { + // a -> b -> c: between a and c includes intermediate b + let graph = "a\nb\nc\n#\na\tb\nb\tc\n"; + let output = tool!("depfilter") + .args([ + "between", + "-p", + "a", + "-p", + "c", + "--input-format", + "tgf", + "--output-format", + "tgf", + ]) + .write_stdin(graph) + .captured_output() + .unwrap(); + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + assert_eq!(stdout, "a\nb\nc\n#\na\tb\nb\tc\n"); +} + +#[test] +fn between_by_id() { + let output = tool!("depfilter") + .args([ + "between", + "-p", + "1", + "-p", + "2", + "--key", + "id", + "--input-format", + "tgf", + "--output-format", + "tgf", + ]) + .write_stdin(SIMPLE_GRAPH) + .captured_output() + .unwrap(); + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + assert_eq!(stdout, "1\tlibfoo\n2\tlibbar\n#\n1\t2\n"); +} + +#[test] +fn between_glob_multiple_nodes() { + // a -> b -> c: glob "?" matches all three, paths exist between all pairs + let graph = "a\nb\nc\n#\na\tb\nb\tc\n"; + let output = tool!("depfilter") + .args([ + "between", + "-p", + "?", + "--input-format", + "tgf", + "--output-format", + "tgf", + ]) + .write_stdin(graph) + .captured_output() + .unwrap(); + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + assert_eq!(stdout, "a\nb\nc\n#\na\tb\nb\tc\n"); +} + +#[test] +fn between_no_matching_patterns() { + let graph = "a\nb\n#\na\tb\n"; + let output = tool!("depfilter") + .args([ + "between", + "-p", + "nonexistent", + "--input-format", + "tgf", + "--output-format", + "tgf", + ]) + .write_stdin(graph) + .captured_output() + .unwrap(); + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + assert_eq!(stdout, "#\n"); +} + +#[test] +fn between_no_path() { + // a -> b, c -> d: no path between a and c + let graph = "a\nb\nc\nd\n#\na\tb\nc\td\n"; + let output = tool!("depfilter") + .args([ + "between", + "-p", + "a", + "-p", + "c", + "--input-format", + "tgf", + "--output-format", + "tgf", + ]) + .write_stdin(graph) + .captured_output() + .unwrap(); + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + assert_eq!(stdout, "#\n"); +} + +#[test] +fn between_cargo_metadata_fixture() { + // Use the real cargo-metadata.json fixture to test between on a non-trivial graph. + // csvizmo-depgraph depends on clap both directly and via csvizmo-utils, + // so csvizmo-utils is an intermediate node on a path to clap. + // clap in turn depends on clap_builder, clap_derive, and clap_builder -> clap_lex. + let input = include_str!("../../../data/depconv/cargo-metadata.json"); + let output = tool!("depfilter") + .args([ + "between", + "-p", + "csvizmo-depgraph", + "-p", + "clap*", + "--input-format", + "cargo-metadata", + "--output-format", + "tgf", + ]) + .write_stdin(input) + .captured_output() + .unwrap(); + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + assert_eq!( + stdout, + "\ +clap_4.5.57\tclap +clap_builder_4.5.57\tclap_builder +clap_derive_4.5.55\tclap_derive +clap_lex_0.7.7\tclap_lex +csvizmo-depgraph_0.5.0\tcsvizmo-depgraph +csvizmo-utils_0.5.0\tcsvizmo-utils +# +clap_4.5.57\tclap_builder_4.5.57 +clap_4.5.57\tclap_derive_4.5.55 +clap_builder_4.5.57\tclap_lex_0.7.7 +csvizmo-depgraph_0.5.0\tclap_4.5.57 +csvizmo-depgraph_0.5.0\tcsvizmo-utils_0.5.0 +csvizmo-utils_0.5.0\tclap_4.5.57 +" + ); +} From a18a33153c7a3ea322dea2eeae93e336e0e071ec Mon Sep 17 00:00:00 2001 From: Austin Gill Date: Sun, 15 Feb 2026 11:50:47 -0600 Subject: [PATCH 3/3] Add cycles subcommand to depfilter --- .../csvizmo-depgraph/src/algorithm/cycles.rs | 253 ++++++++++++++++++ crates/csvizmo-depgraph/src/algorithm/mod.rs | 1 + crates/csvizmo-depgraph/src/bin/depfilter.rs | 4 + crates/csvizmo-depgraph/tests/depfilter.rs | 157 +++++++++++ 4 files changed, 415 insertions(+) create mode 100644 crates/csvizmo-depgraph/src/algorithm/cycles.rs diff --git a/crates/csvizmo-depgraph/src/algorithm/cycles.rs b/crates/csvizmo-depgraph/src/algorithm/cycles.rs new file mode 100644 index 0000000..94abb2c --- /dev/null +++ b/crates/csvizmo-depgraph/src/algorithm/cycles.rs @@ -0,0 +1,253 @@ +use std::collections::{HashMap, HashSet}; + +use clap::Parser; +use indexmap::IndexMap; +use petgraph::algo::tarjan_scc; +use petgraph::graph::NodeIndex; + +use crate::{DepGraph, Edge, FlatGraphView, NodeInfo}; + +/// Detect cycles (strongly connected components with 2+ nodes) in the graph. +#[derive(Clone, Debug, Default, Parser)] +pub struct CyclesArgs {} + +/// Find all cycles in the dependency graph and output them as subgraphs. +/// +/// Uses Tarjan's SCC algorithm to find strongly connected components. SCCs with 2+ nodes +/// are reported as cycles, each in its own subgraph named `cycle_0`, `cycle_1`, etc. +/// Edges between different cycle SCCs appear at the top level. Self-loops (SCCs with 1 +/// node) are ignored. If no cycles exist, returns an empty graph. +pub fn cycles(graph: &DepGraph, _args: &CyclesArgs) -> eyre::Result { + let view = FlatGraphView::new(graph); + let sccs = tarjan_scc(&view.pg); + + // Filter to SCCs with 2+ nodes (ignore self-loops). + let cycle_sccs: Vec> = sccs + .into_iter() + .filter(|scc| scc.len() >= 2) + .map(|scc| scc.into_iter().collect()) + .collect(); + + if cycle_sccs.is_empty() { + return Ok(DepGraph::default()); + } + + // Map each cycle node to its SCC index. + let mut node_to_scc: HashMap = HashMap::new(); + for (i, scc) in cycle_sccs.iter().enumerate() { + for &node in scc { + node_to_scc.insert(node, i); + } + } + + let all_nodes = graph.all_nodes(); + let all_edges = graph.all_edges(); + + // Build a subgraph per SCC, preserving original node/edge order. + let mut subgraphs = Vec::new(); + for (i, scc) in cycle_sccs.iter().enumerate() { + let nodes: IndexMap = all_nodes + .iter() + .filter(|(id, _)| { + view.id_to_idx + .get(id.as_str()) + .is_some_and(|idx| scc.contains(idx)) + }) + .map(|(id, info)| (id.clone(), info.clone())) + .collect(); + + let edges: Vec = all_edges + .iter() + .filter(|e| { + let from = view.id_to_idx.get(e.from.as_str()); + let to = view.id_to_idx.get(e.to.as_str()); + match (from, to) { + (Some(f), Some(t)) => scc.contains(f) && scc.contains(t), + _ => false, + } + }) + .cloned() + .collect(); + + subgraphs.push(DepGraph { + id: Some(format!("cycle_{i}")), + nodes, + edges, + ..Default::default() + }); + } + + // Cross-cycle edges: both endpoints in cycle nodes but in different SCCs. + let cross_edges: Vec = all_edges + .iter() + .filter(|e| { + let from = view.id_to_idx.get(e.from.as_str()); + let to = view.id_to_idx.get(e.to.as_str()); + match (from, to) { + (Some(f), Some(t)) => match (node_to_scc.get(f), node_to_scc.get(t)) { + (Some(sf), Some(st)) => sf != st, + _ => false, + }, + _ => false, + } + }) + .cloned() + .collect(); + + Ok(DepGraph { + edges: cross_edges, + subgraphs, + ..Default::default() + }) +} + +#[cfg(test)] +mod tests { + use super::*; + + fn make_graph(nodes: &[(&str, &str)], edges: &[(&str, &str)]) -> DepGraph { + DepGraph { + nodes: nodes + .iter() + .map(|(id, label)| (id.to_string(), NodeInfo::new(*label))) + .collect(), + edges: edges + .iter() + .map(|(from, to)| Edge { + from: from.to_string(), + to: to.to_string(), + ..Default::default() + }) + .collect(), + ..Default::default() + } + } + + fn sorted_node_ids(graph: &DepGraph) -> Vec<&str> { + let mut ids: Vec<&str> = graph.nodes.keys().map(|s| s.as_str()).collect(); + ids.sort(); + ids + } + + fn sorted_edge_pairs(graph: &DepGraph) -> Vec<(&str, &str)> { + let mut pairs: Vec<(&str, &str)> = graph + .edges + .iter() + .map(|e| (e.from.as_str(), e.to.as_str())) + .collect(); + pairs.sort(); + pairs + } + + #[test] + fn no_cycles_returns_empty() { + // a -> b -> c: DAG, no cycles + let g = make_graph( + &[("a", "a"), ("b", "b"), ("c", "c")], + &[("a", "b"), ("b", "c")], + ); + let result = cycles(&g, &CyclesArgs::default()).unwrap(); + assert!(result.nodes.is_empty()); + assert!(result.edges.is_empty()); + assert!(result.subgraphs.is_empty()); + } + + #[test] + fn simple_cycle() { + // a -> b -> c -> a + let g = make_graph( + &[("a", "a"), ("b", "b"), ("c", "c")], + &[("a", "b"), ("b", "c"), ("c", "a")], + ); + let result = cycles(&g, &CyclesArgs::default()).unwrap(); + assert!(result.nodes.is_empty(), "no top-level nodes"); + assert!(result.edges.is_empty(), "no cross-cycle edges"); + assert_eq!(result.subgraphs.len(), 1); + + let sg = &result.subgraphs[0]; + assert_eq!(sg.id.as_deref(), Some("cycle_0")); + assert_eq!(sorted_node_ids(sg), vec!["a", "b", "c"]); + assert_eq!( + sorted_edge_pairs(sg), + vec![("a", "b"), ("b", "c"), ("c", "a")] + ); + } + + #[test] + fn self_loop_ignored() { + // a -> a: self-loop is an SCC of size 1, should be ignored + let g = make_graph(&[("a", "a")], &[("a", "a")]); + let result = cycles(&g, &CyclesArgs::default()).unwrap(); + assert!(result.nodes.is_empty()); + assert!(result.edges.is_empty()); + assert!(result.subgraphs.is_empty()); + } + + #[test] + fn multiple_disjoint_cycles() { + // cycle1: a -> b -> a + // cycle2: c -> d -> c + let g = make_graph( + &[("a", "a"), ("b", "b"), ("c", "c"), ("d", "d")], + &[("a", "b"), ("b", "a"), ("c", "d"), ("d", "c")], + ); + let result = cycles(&g, &CyclesArgs::default()).unwrap(); + assert_eq!(result.subgraphs.len(), 2); + + // Collect all cycle nodes across subgraphs. + let mut all_cycle_nodes: Vec<&str> = result + .subgraphs + .iter() + .flat_map(|sg| sg.nodes.keys().map(|s| s.as_str())) + .collect(); + all_cycle_nodes.sort(); + assert_eq!(all_cycle_nodes, vec!["a", "b", "c", "d"]); + + // Each subgraph has 2 nodes. + for sg in &result.subgraphs { + assert_eq!(sg.nodes.len(), 2); + assert_eq!(sg.edges.len(), 2); + } + } + + #[test] + fn mixed_graph_excludes_acyclic_nodes() { + // x -> a -> b -> a, b -> y: only a and b are in a cycle + let g = make_graph( + &[("x", "x"), ("a", "a"), ("b", "b"), ("y", "y")], + &[("x", "a"), ("a", "b"), ("b", "a"), ("b", "y")], + ); + let result = cycles(&g, &CyclesArgs::default()).unwrap(); + assert_eq!(result.subgraphs.len(), 1); + + let sg = &result.subgraphs[0]; + assert_eq!(sorted_node_ids(sg), vec!["a", "b"]); + assert_eq!(sorted_edge_pairs(sg), vec![("a", "b"), ("b", "a")]); + // x and y are not in any cycle + assert!(result.nodes.is_empty()); + } + + #[test] + fn cross_cycle_edges() { + // cycle1: a -> b -> a + // cycle2: c -> d -> c + // cross edge: b -> c + let g = make_graph( + &[("a", "a"), ("b", "b"), ("c", "c"), ("d", "d")], + &[("a", "b"), ("b", "a"), ("c", "d"), ("d", "c"), ("b", "c")], + ); + let result = cycles(&g, &CyclesArgs::default()).unwrap(); + assert_eq!(result.subgraphs.len(), 2); + // The cross-cycle edge b -> c should be at the top level. + assert_eq!(sorted_edge_pairs(&result), vec![("b", "c")]); + } + + #[test] + fn empty_graph() { + let g = DepGraph::default(); + let result = cycles(&g, &CyclesArgs::default()).unwrap(); + assert!(result.nodes.is_empty()); + assert!(result.edges.is_empty()); + assert!(result.subgraphs.is_empty()); + } +} diff --git a/crates/csvizmo-depgraph/src/algorithm/mod.rs b/crates/csvizmo-depgraph/src/algorithm/mod.rs index c9f9708..d43e1b5 100644 --- a/crates/csvizmo-depgraph/src/algorithm/mod.rs +++ b/crates/csvizmo-depgraph/src/algorithm/mod.rs @@ -1,4 +1,5 @@ pub mod between; +pub mod cycles; pub mod filter; pub mod select; diff --git a/crates/csvizmo-depgraph/src/bin/depfilter.rs b/crates/csvizmo-depgraph/src/bin/depfilter.rs index db23190..29190c8 100644 --- a/crates/csvizmo-depgraph/src/bin/depfilter.rs +++ b/crates/csvizmo-depgraph/src/bin/depfilter.rs @@ -4,6 +4,7 @@ use std::path::PathBuf; use clap::{Parser, Subcommand}; use csvizmo_depgraph::algorithm; use csvizmo_depgraph::algorithm::between::BetweenArgs; +use csvizmo_depgraph::algorithm::cycles::CyclesArgs; use csvizmo_depgraph::algorithm::filter::FilterArgs; use csvizmo_depgraph::algorithm::select::SelectArgs; use csvizmo_depgraph::emit::OutputFormat; @@ -49,6 +50,8 @@ enum Command { Filter(FilterArgs), /// Extract the subgraph of all directed paths between matched query nodes Between(BetweenArgs), + /// Detect cycles (strongly connected components) and output each as a subgraph + Cycles(CyclesArgs), } fn main() -> eyre::Result<()> { @@ -98,6 +101,7 @@ fn main() -> eyre::Result<()> { Command::Select(select_args) => algorithm::select::select(&graph, select_args)?, Command::Filter(filter_args) => algorithm::filter::filter(&graph, filter_args)?, Command::Between(between_args) => algorithm::between::between(&graph, between_args)?, + Command::Cycles(cycles_args) => algorithm::cycles::cycles(&graph, cycles_args)?, }; let mut output = get_output_writer(&output_path)?; diff --git a/crates/csvizmo-depgraph/tests/depfilter.rs b/crates/csvizmo-depgraph/tests/depfilter.rs index f1ed0e8..5518e30 100644 --- a/crates/csvizmo-depgraph/tests/depfilter.rs +++ b/crates/csvizmo-depgraph/tests/depfilter.rs @@ -613,3 +613,160 @@ csvizmo-utils_0.5.0\tclap_4.5.57 " ); } + +// -- cycles integration tests -- + +#[test] +fn cycles_dag_no_cycles() { + // a -> b -> c: no cycles, empty output + let graph = "a\nb\nc\n#\na\tb\nb\tc\n"; + let output = tool!("depfilter") + .args(["cycles", "--input-format", "tgf", "--output-format", "tgf"]) + .write_stdin(graph) + .captured_output() + .unwrap(); + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + assert_eq!(stdout, "#\n"); +} + +#[test] +fn cycles_simple_three_node() { + // a -> b -> c -> a: single cycle with all three nodes + let graph = "a\nb\nc\n#\na\tb\nb\tc\nc\ta\n"; + let output = tool!("depfilter") + .args(["cycles", "--input-format", "tgf", "--output-format", "dot"]) + .write_stdin(graph) + .captured_output() + .unwrap(); + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + assert_eq!( + stdout, + "\ +digraph { + subgraph cluster_cycle_0 { + label=\"cycle_0\"; + a; + b; + c; + a -> b; + b -> c; + c -> a; + } +} +" + ); +} + +#[test] +fn cycles_multiple_disjoint() { + // cycle1: a <-> b, cycle2: c <-> d (no edges between them) + let graph = "a\nb\nc\nd\n#\na\tb\nb\ta\nc\td\nd\tc\n"; + let output = tool!("depfilter") + .args(["cycles", "--input-format", "tgf", "--output-format", "dot"]) + .write_stdin(graph) + .captured_output() + .unwrap(); + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + // Both cycles appear as separate subgraphs, no cross-cycle edges. + assert_eq!( + stdout, + "\ +digraph { + subgraph cluster_cycle_0 { + label=\"cycle_0\"; + a; + b; + a -> b; + b -> a; + } + subgraph cluster_cycle_1 { + label=\"cycle_1\"; + c; + d; + c -> d; + d -> c; + } +} +" + ); +} + +#[test] +fn cycles_mixed_graph_excludes_acyclic() { + // x -> a <-> b -> y: only a and b form a cycle; x and y excluded + let graph = "x\na\nb\ny\n#\nx\ta\na\tb\nb\ta\nb\ty\n"; + let output = tool!("depfilter") + .args(["cycles", "--input-format", "tgf", "--output-format", "dot"]) + .write_stdin(graph) + .captured_output() + .unwrap(); + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + assert_eq!( + stdout, + "\ +digraph { + subgraph cluster_cycle_0 { + label=\"cycle_0\"; + a; + b; + a -> b; + b -> a; + } +} +" + ); +} + +#[test] +fn cycles_cross_cycle_edges_at_top_level() { + // cycle1: a <-> b, cycle2: c <-> d, cross-edge: b -> c + let graph = "a\nb\nc\nd\n#\na\tb\nb\ta\nc\td\nd\tc\nb\tc\n"; + let output = tool!("depfilter") + .args(["cycles", "--input-format", "tgf", "--output-format", "dot"]) + .write_stdin(graph) + .captured_output() + .unwrap(); + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + // tarjan_scc returns SCCs in reverse topological order: c,d before a,b + assert_eq!( + stdout, + "\ +digraph { + subgraph cluster_cycle_0 { + label=\"cycle_0\"; + c; + d; + c -> d; + d -> c; + } + subgraph cluster_cycle_1 { + label=\"cycle_1\"; + a; + b; + a -> b; + b -> a; + } + b -> c; +} +" + ); +} + +#[test] +fn cycles_self_loop_ignored() { + // a -> a: self-loop is not a cycle (SCC size 1) + let graph = "a\n#\na\ta\n"; + let output = tool!("depfilter") + .args(["cycles", "--input-format", "tgf", "--output-format", "tgf"]) + .write_stdin(graph) + .captured_output() + .unwrap(); + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + assert_eq!(stdout, "#\n"); +}