diff --git a/crates/csvizmo-depgraph/src/algorithm/between.rs b/crates/csvizmo-depgraph/src/algorithm/between.rs new file mode 100644 index 0000000..47502ae --- /dev/null +++ b/crates/csvizmo-depgraph/src/algorithm/between.rs @@ -0,0 +1,279 @@ +use std::collections::HashSet; + +use clap::Parser; +use petgraph::Direction; +use petgraph::graph::NodeIndex; + +use super::{MatchKey, build_globset}; +use crate::{DepGraph, FlatGraphView}; + +#[derive(Clone, Debug, Default, Parser)] +pub struct BetweenArgs { + /// Glob pattern selecting query endpoints (can be repeated, OR logic) + #[clap(short, long)] + pub pattern: Vec, + + /// Match patterns against 'id' or 'label' + #[clap(long, default_value_t = MatchKey::default())] + pub key: MatchKey, +} + +impl BetweenArgs { + pub fn pattern(mut self, p: impl Into) -> Self { + self.pattern.push(p.into()); + self + } + + pub fn key(mut self, k: MatchKey) -> Self { + self.key = k; + self + } +} + +/// Extract the subgraph formed by all directed paths between any pair of matched query nodes. +/// +/// For matched query nodes q1..qk, computes forward and backward reachability from each, +/// then for each ordered pair (qi, qj) collects nodes on directed paths from qi to qj +/// via `forward(qi) & backward(qj)`. The union of all pairwise results is the keep set. +pub fn between(graph: &DepGraph, args: &BetweenArgs) -> eyre::Result { + let globset = build_globset(&args.pattern)?; + let view = FlatGraphView::new(graph); + + // Match query nodes by glob pattern (OR logic). + let matched: Vec = graph + .all_nodes() + .iter() + .filter_map(|(id, info)| { + let text = match args.key { + MatchKey::Id => id.as_str(), + MatchKey::Label => info.label.as_str(), + }; + if globset.is_match(text) { + view.id_to_idx.get(id.as_str()).copied() + } else { + None + } + }) + .collect(); + + // Need at least 2 matched nodes to have a path between them. + if matched.len() < 2 { + return Ok(view.filter(&HashSet::new())); + } + + // BFS forward and backward from each query node. + let forwards: Vec> = matched + .iter() + .map(|&q| view.bfs([q], Direction::Outgoing, None)) + .collect(); + let backwards: Vec> = matched + .iter() + .map(|&q| view.bfs([q], Direction::Incoming, None)) + .collect(); + + // Pairwise intersect: for each pair (i, j) where i != j, nodes on directed paths + // from qi to qj are in forward(qi) & backward(qj). + let mut keep = HashSet::new(); + for (i, fwd) in forwards.iter().enumerate() { + for (j, bwd) in backwards.iter().enumerate() { + if i == j { + continue; + } + for &node in fwd { + if bwd.contains(&node) { + keep.insert(node); + } + } + } + } + + Ok(view.filter(&keep)) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::{DepGraph, Edge, NodeInfo}; + + fn make_graph( + nodes: &[(&str, &str)], + edges: &[(&str, &str)], + subgraphs: Vec, + ) -> DepGraph { + DepGraph { + nodes: nodes + .iter() + .map(|(id, label)| (id.to_string(), NodeInfo::new(*label))) + .collect(), + edges: edges + .iter() + .map(|(from, to)| Edge { + from: from.to_string(), + to: to.to_string(), + ..Default::default() + }) + .collect(), + subgraphs, + ..Default::default() + } + } + + fn sorted_node_ids(graph: &DepGraph) -> Vec<&str> { + let mut ids: Vec<&str> = graph.nodes.keys().map(|s| s.as_str()).collect(); + ids.sort(); + ids + } + + fn sorted_edge_pairs(graph: &DepGraph) -> Vec<(&str, &str)> { + let mut pairs: Vec<(&str, &str)> = graph + .edges + .iter() + .map(|e| (e.from.as_str(), e.to.as_str())) + .collect(); + pairs.sort(); + pairs + } + + #[test] + fn direct_path() { + // a -> b: between a and b yields both + let g = make_graph(&[("a", "a"), ("b", "b")], &[("a", "b")], vec![]); + let args = BetweenArgs::default().pattern("a").pattern("b"); + let result = between(&g, &args).unwrap(); + assert_eq!(sorted_node_ids(&result), vec!["a", "b"]); + assert_eq!(sorted_edge_pairs(&result), vec![("a", "b")]); + } + + #[test] + fn intermediate_nodes() { + // a -> b -> c: between a and c includes intermediate b + let g = make_graph( + &[("a", "a"), ("b", "b"), ("c", "c")], + &[("a", "b"), ("b", "c")], + vec![], + ); + let args = BetweenArgs::default().pattern("a").pattern("c"); + let result = between(&g, &args).unwrap(); + assert_eq!(sorted_node_ids(&result), vec!["a", "b", "c"]); + assert_eq!(sorted_edge_pairs(&result), vec![("a", "b"), ("b", "c")]); + } + + #[test] + fn no_path_returns_empty() { + // a -> b, c -> d: no path between a and c + let g = make_graph( + &[("a", "a"), ("b", "b"), ("c", "c"), ("d", "d")], + &[("a", "b"), ("c", "d")], + vec![], + ); + let args = BetweenArgs::default().pattern("a").pattern("c"); + let result = between(&g, &args).unwrap(); + assert!(result.nodes.is_empty()); + assert!(result.edges.is_empty()); + } + + #[test] + fn diamond() { + // a -> b -> d, a -> c -> d: between a and d includes both paths + let g = make_graph( + &[("a", "a"), ("b", "b"), ("c", "c"), ("d", "d")], + &[("a", "b"), ("a", "c"), ("b", "d"), ("c", "d")], + vec![], + ); + let args = BetweenArgs::default().pattern("a").pattern("d"); + let result = between(&g, &args).unwrap(); + assert_eq!(sorted_node_ids(&result), vec!["a", "b", "c", "d"]); + assert_eq!( + sorted_edge_pairs(&result), + vec![("a", "b"), ("a", "c"), ("b", "d"), ("c", "d")] + ); + } + + #[test] + fn multiple_query_nodes() { + // a -> b -> c -> d: between a, b, and d includes everything on paths a->b, a->d, b->d + let g = make_graph( + &[("a", "a"), ("b", "b"), ("c", "c"), ("d", "d")], + &[("a", "b"), ("b", "c"), ("c", "d")], + vec![], + ); + let args = BetweenArgs::default() + .pattern("a") + .pattern("b") + .pattern("d"); + let result = between(&g, &args).unwrap(); + assert_eq!(sorted_node_ids(&result), vec!["a", "b", "c", "d"]); + } + + #[test] + fn no_match_returns_empty() { + let g = make_graph(&[("a", "a"), ("b", "b")], &[("a", "b")], vec![]); + let args = BetweenArgs::default().pattern("nonexistent"); + let result = between(&g, &args).unwrap(); + assert!(result.nodes.is_empty()); + assert!(result.edges.is_empty()); + } + + #[test] + fn single_match_returns_empty() { + // Only one node matches -- need at least 2 for a path + let g = make_graph(&[("a", "a"), ("b", "b")], &[("a", "b")], vec![]); + let args = BetweenArgs::default().pattern("a"); + let result = between(&g, &args).unwrap(); + assert!(result.nodes.is_empty()); + assert!(result.edges.is_empty()); + } + + #[test] + fn cycle() { + // a -> b -> c -> a: between a and c includes all nodes in the cycle + let g = make_graph( + &[("a", "a"), ("b", "b"), ("c", "c")], + &[("a", "b"), ("b", "c"), ("c", "a")], + vec![], + ); + let args = BetweenArgs::default().pattern("a").pattern("c"); + let result = between(&g, &args).unwrap(); + assert_eq!(sorted_node_ids(&result), vec!["a", "b", "c"]); + } + + #[test] + fn match_by_id() { + let g = make_graph(&[("1", "libfoo"), ("2", "libbar")], &[("1", "2")], vec![]); + let args = BetweenArgs::default() + .pattern("1") + .pattern("2") + .key(MatchKey::Id); + let result = between(&g, &args).unwrap(); + assert_eq!(sorted_node_ids(&result), vec!["1", "2"]); + assert_eq!(sorted_edge_pairs(&result), vec![("1", "2")]); + } + + #[test] + fn excludes_unrelated_nodes() { + // a -> b -> c, d -> e: between a and c should not include d or e + let g = make_graph( + &[("a", "a"), ("b", "b"), ("c", "c"), ("d", "d"), ("e", "e")], + &[("a", "b"), ("b", "c"), ("d", "e")], + vec![], + ); + let args = BetweenArgs::default().pattern("a").pattern("c"); + let result = between(&g, &args).unwrap(); + assert_eq!(sorted_node_ids(&result), vec!["a", "b", "c"]); + assert_eq!(sorted_edge_pairs(&result), vec![("a", "b"), ("b", "c")]); + } + + #[test] + fn glob_matching_multiple_nodes() { + // a -> b -> c: glob "?" matches a, b, c -- all pairs have paths + let g = make_graph( + &[("a", "a"), ("b", "b"), ("c", "c")], + &[("a", "b"), ("b", "c")], + vec![], + ); + let args = BetweenArgs::default().pattern("?"); + let result = between(&g, &args).unwrap(); + assert_eq!(sorted_node_ids(&result), vec!["a", "b", "c"]); + assert_eq!(sorted_edge_pairs(&result), vec![("a", "b"), ("b", "c")]); + } +} diff --git a/crates/csvizmo-depgraph/src/algorithm/cycles.rs b/crates/csvizmo-depgraph/src/algorithm/cycles.rs new file mode 100644 index 0000000..94abb2c --- /dev/null +++ b/crates/csvizmo-depgraph/src/algorithm/cycles.rs @@ -0,0 +1,253 @@ +use std::collections::{HashMap, HashSet}; + +use clap::Parser; +use indexmap::IndexMap; +use petgraph::algo::tarjan_scc; +use petgraph::graph::NodeIndex; + +use crate::{DepGraph, Edge, FlatGraphView, NodeInfo}; + +/// Detect cycles (strongly connected components with 2+ nodes) in the graph. +#[derive(Clone, Debug, Default, Parser)] +pub struct CyclesArgs {} + +/// Find all cycles in the dependency graph and output them as subgraphs. +/// +/// Uses Tarjan's SCC algorithm to find strongly connected components. SCCs with 2+ nodes +/// are reported as cycles, each in its own subgraph named `cycle_0`, `cycle_1`, etc. +/// Edges between different cycle SCCs appear at the top level. Self-loops (SCCs with 1 +/// node) are ignored. If no cycles exist, returns an empty graph. +pub fn cycles(graph: &DepGraph, _args: &CyclesArgs) -> eyre::Result { + let view = FlatGraphView::new(graph); + let sccs = tarjan_scc(&view.pg); + + // Filter to SCCs with 2+ nodes (ignore self-loops). + let cycle_sccs: Vec> = sccs + .into_iter() + .filter(|scc| scc.len() >= 2) + .map(|scc| scc.into_iter().collect()) + .collect(); + + if cycle_sccs.is_empty() { + return Ok(DepGraph::default()); + } + + // Map each cycle node to its SCC index. + let mut node_to_scc: HashMap = HashMap::new(); + for (i, scc) in cycle_sccs.iter().enumerate() { + for &node in scc { + node_to_scc.insert(node, i); + } + } + + let all_nodes = graph.all_nodes(); + let all_edges = graph.all_edges(); + + // Build a subgraph per SCC, preserving original node/edge order. + let mut subgraphs = Vec::new(); + for (i, scc) in cycle_sccs.iter().enumerate() { + let nodes: IndexMap = all_nodes + .iter() + .filter(|(id, _)| { + view.id_to_idx + .get(id.as_str()) + .is_some_and(|idx| scc.contains(idx)) + }) + .map(|(id, info)| (id.clone(), info.clone())) + .collect(); + + let edges: Vec = all_edges + .iter() + .filter(|e| { + let from = view.id_to_idx.get(e.from.as_str()); + let to = view.id_to_idx.get(e.to.as_str()); + match (from, to) { + (Some(f), Some(t)) => scc.contains(f) && scc.contains(t), + _ => false, + } + }) + .cloned() + .collect(); + + subgraphs.push(DepGraph { + id: Some(format!("cycle_{i}")), + nodes, + edges, + ..Default::default() + }); + } + + // Cross-cycle edges: both endpoints in cycle nodes but in different SCCs. + let cross_edges: Vec = all_edges + .iter() + .filter(|e| { + let from = view.id_to_idx.get(e.from.as_str()); + let to = view.id_to_idx.get(e.to.as_str()); + match (from, to) { + (Some(f), Some(t)) => match (node_to_scc.get(f), node_to_scc.get(t)) { + (Some(sf), Some(st)) => sf != st, + _ => false, + }, + _ => false, + } + }) + .cloned() + .collect(); + + Ok(DepGraph { + edges: cross_edges, + subgraphs, + ..Default::default() + }) +} + +#[cfg(test)] +mod tests { + use super::*; + + fn make_graph(nodes: &[(&str, &str)], edges: &[(&str, &str)]) -> DepGraph { + DepGraph { + nodes: nodes + .iter() + .map(|(id, label)| (id.to_string(), NodeInfo::new(*label))) + .collect(), + edges: edges + .iter() + .map(|(from, to)| Edge { + from: from.to_string(), + to: to.to_string(), + ..Default::default() + }) + .collect(), + ..Default::default() + } + } + + fn sorted_node_ids(graph: &DepGraph) -> Vec<&str> { + let mut ids: Vec<&str> = graph.nodes.keys().map(|s| s.as_str()).collect(); + ids.sort(); + ids + } + + fn sorted_edge_pairs(graph: &DepGraph) -> Vec<(&str, &str)> { + let mut pairs: Vec<(&str, &str)> = graph + .edges + .iter() + .map(|e| (e.from.as_str(), e.to.as_str())) + .collect(); + pairs.sort(); + pairs + } + + #[test] + fn no_cycles_returns_empty() { + // a -> b -> c: DAG, no cycles + let g = make_graph( + &[("a", "a"), ("b", "b"), ("c", "c")], + &[("a", "b"), ("b", "c")], + ); + let result = cycles(&g, &CyclesArgs::default()).unwrap(); + assert!(result.nodes.is_empty()); + assert!(result.edges.is_empty()); + assert!(result.subgraphs.is_empty()); + } + + #[test] + fn simple_cycle() { + // a -> b -> c -> a + let g = make_graph( + &[("a", "a"), ("b", "b"), ("c", "c")], + &[("a", "b"), ("b", "c"), ("c", "a")], + ); + let result = cycles(&g, &CyclesArgs::default()).unwrap(); + assert!(result.nodes.is_empty(), "no top-level nodes"); + assert!(result.edges.is_empty(), "no cross-cycle edges"); + assert_eq!(result.subgraphs.len(), 1); + + let sg = &result.subgraphs[0]; + assert_eq!(sg.id.as_deref(), Some("cycle_0")); + assert_eq!(sorted_node_ids(sg), vec!["a", "b", "c"]); + assert_eq!( + sorted_edge_pairs(sg), + vec![("a", "b"), ("b", "c"), ("c", "a")] + ); + } + + #[test] + fn self_loop_ignored() { + // a -> a: self-loop is an SCC of size 1, should be ignored + let g = make_graph(&[("a", "a")], &[("a", "a")]); + let result = cycles(&g, &CyclesArgs::default()).unwrap(); + assert!(result.nodes.is_empty()); + assert!(result.edges.is_empty()); + assert!(result.subgraphs.is_empty()); + } + + #[test] + fn multiple_disjoint_cycles() { + // cycle1: a -> b -> a + // cycle2: c -> d -> c + let g = make_graph( + &[("a", "a"), ("b", "b"), ("c", "c"), ("d", "d")], + &[("a", "b"), ("b", "a"), ("c", "d"), ("d", "c")], + ); + let result = cycles(&g, &CyclesArgs::default()).unwrap(); + assert_eq!(result.subgraphs.len(), 2); + + // Collect all cycle nodes across subgraphs. + let mut all_cycle_nodes: Vec<&str> = result + .subgraphs + .iter() + .flat_map(|sg| sg.nodes.keys().map(|s| s.as_str())) + .collect(); + all_cycle_nodes.sort(); + assert_eq!(all_cycle_nodes, vec!["a", "b", "c", "d"]); + + // Each subgraph has 2 nodes. + for sg in &result.subgraphs { + assert_eq!(sg.nodes.len(), 2); + assert_eq!(sg.edges.len(), 2); + } + } + + #[test] + fn mixed_graph_excludes_acyclic_nodes() { + // x -> a -> b -> a, b -> y: only a and b are in a cycle + let g = make_graph( + &[("x", "x"), ("a", "a"), ("b", "b"), ("y", "y")], + &[("x", "a"), ("a", "b"), ("b", "a"), ("b", "y")], + ); + let result = cycles(&g, &CyclesArgs::default()).unwrap(); + assert_eq!(result.subgraphs.len(), 1); + + let sg = &result.subgraphs[0]; + assert_eq!(sorted_node_ids(sg), vec!["a", "b"]); + assert_eq!(sorted_edge_pairs(sg), vec![("a", "b"), ("b", "a")]); + // x and y are not in any cycle + assert!(result.nodes.is_empty()); + } + + #[test] + fn cross_cycle_edges() { + // cycle1: a -> b -> a + // cycle2: c -> d -> c + // cross edge: b -> c + let g = make_graph( + &[("a", "a"), ("b", "b"), ("c", "c"), ("d", "d")], + &[("a", "b"), ("b", "a"), ("c", "d"), ("d", "c"), ("b", "c")], + ); + let result = cycles(&g, &CyclesArgs::default()).unwrap(); + assert_eq!(result.subgraphs.len(), 2); + // The cross-cycle edge b -> c should be at the top level. + assert_eq!(sorted_edge_pairs(&result), vec![("b", "c")]); + } + + #[test] + fn empty_graph() { + let g = DepGraph::default(); + let result = cycles(&g, &CyclesArgs::default()).unwrap(); + assert!(result.nodes.is_empty()); + assert!(result.edges.is_empty()); + assert!(result.subgraphs.is_empty()); + } +} diff --git a/crates/csvizmo-depgraph/src/algorithm/mod.rs b/crates/csvizmo-depgraph/src/algorithm/mod.rs index f5b85db..d43e1b5 100644 --- a/crates/csvizmo-depgraph/src/algorithm/mod.rs +++ b/crates/csvizmo-depgraph/src/algorithm/mod.rs @@ -1,3 +1,5 @@ +pub mod between; +pub mod cycles; pub mod filter; pub mod select; diff --git a/crates/csvizmo-depgraph/src/bin/depfilter.rs b/crates/csvizmo-depgraph/src/bin/depfilter.rs index 0cb8114..29190c8 100644 --- a/crates/csvizmo-depgraph/src/bin/depfilter.rs +++ b/crates/csvizmo-depgraph/src/bin/depfilter.rs @@ -3,6 +3,8 @@ use std::path::PathBuf; use clap::{Parser, Subcommand}; use csvizmo_depgraph::algorithm; +use csvizmo_depgraph::algorithm::between::BetweenArgs; +use csvizmo_depgraph::algorithm::cycles::CyclesArgs; use csvizmo_depgraph::algorithm::filter::FilterArgs; use csvizmo_depgraph::algorithm::select::SelectArgs; use csvizmo_depgraph::emit::OutputFormat; @@ -46,6 +48,10 @@ enum Command { Select(SelectArgs), /// Remove nodes matching patterns and optionally cascade to dependencies/ancestors Filter(FilterArgs), + /// Extract the subgraph of all directed paths between matched query nodes + Between(BetweenArgs), + /// Detect cycles (strongly connected components) and output each as a subgraph + Cycles(CyclesArgs), } fn main() -> eyre::Result<()> { @@ -94,6 +100,8 @@ fn main() -> eyre::Result<()> { let graph = match &args.command { Command::Select(select_args) => algorithm::select::select(&graph, select_args)?, Command::Filter(filter_args) => algorithm::filter::filter(&graph, filter_args)?, + Command::Between(between_args) => algorithm::between::between(&graph, between_args)?, + Command::Cycles(cycles_args) => algorithm::cycles::cycles(&graph, cycles_args)?, }; let mut output = get_output_writer(&output_path)?; diff --git a/crates/csvizmo-depgraph/src/emit/tgf.rs b/crates/csvizmo-depgraph/src/emit/tgf.rs index 253c4f5..a56f522 100644 --- a/crates/csvizmo-depgraph/src/emit/tgf.rs +++ b/crates/csvizmo-depgraph/src/emit/tgf.rs @@ -2,13 +2,29 @@ use std::io::Write; use crate::DepGraph; +/// Replace whitespace in a node ID with underscores so it survives TGF roundtripping. +/// +/// TGF uses whitespace to separate tokens, so bare whitespace inside an ID +/// would be mis-parsed on re-read. Tabs and spaces are both replaced. +fn sanitize_id(id: &str) -> String { + if id.contains(|c: char| c.is_ascii_whitespace()) { + id.chars() + .map(|c| if c.is_ascii_whitespace() { '_' } else { c }) + .collect() + } else { + id.to_string() + } +} + /// Emit a [`DepGraph`] as TGF (Trivial Graph Format). /// /// Preserves node IDs, node labels, edge endpoints, and edge labels. /// Graph-level attrs, node attrs, and edge attrs are silently dropped -/// (TGF has no syntax for them). +/// (TGF has no syntax for them). Whitespace in node IDs is replaced +/// with underscores to ensure the output can be parsed back. pub fn emit(graph: &DepGraph, writer: &mut dyn Write) -> eyre::Result<()> { for (id, info) in graph.all_nodes() { + let id = sanitize_id(id); if info.label != *id { writeln!(writer, "{id}\t{}", info.label)?; } else { @@ -19,9 +35,11 @@ pub fn emit(graph: &DepGraph, writer: &mut dyn Write) -> eyre::Result<()> { writeln!(writer, "#")?; for edge in graph.all_edges() { + let from = sanitize_id(&edge.from); + let to = sanitize_id(&edge.to); match &edge.label { - Some(label) => writeln!(writer, "{}\t{}\t{label}", edge.from, edge.to)?, - None => writeln!(writer, "{}\t{}", edge.from, edge.to)?, + Some(label) => writeln!(writer, "{from}\t{to}\t{label}")?, + None => writeln!(writer, "{from}\t{to}")?, } } @@ -106,6 +124,29 @@ mod tests { assert_eq!(output, "a\tAlpha\nb\n#\na\tb\tuses\n"); } + #[test] + fn whitespace_in_ids_replaced_with_underscores() { + let mut nodes = IndexMap::new(); + nodes.insert("my app v1.0".into(), NodeInfo::new("my app")); + nodes.insert("lib foo v2.0".into(), NodeInfo::new("lib foo")); + let graph = DepGraph { + nodes, + edges: vec![Edge { + from: "my app v1.0".into(), + to: "lib foo v2.0".into(), + ..Default::default() + }], + ..Default::default() + }; + let mut buf = Vec::new(); + emit(&graph, &mut buf).unwrap(); + let output = String::from_utf8(buf).unwrap(); + assert_eq!( + output, + "my_app_v1.0\tmy app\nlib_foo_v2.0\tlib foo\n#\nmy_app_v1.0\tlib_foo_v2.0\n" + ); + } + #[test] fn subgraph_nodes_and_edges_included() { let graph = DepGraph { diff --git a/crates/csvizmo-depgraph/tests/depconv.rs b/crates/csvizmo-depgraph/tests/depconv.rs index 891a281..dc7b8a8 100644 --- a/crates/csvizmo-depgraph/tests/depconv.rs +++ b/crates/csvizmo-depgraph/tests/depconv.rs @@ -522,22 +522,22 @@ fn cargo_tree_auto_detect() { assert_eq!(node_lines.len(), 69); assert_eq!(edge_lines.len(), 111); - // Root node should be first + // Root node should be first (spaces in IDs become underscores in TGF) assert!( - node_lines[0].starts_with("csvizmo-depgraph v0.5.0\t"), + node_lines[0].starts_with("csvizmo-depgraph_v0.5.0\t"), "root node should be first, got: {}", node_lines[0] ); // A known dependency should appear as a node assert!( - node_lines.iter().any(|l| l.starts_with("clap v4.5.57\t")), + node_lines.iter().any(|l| l.starts_with("clap_v4.5.57\t")), "clap should be in node list" ); // A known edge should exist assert!( - edge_lines.contains(&"csvizmo-depgraph v0.5.0\tclap v4.5.57"), + edge_lines.contains(&"csvizmo-depgraph_v0.5.0\tclap_v4.5.57"), "root -> clap edge should exist" ); } diff --git a/crates/csvizmo-depgraph/tests/depfilter.rs b/crates/csvizmo-depgraph/tests/depfilter.rs index e09c5f3..5518e30 100644 --- a/crates/csvizmo-depgraph/tests/depfilter.rs +++ b/crates/csvizmo-depgraph/tests/depfilter.rs @@ -452,3 +452,321 @@ digraph { let stdout = String::from_utf8_lossy(&output.stdout); assert_eq!(stdout, "2\tlibbar\n3\tmyapp\n#\n3\t2\n"); } + +// -- between integration tests -- + +#[test] +fn between_two_nodes() { + // a -> b -> c: between a and c includes intermediate b + let graph = "a\nb\nc\n#\na\tb\nb\tc\n"; + let output = tool!("depfilter") + .args([ + "between", + "-p", + "a", + "-p", + "c", + "--input-format", + "tgf", + "--output-format", + "tgf", + ]) + .write_stdin(graph) + .captured_output() + .unwrap(); + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + assert_eq!(stdout, "a\nb\nc\n#\na\tb\nb\tc\n"); +} + +#[test] +fn between_by_id() { + let output = tool!("depfilter") + .args([ + "between", + "-p", + "1", + "-p", + "2", + "--key", + "id", + "--input-format", + "tgf", + "--output-format", + "tgf", + ]) + .write_stdin(SIMPLE_GRAPH) + .captured_output() + .unwrap(); + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + assert_eq!(stdout, "1\tlibfoo\n2\tlibbar\n#\n1\t2\n"); +} + +#[test] +fn between_glob_multiple_nodes() { + // a -> b -> c: glob "?" matches all three, paths exist between all pairs + let graph = "a\nb\nc\n#\na\tb\nb\tc\n"; + let output = tool!("depfilter") + .args([ + "between", + "-p", + "?", + "--input-format", + "tgf", + "--output-format", + "tgf", + ]) + .write_stdin(graph) + .captured_output() + .unwrap(); + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + assert_eq!(stdout, "a\nb\nc\n#\na\tb\nb\tc\n"); +} + +#[test] +fn between_no_matching_patterns() { + let graph = "a\nb\n#\na\tb\n"; + let output = tool!("depfilter") + .args([ + "between", + "-p", + "nonexistent", + "--input-format", + "tgf", + "--output-format", + "tgf", + ]) + .write_stdin(graph) + .captured_output() + .unwrap(); + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + assert_eq!(stdout, "#\n"); +} + +#[test] +fn between_no_path() { + // a -> b, c -> d: no path between a and c + let graph = "a\nb\nc\nd\n#\na\tb\nc\td\n"; + let output = tool!("depfilter") + .args([ + "between", + "-p", + "a", + "-p", + "c", + "--input-format", + "tgf", + "--output-format", + "tgf", + ]) + .write_stdin(graph) + .captured_output() + .unwrap(); + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + assert_eq!(stdout, "#\n"); +} + +#[test] +fn between_cargo_metadata_fixture() { + // Use the real cargo-metadata.json fixture to test between on a non-trivial graph. + // csvizmo-depgraph depends on clap both directly and via csvizmo-utils, + // so csvizmo-utils is an intermediate node on a path to clap. + // clap in turn depends on clap_builder, clap_derive, and clap_builder -> clap_lex. + let input = include_str!("../../../data/depconv/cargo-metadata.json"); + let output = tool!("depfilter") + .args([ + "between", + "-p", + "csvizmo-depgraph", + "-p", + "clap*", + "--input-format", + "cargo-metadata", + "--output-format", + "tgf", + ]) + .write_stdin(input) + .captured_output() + .unwrap(); + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + assert_eq!( + stdout, + "\ +clap_4.5.57\tclap +clap_builder_4.5.57\tclap_builder +clap_derive_4.5.55\tclap_derive +clap_lex_0.7.7\tclap_lex +csvizmo-depgraph_0.5.0\tcsvizmo-depgraph +csvizmo-utils_0.5.0\tcsvizmo-utils +# +clap_4.5.57\tclap_builder_4.5.57 +clap_4.5.57\tclap_derive_4.5.55 +clap_builder_4.5.57\tclap_lex_0.7.7 +csvizmo-depgraph_0.5.0\tclap_4.5.57 +csvizmo-depgraph_0.5.0\tcsvizmo-utils_0.5.0 +csvizmo-utils_0.5.0\tclap_4.5.57 +" + ); +} + +// -- cycles integration tests -- + +#[test] +fn cycles_dag_no_cycles() { + // a -> b -> c: no cycles, empty output + let graph = "a\nb\nc\n#\na\tb\nb\tc\n"; + let output = tool!("depfilter") + .args(["cycles", "--input-format", "tgf", "--output-format", "tgf"]) + .write_stdin(graph) + .captured_output() + .unwrap(); + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + assert_eq!(stdout, "#\n"); +} + +#[test] +fn cycles_simple_three_node() { + // a -> b -> c -> a: single cycle with all three nodes + let graph = "a\nb\nc\n#\na\tb\nb\tc\nc\ta\n"; + let output = tool!("depfilter") + .args(["cycles", "--input-format", "tgf", "--output-format", "dot"]) + .write_stdin(graph) + .captured_output() + .unwrap(); + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + assert_eq!( + stdout, + "\ +digraph { + subgraph cluster_cycle_0 { + label=\"cycle_0\"; + a; + b; + c; + a -> b; + b -> c; + c -> a; + } +} +" + ); +} + +#[test] +fn cycles_multiple_disjoint() { + // cycle1: a <-> b, cycle2: c <-> d (no edges between them) + let graph = "a\nb\nc\nd\n#\na\tb\nb\ta\nc\td\nd\tc\n"; + let output = tool!("depfilter") + .args(["cycles", "--input-format", "tgf", "--output-format", "dot"]) + .write_stdin(graph) + .captured_output() + .unwrap(); + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + // Both cycles appear as separate subgraphs, no cross-cycle edges. + assert_eq!( + stdout, + "\ +digraph { + subgraph cluster_cycle_0 { + label=\"cycle_0\"; + a; + b; + a -> b; + b -> a; + } + subgraph cluster_cycle_1 { + label=\"cycle_1\"; + c; + d; + c -> d; + d -> c; + } +} +" + ); +} + +#[test] +fn cycles_mixed_graph_excludes_acyclic() { + // x -> a <-> b -> y: only a and b form a cycle; x and y excluded + let graph = "x\na\nb\ny\n#\nx\ta\na\tb\nb\ta\nb\ty\n"; + let output = tool!("depfilter") + .args(["cycles", "--input-format", "tgf", "--output-format", "dot"]) + .write_stdin(graph) + .captured_output() + .unwrap(); + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + assert_eq!( + stdout, + "\ +digraph { + subgraph cluster_cycle_0 { + label=\"cycle_0\"; + a; + b; + a -> b; + b -> a; + } +} +" + ); +} + +#[test] +fn cycles_cross_cycle_edges_at_top_level() { + // cycle1: a <-> b, cycle2: c <-> d, cross-edge: b -> c + let graph = "a\nb\nc\nd\n#\na\tb\nb\ta\nc\td\nd\tc\nb\tc\n"; + let output = tool!("depfilter") + .args(["cycles", "--input-format", "tgf", "--output-format", "dot"]) + .write_stdin(graph) + .captured_output() + .unwrap(); + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + // tarjan_scc returns SCCs in reverse topological order: c,d before a,b + assert_eq!( + stdout, + "\ +digraph { + subgraph cluster_cycle_0 { + label=\"cycle_0\"; + c; + d; + c -> d; + d -> c; + } + subgraph cluster_cycle_1 { + label=\"cycle_1\"; + a; + b; + a -> b; + b -> a; + } + b -> c; +} +" + ); +} + +#[test] +fn cycles_self_loop_ignored() { + // a -> a: self-loop is not a cycle (SCC size 1) + let graph = "a\n#\na\ta\n"; + let output = tool!("depfilter") + .args(["cycles", "--input-format", "tgf", "--output-format", "tgf"]) + .write_stdin(graph) + .captured_output() + .unwrap(); + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + assert_eq!(stdout, "#\n"); +}