diff options
author | Wilfred Hughes <me@wilfred.me.uk> | 2023-08-18 09:11:42 -0700 |
---|---|---|
committer | Wilfred Hughes <me@wilfred.me.uk> | 2023-08-18 09:11:42 -0700 |
commit | 4fa55054310280a209e6825b51dd737890422262 (patch) | |
tree | 2aa55d45ed004452f3bbeef9eefdf946ebe943c9 | |
parent | b6d8ecbd4f3f863b2c587ff367f5409787f7c2f8 (diff) |
-rw-r--r-- | Cargo.lock | 7 | ||||
-rw-r--r-- | Cargo.toml | 1 | ||||
-rw-r--r-- | src/diff/changes.rs | 46 | ||||
-rw-r--r-- | src/diff/dijkstra.rs | 19 | ||||
-rw-r--r-- | src/diff/graph.rs | 42 | ||||
-rw-r--r-- | src/parse/syntax.rs | 39 |
6 files changed, 101 insertions, 53 deletions
diff --git a/Cargo.lock b/Cargo.lock index f83139601..df5500598 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -245,6 +245,7 @@ dependencies = [ "glob", "hashbrown 0.12.3", "humansize", + "id-arena", "itertools", "lazy_static", "libc", @@ -384,6 +385,12 @@ dependencies = [ ] [[package]] +name = "id-arena" +version = "2.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25a2bc672d1148e28034f176e01fffebb08b35768468cc954630da77a1449005" + +[[package]] name = "indexmap" version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" diff --git a/Cargo.toml b/Cargo.toml index 9eece16f1..b650be654 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -66,6 +66,7 @@ hashbrown = "0.12.3" humansize = "2.1.3" serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" +id-arena = "2.2.1" [dev-dependencies] # assert_cmd 2.0.6 requires rust 1.60 diff --git a/src/diff/changes.rs b/src/diff/changes.rs index 0234be2bc..aae3a0497 100644 --- a/src/diff/changes.rs +++ b/src/diff/changes.rs @@ -2,14 +2,14 @@ use crate::{ hash::DftHashMap, - parse::syntax::{Syntax, SyntaxId}, + parse::syntax::{Syntax, SyntaxArena, SyntaxArenaId, SyntaxId}, }; #[derive(PartialEq, Eq, Clone, Copy)] pub enum ChangeKind<'a> { - Unchanged(&'a Syntax<'a>), - ReplacedComment(&'a Syntax<'a>, &'a Syntax<'a>), - ReplacedString(&'a Syntax<'a>, &'a Syntax<'a>), + Unchanged(SyntaxArenaId<'a>), + ReplacedComment(SyntaxArenaId<'a>, SyntaxArenaId<'a>), + ReplacedString(SyntaxArenaId<'a>, SyntaxArenaId<'a>), Novel, } @@ -19,21 +19,36 @@ pub struct ChangeMap<'a> { } impl<'a> ChangeMap<'a> { - pub fn insert(&mut self, node: &'a Syntax<'a>, ck: ChangeKind<'a>) { + pub fn insert( + &mut self, + node_arena: &SyntaxArena<'a>, + node_id: SyntaxArenaId<'a>, + ck: ChangeKind<'a>, + ) { + let node = &node_arena[node_id]; self.changes.insert(node.id(), ck); } - pub fn get(&self, node: &Syntax<'a>) -> Option<ChangeKind<'a>> { + pub fn get( + &self, + node_arena: &SyntaxArena<'a>, + node_id: SyntaxArenaId<'a>, + ) -> Option<ChangeKind<'a>> { + let node = &node_arena[node_id]; self.changes.get(&node.id()).copied() } } pub fn insert_deep_unchanged<'a>( - node: &'a Syntax<'a>, - opposite_node: &'a Syntax<'a>, + node_arena: &SyntaxArena<'a>, + node_id: SyntaxArenaId<'a>, + opposite_node_id: SyntaxArenaId<'a>, change_map: &mut ChangeMap<'a>, ) { - change_map.insert(node, ChangeKind::Unchanged(opposite_node)); + let node = &node_arena[node_id]; + let opposite_node = &node_arena[opposite_node_id]; + + change_map.insert(node_arena, node_id, ChangeKind::Unchanged(opposite_node_id)); match (node, opposite_node) { ( @@ -47,7 +62,7 @@ pub fn insert_deep_unchanged<'a>( }, ) => { for (child, opposite_child) in node_children.iter().zip(opposite_children) { - insert_deep_unchanged(child, opposite_child, change_map); + insert_deep_unchanged(node_arena, *child, *opposite_child, change_map); } } (Syntax::Atom { .. }, Syntax::Atom { .. }) => {} @@ -55,12 +70,17 @@ pub fn insert_deep_unchanged<'a>( } } -pub fn insert_deep_novel<'a>(node: &'a Syntax<'a>, change_map: &mut ChangeMap<'a>) { - change_map.insert(node, ChangeKind::Novel); +pub fn insert_deep_novel<'a>( + node_arena: &SyntaxArena<'a>, + node_id: SyntaxArenaId<'a>, + change_map: &mut ChangeMap<'a>, +) { + let node = &node_arena[node_id]; + change_map.insert(node_arena, node_id, ChangeKind::Novel); if let Syntax::List { children, .. } = node { for child in children.iter() { - insert_deep_novel(child, change_map); + insert_deep_novel(node_arena, *child, change_map); } } } diff --git a/src/diff/dijkstra.rs b/src/diff/dijkstra.rs index 6e311da60..a705b0fcf 100644 --- a/src/diff/dijkstra.rs +++ b/src/diff/dijkstra.rs @@ -7,7 +7,7 @@ use crate::{ diff::changes::ChangeMap, diff::graph::{populate_change_map, set_neighbours, Edge, Vertex}, hash::DftHashMap, - parse::syntax::Syntax, + parse::syntax::{Syntax, SyntaxArena}, }; use bumpalo::Bump; use itertools::Itertools; @@ -155,7 +155,7 @@ fn edge_between<'s, 'b>(before: &Vertex<'s, 'b>, after: &Vertex<'s, 'b>) -> Edge } /// What is the total number of AST nodes? -fn node_count(root: Option<&Syntax>) -> u32 { +fn node_count<'a>(node_arena: &SyntaxArena<'a>, root: Option<&Syntax>) -> u32 { let mut node = root; let mut count = 0; while let Some(current_node) = node { @@ -167,38 +167,39 @@ fn node_count(root: Option<&Syntax>) -> u32 { }; count += current_count; - node = current_node.next_sibling(); + node = current_node.next_sibling().map(|id| &node_arena[id]); } count } /// How many top-level AST nodes do we have? -fn tree_count(root: Option<&Syntax>) -> u32 { +fn tree_count<'a>(node_arena: &SyntaxArena<'a>, root: Option<&Syntax>) -> u32 { let mut node = root; let mut count = 0; while let Some(current_node) = node { count += 1; - node = current_node.next_sibling(); + node = current_node.next_sibling().map(|id| &node_arena[id]); } count } pub fn mark_syntax<'a>( + node_arena: &SyntaxArena<'a>, lhs_syntax: Option<&'a Syntax<'a>>, rhs_syntax: Option<&'a Syntax<'a>>, change_map: &mut ChangeMap<'a>, graph_limit: usize, ) -> Result<(), ExceededGraphLimit> { - let lhs_node_count = node_count(lhs_syntax) as usize; - let rhs_node_count = node_count(rhs_syntax) as usize; + let lhs_node_count = node_count(node_arena, lhs_syntax) as usize; + let rhs_node_count = node_count(node_arena, rhs_syntax) as usize; info!( "LHS nodes: {} ({} toplevel), RHS nodes: {} ({} toplevel)", lhs_node_count, - tree_count(lhs_syntax), + tree_count(node_arena, lhs_syntax), rhs_node_count, - tree_count(rhs_syntax), + tree_count(node_arena, rhs_syntax), ); // When there are a large number of changes, we end up building a diff --git a/src/diff/graph.rs b/src/diff/graph.rs index f9b0ddbcb..becf43f7a 100644 --- a/src/diff/graph.rs +++ b/src/diff/graph.rs @@ -16,7 +16,7 @@ use crate::{ stack::Stack, }, hash::DftHashMap, - parse::syntax::{AtomKind, Syntax, SyntaxId}, + parse::syntax::{AtomKind, Syntax, SyntaxArena, SyntaxArenaId, SyntaxId}, }; use Edge::*; @@ -408,18 +408,22 @@ fn looks_like_punctuation(node: &Syntax) -> bool { /// Pop as many parents of `lhs_node` and `rhs_node` as /// possible. Return the new syntax nodes and parents. fn pop_all_parents<'s>( - lhs_node: Option<&'s Syntax<'s>>, - rhs_node: Option<&'s Syntax<'s>>, + node_arena: &SyntaxArena<'s>, + lhs_node_id: Option<SyntaxArenaId<'s>>, + rhs_node_id: Option<SyntaxArenaId<'s>>, lhs_parent_id: Option<SyntaxId>, rhs_parent_id: Option<SyntaxId>, parents: &Stack<EnteredDelimiter<'s>>, ) -> ( - Option<&'s Syntax<'s>>, - Option<&'s Syntax<'s>>, + Option<SyntaxArenaId<'s>>, + Option<SyntaxArenaId<'s>>, Option<SyntaxId>, Option<SyntaxId>, Stack<EnteredDelimiter<'s>>, ) { + let lhs_node = lhs_node_id.map(|id| &node_arena[id]); + let rhs_node = rhs_node_id.map(|id| &node_arena[id]); + let mut lhs_node = lhs_node; let mut rhs_node = rhs_node; let mut lhs_parent_id = lhs_parent_id; @@ -432,8 +436,8 @@ fn pop_all_parents<'s>( // Move to next after LHS parent. // Continue from sibling of parent. - lhs_node = lhs_parent.next_sibling(); - lhs_parent_id = lhs_parent.parent().map(Syntax::id); + lhs_node = lhs_parent.next_sibling().map(|id| &node_arena[id]); + lhs_parent_id = lhs_parent.parent().map(|id| node_arena[id].id()); parents = parents_next; continue; } @@ -444,8 +448,8 @@ fn pop_all_parents<'s>( // Move to next after RHS parent. // Continue from sibling of parent. - rhs_node = rhs_parent.next_sibling(); - rhs_parent_id = rhs_parent.parent().map(Syntax::id); + rhs_node = rhs_parent.next_sibling().map(|id| &node_arena[id]); + rhs_parent_id = rhs_parent.parent().map(|id| node_arena[id].id()); parents = parents_next; continue; } @@ -457,10 +461,12 @@ fn pop_all_parents<'s>( // Continue from sibling of parent. if let Some((lhs_parent, rhs_parent, parents_next)) = try_pop_both(&parents) { - lhs_node = lhs_parent.next_sibling(); - rhs_node = rhs_parent.next_sibling(); - lhs_parent_id = lhs_parent.parent().map(Syntax::id); - rhs_parent_id = rhs_parent.parent().map(Syntax::id); + lhs_node = lhs_parent.next_sibling().map(|id| &node_arena[id]); + lhs_parent_id = lhs_parent.parent().map(|id| node_arena[id].id()); + + rhs_node = rhs_parent.next_sibling().map(|id| &node_arena[id]); + rhs_parent_id = rhs_parent.parent().map(|id| node_arena[id].id()); + parents = parents_next; continue; } @@ -469,12 +475,19 @@ fn pop_all_parents<'s>( break; } - (lhs_node, rhs_node, lhs_parent_id, rhs_parent_id, parents) + ( + lhs_node_id, + rhs_node_id, + lhs_parent_id, + rhs_parent_id, + parents, + ) } /// Compute the neighbours of `v` if we haven't previously done so, /// and write them to the .neighbours cell inside `v`. pub fn set_neighbours<'s, 'b>( + node_arena: &SyntaxArena<'s>, v: &Vertex<'s, 'b>, alloc: &'b Bump, seen: &mut DftHashMap<&Vertex<'s, 'b>, Vec<&'b Vertex<'s, 'b>>>, @@ -496,6 +509,7 @@ pub fn set_neighbours<'s, 'b>( // Both nodes are equal, the happy case. let (lhs_syntax, rhs_syntax, lhs_parent_id, rhs_parent_id, parents) = pop_all_parents( + node_arena, lhs_syntax.next_sibling(), rhs_syntax.next_sibling(), v.lhs_parent_id, diff --git a/src/parse/syntax.rs b/src/parse/syntax.rs index 79d9bd171..2a05eb9fe 100644 --- a/src/parse/syntax.rs +++ b/src/parse/syntax.rs @@ -4,6 +4,8 @@ use std::{cell::Cell, env, fmt, hash::Hash, num::NonZeroU32}; use typed_arena::Arena; +use id_arena::Arena as IdArena; +use id_arena::Id; use crate::{ diff::changes::ChangeKind, @@ -42,6 +44,9 @@ impl<'a> fmt::Debug for ChangeKind<'a> { } } +pub type SyntaxArena<'a> = IdArena<Syntax<'a>>; +pub type SyntaxArenaId<'a> = Id<Syntax<'a>>; + pub type SyntaxId = NonZeroU32; /// Fields that are common to both `Syntax::List` and `Syntax::Atom`. @@ -98,7 +103,7 @@ pub enum Syntax<'a> { info: SyntaxInfo<'a>, open_position: Vec<SingleLineSpan>, open_content: String, - children: Vec<&'a Syntax<'a>>, + children: Vec<SyntaxArenaId<'a>>, close_position: Vec<SingleLineSpan>, close_content: String, num_descendants: u32, @@ -193,10 +198,10 @@ impl<'a> Syntax<'a> { arena: &'a Arena<Syntax<'a>>, open_content: &str, open_position: Vec<SingleLineSpan>, - children: Vec<&'a Syntax<'a>>, + children: Vec<SyntaxArenaId<'a>>, close_content: &str, close_position: Vec<SingleLineSpan>, - ) -> &'a Syntax<'a> { + ) -> SyntaxArenaId<'a> { // Skip empty atoms: they aren't displayed, so there's no // point making our syntax tree bigger. These occur when we're // parsing incomplete or malformed programs. @@ -247,7 +252,7 @@ impl<'a> Syntax<'a> { mut position: Vec<SingleLineSpan>, mut content: &str, kind: AtomKind, - ) -> &'a Syntax<'a> { + ) -> SyntaxArenaId<'a> { // If a parser hasn't cleaned up \r on CRLF files with // comments, discard it. if content.ends_with('\r') { @@ -273,11 +278,11 @@ impl<'a> Syntax<'a> { } } - pub fn parent(&self) -> Option<&'a Syntax<'a>> { + pub fn parent(&self) -> Option<SyntaxArenaId<'a>> { self.info().parent.get() } - pub fn next_sibling(&self) -> Option<&'a Syntax<'a>> { + pub fn next_sibling(&self) -> Option<SyntaxArenaId<'a>> { self.info().next_sibling.get() } @@ -330,7 +335,7 @@ impl<'a> Syntax<'a> { } } -pub fn comment_positions<'a>(nodes: &[&'a Syntax<'a>]) -> Vec<SingleLineSpan> { +pub fn comment_positions<'a>(nodes: &[SyntaxArenaId<'a>]) -> Vec<SingleLineSpan> { fn walk_comment_positions(node: &Syntax<'_>, positions: &mut Vec<SingleLineSpan>) { match node { List { children, .. } => { @@ -355,13 +360,13 @@ pub fn comment_positions<'a>(nodes: &[&'a Syntax<'a>]) -> Vec<SingleLineSpan> { } /// Initialise all the fields in `SyntaxInfo`. -pub fn init_all_info<'a>(lhs_roots: &[&'a Syntax<'a>], rhs_roots: &[&'a Syntax<'a>]) { +pub fn init_all_info<'a>(lhs_roots: &[SyntaxArenaId<'a>], rhs_roots: &[SyntaxArenaId<'a>]) { init_info(lhs_roots, rhs_roots); init_next_prev(lhs_roots); init_next_prev(rhs_roots); } -fn init_info<'a>(lhs_roots: &[&'a Syntax<'a>], rhs_roots: &[&'a Syntax<'a>]) { +fn init_info<'a>(lhs_roots: &[SyntaxArenaId<'a>], rhs_roots: &[SyntaxArenaId<'a>]) { let mut id = NonZeroU32::new(1).unwrap(); init_info_on_side(lhs_roots, &mut id); init_info_on_side(rhs_roots, &mut id); @@ -437,7 +442,7 @@ fn set_num_after(nodes: &[&Syntax], parent_num_after: usize) { } } } -pub fn init_next_prev<'a>(roots: &[&'a Syntax<'a>]) { +pub fn init_next_prev<'a>(roots: &[SyntaxArenaId<'a>]) { set_prev_sibling(roots); set_next_sibling(roots); set_prev(roots, None); @@ -445,7 +450,7 @@ pub fn init_next_prev<'a>(roots: &[&'a Syntax<'a>]) { /// Set all the `SyntaxInfo` values for all the `roots` on a single /// side (LHS or RHS). -fn init_info_on_side<'a>(roots: &[&'a Syntax<'a>], next_id: &mut SyntaxId) { +fn init_info_on_side<'a>(roots: &[SyntaxArenaId<'a>], next_id: &mut SyntaxId) { set_parent(roots, None); set_num_ancestors(roots, 0); set_num_after(roots, 0); @@ -492,7 +497,7 @@ fn set_content_is_unique(nodes: &[&Syntax]) { set_content_is_unique_from_counts(nodes, &counts); } -fn set_prev_sibling<'a>(nodes: &[&'a Syntax<'a>]) { +fn set_prev_sibling<'a>(nodes: &[SyntaxArenaId<'a>]) { let mut prev = None; for node in nodes { @@ -505,7 +510,7 @@ fn set_prev_sibling<'a>(nodes: &[&'a Syntax<'a>]) { } } -fn set_next_sibling<'a>(nodes: &[&'a Syntax<'a>]) { +fn set_next_sibling<'a>(nodes: &[SyntaxArenaId<'a>]) { for (i, node) in nodes.iter().enumerate() { let sibling = nodes.get(i + 1).copied(); node.info().next_sibling.set(sibling); @@ -518,7 +523,7 @@ fn set_next_sibling<'a>(nodes: &[&'a Syntax<'a>]) { /// For every syntax node in the tree, mark the previous node /// according to a preorder traversal. -fn set_prev<'a>(nodes: &[&'a Syntax<'a>], parent: Option<&'a Syntax<'a>>) { +fn set_prev<'a>(nodes: &[SyntaxArenaId<'a>], parent: Option<SyntaxArenaId<'a>>) { for (i, node) in nodes.iter().enumerate() { let node_prev = if i == 0 { parent } else { Some(nodes[i - 1]) }; @@ -529,7 +534,7 @@ fn set_prev<'a>(nodes: &[&'a Syntax<'a>], parent: Option<&'a Syntax<'a>>) { } } -fn set_parent<'a>(nodes: &[&'a Syntax<'a>], parent: Option<&'a Syntax<'a>>) { +fn set_parent<'a>(nodes: &[SyntaxArenaId<'a>], parent: Option<SyntaxArenaId<'a>>) { for node in nodes { node.info().parent.set(parent); if let List { children, .. } = node { @@ -928,7 +933,7 @@ impl MatchedPos { /// Walk `nodes` and return a vec of all the changed positions. pub fn change_positions<'a>( - nodes: &[&'a Syntax<'a>], + nodes: &[SyntaxArenaId<'a>], change_map: &ChangeMap<'a>, ) -> Vec<MatchedPos> { let mut positions = Vec::new(); @@ -937,7 +942,7 @@ pub fn change_positions<'a>( } fn change_positions_<'a>( - nodes: &[&'a Syntax<'a>], + nodes: &[SyntaxArenaId<'a>], change_map: &ChangeMap<'a>, positions: &mut Vec<MatchedPos>, ) { |