summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorWilfred Hughes <me@wilfred.me.uk>2023-08-18 09:11:42 -0700
committerWilfred Hughes <me@wilfred.me.uk>2023-08-18 09:11:42 -0700
commit4fa55054310280a209e6825b51dd737890422262 (patch)
tree2aa55d45ed004452f3bbeef9eefdf946ebe943c9
parentb6d8ecbd4f3f863b2c587ff367f5409787f7c2f8 (diff)
-rw-r--r--Cargo.lock7
-rw-r--r--Cargo.toml1
-rw-r--r--src/diff/changes.rs46
-rw-r--r--src/diff/dijkstra.rs19
-rw-r--r--src/diff/graph.rs42
-rw-r--r--src/parse/syntax.rs39
6 files changed, 101 insertions, 53 deletions
diff --git a/Cargo.lock b/Cargo.lock
index f83139601..df5500598 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -245,6 +245,7 @@ dependencies = [
"glob",
"hashbrown 0.12.3",
"humansize",
+ "id-arena",
"itertools",
"lazy_static",
"libc",
@@ -384,6 +385,12 @@ dependencies = [
]
[[package]]
+name = "id-arena"
+version = "2.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "25a2bc672d1148e28034f176e01fffebb08b35768468cc954630da77a1449005"
+
+[[package]]
name = "indexmap"
version = "1.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
diff --git a/Cargo.toml b/Cargo.toml
index 9eece16f1..b650be654 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -66,6 +66,7 @@ hashbrown = "0.12.3"
humansize = "2.1.3"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
+id-arena = "2.2.1"
[dev-dependencies]
# assert_cmd 2.0.6 requires rust 1.60
diff --git a/src/diff/changes.rs b/src/diff/changes.rs
index 0234be2bc..aae3a0497 100644
--- a/src/diff/changes.rs
+++ b/src/diff/changes.rs
@@ -2,14 +2,14 @@
use crate::{
hash::DftHashMap,
- parse::syntax::{Syntax, SyntaxId},
+ parse::syntax::{Syntax, SyntaxArena, SyntaxArenaId, SyntaxId},
};
#[derive(PartialEq, Eq, Clone, Copy)]
pub enum ChangeKind<'a> {
- Unchanged(&'a Syntax<'a>),
- ReplacedComment(&'a Syntax<'a>, &'a Syntax<'a>),
- ReplacedString(&'a Syntax<'a>, &'a Syntax<'a>),
+ Unchanged(SyntaxArenaId<'a>),
+ ReplacedComment(SyntaxArenaId<'a>, SyntaxArenaId<'a>),
+ ReplacedString(SyntaxArenaId<'a>, SyntaxArenaId<'a>),
Novel,
}
@@ -19,21 +19,36 @@ pub struct ChangeMap<'a> {
}
impl<'a> ChangeMap<'a> {
- pub fn insert(&mut self, node: &'a Syntax<'a>, ck: ChangeKind<'a>) {
+ pub fn insert(
+ &mut self,
+ node_arena: &SyntaxArena<'a>,
+ node_id: SyntaxArenaId<'a>,
+ ck: ChangeKind<'a>,
+ ) {
+ let node = &node_arena[node_id];
self.changes.insert(node.id(), ck);
}
- pub fn get(&self, node: &Syntax<'a>) -> Option<ChangeKind<'a>> {
+ pub fn get(
+ &self,
+ node_arena: &SyntaxArena<'a>,
+ node_id: SyntaxArenaId<'a>,
+ ) -> Option<ChangeKind<'a>> {
+ let node = &node_arena[node_id];
self.changes.get(&node.id()).copied()
}
}
pub fn insert_deep_unchanged<'a>(
- node: &'a Syntax<'a>,
- opposite_node: &'a Syntax<'a>,
+ node_arena: &SyntaxArena<'a>,
+ node_id: SyntaxArenaId<'a>,
+ opposite_node_id: SyntaxArenaId<'a>,
change_map: &mut ChangeMap<'a>,
) {
- change_map.insert(node, ChangeKind::Unchanged(opposite_node));
+ let node = &node_arena[node_id];
+ let opposite_node = &node_arena[opposite_node_id];
+
+ change_map.insert(node_arena, node_id, ChangeKind::Unchanged(opposite_node_id));
match (node, opposite_node) {
(
@@ -47,7 +62,7 @@ pub fn insert_deep_unchanged<'a>(
},
) => {
for (child, opposite_child) in node_children.iter().zip(opposite_children) {
- insert_deep_unchanged(child, opposite_child, change_map);
+ insert_deep_unchanged(node_arena, *child, *opposite_child, change_map);
}
}
(Syntax::Atom { .. }, Syntax::Atom { .. }) => {}
@@ -55,12 +70,17 @@ pub fn insert_deep_unchanged<'a>(
}
}
-pub fn insert_deep_novel<'a>(node: &'a Syntax<'a>, change_map: &mut ChangeMap<'a>) {
- change_map.insert(node, ChangeKind::Novel);
+pub fn insert_deep_novel<'a>(
+ node_arena: &SyntaxArena<'a>,
+ node_id: SyntaxArenaId<'a>,
+ change_map: &mut ChangeMap<'a>,
+) {
+ let node = &node_arena[node_id];
+ change_map.insert(node_arena, node_id, ChangeKind::Novel);
if let Syntax::List { children, .. } = node {
for child in children.iter() {
- insert_deep_novel(child, change_map);
+ insert_deep_novel(node_arena, *child, change_map);
}
}
}
diff --git a/src/diff/dijkstra.rs b/src/diff/dijkstra.rs
index 6e311da60..a705b0fcf 100644
--- a/src/diff/dijkstra.rs
+++ b/src/diff/dijkstra.rs
@@ -7,7 +7,7 @@ use crate::{
diff::changes::ChangeMap,
diff::graph::{populate_change_map, set_neighbours, Edge, Vertex},
hash::DftHashMap,
- parse::syntax::Syntax,
+ parse::syntax::{Syntax, SyntaxArena},
};
use bumpalo::Bump;
use itertools::Itertools;
@@ -155,7 +155,7 @@ fn edge_between<'s, 'b>(before: &Vertex<'s, 'b>, after: &Vertex<'s, 'b>) -> Edge
}
/// What is the total number of AST nodes?
-fn node_count(root: Option<&Syntax>) -> u32 {
+fn node_count<'a>(node_arena: &SyntaxArena<'a>, root: Option<&Syntax>) -> u32 {
let mut node = root;
let mut count = 0;
while let Some(current_node) = node {
@@ -167,38 +167,39 @@ fn node_count(root: Option<&Syntax>) -> u32 {
};
count += current_count;
- node = current_node.next_sibling();
+ node = current_node.next_sibling().map(|id| &node_arena[id]);
}
count
}
/// How many top-level AST nodes do we have?
-fn tree_count(root: Option<&Syntax>) -> u32 {
+fn tree_count<'a>(node_arena: &SyntaxArena<'a>, root: Option<&Syntax>) -> u32 {
let mut node = root;
let mut count = 0;
while let Some(current_node) = node {
count += 1;
- node = current_node.next_sibling();
+ node = current_node.next_sibling().map(|id| &node_arena[id]);
}
count
}
pub fn mark_syntax<'a>(
+ node_arena: &SyntaxArena<'a>,
lhs_syntax: Option<&'a Syntax<'a>>,
rhs_syntax: Option<&'a Syntax<'a>>,
change_map: &mut ChangeMap<'a>,
graph_limit: usize,
) -> Result<(), ExceededGraphLimit> {
- let lhs_node_count = node_count(lhs_syntax) as usize;
- let rhs_node_count = node_count(rhs_syntax) as usize;
+ let lhs_node_count = node_count(node_arena, lhs_syntax) as usize;
+ let rhs_node_count = node_count(node_arena, rhs_syntax) as usize;
info!(
"LHS nodes: {} ({} toplevel), RHS nodes: {} ({} toplevel)",
lhs_node_count,
- tree_count(lhs_syntax),
+ tree_count(node_arena, lhs_syntax),
rhs_node_count,
- tree_count(rhs_syntax),
+ tree_count(node_arena, rhs_syntax),
);
// When there are a large number of changes, we end up building a
diff --git a/src/diff/graph.rs b/src/diff/graph.rs
index f9b0ddbcb..becf43f7a 100644
--- a/src/diff/graph.rs
+++ b/src/diff/graph.rs
@@ -16,7 +16,7 @@ use crate::{
stack::Stack,
},
hash::DftHashMap,
- parse::syntax::{AtomKind, Syntax, SyntaxId},
+ parse::syntax::{AtomKind, Syntax, SyntaxArena, SyntaxArenaId, SyntaxId},
};
use Edge::*;
@@ -408,18 +408,22 @@ fn looks_like_punctuation(node: &Syntax) -> bool {
/// Pop as many parents of `lhs_node` and `rhs_node` as
/// possible. Return the new syntax nodes and parents.
fn pop_all_parents<'s>(
- lhs_node: Option<&'s Syntax<'s>>,
- rhs_node: Option<&'s Syntax<'s>>,
+ node_arena: &SyntaxArena<'s>,
+ lhs_node_id: Option<SyntaxArenaId<'s>>,
+ rhs_node_id: Option<SyntaxArenaId<'s>>,
lhs_parent_id: Option<SyntaxId>,
rhs_parent_id: Option<SyntaxId>,
parents: &Stack<EnteredDelimiter<'s>>,
) -> (
- Option<&'s Syntax<'s>>,
- Option<&'s Syntax<'s>>,
+ Option<SyntaxArenaId<'s>>,
+ Option<SyntaxArenaId<'s>>,
Option<SyntaxId>,
Option<SyntaxId>,
Stack<EnteredDelimiter<'s>>,
) {
+ let lhs_node = lhs_node_id.map(|id| &node_arena[id]);
+ let rhs_node = rhs_node_id.map(|id| &node_arena[id]);
+
let mut lhs_node = lhs_node;
let mut rhs_node = rhs_node;
let mut lhs_parent_id = lhs_parent_id;
@@ -432,8 +436,8 @@ fn pop_all_parents<'s>(
// Move to next after LHS parent.
// Continue from sibling of parent.
- lhs_node = lhs_parent.next_sibling();
- lhs_parent_id = lhs_parent.parent().map(Syntax::id);
+ lhs_node = lhs_parent.next_sibling().map(|id| &node_arena[id]);
+ lhs_parent_id = lhs_parent.parent().map(|id| node_arena[id].id());
parents = parents_next;
continue;
}
@@ -444,8 +448,8 @@ fn pop_all_parents<'s>(
// Move to next after RHS parent.
// Continue from sibling of parent.
- rhs_node = rhs_parent.next_sibling();
- rhs_parent_id = rhs_parent.parent().map(Syntax::id);
+ rhs_node = rhs_parent.next_sibling().map(|id| &node_arena[id]);
+ rhs_parent_id = rhs_parent.parent().map(|id| node_arena[id].id());
parents = parents_next;
continue;
}
@@ -457,10 +461,12 @@ fn pop_all_parents<'s>(
// Continue from sibling of parent.
if let Some((lhs_parent, rhs_parent, parents_next)) = try_pop_both(&parents) {
- lhs_node = lhs_parent.next_sibling();
- rhs_node = rhs_parent.next_sibling();
- lhs_parent_id = lhs_parent.parent().map(Syntax::id);
- rhs_parent_id = rhs_parent.parent().map(Syntax::id);
+ lhs_node = lhs_parent.next_sibling().map(|id| &node_arena[id]);
+ lhs_parent_id = lhs_parent.parent().map(|id| node_arena[id].id());
+
+ rhs_node = rhs_parent.next_sibling().map(|id| &node_arena[id]);
+ rhs_parent_id = rhs_parent.parent().map(|id| node_arena[id].id());
+
parents = parents_next;
continue;
}
@@ -469,12 +475,19 @@ fn pop_all_parents<'s>(
break;
}
- (lhs_node, rhs_node, lhs_parent_id, rhs_parent_id, parents)
+ (
+ lhs_node_id,
+ rhs_node_id,
+ lhs_parent_id,
+ rhs_parent_id,
+ parents,
+ )
}
/// Compute the neighbours of `v` if we haven't previously done so,
/// and write them to the .neighbours cell inside `v`.
pub fn set_neighbours<'s, 'b>(
+ node_arena: &SyntaxArena<'s>,
v: &Vertex<'s, 'b>,
alloc: &'b Bump,
seen: &mut DftHashMap<&Vertex<'s, 'b>, Vec<&'b Vertex<'s, 'b>>>,
@@ -496,6 +509,7 @@ pub fn set_neighbours<'s, 'b>(
// Both nodes are equal, the happy case.
let (lhs_syntax, rhs_syntax, lhs_parent_id, rhs_parent_id, parents) = pop_all_parents(
+ node_arena,
lhs_syntax.next_sibling(),
rhs_syntax.next_sibling(),
v.lhs_parent_id,
diff --git a/src/parse/syntax.rs b/src/parse/syntax.rs
index 79d9bd171..2a05eb9fe 100644
--- a/src/parse/syntax.rs
+++ b/src/parse/syntax.rs
@@ -4,6 +4,8 @@
use std::{cell::Cell, env, fmt, hash::Hash, num::NonZeroU32};
use typed_arena::Arena;
+use id_arena::Arena as IdArena;
+use id_arena::Id;
use crate::{
diff::changes::ChangeKind,
@@ -42,6 +44,9 @@ impl<'a> fmt::Debug for ChangeKind<'a> {
}
}
+pub type SyntaxArena<'a> = IdArena<Syntax<'a>>;
+pub type SyntaxArenaId<'a> = Id<Syntax<'a>>;
+
pub type SyntaxId = NonZeroU32;
/// Fields that are common to both `Syntax::List` and `Syntax::Atom`.
@@ -98,7 +103,7 @@ pub enum Syntax<'a> {
info: SyntaxInfo<'a>,
open_position: Vec<SingleLineSpan>,
open_content: String,
- children: Vec<&'a Syntax<'a>>,
+ children: Vec<SyntaxArenaId<'a>>,
close_position: Vec<SingleLineSpan>,
close_content: String,
num_descendants: u32,
@@ -193,10 +198,10 @@ impl<'a> Syntax<'a> {
arena: &'a Arena<Syntax<'a>>,
open_content: &str,
open_position: Vec<SingleLineSpan>,
- children: Vec<&'a Syntax<'a>>,
+ children: Vec<SyntaxArenaId<'a>>,
close_content: &str,
close_position: Vec<SingleLineSpan>,
- ) -> &'a Syntax<'a> {
+ ) -> SyntaxArenaId<'a> {
// Skip empty atoms: they aren't displayed, so there's no
// point making our syntax tree bigger. These occur when we're
// parsing incomplete or malformed programs.
@@ -247,7 +252,7 @@ impl<'a> Syntax<'a> {
mut position: Vec<SingleLineSpan>,
mut content: &str,
kind: AtomKind,
- ) -> &'a Syntax<'a> {
+ ) -> SyntaxArenaId<'a> {
// If a parser hasn't cleaned up \r on CRLF files with
// comments, discard it.
if content.ends_with('\r') {
@@ -273,11 +278,11 @@ impl<'a> Syntax<'a> {
}
}
- pub fn parent(&self) -> Option<&'a Syntax<'a>> {
+ pub fn parent(&self) -> Option<SyntaxArenaId<'a>> {
self.info().parent.get()
}
- pub fn next_sibling(&self) -> Option<&'a Syntax<'a>> {
+ pub fn next_sibling(&self) -> Option<SyntaxArenaId<'a>> {
self.info().next_sibling.get()
}
@@ -330,7 +335,7 @@ impl<'a> Syntax<'a> {
}
}
-pub fn comment_positions<'a>(nodes: &[&'a Syntax<'a>]) -> Vec<SingleLineSpan> {
+pub fn comment_positions<'a>(nodes: &[SyntaxArenaId<'a>]) -> Vec<SingleLineSpan> {
fn walk_comment_positions(node: &Syntax<'_>, positions: &mut Vec<SingleLineSpan>) {
match node {
List { children, .. } => {
@@ -355,13 +360,13 @@ pub fn comment_positions<'a>(nodes: &[&'a Syntax<'a>]) -> Vec<SingleLineSpan> {
}
/// Initialise all the fields in `SyntaxInfo`.
-pub fn init_all_info<'a>(lhs_roots: &[&'a Syntax<'a>], rhs_roots: &[&'a Syntax<'a>]) {
+pub fn init_all_info<'a>(lhs_roots: &[SyntaxArenaId<'a>], rhs_roots: &[SyntaxArenaId<'a>]) {
init_info(lhs_roots, rhs_roots);
init_next_prev(lhs_roots);
init_next_prev(rhs_roots);
}
-fn init_info<'a>(lhs_roots: &[&'a Syntax<'a>], rhs_roots: &[&'a Syntax<'a>]) {
+fn init_info<'a>(lhs_roots: &[SyntaxArenaId<'a>], rhs_roots: &[SyntaxArenaId<'a>]) {
let mut id = NonZeroU32::new(1).unwrap();
init_info_on_side(lhs_roots, &mut id);
init_info_on_side(rhs_roots, &mut id);
@@ -437,7 +442,7 @@ fn set_num_after(nodes: &[&Syntax], parent_num_after: usize) {
}
}
}
-pub fn init_next_prev<'a>(roots: &[&'a Syntax<'a>]) {
+pub fn init_next_prev<'a>(roots: &[SyntaxArenaId<'a>]) {
set_prev_sibling(roots);
set_next_sibling(roots);
set_prev(roots, None);
@@ -445,7 +450,7 @@ pub fn init_next_prev<'a>(roots: &[&'a Syntax<'a>]) {
/// Set all the `SyntaxInfo` values for all the `roots` on a single
/// side (LHS or RHS).
-fn init_info_on_side<'a>(roots: &[&'a Syntax<'a>], next_id: &mut SyntaxId) {
+fn init_info_on_side<'a>(roots: &[SyntaxArenaId<'a>], next_id: &mut SyntaxId) {
set_parent(roots, None);
set_num_ancestors(roots, 0);
set_num_after(roots, 0);
@@ -492,7 +497,7 @@ fn set_content_is_unique(nodes: &[&Syntax]) {
set_content_is_unique_from_counts(nodes, &counts);
}
-fn set_prev_sibling<'a>(nodes: &[&'a Syntax<'a>]) {
+fn set_prev_sibling<'a>(nodes: &[SyntaxArenaId<'a>]) {
let mut prev = None;
for node in nodes {
@@ -505,7 +510,7 @@ fn set_prev_sibling<'a>(nodes: &[&'a Syntax<'a>]) {
}
}
-fn set_next_sibling<'a>(nodes: &[&'a Syntax<'a>]) {
+fn set_next_sibling<'a>(nodes: &[SyntaxArenaId<'a>]) {
for (i, node) in nodes.iter().enumerate() {
let sibling = nodes.get(i + 1).copied();
node.info().next_sibling.set(sibling);
@@ -518,7 +523,7 @@ fn set_next_sibling<'a>(nodes: &[&'a Syntax<'a>]) {
/// For every syntax node in the tree, mark the previous node
/// according to a preorder traversal.
-fn set_prev<'a>(nodes: &[&'a Syntax<'a>], parent: Option<&'a Syntax<'a>>) {
+fn set_prev<'a>(nodes: &[SyntaxArenaId<'a>], parent: Option<SyntaxArenaId<'a>>) {
for (i, node) in nodes.iter().enumerate() {
let node_prev = if i == 0 { parent } else { Some(nodes[i - 1]) };
@@ -529,7 +534,7 @@ fn set_prev<'a>(nodes: &[&'a Syntax<'a>], parent: Option<&'a Syntax<'a>>) {
}
}
-fn set_parent<'a>(nodes: &[&'a Syntax<'a>], parent: Option<&'a Syntax<'a>>) {
+fn set_parent<'a>(nodes: &[SyntaxArenaId<'a>], parent: Option<SyntaxArenaId<'a>>) {
for node in nodes {
node.info().parent.set(parent);
if let List { children, .. } = node {
@@ -928,7 +933,7 @@ impl MatchedPos {
/// Walk `nodes` and return a vec of all the changed positions.
pub fn change_positions<'a>(
- nodes: &[&'a Syntax<'a>],
+ nodes: &[SyntaxArenaId<'a>],
change_map: &ChangeMap<'a>,
) -> Vec<MatchedPos> {
let mut positions = Vec::new();
@@ -937,7 +942,7 @@ pub fn change_positions<'a>(
}
fn change_positions_<'a>(
- nodes: &[&'a Syntax<'a>],
+ nodes: &[SyntaxArenaId<'a>],
change_map: &ChangeMap<'a>,
positions: &mut Vec<MatchedPos>,
) {