summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorWilfred Hughes <me@wilfred.me.uk>2023-08-18 00:54:07 -0700
committerWilfred Hughes <me@wilfred.me.uk>2023-08-18 08:44:33 -0700
commitaffce75a37ab620980be0c54626b0079ebb17940 (patch)
tree7653138cecbc970b2a3405886051b1a927fb5259
parentb6d8ecbd4f3f863b2c587ff367f5409787f7c2f8 (diff)
Use id-arena for Vertexid_arena_for_vertex
-rw-r--r--Cargo.lock7
-rw-r--r--Cargo.toml1
-rw-r--r--src/diff/dijkstra.rs148
-rw-r--r--src/diff/graph.rs67
4 files changed, 126 insertions, 97 deletions
diff --git a/Cargo.lock b/Cargo.lock
index f83139601..df5500598 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -245,6 +245,7 @@ dependencies = [
"glob",
"hashbrown 0.12.3",
"humansize",
+ "id-arena",
"itertools",
"lazy_static",
"libc",
@@ -384,6 +385,12 @@ dependencies = [
]
[[package]]
+name = "id-arena"
+version = "2.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "25a2bc672d1148e28034f176e01fffebb08b35768468cc954630da77a1449005"
+
+[[package]]
name = "indexmap"
version = "1.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
diff --git a/Cargo.toml b/Cargo.toml
index 9eece16f1..b650be654 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -66,6 +66,7 @@ hashbrown = "0.12.3"
humansize = "2.1.3"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
+id-arena = "2.2.1"
[dev-dependencies]
# assert_cmd 2.0.6 requires rust 1.60
diff --git a/src/diff/dijkstra.rs b/src/diff/dijkstra.rs
index 6e311da60..029c28465 100644
--- a/src/diff/dijkstra.rs
+++ b/src/diff/dijkstra.rs
@@ -5,11 +5,11 @@ use std::{cmp::Reverse, env};
use crate::{
diff::changes::ChangeMap,
- diff::graph::{populate_change_map, set_neighbours, Edge, Vertex},
+ diff::graph::{populate_change_map, set_neighbours, Edge, Vertex, VertexId},
hash::DftHashMap,
parse::syntax::Syntax,
};
-use bumpalo::Bump;
+use id_arena::Arena;
use itertools::Itertools;
use radix_heap::RadixHeapMap;
@@ -17,32 +17,36 @@ use radix_heap::RadixHeapMap;
pub struct ExceededGraphLimit {}
/// Return the shortest route from `start` to the end vertex.
-fn shortest_vertex_path<'s, 'b>(
- start: &'b Vertex<'s, 'b>,
- vertex_arena: &'b Bump,
+fn shortest_vertex_path<'s>(
+ start: VertexId<'s>,
+ vertex_arena: &mut Arena<Vertex<'s>>,
size_hint: usize,
graph_limit: usize,
-) -> Result<Vec<&'b Vertex<'s, 'b>>, ExceededGraphLimit> {
+) -> Result<Vec<VertexId<'s>>, ExceededGraphLimit> {
// We want to visit nodes with the shortest distance first, but
// RadixHeapMap is a max-heap. Ensure nodes are wrapped with
// Reverse to flip comparisons.
- let mut heap: RadixHeapMap<Reverse<_>, &'b Vertex<'s, 'b>> = RadixHeapMap::new();
+ let mut heap: RadixHeapMap<Reverse<_>, VertexId<'s>> = RadixHeapMap::new();
heap.push(Reverse(0), start);
let mut seen = DftHashMap::default();
seen.reserve(size_hint);
- let end: &'b Vertex<'s, 'b> = loop {
+ let end: VertexId<'s> = loop {
match heap.pop() {
- Some((Reverse(distance), current)) => {
- if current.is_end() {
- break current;
+ Some((Reverse(distance), current_id)) => {
+ {
+ if (vertex_arena[current_id]).is_end() {
+ break current_id;
+ }
}
- set_neighbours(current, vertex_arena, &mut seen);
+ set_neighbours(current_id, vertex_arena, &mut seen);
+ let current = &vertex_arena[current_id];
for neighbour in current.neighbours.borrow().as_ref().unwrap() {
- let (edge, next) = neighbour;
+ let (edge, next_id) = neighbour;
+ let next = &vertex_arena[*next_id];
let distance_to_next = distance + edge.cost();
let found_shorter_route = match next.predecessor.get() {
@@ -51,15 +55,16 @@ fn shortest_vertex_path<'s, 'b>(
};
if found_shorter_route {
- next.predecessor.replace(Some((distance_to_next, current)));
- heap.push(Reverse(distance_to_next), next);
+ next.predecessor
+ .replace(Some((distance_to_next, current_id)));
+ heap.push(Reverse(distance_to_next), *next_id);
}
}
if seen.len() > graph_limit {
info!(
- "Reached graph limit, arena consumed {} bytes",
- vertex_arena.allocated_bytes(),
+ "Reached graph limit, arena allocated {} items",
+ vertex_arena.len(),
);
return Err(ExceededGraphLimit {});
}
@@ -69,38 +74,39 @@ fn shortest_vertex_path<'s, 'b>(
};
info!(
- "Saw {} vertices (a Vertex is {} bytes), arena consumed {} bytes, with {} vertices left on heap.",
+ "Saw {} vertices (a Vertex is {} bytes), arena allocated {} items, with {} vertices left on heap.",
seen.len(),
std::mem::size_of::<Vertex>(),
- vertex_arena.allocated_bytes(),
+ vertex_arena.len(),
heap.len(),
);
let mut current = Some((0, end));
- let mut vertex_route: Vec<&'b Vertex<'s, 'b>> = vec![];
- while let Some((_, node)) = current {
- vertex_route.push(node);
- current = node.predecessor.get();
+ let mut vertex_route: Vec<VertexId<'s>> = vec![];
+ while let Some((_, node_id)) = current {
+ vertex_route.push(node_id);
+ current = vertex_arena[node_id].predecessor.get();
}
vertex_route.reverse();
Ok(vertex_route)
}
-fn shortest_path_with_edges<'s, 'b>(
- route: &[&'b Vertex<'s, 'b>],
-) -> Vec<(Edge, &'b Vertex<'s, 'b>)> {
+fn shortest_path_with_edges<'s>(
+ route: &[VertexId<'s>],
+ vertex_arena: &Arena<Vertex<'s>>,
+) -> Vec<(Edge, VertexId<'s>)> {
let mut prev = route.first().expect("Expected non-empty route");
let mut cost = 0;
let mut res = vec![];
- for vertex in route.iter().skip(1) {
- let edge = edge_between(prev, vertex);
+ for vertex_id in route.iter().skip(1) {
+ let edge = edge_between(*prev, *vertex_id, vertex_arena);
res.push((edge, *prev));
cost += edge.cost();
- prev = vertex;
+ prev = vertex_id;
}
debug!("Found a path of {} with cost {}.", route.len(), cost);
@@ -111,27 +117,34 @@ fn shortest_path_with_edges<'s, 'b>(
///
/// The vec returned does not return the very last vertex. This is
/// necessary because a route of N vertices only has N-1 edges.
-fn shortest_path<'s, 'b>(
- start: Vertex<'s, 'b>,
- vertex_arena: &'b Bump,
+fn shortest_path<'s>(
+ start: Vertex<'s>,
+ vertex_arena: &mut Arena<Vertex<'s>>,
size_hint: usize,
graph_limit: usize,
-) -> Result<Vec<(Edge, &'b Vertex<'s, 'b>)>, ExceededGraphLimit> {
- let start: &'b Vertex<'s, 'b> = vertex_arena.alloc(start);
+) -> Result<Vec<(Edge, VertexId<'s>)>, ExceededGraphLimit> {
+ let start: VertexId<'s> = vertex_arena.alloc(start);
let vertex_path = shortest_vertex_path(start, vertex_arena, size_hint, graph_limit)?;
- Ok(shortest_path_with_edges(&vertex_path))
+ Ok(shortest_path_with_edges(&vertex_path, vertex_arena))
}
-fn edge_between<'s, 'b>(before: &Vertex<'s, 'b>, after: &Vertex<'s, 'b>) -> Edge {
- assert_ne!(before, after);
+fn edge_between<'s>(
+ before_id: VertexId<'s>,
+ after_id: VertexId<'s>,
+ vertex_arena: &Arena<Vertex<'s>>,
+) -> Edge {
+ assert_ne!(before_id, after_id);
+ let before = &vertex_arena[before_id];
+ let after = &vertex_arena[after_id];
let mut shortest_edge: Option<Edge> = None;
if let Some(neighbours) = &*before.neighbours.borrow() {
for neighbour in neighbours {
- let (edge, next) = *neighbour;
+ let (edge, next_id) = *neighbour;
+
// If there are multiple edges that can take us to `next`,
// prefer the shortest.
- if *next == *after {
+ if next_id == after_id {
let is_shorter = match shortest_edge {
Some(prev_edge) => edge.cost() < prev_edge.cost(),
None => true,
@@ -213,9 +226,9 @@ pub fn mark_syntax<'a>(
let size_hint = std::cmp::min(lhs_node_count * rhs_node_count, graph_limit);
let start = Vertex::new(lhs_syntax, rhs_syntax);
- let vertex_arena = Bump::new();
+ let mut vertex_arena = Arena::new();
- let route = shortest_path(start, &vertex_arena, size_hint, graph_limit)?;
+ let route = shortest_path(start, &mut vertex_arena, size_hint, graph_limit)?;
let print_length = if env::var("DFT_VERBOSE").is_ok() {
50
@@ -227,7 +240,8 @@ pub fn mark_syntax<'a>(
print_length,
route
.iter()
- .map(|(edge, v)| {
+ .map(|(edge, v_id)| {
+ let v = &vertex_arena[*v_id];
format!(
"{:20} {:20} --- {:3} {:?}",
v.lhs_syntax
@@ -242,7 +256,7 @@ pub fn mark_syntax<'a>(
.collect_vec()
);
- populate_change_map(&route, change_map);
+ populate_change_map(&route, &vertex_arena, change_map);
Ok(())
}
@@ -258,7 +272,7 @@ mod tests {
};
use itertools::Itertools;
- use typed_arena::Arena;
+ use typed_arena::Arena as TypedArena;
fn pos_helper(line: u32) -> Vec<SingleLineSpan> {
vec![SingleLineSpan {
@@ -270,7 +284,7 @@ mod tests {
#[test]
fn identical_atoms() {
- let arena = Arena::new();
+ let arena = TypedArena::new();
let lhs = Syntax::new_atom(&arena, pos_helper(0), "foo", AtomKind::Normal);
// Same content as LHS.
@@ -278,8 +292,8 @@ mod tests {
init_all_info(&[lhs], &[rhs]);
let start = Vertex::new(Some(lhs), Some(rhs));
- let vertex_arena = Bump::new();
- let route = shortest_path(start, &vertex_arena, 0, DEFAULT_GRAPH_LIMIT).unwrap();
+ let mut vertex_arena = Arena::<Vertex>::new();
+ let route = shortest_path(start, &mut vertex_arena, 0, DEFAULT_GRAPH_LIMIT).unwrap();
let actions = route.iter().map(|(action, _)| *action).collect_vec();
assert_eq!(
@@ -293,7 +307,7 @@ mod tests {
#[test]
fn extra_atom_lhs() {
- let arena = Arena::new();
+ let arena = TypedArena::new();
let lhs = vec![Syntax::new_list(
&arena,
@@ -320,8 +334,8 @@ mod tests {
init_all_info(&lhs, &rhs);
let start = Vertex::new(lhs.get(0).copied(), rhs.get(0).copied());
- let vertex_arena = Bump::new();
- let route = shortest_path(start, &vertex_arena, 0, DEFAULT_GRAPH_LIMIT).unwrap();
+ let mut vertex_arena = Arena::new();
+ let route = shortest_path(start, &mut vertex_arena, 0, DEFAULT_GRAPH_LIMIT).unwrap();
let actions = route.iter().map(|(action, _)| *action).collect_vec();
assert_eq!(
@@ -337,7 +351,7 @@ mod tests {
#[test]
fn repeated_atoms() {
- let arena = Arena::new();
+ let arena = TypedArena::new();
let lhs = vec![Syntax::new_list(
&arena,
@@ -362,8 +376,8 @@ mod tests {
init_all_info(&lhs, &rhs);
let start = Vertex::new(lhs.get(0).copied(), rhs.get(0).copied());
- let vertex_arena = Bump::new();
- let route = shortest_path(start, &vertex_arena, 0, DEFAULT_GRAPH_LIMIT).unwrap();
+ let mut vertex_arena = Arena::new();
+ let route = shortest_path(start, &mut vertex_arena, 0, DEFAULT_GRAPH_LIMIT).unwrap();
let actions = route.iter().map(|(action, _)| *action).collect_vec();
assert_eq!(
@@ -380,7 +394,7 @@ mod tests {
#[test]
fn atom_after_empty_list() {
- let arena = Arena::new();
+ let arena = TypedArena::new();
let lhs = vec![Syntax::new_list(
&arena,
@@ -408,8 +422,8 @@ mod tests {
init_all_info(&lhs, &rhs);
let start = Vertex::new(lhs.get(0).copied(), rhs.get(0).copied());
- let vertex_arena = Bump::new();
- let route = shortest_path(start, &vertex_arena, 0, DEFAULT_GRAPH_LIMIT).unwrap();
+ let mut vertex_arena = Arena::new();
+ let route = shortest_path(start, &mut vertex_arena, 0, DEFAULT_GRAPH_LIMIT).unwrap();
let actions = route.iter().map(|(action, _)| *action).collect_vec();
assert_eq!(
@@ -431,7 +445,7 @@ mod tests {
#[test]
fn replace_similar_comment() {
- let arena = Arena::new();
+ let arena = TypedArena::new();
let lhs = vec![Syntax::new_atom(
&arena,
@@ -449,8 +463,8 @@ mod tests {
init_all_info(&lhs, &rhs);
let start = Vertex::new(lhs.get(0).copied(), rhs.get(0).copied());
- let vertex_arena = Bump::new();
- let route = shortest_path(start, &vertex_arena, 0, DEFAULT_GRAPH_LIMIT).unwrap();
+ let mut vertex_arena = Arena::new();
+ let route = shortest_path(start, &mut vertex_arena, 0, DEFAULT_GRAPH_LIMIT).unwrap();
let actions = route.iter().map(|(action, _)| *action).collect_vec();
assert_eq!(
@@ -463,7 +477,7 @@ mod tests {
#[test]
fn replace_very_different_comment() {
- let arena = Arena::new();
+ let arena = TypedArena::new();
let lhs = vec![Syntax::new_atom(
&arena,
@@ -481,8 +495,8 @@ mod tests {
init_all_info(&lhs, &rhs);
let start = Vertex::new(lhs.get(0).copied(), rhs.get(0).copied());
- let vertex_arena = Bump::new();
- let route = shortest_path(start, &vertex_arena, 0, DEFAULT_GRAPH_LIMIT).unwrap();
+ let mut vertex_arena = Arena::new();
+ let route = shortest_path(start, &mut vertex_arena, 0, DEFAULT_GRAPH_LIMIT).unwrap();
let actions = route.iter().map(|(action, _)| *action).collect_vec();
assert_eq!(
@@ -495,7 +509,7 @@ mod tests {
#[test]
fn replace_comment_prefer_most_similar() {
- let arena = Arena::new();
+ let arena = TypedArena::new();
let lhs = vec![
Syntax::new_atom(
@@ -521,8 +535,8 @@ mod tests {
init_all_info(&lhs, &rhs);
let start = Vertex::new(lhs.get(0).copied(), rhs.get(0).copied());
- let vertex_arena = Bump::new();
- let route = shortest_path(start, &vertex_arena, 0, DEFAULT_GRAPH_LIMIT).unwrap();
+ let mut vertex_arena = Arena::new();
+ let route = shortest_path(start, &mut vertex_arena, 0, DEFAULT_GRAPH_LIMIT).unwrap();
let actions = route.iter().map(|(action, _)| *action).collect_vec();
assert_eq!(
@@ -538,7 +552,7 @@ mod tests {
#[test]
fn mark_syntax_equal_atoms() {
- let arena = Arena::new();
+ let arena = TypedArena::new();
let lhs = Syntax::new_atom(&arena, pos_helper(1), "foo", AtomKind::Normal);
let rhs = Syntax::new_atom(&arena, pos_helper(1), "foo", AtomKind::Normal);
init_all_info(&[lhs], &[rhs]);
@@ -552,7 +566,7 @@ mod tests {
#[test]
fn mark_syntax_different_atoms() {
- let arena = Arena::new();
+ let arena = TypedArena::new();
let lhs = Syntax::new_atom(&arena, pos_helper(1), "foo", AtomKind::Normal);
let rhs = Syntax::new_atom(&arena, pos_helper(1), "bar", AtomKind::Normal);
init_all_info(&[lhs], &[rhs]);
diff --git a/src/diff/graph.rs b/src/diff/graph.rs
index f9b0ddbcb..847852735 100644
--- a/src/diff/graph.rs
+++ b/src/diff/graph.rs
@@ -1,7 +1,7 @@
//! A graph representation for computing tree diffs.
-use bumpalo::Bump;
use hashbrown::hash_map::RawEntryMut;
+use id_arena::{Arena, Id};
use std::{
cell::{Cell, RefCell},
cmp::min,
@@ -49,9 +49,9 @@ use Edge::*;
/// ^ ^
/// ```
#[derive(Debug, Clone)]
-pub struct Vertex<'s, 'b> {
- pub neighbours: RefCell<Option<Vec<(Edge, &'b Vertex<'s, 'b>)>>>,
- pub predecessor: Cell<Option<(u32, &'b Vertex<'s, 'b>)>>,
+pub struct Vertex<'s> {
+ pub neighbours: RefCell<Option<Vec<(Edge, VertexId<'s>)>>>,
+ pub predecessor: Cell<Option<(u32, VertexId<'s>)>>,
// TODO: experiment with storing SyntaxId only, and have a HashMap
// from SyntaxId to &Syntax.
pub lhs_syntax: Option<&'s Syntax<'s>>,
@@ -61,7 +61,9 @@ pub struct Vertex<'s, 'b> {
rhs_parent_id: Option<SyntaxId>,
}
-impl<'s, 'b> PartialEq for Vertex<'s, 'b> {
+pub type VertexId<'s> = Id<Vertex<'s>>;
+
+impl<'s> PartialEq for Vertex<'s> {
fn eq(&self, other: &Self) -> bool {
// Strictly speaking, we should compare the whole
// EnteredDelimiter stack, not just the immediate
@@ -99,9 +101,9 @@ impl<'s, 'b> PartialEq for Vertex<'s, 'b> {
b0 && b1 && b2
}
}
-impl<'s, 'b> Eq for Vertex<'s, 'b> {}
+impl<'s> Eq for Vertex<'s> {}
-impl<'s, 'b> Hash for Vertex<'s, 'b> {
+impl<'s> Hash for Vertex<'s> {
fn hash<H: Hasher>(&self, state: &mut H) {
self.lhs_syntax.map(|node| node.id()).hash(state);
self.rhs_syntax.map(|node| node.id()).hash(state);
@@ -247,7 +249,7 @@ fn push_rhs_delimiter<'s>(
}
}
-impl<'s, 'b> Vertex<'s, 'b> {
+impl<'s> Vertex<'s> {
pub fn is_end(&self) -> bool {
self.lhs_syntax.is_none() && self.rhs_syntax.is_none() && self.parents.is_empty()
}
@@ -344,11 +346,11 @@ impl Edge {
}
}
-fn allocate_if_new<'s, 'b>(
- v: Vertex<'s, 'b>,
- alloc: &'b Bump,
- seen: &mut DftHashMap<&Vertex<'s, 'b>, Vec<&'b Vertex<'s, 'b>>>,
-) -> &'b Vertex<'s, 'b> {
+fn allocate_if_new<'a, 's>(
+ v: Vertex<'s>,
+ alloc: &'a mut Arena<Vertex<'s>>,
+ seen: &mut DftHashMap<Vertex<'s>, Vec<VertexId<'s>>>,
+) -> VertexId<'s> {
// We use the entry API so that we only need to do a single lookup
// for access and insert.
match seen.raw_entry_mut().from_key(&v) {
@@ -359,15 +361,16 @@ fn allocate_if_new<'s, 'b>(
// nestings for each syntax node pair.
if let Some(allocated) = existing.last() {
if existing.len() >= 2 {
- return allocated;
+ return *allocated;
}
}
// If we have seen exactly this graph node before, even
// considering parenthesis matching, return it.
- for existing_node in existing.iter() {
+ for existing_node_id in existing.iter() {
+ let existing_node = &alloc[*existing_node_id];
if existing_node.parents == v.parents {
- return existing_node;
+ return *existing_node_id;
}
}
@@ -378,18 +381,18 @@ fn allocate_if_new<'s, 'b>(
allocated
}
RawEntryMut::Vacant(vacant) => {
- let allocated = alloc.alloc(v);
+ let allocated_id = alloc.alloc(v.clone());
// We know that this vec will never have more than 2
// nodes, and this code is very hot, so reserve.
//
// We still use a vec to enable experiments with the value
// of how many possible parenthesis nestings to explore.
- let mut existing: Vec<&'b Vertex<'s, 'b>> = Vec::with_capacity(2);
- existing.push(allocated);
+ let mut existing: Vec<VertexId<'s>> = Vec::with_capacity(2);
+ existing.push(allocated_id);
- vacant.insert(allocated, existing);
- allocated
+ vacant.insert(v, existing);
+ allocated_id
}
}
}
@@ -474,20 +477,22 @@ fn pop_all_parents<'s>(
/// Compute the neighbours of `v` if we haven't previously done so,
/// and write them to the .neighbours cell inside `v`.
-pub fn set_neighbours<'s, 'b>(
- v: &Vertex<'s, 'b>,
- alloc: &'b Bump,
- seen: &mut DftHashMap<&Vertex<'s, 'b>, Vec<&'b Vertex<'s, 'b>>>,
+pub fn set_neighbours<'a, 's>(
+ v_id: VertexId<'s>,
+ alloc: &'a mut Arena<Vertex<'s>>,
+ seen: &mut DftHashMap<Vertex<'s>, Vec<VertexId<'s>>>,
) {
- if v.neighbours.borrow().is_some() {
+ if alloc[v_id].neighbours.borrow().is_some() {
return;
}
// There are only seven pushes in this function, so that's sufficient.
- let mut res: Vec<(Edge, &Vertex)> = Vec::with_capacity(7);
+ let mut res: Vec<(Edge, VertexId<'s>)> = Vec::with_capacity(7);
+ let v = &alloc[v_id];
if let (Some(lhs_syntax), Some(rhs_syntax)) = (&v.lhs_syntax, &v.rhs_syntax) {
if lhs_syntax == rhs_syntax {
+ let v = &alloc[v_id];
let depth_difference = (lhs_syntax.num_ancestors() as i32
- rhs_syntax.num_ancestors() as i32)
.unsigned_abs();
@@ -769,11 +774,13 @@ pub fn set_neighbours<'s, 'b>(
v.neighbours.replace(Some(res));
}
-pub fn populate_change_map<'s, 'b>(
- route: &[(Edge, &'b Vertex<'s, 'b>)],
+pub fn populate_change_map<'s>(
+ route: &[(Edge, VertexId<'s>)],
+ vertex_arena: &Arena<Vertex<'s>>,
change_map: &mut ChangeMap<'s>,
) {
- for (e, v) in route {
+ for (e, v_id) in route {
+ let v = &vertex_arena[*v_id];
match e {
UnchangedNode { .. } => {
// No change on this node or its children.