From fa9f48ae8d53ce6ffda9e57fe1b278852348992f Mon Sep 17 00:00:00 2001 From: rabite Date: Sun, 24 May 2020 21:00:04 +0200 Subject: add alloc.rs and iter.rs --- src/alloc.rs | 195 +++++++++++++++++++++++++ src/iter.rs | 464 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 659 insertions(+) create mode 100644 src/alloc.rs create mode 100644 src/iter.rs diff --git a/src/alloc.rs b/src/alloc.rs new file mode 100644 index 0000000..2e8b4b3 --- /dev/null +++ b/src/alloc.rs @@ -0,0 +1,195 @@ +use bumpalo::{collections::Vec, Bump}; +use crossbeam::utils::{Backoff, CachePadded}; + +use std::path::PathBuf; +use std::ptr::NonNull; +use std::sync::atomic::{AtomicBool, Ordering}; + +// Concurrent access to the Bump allocator is prevented through the +// use of a simple atomics based spin loop. +unsafe impl Send for Allocator {} +unsafe impl Sync for Allocator {} + +#[derive(Debug)] +pub struct Allocator { + bump: Bump, + lock: CachePadded, +} + +pub struct Mem { + ptr: NonNull, + cap: usize, + len: usize, +} + +pub enum RawAlloc { + PathBuf(Mem), + String(Mem), +} + +impl Allocator { + pub fn new() -> Allocator { + Allocator { + bump: Bump::new(), + lock: CachePadded::new(AtomicBool::new(false)), + } + } + + #[inline] + pub fn pathbuf(&self, size: usize) -> RawAlloc { + self.lock(); + let vec = Vec::with_capacity_in(size, &self.bump); + self.unlock(); + let mem = Mem { + ptr: unsafe { NonNull::new_unchecked(vec.into_bump_slice_mut().as_mut_ptr()) }, + cap: size, + len: 0, + }; + + RawAlloc::PathBuf(mem) + } + + #[inline] + pub fn string(&self, size: usize) -> RawAlloc { + self.lock(); + let vec = Vec::with_capacity_in(size, &self.bump); + self.unlock(); + let mem = Mem { + ptr: unsafe { NonNull::new_unchecked(vec.into_bump_slice_mut().as_mut_ptr()) }, + cap: size, + len: 0, + }; + + RawAlloc::String(mem) + } + + // Only reason this ISN'T wrong is that capacity will always be + // larger than size. Otherwise a race during reallocation could + // cause bad things to happen. + #[inline] + pub fn tmpfiles(&self, cap: usize) -> Vec { + self.lock(); + let vec = Vec::with_capacity_in(cap, &self.bump); + self.unlock(); + vec + } + + #[inline] + pub fn dentbuf(&self, cap: usize) -> *mut u8 { + let align = std::mem::align_of::(); + let layout = std::alloc::Layout::from_size_align(cap, align).unwrap(); + self.lock(); + let buf = self.bump.alloc_layout(layout); + self.unlock(); + buf.as_ptr() + } + + #[inline] + fn lock(&self) { + let backoff = Backoff::new(); + while self.lock.compare_and_swap(false, true, Ordering::Acquire) { + backoff.snooze(); + } + } + + #[inline] + fn unlock(&self) { + self.lock.store(false, Ordering::Release); + } +} + +impl RawAlloc { + #[inline] + fn get_mem(&mut self) -> &mut Mem { + let mem = match self { + RawAlloc::PathBuf(mem) => mem, + RawAlloc::String(mem) => mem, + }; + mem + } + + #[inline] + pub fn write(&mut self, src: &[u8]) { + let mem = self.get_mem(); + let len = mem.len; + let src_len = src.len(); + + if (mem.len + src_len) > mem.cap { + let mem_bytes = unsafe { std::slice::from_raw_parts(mem.ptr.as_ptr(), len) }; + + let mem_string = String::from_utf8_lossy(mem_bytes); + let src_string = String::from_utf8_lossy(src); + panic!( + "Can't write this much into {:?}\nWanted to add:{}", + mem_string, src_string + ); + }; + + mem.len += src_len; + + unsafe { + let ptr = mem.ptr.as_ptr().offset(len as isize); + ptr.copy_from_nonoverlapping(src.as_ptr(), src_len) + } + } + + #[inline] + pub fn finalize_pathbuf(mut self) -> PathBuf { + let mem = self.get_mem(); + let len = mem.len; + let cap = mem.cap; + let ptr = mem.ptr.as_ptr(); + + let pathbuf = unsafe { + let mut stdvec = std::vec::Vec::from_raw_parts(ptr, len, cap); + let ptr = &mut stdvec as *mut std::vec::Vec; + std::mem::forget(stdvec); + ptr.cast::().read() + }; + return pathbuf; + } + + // #[inline] + // #[target_feature(enable = "avx2")] + // unsafe fn verify_string(&self) -> bool { + // if let RawAlloc::String(mem) = self { + // let ptr = mem.ptr.as_ptr(); + // let len = mem.len; + // let slice = // unsafe { + // std::slice::from_raw_parts(ptr, len); + // //}; + + // let valid = is_utf8::lemire::avx::is_utf8_ascii_path(slice); + // return valid; + + // } else { panic!("Called verify_string() on non-string allocation!"); } + // } + + pub fn finalize_string(mut self) -> String { + let mem = self.get_mem(); + let len = mem.len; + let cap = mem.cap; + let ptr = mem.ptr.as_ptr(); + + let string = unsafe { + // match self.verify_string() { + // true => + String::from_raw_parts(ptr, len, cap) // , + // false => { + // let slice = std::slice::from_raw_parts(ptr, len); + // String::from_utf8_lossy(slice).to_string() + // } + // } + }; + + return string; + } +} + +impl Drop for Allocator { + fn drop(&mut self) { + // if Arc::strong_count(&self.bump) <= 1 { + self.bump.reset(); + // } + } +} diff --git a/src/iter.rs b/src/iter.rs new file mode 100644 index 0000000..f0b9a16 --- /dev/null +++ b/src/iter.rs @@ -0,0 +1,464 @@ +use splay_tree::set::{SplaySet, VecLike, VecLikeMut}; + +use crate::files::File; +use std::sync::RwLockWriteGuard; + +pub struct FileIter<'a> { + files: &'a mut SplaySet, + current: Option<&'a File>, + filter_fn: Box bool>, + done: bool, + n: usize, +} + +impl<'a> FileIter<'a> { + pub fn new(files: &'a mut SplaySet, filter_fn: Box bool>) -> Self { + let mut iter = FileIter { + files: files, + current: None, + filter_fn, + done: false, + n: 0, + }; + + let file = iter.find_first_nonhidden(); + iter.current = file; + iter + } + + pub fn start_from(mut self, file: &'a File) -> Self { + // let mut first = file.clone(); + + // while let Some(current) = self.files.find_less(&first) { + // if (self.filter_fn)(current) == true { + // first = current.clone(); + // break; + // } else { + // first = current.clone(); + // } + // } + dbg!("STARTING FROM"); + dbg!(&file); + self.current = Some(file); + self + } + + pub fn seek_back(mut self, n: usize) -> Self { + if n > 0 { + dbg!(n); + dbg!(&self.current); + self.nth_back(n - 1); + dbg!(&self.current); + } + self + } + + fn find_prev_nonhidden(&mut self, of: &'a File) -> Option<&'a File> { + let mut prev = of; + while let Some(current) = self.files.find_less(&prev).extend() { + prev = current; + if (self.filter_fn)(current) == true { + break; + } + } + + if of != prev { + Some(prev) + } else { + None + } + } + + fn find_next_nonhidden(&mut self, of: &'a File) -> Option<&'a File> { + let mut next = of; + while let Some(current) = self.files.find_upper_bound(&next).extend() { + next = current; + if (self.filter_fn)(current) == true { + break; + } + } + + if of != next { + Some(next) + } else { + None + } + } + + fn find_first_nonhidden(&mut self) -> Option<&'a File> { + self.files.smallest().extend().map(|f| { + let mut first = f; + while let Some(current) = self.files.find_upper_bound(&first).extend() { + first = current; + if (self.filter_fn)(current) == true { + break; + } + } + first + }) + } +} + +impl<'a> Iterator for FileIter<'a> { + type Item = &'a File; + + fn next(&mut self) -> Option<&'a File> { + // if self.done || self.n > self.files.len() { + // self.done = true; + // return None; + // } + + if !self.done && self.current.is_none() { + self.current = self.find_first_nonhidden(); + } + + self.current.take().map(|f| { + self.find_next_nonhidden(f) + .map(|f| self.current = Some(f)) + .or_else(|| { + self.done = true; + None + }); + f + }) + } +} + +impl<'a> DoubleEndedIterator for FileIter<'a> { + fn next_back(&mut self) -> Option<&'a File> { + // dbg!(&self.current); + // self.current + // .clone() + // .as_ref() + // .and_then(|f| unsafe { + // std::mem::transmute::<_, Option<&File>>(self.files.find_less(f)) + // .and_then(|f: &File| { + // self.current = Some(f.clone()); + // Some(f) + // }) + // .filter(|f| (self.filter_fn)(f)) + // .or_else(|| self.next_back()) + // }) + + self.current.and_then(|f| { + self.find_prev_nonhidden(f) + .extend() + .map(|f| self.current = Some(f)) + .and_then(|_| self.current) + // .or_else(|| { self.current = None; None }); + // self.current + }) + } +} + +pub struct FileIterMut<'a> { + files: VecLikeMut<'a, File>, + pos: usize, + filter_fn: Box bool>, +} + +impl<'a, 'b: 'a> FileIterMut<'a> { + pub fn new(files: &'b mut SplaySet, filter_fn: Box bool>) -> Self { + FileIterMut { + files: files.as_vec_like_mut(), + pos: 0, + filter_fn, + } + } + + pub fn set_raw_pos(mut self, pos: usize) -> Self { + self.pos = pos; + self + } + + pub fn seek_back(mut self, n: usize) -> Self { + if n > 0 { + self.nth_back(n - 1); + } + self + } + + pub fn unfiltered(mut self) -> Self { + self.filter_fn = Box::new(|_| true); + self + } +} + +impl<'a> Iterator for FileIterMut<'a> { + type Item = &'a mut File; + + fn next(&mut self) -> Option<&'a mut File> { + let file = self.files.get_mut(self.pos).map(|f| { + let f = f as *mut _; + let f = unsafe { &mut *f }; + f + }); + self.pos += 1; + + match file { + Some(file) => match (self.filter_fn)(file) { + false => self.next(), + true => Some(file), + }, + None => None, + } + } +} + +impl<'a> DoubleEndedIterator for FileIterMut<'a> { + fn next_back(&mut self) -> Option<&'a mut File> { + if self.pos == 0 { + return None; + } + + self.pos -= 1; + + let file = self.files.get_mut(self.pos).map(|f| { + let f = f as *mut _; + let f = unsafe { &mut *f }; + f + }); + + match file { + Some(file) => match (self.filter_fn)(file) { + false => self.next_back(), + true => Some(file), + }, + None => None, + } + } +} + +trait LifeTimeExtended { + fn extend<'a>(self) -> Option<&'a File>; +} + +impl LifeTimeExtended for Option<&File> { + fn extend<'a>(self) -> Option<&'a File> { + self.and_then(|f| unsafe { + let f = f as *const File; + Some(&*f) + }) + } +} + +// pub struct FileIter<'a> { +// files: &'a mut SplaySet, +// current: File, +// filter_fn: Box bool>, +// done: bool, +// n: usize, +// } + +// impl<'a> FileIter<'a> { +// pub fn new(files: &'a mut SplaySet, filter_fn: Box bool>) -> Self { +// let mut first = files.smallest().cloned().unwrap(); + +// while let Some(current) = files.find_upper_bound(&first) { +// if (filter_fn)(current) == true { +// break; +// } else { +// first = current.clone(); +// } +// } + +// FileIter { +// files: files, +// current: first, +// filter_fn, +// done: false, +// n: 0 +// } +// } + +// pub fn start_from(mut self, file: &File) -> Self { +// let mut first = file.clone(); + +// while let Some(current) = self.files.find_less(&first) { +// if (self.filter_fn)(current) == true { +// first = current.clone(); +// break; +// } else { +// first = current.clone(); +// } +// } +// dbg!("STARTING FROM"); +// dbg!(&file); +// self.current = first; +// self +// } + +// pub fn seek_back(mut self, n: usize) -> Self { +// if n > 0 { +// dbg!(n); +// dbg!(&self.current); +// self.nth_back(n-1); +// dbg!(&self.current); +// } +// self +// } + +// fn find_prev_nonhidden(&mut self) { + +// } +// } + +// impl<'a> Iterator for FileIter<'a> { +// type Item=&'a File; + +// fn next(&mut self) -> Option<&'a File> { +// //dbg!(&self.current); +// self.n+=1; +// if self.done || self.n > self.files.len() { +// self.done = true; +// return None; +// } + +// self.files.find_upper_bound(&self.current) +// .extend() +// .and_then(|f| { +// self.current = f.clone(); +// Some(f) +// }) +// .filter(|f| (self.filter_fn)(f)) +// .or_else(|| self.next()) +// } +// } + +// impl<'a> DoubleEndedIterator for FileIter<'a> { +// fn next_back(&mut self) -> Option<&'a File> { +// // dbg!(&self.current); +// // self.current +// // .clone() +// // .as_ref() +// // .and_then(|f| unsafe { +// // std::mem::transmute::<_, Option<&File>>(self.files.find_less(f)) +// // .and_then(|f: &File| { +// // self.current = Some(f.clone()); +// // Some(f) +// // }) +// // .filter(|f| (self.filter_fn)(f)) +// // .or_else(|| self.next_back()) +// // }) + +// self.files.find_less(&self.current) +// .extend() +// .and_then(|f| { +// self.current = f.clone(); +// dbg!(&self.current.name); +// if (self.filter_fn)(f) == true { +// Some(f) +// } else { +// self.next_back() +// } +// }) +// .or_else(|| { +// self.current = self.files.smallest().unwrap().clone(); +// None +// }) + +// } +// } + +// pub struct FileIterMut<'a> { +// files: VecLikeMut<'a, File>, +// pos: usize, +// filter_fn: Box bool>, +// } + +// impl<'a, 'b: 'a> FileIterMut<'a> { +// pub fn new(files: &'b mut SplaySet, filter_fn: Box bool>) -> Self { +// FileIterMut { +// files: files.as_vec_like_mut(), +// pos: 0, +// filter_fn +// } +// } + +// pub fn set_raw_pos(mut self, pos: usize) -> Self { +// self.pos = pos; +// self +// } + +// pub fn seek_back(mut self, n: usize) -> Self { +// if n > 0 { +// self.nth_back(n-1); +// } +// self +// } + +// pub fn unfiltered(mut self) -> Self { +// self.filter_fn = Box::new(|_| true); +// self +// } +// } + +// impl<'a> Iterator for FileIterMut<'a> { +// type Item=&'a mut File; + +// fn next(&mut self) -> Option<&'a mut File> { +// let file = self.files.get_mut(self.pos) +// .map(|f| { +// let f = f as *mut _; +// let f = unsafe { +// &mut *f +// }; +// f +// }); +// self.pos += 1; + +// match file { +// Some(file) => { +// match (self.filter_fn)(file) { +// false => self.next(), +// true => Some(file) +// } +// } +// None => None +// } +// } +// } + +// impl<'a> DoubleEndedIterator for FileIterMut<'a> { +// fn next_back(&mut self) -> Option<&'a mut File> { +// if self.pos == 0 { +// return None; +// } + +// self.pos -= 1; + +// let file = self.files.get_mut(self.pos) +// .map(|f| { +// let f = f as *mut _; +// let f = unsafe { +// &mut *f +// }; +// f +// }); + +// match file { +// Some(file) => { +// match (self.filter_fn)(file) { +// false => self.next_back(), +// true => Some(file) +// } +// } +// None => None +// } +// } +// } + +// trait LifeTimeExtended { +// fn extend<'a>(self) -> Option<&'a File>; +// } + +// impl LifeTimeExtended for Option<&File> { +// fn extend<'a>(self) -> Option<&'a File> { +// self.and_then(|f| { +// unsafe { +// let f = f as *const File; +// Some(&*f) +// } +// }) +// } +// } -- cgit v1.2.3