From cec9956a01d922f6ce2e5dee3da003512597e3ed Mon Sep 17 00:00:00 2001 From: Paul Masurel Date: Thu, 6 Sep 2018 10:10:40 +0900 Subject: Issue/389 (#405) * Setting up the dependency. * Completed README --- Cargo.toml | 11 ++++++- README.md | 4 +++ appveyor.yml | 4 +-- ci/script.sh | 2 +- run-tests.sh | 2 ++ src/common/bitset.rs | 6 ++-- src/core/segment_reader.rs | 3 ++ src/directory/ram_directory.rs | 3 ++ src/fastfield/mod.rs | 2 +- src/indexer/index_writer.rs | 73 +++++++++++++++++++++++++++++++----------- src/lib.rs | 4 +++ 11 files changed, 87 insertions(+), 27 deletions(-) create mode 100755 run-tests.sh diff --git a/Cargo.toml b/Cargo.toml index ab767d3..098ab91 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -48,6 +48,7 @@ census = "0.1" fnv = "1.0.6" owned-read = "0.4" failure = "0.1" +fail = "0.2" [target.'cfg(windows)'.dependencies] winapi = "0.2" @@ -60,12 +61,20 @@ opt-level = 3 debug = false lto = true debug-assertions = false +overflow-checks = false + +[profile.test] +debug-assertions = true +overflow-checks = true [features] -default = ["mmap"] +# by default no-fail is disabled. We manually enable it when running test. +default = ["mmap", "no_fail"] mmap = ["fst/mmap", "atomicwrites"] lz4-compression = ["lz4"] +no_fail = ["fail/no_fail"] [badges] travis-ci = { repository = "tantivy-search/tantivy" } + diff --git a/README.md b/README.md index 499a124..0ce522a 100644 --- a/README.md +++ b/README.md @@ -78,6 +78,10 @@ To check out and run tests, you can simply run : cd tantivy cargo build +## Running tests + +Some tests will not run with just `cargo test` because of `fail-rs`. +To run the tests exhaustively, run `./run-tests.sh`. # Contribute diff --git a/appveyor.yml b/appveyor.yml index a3bd2ac..685b04d 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -18,5 +18,5 @@ install: build: false test_script: - - REM SET RUST_LOG=tantivy,test & cargo test --verbose - - REM SET RUST_BACKTRACE=1 & cargo build --examples \ No newline at end of file + - REM SET RUST_LOG=tantivy,test & cargo test --verbose --no-default-features --features mmap -- --test-threads 1 + - REM SET RUST_BACKTRACE=1 & cargo build --examples diff --git a/ci/script.sh b/ci/script.sh index b563457..0939344 100644 --- a/ci/script.sh +++ b/ci/script.sh @@ -16,7 +16,7 @@ main() { return fi echo "Test" - cross test --target $TARGET + cross test --target $TARGET --no-default-features --features mmap -- --test-threads 1 fi for example in $(ls examples/*.rs) do diff --git a/run-tests.sh b/run-tests.sh new file mode 100755 index 0000000..fc2944d --- /dev/null +++ b/run-tests.sh @@ -0,0 +1,2 @@ +#!/bin/bash +cargo test --no-default-features --features mmap -- --test-threads 1 diff --git a/src/common/bitset.rs b/src/common/bitset.rs index 73f03c4..326e7ce 100644 --- a/src/common/bitset.rs +++ b/src/common/bitset.rs @@ -266,14 +266,14 @@ mod tests { #[test] fn test_bitset_large() { - let arr = generate_nonunique_unsorted(1_000_000, 50_000); + let arr = generate_nonunique_unsorted(100_000, 5_000); let mut btreeset: BTreeSet = BTreeSet::new(); - let mut bitset = BitSet::with_max_value(1_000_000); + let mut bitset = BitSet::with_max_value(100_000); for el in arr { btreeset.insert(el); bitset.insert(el); } - for i in 0..1_000_000 { + for i in 0..100_000 { assert_eq!(btreeset.contains(&i), bitset.contains(i)); } assert_eq!(btreeset.len(), bitset.len()); diff --git a/src/core/segment_reader.rs b/src/core/segment_reader.rs index 37b9503..517e153 100644 --- a/src/core/segment_reader.rs +++ b/src/core/segment_reader.rs @@ -225,6 +225,8 @@ impl SegmentReader { let store_source = segment.open_read(SegmentComponent::STORE)?; let store_reader = StoreReader::from_source(store_source); + fail_point!("SegmentReader::open#middle"); + let postings_source = segment.open_read(SegmentComponent::POSTINGS)?; let postings_composite = CompositeFile::open(&postings_source)?; @@ -432,6 +434,7 @@ mod test { use schema::{SchemaBuilder, Term, STORED, TEXT}; use DocId; + #[test] fn test_alive_docs_iterator() { let mut schema_builder = SchemaBuilder::new(); diff --git a/src/directory/ram_directory.rs b/src/directory/ram_directory.rs index 4e55da5..1b40970 100644 --- a/src/directory/ram_directory.rs +++ b/src/directory/ram_directory.rs @@ -195,6 +195,9 @@ impl Directory for RAMDirectory { } fn atomic_write(&mut self, path: &Path, data: &[u8]) -> io::Result<()> { + fail_point!("RAMDirectory::atomic_write", |msg| { + Err(io::Error::new(io::ErrorKind::Other, msg.unwrap_or("Undefined".to_string()))) + }); let path_buf = PathBuf::from(path); let mut vec_writer = VecWriter::new(path_buf.clone(), self.fs.clone()); self.fs.write(path_buf, &Vec::new())?; diff --git a/src/fastfield/mod.rs b/src/fastfield/mod.rs index e3599ba..fdb0294 100644 --- a/src/fastfield/mod.rs +++ b/src/fastfield/mod.rs @@ -370,7 +370,7 @@ mod tests { pub fn generate_permutation() -> Vec { let seed: [u8; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; let mut rng = XorShiftRng::from_seed(seed); - let mut permutation: Vec = (0u64..1_000_000u64).collect(); + let mut permutation: Vec = (0u64..100_000u64).collect(); rng.shuffle(&mut permutation); permutation } diff --git a/src/indexer/index_writer.rs b/src/indexer/index_writer.rs index 5af4ed6..3e11c4c 100644 --- a/src/indexer/index_writer.rs +++ b/src/indexer/index_writer.rs @@ -301,25 +301,31 @@ fn index_documents( let last_docstamp: u64 = *(doc_opstamps.last().unwrap()); - let doc_to_opstamps = DocToOpstampMapping::from(doc_opstamps); - let segment_reader = SegmentReader::open(segment)?; - let mut deleted_bitset = BitSet::with_capacity(num_docs as usize); - let may_have_deletes = compute_deleted_bitset( - &mut deleted_bitset, - &segment_reader, - &mut delete_cursor, - &doc_to_opstamps, - last_docstamp, - )?; - - let segment_entry = SegmentEntry::new(segment_meta, delete_cursor, { - if may_have_deletes { - Some(deleted_bitset) - } else { - None - } - }); - + let segment_entry: SegmentEntry; + + if delete_cursor.get().is_some() { + let doc_to_opstamps = DocToOpstampMapping::from(doc_opstamps); + let segment_reader = SegmentReader::open(segment)?; + let mut deleted_bitset = BitSet::with_capacity(num_docs as usize); + let may_have_deletes = compute_deleted_bitset( + &mut deleted_bitset, + &segment_reader, + &mut delete_cursor, + &doc_to_opstamps, + last_docstamp, + )?; + segment_entry = SegmentEntry::new(segment_meta, delete_cursor, { + if may_have_deletes { + Some(deleted_bitset) + } else { + None + } + }); + } else { + // if there are no delete operation in the queue, no need + // to even open the segment. + segment_entry = SegmentEntry::new(segment_meta, delete_cursor, None); + } Ok(segment_updater.add_segment(generation, segment_entry)) } @@ -858,4 +864,33 @@ mod tests { assert_eq!(initial_table_size(1_000_000_000), 19); } + + #[cfg(not(feature="no_fail"))] + #[test] + fn test_write_commit_fails() { + use fail; + let mut schema_builder = schema::SchemaBuilder::default(); + let text_field = schema_builder.add_text_field("text", schema::TEXT); + let index = Index::create_in_ram(schema_builder.build()); + + let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); + for _ in 0..100 { + index_writer.add_document(doc!(text_field => "a")); + } + index_writer.commit().unwrap(); + fail::cfg("RAMDirectory::atomic_write", "return(error_write_failed)").unwrap(); + for _ in 0..100 { + index_writer.add_document(doc!(text_field => "b")); + } + assert!(index_writer.commit().is_err()); + index.load_searchers().unwrap(); + let num_docs_containing = |s: &str| { + let searcher = index.searcher(); + let term_a = Term::from_field_text(text_field, s); + searcher.doc_freq(&term_a) + }; + assert_eq!(num_docs_containing("a"), 100); + assert_eq!(num_docs_containing("b"), 0); + fail::cfg("RAMDirectory::atomic_write", "off").unwrap(); + } } diff --git a/src/lib.rs b/src/lib.rs index 985d68a..e5a75cd 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -173,6 +173,9 @@ extern crate tinysegmenter; #[macro_use] extern crate downcast; +#[macro_use] +extern crate fail; + #[cfg(test)] mod functional_test; @@ -946,3 +949,4 @@ mod tests { } } } + -- cgit v1.2.3