diff options
80 files changed, 1489 insertions, 863 deletions
diff --git a/.travis.yml b/.travis.yml index 9fc2578..c4c59cd 100644 --- a/.travis.yml +++ b/.travis.yml @@ -47,6 +47,7 @@ matrix: before_install: - set -e - rustup self update + - rustup component add rustfmt install: - sh ci/install.sh @@ -60,6 +61,7 @@ before_script: script: - bash ci/script.sh + - cargo fmt --all -- --check before_deploy: - sh ci/before_deploy.sh diff --git a/CHANGELOG.md b/CHANGELOG.md index 5f74923..a0df3a1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,19 @@ Tantivy 0.11.0 ===================== - Added f64 field. Internally reuse u64 code the same way i64 does (@fdb-hiroshima) +- Various bugfixes in the query parser. + - Better handling of hyphens in query parser. (#609) + - Better handling of whitespaces. +- Closes #498 - add support for Elastic-style unbounded range queries for alphanumeric types eg. "title:>hello", "weight:>=70.5", "height:<200" (@petr-tik) +- API change around `Box<BoxableTokenizer>`. See detail in #629 +- Avoid rebuilding Regex automaton whenever a regex query is reused. #639 (@brainlock) +- Add footer with some metadata to index files. #605 (@fdb-hiroshima) + +## How to update? + +- `Box<dyn BoxableTokenizer>` has been replaced by a `BoxedTokenizer` struct. +- Regex are now compiled when the `RegexQuery` instance is built. As a result, it can now return +an error and handling the `Result` is required. Tantivy 0.10.2 @@ -1,6 +1,6 @@ [package] name = "tantivy" -version = "0.10.2" +version = "0.11.0" authors = ["Paul Masurel <paul.masurel@gmail.com>"] license = "MIT" categories = ["database-implementations", "data-structures"] @@ -15,8 +15,9 @@ edition = "2018" [dependencies] base64 = "0.10.0" byteorder = "1.0" -once_cell = "0.2" -regex = "1.0" +crc32fast = "1.2.0" +once_cell = "1.0" +regex ={version = "1.3.0", default-features = false, features = ["std"]} tantivy-fst = "0.1" memmap = {version = "0.7", optional=true} lz4 = {version="1.20", optional=true} @@ -24,8 +25,6 @@ snap = {version="0.2"} atomicwrites = {version="0.2.2", optional=true} tempfile = "3.0" log = "0.4" -combine = ">=3.6.0,<4.0.0" -tempdir = "0.3" serde = "1.0" serde_derive = "1.0" serde_json = "1.0" @@ -36,13 +35,14 @@ levenshtein_automata = {version="0.1", features=["fst_automaton"]} notify = {version="4", optional=true} bit-set = "0.5" uuid = { version = "0.7.2", features = ["v4", "serde"] } -crossbeam = "0.5" +crossbeam = "0.7" futures = "0.1" futures-cpupool = "0.1" owning_ref = "0.4" stable_deref_trait = "1.0.0" rust-stemmers = "1.1" downcast-rs = { version="1.0" } +tantivy-query-grammar = { path="./query-grammar" } bitpacking = {version="0.8", default-features = false, features=["bitpacker4x"]} census = "0.2" fnv = "1.0.6" @@ -81,6 +81,9 @@ failpoints = ["fail/failpoints"] unstable = [] # useful for benches. wasm-bindgen = ["uuid/wasm-bindgen"] +[workspace] +members = ["query-grammar"] + [badges] travis-ci = { repository = "tantivy-search/tantivy" } @@ -88,7 +91,6 @@ travis-ci = { repository = "tantivy-search/tantivy" } version = "0.3" features = ["failpoints"] - # Following the "fail" crate best practises, we isolate # tests that define specific behavior in fail check points # in a different binary. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..05f0f44 --- /dev/null +++ b/Makefile @@ -0,0 +1,3 @@ +test: + echo "Run test only... No examples." + cargo test --tests --lib diff --git a/ci/script.sh b/ci/script.sh index 9f3cf88..87f8902 100644 --- a/ci/script.sh +++ b/ci/script.sh @@ -7,7 +7,7 @@ set -ex main() { if [ ! -z $CODECOV ]; then echo "Codecov" - cargo build --verbose && cargo coverage --verbose && bash <(curl -s https://codecov.io/bash) -s target/kcov + cargo build --verbose && cargo coverage --verbose --all && bash <(curl -s https://codecov.io/bash) -s target/kcov else echo "Build" cross build --target $TARGET @@ -15,7 +15,8 @@ main() { return fi echo "Test" - cross test --target $TARGET --no-default-features --features mmap -- --test-threads 1 + cross test --target $TARGET --no-default-features --features mmap + cross test --target $TARGET --no-default-features --features mmap query-grammar fi for example in $(ls examples/*.rs) do diff --git a/examples/basic_search.rs b/examples/basic_search.rs index 416f86f..c8ac36c 100644 --- a/examples/basic_search.rs +++ b/examples/basic_search.rs @@ -5,26 +5,23 @@ // // We will : // - define our schema -// = create an index in a directory -// - index few documents in our index -// - search for the best document matchings "sea whale" -// - retrieve the best document original content. +// - create an index in a directory +// - index a few documents into our index +// - search for the best document matching a basic query +// - retrieve the best document's original content. // --- // Importing tantivy... -#[macro_use] -extern crate tantivy; use tantivy::collector::TopDocs; use tantivy::query::QueryParser; use tantivy::schema::*; -use tantivy::Index; -use tantivy::ReloadPolicy; -use tempdir::TempDir; +use tantivy::{doc, Index, ReloadPolicy}; +use tempfile::TempDir; fn main() -> tantivy::Result<()> { // Let's create a temporary directory for the // sake of this example - let index_path = TempDir::new("tantivy_example_dir")?; + let index_path = TempDir::new()?; // # Defining the schema // @@ -33,7 +30,7 @@ fn main() -> tantivy::Result<()> { // and for each field, its type and "the way it should // be indexed". - // first we need to define a schema ... + // First we need to define a schema ... let mut schema_builder = Schema::builder(); // Our first field is title. @@ -48,7 +45,7 @@ fn main() -> tantivy::Result<()> { // // `STORED` means that the field will also be saved // in a compressed, row-oriented key-value store. - // This store is useful to reconstruct the + // This store is useful for reconstructing the // documents that were selected during the search phase. schema_builder.add_text_field("title", TEXT | STORED); @@ -57,8 +54,7 @@ fn main() -> tantivy::Result<()> { // need to be able to be able to retrieve it // for our application. // - // We can make our index lighter and - // by omitting `STORED` flag. + // We can make our index lighter by omitting the `STORED` flag. schema_builder.add_text_field("body", TEXT); let schema = schema_builder.build(); @@ -71,7 +67,7 @@ fn main() -> tantivy::Result<()> { // with our schema in the directory. let index = Index::create_in_dir(&index_path, schema.clone())?; - // To insert document we need an index writer. + // To insert a document we will need an index writer. // There must be only one writer at a time. // This single `IndexWriter` is already // multithreaded. @@ -149,8 +145,8 @@ fn main() -> tantivy::Result<()> { // At this point our documents are not searchable. // // - // We need to call .commit() explicitly to force the - // index_writer to finish processing the documents in the queue, + // We need to call `.commit()` explicitly to force the + // `index_writer` to finish processing the documents in the queue, // flush the current index to the disk, and advertise // the existence of new documents. // @@ -162,14 +158,14 @@ fn main() -> tantivy::Result<()> { // persistently indexed. // // In the scenario of a crash or a power failure, - // tantivy behaves as if has rolled back to its last + // tantivy behaves as if it has rolled back to its last // commit. // # Searching // // ### Searcher // - // A reader is required to get search the index. + // A reader is required first in order to search an index. // It acts as a `Searcher` pool that reloads itself, // depending on a `ReloadPolicy`. // @@ -185,7 +181,7 @@ fn main() -> tantivy::Result<()> { // We now need to acquire a searcher. // - // A searcher points to snapshotted, immutable version of the index. + // A searcher points to a snapshotted, immutable version of the index. // // Some search experience might require more than // one query. Using the same searcher ensures that all of these queries will run on the @@ -205,7 +201,7 @@ fn main() -> tantivy::Result<()> { // in both title and body. let query_parser = QueryParser::for_index(&index, vec![title, body]); - // QueryParser may fail if the query is not in the right + // `QueryParser` may fail if the query is not in the right // format. For user facing applications, this can be a problem. // A ticket has been opened regarding this problem. let query = query_parser.parse_query("sea whale")?; @@ -221,7 +217,7 @@ fn main() -> tantivy::Result<()> { // // We are not interested in all of the documents but // only in the top 10. Keeping track of our top 10 best documents - // is the role of the TopDocs. + // is the role of the `TopDocs` collector. // We can now perform our query. let top_docs = searcher.search(&query, &TopDocs::with_limit(10))?; diff --git a/examples/custom_collector.rs b/examples/custom_collector.rs index e63eb9f..c277ede 100644 --- a/examples/custom_collector.rs +++ b/examples/custom_collector.rs @@ -9,15 +9,12 @@ // --- // Importing tantivy... -#[macro_use] -extern crate tantivy; use tantivy::collector::{Collector, SegmentCollector}; use tantivy::fastfield::FastFieldReader; use tantivy::query::QueryParser; use tantivy::schema::Field; use tantivy::schema::{Schema, FAST, INDEXED, TEXT}; -use tantivy::SegmentReader; -use tantivy::{Index, TantivyError}; +use tantivy::{doc, Index, SegmentReader, TantivyError}; #[derive(Default)] struct Stats { diff --git a/examples/custom_tokenizer.rs b/examples/custom_tokenizer.rs index 5730adb..4db6d10 100644 --- a/examples/custom_tokenizer.rs +++ b/examples/custom_tokenizer.rs @@ -2,14 +2,11 @@ // // In this example, we'll see how to define a tokenizer pipeline // by aligning a bunch of `TokenFilter`. - -#[macro_use] -extern crate tantivy; use tantivy::collector::TopDocs; use tantivy::query::QueryParser; use tantivy::schema::*; use tantivy::tokenizer::NgramTokenizer; -use tantivy::Index; +use tantivy::{doc, Index}; fn main() -> tantivy::Result<()> { // # Defining the schema diff --git a/examples/deleting_updating_documents.rs b/examples/deleting_updating_documents.rs index 82fdd90..1eda6ce 100644 --- a/examples/deleting_updating_documents.rs +++ b/examples/deleting_updating_documents.rs @@ -8,13 +8,10 @@ // // --- // Importing tantivy... -#[macro_use] -extern crate tantivy; use tantivy::collector::TopDocs; use tantivy::query::TermQuery; use tantivy::schema::*; -use tantivy::Index; -use tantivy::IndexReader; +use tantivy::{doc, Index, IndexReader}; // A simple helper function to fetch a single document // given its id from our index. diff --git a/examples/faceted_search.rs b/examples/faceted_search.rs index 98e0a27..7ac67c3 100644 --- a/examples/faceted_search.rs +++ b/examples/faceted_search.rs @@ -12,17 +12,16 @@ // --- // Importing tantivy... -#[macro_use] -extern crate tantivy; use tantivy::collector::FacetCollector; use tantivy::query::AllQuery; use tantivy::schema::*; -use tantivy::Index; +use tantivy::{doc, Index}; +use tempfile::TempDir; fn main() -> tantivy::Result<()> { // Let's create a temporary directory for the // sake of this example - let index_path = TempDir::new("tantivy_facet_example_dir")?; + let index_path = TempDir::new()?; let mut schema_builder = Schema::builder(); schema_builder.add_text_field("name", TEXT | STORED); @@ -74,5 +73,3 @@ fn main() -> tantivy::Result<()> { Ok(()) } - -use tempdir::TempDir; diff --git a/examples/integer_range_search.rs b/examples/integer_range_search.rs index dea3145..12edd6e 100644 --- a/examples/integer_range_search.rs +++ b/examples/integer_range_search.rs @@ -2,14 +2,10 @@ // // Below is an example of creating an indexed integer field in your schema // You can use RangeQuery to get a Count of all occurrences in a given range. - -#[macro_use] -extern crate tantivy; use tantivy::collector::Count; use tantivy::query::RangeQuery; use tantivy::schema::{Schema, INDEXED}; -use tantivy::Index; -use tantivy::Result; +use tantivy::{doc, Index, Result}; fn run() -> Result<()> { // For the sake of simplicity, this schema will only have 1 field diff --git a/examples/iterating_docs_and_positions.rs b/examples/iterating_docs_and_positions.rs index 4668de3..0be84ec 100644 --- a/examples/iterating_docs_and_positions.rs +++ b/examples/iterating_docs_and_positions.rs @@ -9,11 +9,8 @@ // --- // Importing tantivy... -#[macro_use] -extern crate tantivy; use tantivy::schema::*; -use tantivy::Index; -use tantivy::{DocId, DocSet, Postings}; +use tantivy::{doc, DocId, DocSet, Index, Postings}; fn main() -> tantivy::Result<()> { // We first create a schema for the sake of the diff --git a/examples/mu |