diff options
author | Kornel <kornel@geekhood.net> | 2020-03-02 19:29:09 +0000 |
---|---|---|
committer | Kornel <kornel@geekhood.net> | 2020-03-02 19:41:02 +0000 |
commit | e9fb3905e14ff7f4db01a562dd54b61248b0948e (patch) | |
tree | 257aabca906db63a53ad152c35c70a3eb527137b | |
parent | 9e0abe2ccd949b7caabf906d06109641f931d460 (diff) |
Periodically reindex crates
-rw-r--r-- | crate_db/src/lib_crate_db.rs | 13 | ||||
-rw-r--r-- | kitchen_sink/src/lib_kitchen_sink.rs | 14 | ||||
-rw-r--r-- | reindex/src/bin/reindex_crates.rs | 2 |
3 files changed, 25 insertions, 4 deletions
diff --git a/crate_db/src/lib_crate_db.rs b/crate_db/src/lib_crate_db.rs index 3482e34..7fb11fb 100644 --- a/crate_db/src/lib_crate_db.rs +++ b/crate_db/src/lib_crate_db.rs @@ -1043,6 +1043,19 @@ impl CrateDb { }).await } + /// Crates overdue for an update + pub async fn crates_to_reindex(&self) -> FResult<Vec<Origin>> { + self.with_read("crates_to_reindex", |conn| { + let mut q = conn.prepare("SELECT origin FROM crates WHERE next_update < ?1 LIMIT 1000")?; + let timestamp = Utc::now().timestamp() as u32; + let q = q.query_map(&[×tamp], |r| { + let s = r.get_raw(0).as_str().unwrap(); + Ok(Origin::from_crates_io_name(s)) + })?.filter_map(|r| r.ok()); + Ok(q.collect()) + }).await + } + // returns an array of lowercase phrases fn extract_text_phrases(c: &CrateVersionData<'_>) -> Vec<(f64, String)> { let mut out = Vec::new(); diff --git a/kitchen_sink/src/lib_kitchen_sink.rs b/kitchen_sink/src/lib_kitchen_sink.rs index 0f95672..3e6bb16 100644 --- a/kitchen_sink/src/lib_kitchen_sink.rs +++ b/kitchen_sink/src/lib_kitchen_sink.rs @@ -474,7 +474,7 @@ impl KitchenSink { Ok(res) } - pub async fn all_new_crates(&self) -> CResult<Vec<RichCrate>> { + pub async fn crates_to_reindex(&self) -> CResult<Vec<RichCrate>> { let min_timestamp = self.crate_db.latest_crate_update_timestamp().await?.unwrap_or(0); let all = tokio::task::block_in_place(|| { self.index.crates_io_crates() // too slow to scan all GH crates @@ -483,7 +483,7 @@ impl KitchenSink { .filter_map(move |(name, _)| async move { self.rich_crate_async(&Origin::from_crates_io_name(&*name)).await.map_err(|e| eprintln!("{}: {}", name, e)).ok() }); - Ok(stream.filter(move |k| { + let mut crates = stream.filter(move |k| { let latest = k.versions().iter().map(|v| v.created_at.as_str()).max().unwrap_or(""); let res = if let Ok(timestamp) = DateTime::parse_from_rfc3339(latest) { timestamp.timestamp() >= min_timestamp as i64 @@ -493,7 +493,15 @@ impl KitchenSink { }; async move { res } }) - .collect::<Vec<_>>().await) + .collect::<Vec<_>>().await; + + let mut crates2 = futures::stream::iter(self.crate_db.crates_to_reindex().await?.into_iter()) + .filter_map(move |origin| async move { + self.rich_crate_async(&origin).await.map_err(|e| eprintln!("{:?}: {}", origin, e)).ok() + }).collect::<Vec<_>>().await; + + crates.append(&mut crates2); + Ok(crates) } pub fn crate_exists(&self, origin: &Origin) -> bool { diff --git a/reindex/src/bin/reindex_crates.rs b/reindex/src/bin/reindex_crates.rs index 0086bb0..cb96d21 100644 --- a/reindex/src/bin/reindex_crates.rs +++ b/reindex/src/bin/reindex_crates.rs @@ -80,7 +80,7 @@ async fn main() { } else if !specific.is_empty() { Either::Left(specific) } else { - Either::Right(handle.enter(|| futures::executor::block_on(crates.all_new_crates())).unwrap().into_iter().map(|c| c.origin().clone())) + Either::Right(handle.enter(|| futures::executor::block_on(crates.crates_to_reindex())).unwrap().into_iter().map(|c| c.origin().clone())) }; for (i, origin) in c.into_iter().enumerate() { if stopped() { |