summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKornel <kornel@geekhood.net>2020-03-02 19:29:09 +0000
committerKornel <kornel@geekhood.net>2020-03-02 19:41:02 +0000
commite9fb3905e14ff7f4db01a562dd54b61248b0948e (patch)
tree257aabca906db63a53ad152c35c70a3eb527137b
parent9e0abe2ccd949b7caabf906d06109641f931d460 (diff)
Periodically reindex crates
-rw-r--r--crate_db/src/lib_crate_db.rs13
-rw-r--r--kitchen_sink/src/lib_kitchen_sink.rs14
-rw-r--r--reindex/src/bin/reindex_crates.rs2
3 files changed, 25 insertions, 4 deletions
diff --git a/crate_db/src/lib_crate_db.rs b/crate_db/src/lib_crate_db.rs
index 3482e34..7fb11fb 100644
--- a/crate_db/src/lib_crate_db.rs
+++ b/crate_db/src/lib_crate_db.rs
@@ -1043,6 +1043,19 @@ impl CrateDb {
}).await
}
+ /// Crates overdue for an update
+ pub async fn crates_to_reindex(&self) -> FResult<Vec<Origin>> {
+ self.with_read("crates_to_reindex", |conn| {
+ let mut q = conn.prepare("SELECT origin FROM crates WHERE next_update < ?1 LIMIT 1000")?;
+ let timestamp = Utc::now().timestamp() as u32;
+ let q = q.query_map(&[&timestamp], |r| {
+ let s = r.get_raw(0).as_str().unwrap();
+ Ok(Origin::from_crates_io_name(s))
+ })?.filter_map(|r| r.ok());
+ Ok(q.collect())
+ }).await
+ }
+
// returns an array of lowercase phrases
fn extract_text_phrases(c: &CrateVersionData<'_>) -> Vec<(f64, String)> {
let mut out = Vec::new();
diff --git a/kitchen_sink/src/lib_kitchen_sink.rs b/kitchen_sink/src/lib_kitchen_sink.rs
index 0f95672..3e6bb16 100644
--- a/kitchen_sink/src/lib_kitchen_sink.rs
+++ b/kitchen_sink/src/lib_kitchen_sink.rs
@@ -474,7 +474,7 @@ impl KitchenSink {
Ok(res)
}
- pub async fn all_new_crates(&self) -> CResult<Vec<RichCrate>> {
+ pub async fn crates_to_reindex(&self) -> CResult<Vec<RichCrate>> {
let min_timestamp = self.crate_db.latest_crate_update_timestamp().await?.unwrap_or(0);
let all = tokio::task::block_in_place(|| {
self.index.crates_io_crates() // too slow to scan all GH crates
@@ -483,7 +483,7 @@ impl KitchenSink {
.filter_map(move |(name, _)| async move {
self.rich_crate_async(&Origin::from_crates_io_name(&*name)).await.map_err(|e| eprintln!("{}: {}", name, e)).ok()
});
- Ok(stream.filter(move |k| {
+ let mut crates = stream.filter(move |k| {
let latest = k.versions().iter().map(|v| v.created_at.as_str()).max().unwrap_or("");
let res = if let Ok(timestamp) = DateTime::parse_from_rfc3339(latest) {
timestamp.timestamp() >= min_timestamp as i64
@@ -493,7 +493,15 @@ impl KitchenSink {
};
async move { res }
})
- .collect::<Vec<_>>().await)
+ .collect::<Vec<_>>().await;
+
+ let mut crates2 = futures::stream::iter(self.crate_db.crates_to_reindex().await?.into_iter())
+ .filter_map(move |origin| async move {
+ self.rich_crate_async(&origin).await.map_err(|e| eprintln!("{:?}: {}", origin, e)).ok()
+ }).collect::<Vec<_>>().await;
+
+ crates.append(&mut crates2);
+ Ok(crates)
}
pub fn crate_exists(&self, origin: &Origin) -> bool {
diff --git a/reindex/src/bin/reindex_crates.rs b/reindex/src/bin/reindex_crates.rs
index 0086bb0..cb96d21 100644
--- a/reindex/src/bin/reindex_crates.rs
+++ b/reindex/src/bin/reindex_crates.rs
@@ -80,7 +80,7 @@ async fn main() {
} else if !specific.is_empty() {
Either::Left(specific)
} else {
- Either::Right(handle.enter(|| futures::executor::block_on(crates.all_new_crates())).unwrap().into_iter().map(|c| c.origin().clone()))
+ Either::Right(handle.enter(|| futures::executor::block_on(crates.crates_to_reindex())).unwrap().into_iter().map(|c| c.origin().clone()))
};
for (i, origin) in c.into_iter().enumerate() {
if stopped() {