From 149587f0bda4e79ea2d054f9f33636b3fbf7e504 Mon Sep 17 00:00:00 2001 From: Savanni D'Gerinel Date: Wed, 27 Dec 2023 16:13:47 -0500 Subject: [PATCH] Bind the ID to the record instead of keeping them separate --- emseries/src/lib.rs | 2 +- emseries/src/series.rs | 93 ++++++++++++++++++++++++++++----------- emseries/src/types.rs | 36 +++++++++------ emseries/tests/test_io.rs | 83 +++++++++++++++++----------------- 4 files changed, 131 insertions(+), 83 deletions(-) diff --git a/emseries/src/lib.rs b/emseries/src/lib.rs index 3973f63..8ea482e 100644 --- a/emseries/src/lib.rs +++ b/emseries/src/lib.rs @@ -76,4 +76,4 @@ mod types; pub use criteria::*; pub use series::Series; -pub use types::{EmseriesReadError, EmseriesWriteError, RecordId, Recordable, Timestamp}; +pub use types::{EmseriesReadError, EmseriesWriteError, Record, RecordId, Recordable, Timestamp}; diff --git a/emseries/src/series.rs b/emseries/src/series.rs index 22367ae..ee964e7 100644 --- a/emseries/src/series.rs +++ b/emseries/src/series.rs @@ -18,6 +18,7 @@ use serde::de::DeserializeOwned; use serde::ser::Serialize; use std::cmp::Ordering; use std::collections::HashMap; +use std::convert::TryFrom; use std::fs::File; use std::fs::OpenOptions; use std::io::{BufRead, BufReader, LineWriter, Write}; @@ -26,6 +27,43 @@ use std::iter::Iterator; use criteria::Criteria; use types::{EmseriesReadError, EmseriesWriteError, Record, RecordId, Recordable}; +// A RecordOnDisk, a private data structure, is useful for handling all of the on-disk +// representations of a record. Unlike [Record], this one can accept an empty data value to +// represent that the data may have been deleted. This is not made public because, so far as the +// user is concerned, any record in the system must have data associated with it. +#[derive(Clone, Deserialize, Serialize)] +struct RecordOnDisk { + id: RecordId, + data: Option, +} + +/* +impl FromStr for RecordOnDisk +where + T: Clone + Recordable + DeserializeOwned + Serialize, +{ + type Err = EmseriesReadError; + + fn from_str(line: &str) -> Result { + serde_json::from_str(line).map_err(EmseriesReadError::JSONParseError) + } +} +*/ + +impl TryFrom> for Record { + type Error = EmseriesReadError; + + fn try_from(disk_record: RecordOnDisk) -> Result { + match disk_record.data { + Some(data) => Ok(Record { + id: disk_record.id, + data, + }), + None => Err(Self::Error::RecordDeleted(disk_record.id)), + } + } +} + /// An open time series database. /// /// Any given database can store only one data type, T. The data type must be determined when the @@ -33,7 +71,7 @@ use types::{EmseriesReadError, EmseriesWriteError, Record, RecordId, Recordable} pub struct Series { //path: String, writer: LineWriter, - records: HashMap, + records: HashMap>, } impl Series @@ -62,20 +100,18 @@ where } /// Load a file and return all of the records in it. - fn load_file(f: &File) -> Result, EmseriesReadError> { - let mut records: HashMap = HashMap::new(); + fn load_file(f: &File) -> Result>, EmseriesReadError> { + let mut records: HashMap> = HashMap::new(); let reader = BufReader::new(f); for line in reader.lines() { match line { Ok(line_) => { - /* Can't create a JSONParseError because I can't actually create the underlying error. - fail_point!("parse-line", Err(Error::JSONParseError())) - */ - match line_.parse::>() { - Ok(record) => match record.data { - Some(val) => records.insert(record.id.clone(), val), - None => records.remove(&record.id.clone()), - }, + match serde_json::from_str::>(line_.as_ref()) + .map_err(EmseriesReadError::JSONParseError) + .and_then(|record| Record::try_from(record)) + { + Ok(record) => records.insert(record.id.clone(), record.clone()), + Err(EmseriesReadError::RecordDeleted(id)) => records.remove(&id), Err(err) => return Err(err), }; } @@ -89,16 +125,21 @@ where /// returned. pub fn put(&mut self, entry: T) -> Result { let uuid = RecordId::default(); - self.update(uuid.clone(), entry).map(|_| uuid) + let record = Record { + id: uuid.clone(), + data: entry, + }; + self.update(record)?; + Ok(uuid) } /// Update an existing record. The [RecordId] of the record passed into this function must match /// the [RecordId] of a record already in the database. - pub fn update(&mut self, uuid: RecordId, entry: T) -> Result<(), EmseriesWriteError> { - self.records.insert(uuid.clone(), entry.clone()); - let write_res = match serde_json::to_string(&Record { - id: uuid, - data: Some(entry), + pub fn update(&mut self, record: Record) -> Result<(), EmseriesWriteError> { + self.records.insert(record.id.clone(), record.clone()); + let write_res = match serde_json::to_string(&RecordOnDisk { + id: record.id, + data: Some(record.data), }) { Ok(rec_str) => self .writer @@ -124,7 +165,7 @@ where }; self.records.remove(uuid); - let rec: Record = Record { + let rec: RecordOnDisk = RecordOnDisk { id: uuid.clone(), data: None, }; @@ -138,8 +179,8 @@ where } /// Get all of the records in the database. - pub fn records(&self) -> impl Iterator { - self.records.iter() + pub fn records(&self) -> impl Iterator> { + self.records.values() } /* The point of having Search is so that a lot of internal optimizations can happen once the @@ -148,24 +189,24 @@ where pub fn search<'s>( &'s self, criteria: impl Criteria + 's, - ) -> impl Iterator + 's { - self.records().filter(move |&tr| criteria.apply(tr.1)) + ) -> impl Iterator> + 's { + self.records().filter(move |&tr| criteria.apply(&tr.data)) } /// Perform a search and sort the resulting records based on the comparison. - pub fn search_sorted<'s, C, CMP>(&'s self, criteria: C, compare: CMP) -> Vec<(&RecordId, &T)> + pub fn search_sorted<'s, C, CMP>(&'s self, criteria: C, compare: CMP) -> Vec<&'s Record> where C: Criteria + 's, - CMP: FnMut(&(&RecordId, &T), &(&RecordId, &T)) -> Ordering, + CMP: FnMut(&&Record, &&Record) -> Ordering, { let search_iter = self.search(criteria); - let mut records: Vec<(&RecordId, &T)> = search_iter.collect(); + let mut records: Vec<&Record> = search_iter.collect(); records.sort_by(compare); records } /// Get an exact record from the database based on unique id. - pub fn get(&self, uuid: &RecordId) -> Option { + pub fn get(&self, uuid: &RecordId) -> Option> { self.records.get(uuid).cloned() } diff --git a/emseries/src/types.rs b/emseries/src/types.rs index 61f1b8a..80f215d 100644 --- a/emseries/src/types.rs +++ b/emseries/src/types.rs @@ -28,6 +28,9 @@ pub enum EmseriesReadError { #[error("Error parsing JSON: {0}")] JSONParseError(serde_json::error::Error), + #[error("Record was deleted")] + RecordDeleted(RecordId), + /// Indicates a general IO error #[error("IO Error: {0}")] IOError(io::Error), @@ -171,28 +174,31 @@ impl fmt::Display for RecordId { } } -/// Every record contains a unique ID and then the primary data, which itself must implementd the -/// Recordable trait. -#[derive(Clone, Deserialize, Serialize)] +/// A record represents data that actually exists in the database. Users cannot make the record +/// directly, as the database will create them. +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)] pub struct Record { - pub id: RecordId, - pub data: Option, + pub(crate) id: RecordId, + pub data: T, } -impl str::FromStr for Record -where - T: Clone + Recordable + DeserializeOwned + Serialize, -{ - type Err = EmseriesReadError; +impl Record { + pub fn date(&self) -> NaiveDate { + match self.data.timestamp() { + Timestamp::DateTime(dt) => dt.date_naive(), + Timestamp::Date(dt) => dt, + } + } - fn from_str(line: &str) -> Result { - serde_json::from_str(line).map_err(EmseriesReadError::JSONParseError) + pub fn timestamp(&self) -> Timestamp { + self.data.timestamp() } } #[cfg(test)] mod test { extern crate dimensioned; + extern crate serde_json; use self::dimensioned::si::{Kilogram, KG}; @@ -239,6 +245,7 @@ mod test { ); } + /* #[ignore] fn v_alpha_serialization() { const WEIGHT_ENTRY: &str = "{\"data\":{\"weight\":77.79109},\"date\":\"2003-11-10\",\"id\":\"3330c5b0-783f-4919-b2c4-8169c38f65ff\"}"; @@ -252,12 +259,13 @@ mod test { ); assert_eq!( rec.data, - Some(WeightRecord { + WeightRecord { date: NaiveDate::from_ymd_opt(2003, 11, 10).unwrap(), weight: Weight(77.79109 * KG), - }) + } ); } + */ #[test] fn serialization_output() { diff --git a/emseries/tests/test_io.rs b/emseries/tests/test_io.rs index 2cf34c0..77142c4 100644 --- a/emseries/tests/test_io.rs +++ b/emseries/tests/test_io.rs @@ -143,9 +143,9 @@ mod test { .with_timezone(&FixedOffset::east_opt(0).unwrap()) ) ); - assert_eq!(tr.duration, Duration(11040.0 * S)); - assert_eq!(tr.comments, String::from("long time ago")); - assert_eq!(tr, trips[0]); + assert_eq!(tr.data.duration, Duration(11040.0 * S)); + assert_eq!(tr.data.comments, String::from("long time ago")); + assert_eq!(tr.data, trips[0]); } } }) @@ -162,7 +162,7 @@ mod test { ts.put(trip.clone()).expect("expect a successful put"); } - let v: Vec<(&RecordId, &BikeTrip)> = ts + let v: Vec<&Record> = ts .search(exact_time(Timestamp::DateTime( UTC.with_ymd_and_hms(2011, 10, 31, 0, 0, 0) .unwrap() @@ -170,7 +170,7 @@ mod test { ))) .collect(); assert_eq!(v.len(), 1); - assert_eq!(*v[0].1, trips[1]); + assert_eq!(v[0].data, trips[1]); }) } @@ -185,7 +185,7 @@ mod test { ts.put(trip.clone()).expect("expect a successful put"); } - let v: Vec<(&RecordId, &BikeTrip)> = ts.search_sorted( + let v: Vec<&Record> = ts.search_sorted( time_range( Timestamp::DateTime( UTC.with_ymd_and_hms(2011, 10, 31, 0, 0, 0) @@ -200,12 +200,12 @@ mod test { ), true, ), - |l, r| l.1.timestamp().cmp(&r.1.timestamp()), + |l, r| l.timestamp().cmp(&r.timestamp()), ); assert_eq!(v.len(), 3); - assert_eq!(*v[0].1, trips[1]); - assert_eq!(*v[1].1, trips[2]); - assert_eq!(*v[2].1, trips[3]); + assert_eq!(v[0].data, trips[1]); + assert_eq!(v[1].data, trips[2]); + assert_eq!(v[2].data, trips[3]); }) } @@ -226,7 +226,7 @@ mod test { { let ts: Series = Series::open(&path).expect("expect the time series to open correctly"); - let v: Vec<(&RecordId, &BikeTrip)> = ts.search_sorted( + let v: Vec<&Record> = ts.search_sorted( time_range( Timestamp::DateTime( UTC.with_ymd_and_hms(2011, 10, 31, 0, 0, 0) @@ -241,12 +241,12 @@ mod test { ), true, ), - |l, r| l.1.timestamp().cmp(&r.1.timestamp()), + |l, r| l.timestamp().cmp(&r.timestamp()), ); assert_eq!(v.len(), 3); - assert_eq!(*v[0].1, trips[1]); - assert_eq!(*v[1].1, trips[2]); - assert_eq!(*v[2].1, trips[3]); + assert_eq!(v[0].data, trips[1]); + assert_eq!(v[1].data, trips[2]); + assert_eq!(v[2].data, trips[3]); } }) } @@ -268,7 +268,7 @@ mod test { { let mut ts: Series = Series::open(&path).expect("expect the time series to open correctly"); - let v: Vec<(&RecordId, &BikeTrip)> = ts.search_sorted( + let v: Vec<&Record> = ts.search_sorted( time_range( Timestamp::DateTime( UTC.with_ymd_and_hms(2011, 10, 31, 0, 0, 0) @@ -284,11 +284,11 @@ mod test { .into(), true, ), - |l, r| l.1.timestamp().cmp(&r.1.timestamp()), + |l, r| l.timestamp().cmp(&r.timestamp()), ); assert_eq!(v.len(), 2); - assert_eq!(*v[0].1, trips[1]); - assert_eq!(*v[1].1, trips[2]); + assert_eq!(v[0].data, trips[1]); + assert_eq!(v[1].data, trips[2]); ts.put(trips[3].clone()).expect("expect a successful put"); ts.put(trips[4].clone()).expect("expect a successful put"); } @@ -296,7 +296,7 @@ mod test { { let ts: Series = Series::open(&path).expect("expect the time series to open correctly"); - let v: Vec<(&RecordId, &BikeTrip)> = ts.search_sorted( + let v: Vec<&Record> = ts.search_sorted( time_range( Timestamp::DateTime( UTC.with_ymd_and_hms(2011, 10, 31, 0, 0, 0) @@ -311,13 +311,13 @@ mod test { ), true, ), - |l, r| l.1.timestamp().cmp(&r.1.timestamp()), + |l, r| l.timestamp().cmp(&r.timestamp()), ); assert_eq!(v.len(), 4); - assert_eq!(*v[0].1, trips[1]); - assert_eq!(*v[1].1, trips[2]); - assert_eq!(*v[2].1, trips[3]); - assert_eq!(*v[3].1, trips[4]); + assert_eq!(v[0].data, trips[1]); + assert_eq!(v[1].data, trips[2]); + assert_eq!(v[2].data, trips[3]); + assert_eq!(v[3].data, trips[4]); } }) } @@ -337,9 +337,8 @@ mod test { match ts.get(&trip_id) { None => assert!(false, "record not found"), Some(mut trip) => { - trip.distance = Distance(50000.0 * M); - ts.update(trip_id.clone(), trip) - .expect("expect record to update"); + trip.data.distance = Distance(50000.0 * M); + ts.update(trip).expect("expect record to update"); } }; @@ -347,12 +346,12 @@ mod test { None => assert!(false, "record not found"), Some(trip) => { assert_eq!( - trip.datetime, + trip.data.datetime, UTC.with_ymd_and_hms(2011, 11, 02, 0, 0, 0).unwrap() ); - assert_eq!(trip.distance, Distance(50000.0 * M)); - assert_eq!(trip.duration, Duration(7020.0 * S)); - assert_eq!(trip.comments, String::from("Do Some Distance!")); + assert_eq!(trip.data.distance, Distance(50000.0 * M)); + assert_eq!(trip.data.duration, Duration(7020.0 * S)); + assert_eq!(trip.data.comments, String::from("Do Some Distance!")); } } }) @@ -374,8 +373,8 @@ mod test { match ts.get(&trip_id) { None => assert!(false, "record not found"), Some(mut trip) => { - trip.distance = Distance(50000.0 * M); - ts.update(trip_id, trip).expect("expect record to update"); + trip.data.distance = Distance(50000.0 * M); + ts.update(trip).expect("expect record to update"); } }; } @@ -384,10 +383,10 @@ mod test { let ts: Series = Series::open(&path).expect("expect the time series to open correctly"); - let trips: Vec<(&RecordId, &BikeTrip)> = ts.records().collect(); + let trips: Vec<&Record> = ts.records().collect(); assert_eq!(trips.len(), 3); - let trips: Vec<(&RecordId, &BikeTrip)> = ts + let trips: Vec<&Record> = ts .search(exact_time(Timestamp::DateTime( UTC.with_ymd_and_hms(2011, 11, 02, 0, 0, 0) .unwrap() @@ -396,14 +395,14 @@ mod test { .collect(); assert_eq!(trips.len(), 1); assert_eq!( - trips[0].1.datetime, + trips[0].data.datetime, UTC.with_ymd_and_hms(2011, 11, 02, 0, 0, 0) .unwrap() .with_timezone(&FixedOffset::east_opt(0).unwrap()) ); - assert_eq!(trips[0].1.distance, Distance(50000.0 * M)); - assert_eq!(trips[0].1.duration, Duration(7020.0 * S)); - assert_eq!(trips[0].1.comments, String::from("Do Some Distance!")); + assert_eq!(trips[0].data.distance, Distance(50000.0 * M)); + assert_eq!(trips[0].data.duration, Duration(7020.0 * S)); + assert_eq!(trips[0].data.comments, String::from("Do Some Distance!")); } }) } @@ -421,14 +420,14 @@ mod test { ts.put(trips[2].clone()).expect("expect a successful put"); ts.delete(&trip_id).expect("successful delete"); - let recs: Vec<(&RecordId, &BikeTrip)> = ts.records().collect(); + let recs: Vec<&Record> = ts.records().collect(); assert_eq!(recs.len(), 2); } { let ts: Series = Series::open(&path).expect("expect the time series to open correctly"); - let recs: Vec<(&RecordId, &BikeTrip)> = ts.records().collect(); + let recs: Vec<&Record> = ts.records().collect(); assert_eq!(recs.len(), 2); } })