Bind a record to its unique id #130

Merged
savanni merged 2 commits from emseries/bound-records into main 2023-12-27 22:38:43 +00:00
4 changed files with 131 additions and 83 deletions
Showing only changes of commit 149587f0bd - Show all commits

View File

@ -76,4 +76,4 @@ mod types;
pub use criteria::*;
pub use series::Series;
pub use types::{EmseriesReadError, EmseriesWriteError, RecordId, Recordable, Timestamp};
pub use types::{EmseriesReadError, EmseriesWriteError, Record, RecordId, Recordable, Timestamp};

View File

@ -18,6 +18,7 @@ use serde::de::DeserializeOwned;
use serde::ser::Serialize;
use std::cmp::Ordering;
use std::collections::HashMap;
use std::convert::TryFrom;
use std::fs::File;
use std::fs::OpenOptions;
use std::io::{BufRead, BufReader, LineWriter, Write};
@ -26,6 +27,43 @@ use std::iter::Iterator;
use criteria::Criteria;
use types::{EmseriesReadError, EmseriesWriteError, Record, RecordId, Recordable};
// A RecordOnDisk, a private data structure, is useful for handling all of the on-disk
// representations of a record. Unlike [Record], this one can accept an empty data value to
// represent that the data may have been deleted. This is not made public because, so far as the
// user is concerned, any record in the system must have data associated with it.
#[derive(Clone, Deserialize, Serialize)]
struct RecordOnDisk<T: Clone + Recordable> {
id: RecordId,
data: Option<T>,
}
/*
impl<T> FromStr for RecordOnDisk<T>
where
T: Clone + Recordable + DeserializeOwned + Serialize,
{
type Err = EmseriesReadError;
fn from_str(line: &str) -> Result<Self, Self::Err> {
serde_json::from_str(line).map_err(EmseriesReadError::JSONParseError)
}
}
*/
impl<T: Clone + Recordable> TryFrom<RecordOnDisk<T>> for Record<T> {
type Error = EmseriesReadError;
fn try_from(disk_record: RecordOnDisk<T>) -> Result<Self, Self::Error> {
match disk_record.data {
Some(data) => Ok(Record {
id: disk_record.id,
data,
}),
None => Err(Self::Error::RecordDeleted(disk_record.id)),
}
}
}
/// An open time series database.
///
/// Any given database can store only one data type, T. The data type must be determined when the
@ -33,7 +71,7 @@ use types::{EmseriesReadError, EmseriesWriteError, Record, RecordId, Recordable}
pub struct Series<T: Clone + Recordable + DeserializeOwned + Serialize> {
//path: String,
writer: LineWriter<File>,
records: HashMap<RecordId, T>,
records: HashMap<RecordId, Record<T>>,
}
impl<T> Series<T>
@ -62,20 +100,18 @@ where
}
/// Load a file and return all of the records in it.
fn load_file(f: &File) -> Result<HashMap<RecordId, T>, EmseriesReadError> {
let mut records: HashMap<RecordId, T> = HashMap::new();
fn load_file(f: &File) -> Result<HashMap<RecordId, Record<T>>, EmseriesReadError> {
let mut records: HashMap<RecordId, Record<T>> = HashMap::new();
let reader = BufReader::new(f);
for line in reader.lines() {
match line {
Ok(line_) => {
/* Can't create a JSONParseError because I can't actually create the underlying error.
fail_point!("parse-line", Err(Error::JSONParseError()))
*/
match line_.parse::<Record<T>>() {
Ok(record) => match record.data {
Some(val) => records.insert(record.id.clone(), val),
None => records.remove(&record.id.clone()),
},
match serde_json::from_str::<RecordOnDisk<T>>(line_.as_ref())
.map_err(EmseriesReadError::JSONParseError)
.and_then(|record| Record::try_from(record))
{
Ok(record) => records.insert(record.id.clone(), record.clone()),
Err(EmseriesReadError::RecordDeleted(id)) => records.remove(&id),
Err(err) => return Err(err),
};
}
@ -89,16 +125,21 @@ where
/// returned.
pub fn put(&mut self, entry: T) -> Result<RecordId, EmseriesWriteError> {
let uuid = RecordId::default();
self.update(uuid.clone(), entry).map(|_| uuid)
let record = Record {
id: uuid.clone(),
data: entry,
};
self.update(record)?;
Ok(uuid)
}
/// Update an existing record. The [RecordId] of the record passed into this function must match
/// the [RecordId] of a record already in the database.
pub fn update(&mut self, uuid: RecordId, entry: T) -> Result<(), EmseriesWriteError> {
self.records.insert(uuid.clone(), entry.clone());
let write_res = match serde_json::to_string(&Record {
id: uuid,
data: Some(entry),
pub fn update(&mut self, record: Record<T>) -> Result<(), EmseriesWriteError> {
self.records.insert(record.id.clone(), record.clone());
let write_res = match serde_json::to_string(&RecordOnDisk {
id: record.id,
data: Some(record.data),
}) {
Ok(rec_str) => self
.writer
@ -124,7 +165,7 @@ where
};
self.records.remove(uuid);
let rec: Record<T> = Record {
let rec: RecordOnDisk<T> = RecordOnDisk {
id: uuid.clone(),
data: None,
};
@ -138,8 +179,8 @@ where
}
/// Get all of the records in the database.
pub fn records(&self) -> impl Iterator<Item = (&RecordId, &T)> {
self.records.iter()
pub fn records(&self) -> impl Iterator<Item = &Record<T>> {
self.records.values()
}
/* The point of having Search is so that a lot of internal optimizations can happen once the
@ -148,24 +189,24 @@ where
pub fn search<'s>(
&'s self,
criteria: impl Criteria + 's,
) -> impl Iterator<Item = (&'s RecordId, &'s T)> + 's {
self.records().filter(move |&tr| criteria.apply(tr.1))
) -> impl Iterator<Item = &'s Record<T>> + 's {
self.records().filter(move |&tr| criteria.apply(&tr.data))
}
/// Perform a search and sort the resulting records based on the comparison.
pub fn search_sorted<'s, C, CMP>(&'s self, criteria: C, compare: CMP) -> Vec<(&RecordId, &T)>
pub fn search_sorted<'s, C, CMP>(&'s self, criteria: C, compare: CMP) -> Vec<&'s Record<T>>
where
C: Criteria + 's,
CMP: FnMut(&(&RecordId, &T), &(&RecordId, &T)) -> Ordering,
CMP: FnMut(&&Record<T>, &&Record<T>) -> Ordering,
{
let search_iter = self.search(criteria);
let mut records: Vec<(&RecordId, &T)> = search_iter.collect();
let mut records: Vec<&Record<T>> = search_iter.collect();
records.sort_by(compare);
records
}
/// Get an exact record from the database based on unique id.
pub fn get(&self, uuid: &RecordId) -> Option<T> {
pub fn get(&self, uuid: &RecordId) -> Option<Record<T>> {
self.records.get(uuid).cloned()
}

View File

@ -28,6 +28,9 @@ pub enum EmseriesReadError {
#[error("Error parsing JSON: {0}")]
JSONParseError(serde_json::error::Error),
#[error("Record was deleted")]
RecordDeleted(RecordId),
/// Indicates a general IO error
#[error("IO Error: {0}")]
IOError(io::Error),
@ -171,28 +174,31 @@ impl fmt::Display for RecordId {
}
}
/// Every record contains a unique ID and then the primary data, which itself must implementd the
/// Recordable trait.
#[derive(Clone, Deserialize, Serialize)]
/// A record represents data that actually exists in the database. Users cannot make the record
/// directly, as the database will create them.
#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)]
pub struct Record<T: Clone + Recordable> {
pub id: RecordId,
pub data: Option<T>,
pub(crate) id: RecordId,
pub data: T,
}
impl<T> str::FromStr for Record<T>
where
T: Clone + Recordable + DeserializeOwned + Serialize,
{
type Err = EmseriesReadError;
impl<T: Clone + Recordable> Record<T> {
pub fn date(&self) -> NaiveDate {
match self.data.timestamp() {
Timestamp::DateTime(dt) => dt.date_naive(),
Timestamp::Date(dt) => dt,
}
}
fn from_str(line: &str) -> Result<Self, Self::Err> {
serde_json::from_str(line).map_err(EmseriesReadError::JSONParseError)
pub fn timestamp(&self) -> Timestamp {
self.data.timestamp()
}
}
#[cfg(test)]
mod test {
extern crate dimensioned;
extern crate serde_json;
use self::dimensioned::si::{Kilogram, KG};
@ -239,6 +245,7 @@ mod test {
);
}
/*
#[ignore]
fn v_alpha_serialization() {
const WEIGHT_ENTRY: &str = "{\"data\":{\"weight\":77.79109},\"date\":\"2003-11-10\",\"id\":\"3330c5b0-783f-4919-b2c4-8169c38f65ff\"}";
@ -252,12 +259,13 @@ mod test {
);
assert_eq!(
rec.data,
Some(WeightRecord {
WeightRecord {
date: NaiveDate::from_ymd_opt(2003, 11, 10).unwrap(),
weight: Weight(77.79109 * KG),
})
}
);
}
*/
#[test]
fn serialization_output() {

View File

@ -143,9 +143,9 @@ mod test {
.with_timezone(&FixedOffset::east_opt(0).unwrap())
)
);
assert_eq!(tr.duration, Duration(11040.0 * S));
assert_eq!(tr.comments, String::from("long time ago"));
assert_eq!(tr, trips[0]);
assert_eq!(tr.data.duration, Duration(11040.0 * S));
assert_eq!(tr.data.comments, String::from("long time ago"));
assert_eq!(tr.data, trips[0]);
}
}
})
@ -162,7 +162,7 @@ mod test {
ts.put(trip.clone()).expect("expect a successful put");
}
let v: Vec<(&RecordId, &BikeTrip)> = ts
let v: Vec<&Record<BikeTrip>> = ts
.search(exact_time(Timestamp::DateTime(
UTC.with_ymd_and_hms(2011, 10, 31, 0, 0, 0)
.unwrap()
@ -170,7 +170,7 @@ mod test {
)))
.collect();
assert_eq!(v.len(), 1);
assert_eq!(*v[0].1, trips[1]);
assert_eq!(v[0].data, trips[1]);
})
}
@ -185,7 +185,7 @@ mod test {
ts.put(trip.clone()).expect("expect a successful put");
}
let v: Vec<(&RecordId, &BikeTrip)> = ts.search_sorted(
let v: Vec<&Record<BikeTrip>> = ts.search_sorted(
time_range(
Timestamp::DateTime(
UTC.with_ymd_and_hms(2011, 10, 31, 0, 0, 0)
@ -200,12 +200,12 @@ mod test {
),
true,
),
|l, r| l.1.timestamp().cmp(&r.1.timestamp()),
|l, r| l.timestamp().cmp(&r.timestamp()),
);
assert_eq!(v.len(), 3);
assert_eq!(*v[0].1, trips[1]);
assert_eq!(*v[1].1, trips[2]);
assert_eq!(*v[2].1, trips[3]);
assert_eq!(v[0].data, trips[1]);
assert_eq!(v[1].data, trips[2]);
assert_eq!(v[2].data, trips[3]);
})
}
@ -226,7 +226,7 @@ mod test {
{
let ts: Series<BikeTrip> =
Series::open(&path).expect("expect the time series to open correctly");
let v: Vec<(&RecordId, &BikeTrip)> = ts.search_sorted(
let v: Vec<&Record<BikeTrip>> = ts.search_sorted(
time_range(
Timestamp::DateTime(
UTC.with_ymd_and_hms(2011, 10, 31, 0, 0, 0)
@ -241,12 +241,12 @@ mod test {
),
true,
),
|l, r| l.1.timestamp().cmp(&r.1.timestamp()),
|l, r| l.timestamp().cmp(&r.timestamp()),
);
assert_eq!(v.len(), 3);
assert_eq!(*v[0].1, trips[1]);
assert_eq!(*v[1].1, trips[2]);
assert_eq!(*v[2].1, trips[3]);
assert_eq!(v[0].data, trips[1]);
assert_eq!(v[1].data, trips[2]);
assert_eq!(v[2].data, trips[3]);
}
})
}
@ -268,7 +268,7 @@ mod test {
{
let mut ts: Series<BikeTrip> =
Series::open(&path).expect("expect the time series to open correctly");
let v: Vec<(&RecordId, &BikeTrip)> = ts.search_sorted(
let v: Vec<&Record<BikeTrip>> = ts.search_sorted(
time_range(
Timestamp::DateTime(
UTC.with_ymd_and_hms(2011, 10, 31, 0, 0, 0)
@ -284,11 +284,11 @@ mod test {
.into(),
true,
),
|l, r| l.1.timestamp().cmp(&r.1.timestamp()),
|l, r| l.timestamp().cmp(&r.timestamp()),
);
assert_eq!(v.len(), 2);
assert_eq!(*v[0].1, trips[1]);
assert_eq!(*v[1].1, trips[2]);
assert_eq!(v[0].data, trips[1]);
assert_eq!(v[1].data, trips[2]);
ts.put(trips[3].clone()).expect("expect a successful put");
ts.put(trips[4].clone()).expect("expect a successful put");
}
@ -296,7 +296,7 @@ mod test {
{
let ts: Series<BikeTrip> =
Series::open(&path).expect("expect the time series to open correctly");
let v: Vec<(&RecordId, &BikeTrip)> = ts.search_sorted(
let v: Vec<&Record<BikeTrip>> = ts.search_sorted(
time_range(
Timestamp::DateTime(
UTC.with_ymd_and_hms(2011, 10, 31, 0, 0, 0)
@ -311,13 +311,13 @@ mod test {
),
true,
),
|l, r| l.1.timestamp().cmp(&r.1.timestamp()),
|l, r| l.timestamp().cmp(&r.timestamp()),
);
assert_eq!(v.len(), 4);
assert_eq!(*v[0].1, trips[1]);
assert_eq!(*v[1].1, trips[2]);
assert_eq!(*v[2].1, trips[3]);
assert_eq!(*v[3].1, trips[4]);
assert_eq!(v[0].data, trips[1]);
assert_eq!(v[1].data, trips[2]);
assert_eq!(v[2].data, trips[3]);
assert_eq!(v[3].data, trips[4]);
}
})
}
@ -337,9 +337,8 @@ mod test {
match ts.get(&trip_id) {
None => assert!(false, "record not found"),
Some(mut trip) => {
trip.distance = Distance(50000.0 * M);
ts.update(trip_id.clone(), trip)
.expect("expect record to update");
trip.data.distance = Distance(50000.0 * M);
ts.update(trip).expect("expect record to update");
}
};
@ -347,12 +346,12 @@ mod test {
None => assert!(false, "record not found"),
Some(trip) => {
assert_eq!(
trip.datetime,
trip.data.datetime,
UTC.with_ymd_and_hms(2011, 11, 02, 0, 0, 0).unwrap()
);
assert_eq!(trip.distance, Distance(50000.0 * M));
assert_eq!(trip.duration, Duration(7020.0 * S));
assert_eq!(trip.comments, String::from("Do Some Distance!"));
assert_eq!(trip.data.distance, Distance(50000.0 * M));
assert_eq!(trip.data.duration, Duration(7020.0 * S));
assert_eq!(trip.data.comments, String::from("Do Some Distance!"));
}
}
})
@ -374,8 +373,8 @@ mod test {
match ts.get(&trip_id) {
None => assert!(false, "record not found"),
Some(mut trip) => {
trip.distance = Distance(50000.0 * M);
ts.update(trip_id, trip).expect("expect record to update");
trip.data.distance = Distance(50000.0 * M);
ts.update(trip).expect("expect record to update");
}
};
}
@ -384,10 +383,10 @@ mod test {
let ts: Series<BikeTrip> =
Series::open(&path).expect("expect the time series to open correctly");
let trips: Vec<(&RecordId, &BikeTrip)> = ts.records().collect();
let trips: Vec<&Record<BikeTrip>> = ts.records().collect();
assert_eq!(trips.len(), 3);
let trips: Vec<(&RecordId, &BikeTrip)> = ts
let trips: Vec<&Record<BikeTrip>> = ts
.search(exact_time(Timestamp::DateTime(
UTC.with_ymd_and_hms(2011, 11, 02, 0, 0, 0)
.unwrap()
@ -396,14 +395,14 @@ mod test {
.collect();
assert_eq!(trips.len(), 1);
assert_eq!(
trips[0].1.datetime,
trips[0].data.datetime,
UTC.with_ymd_and_hms(2011, 11, 02, 0, 0, 0)
.unwrap()
.with_timezone(&FixedOffset::east_opt(0).unwrap())
);
assert_eq!(trips[0].1.distance, Distance(50000.0 * M));
assert_eq!(trips[0].1.duration, Duration(7020.0 * S));
assert_eq!(trips[0].1.comments, String::from("Do Some Distance!"));
assert_eq!(trips[0].data.distance, Distance(50000.0 * M));
assert_eq!(trips[0].data.duration, Duration(7020.0 * S));
assert_eq!(trips[0].data.comments, String::from("Do Some Distance!"));
}
})
}
@ -421,14 +420,14 @@ mod test {
ts.put(trips[2].clone()).expect("expect a successful put");
ts.delete(&trip_id).expect("successful delete");
let recs: Vec<(&RecordId, &BikeTrip)> = ts.records().collect();
let recs: Vec<&Record<BikeTrip>> = ts.records().collect();
assert_eq!(recs.len(), 2);
}
{
let ts: Series<BikeTrip> =
Series::open(&path).expect("expect the time series to open correctly");
let recs: Vec<(&RecordId, &BikeTrip)> = ts.records().collect();
let recs: Vec<&Record<BikeTrip>> = ts.records().collect();
assert_eq!(recs.len(), 2);
}
})