Bind a record to its unique id #130

Merged
savanni merged 2 commits from emseries/bound-records into main 2023-12-27 22:38:43 +00:00
4 changed files with 144 additions and 96 deletions

View File

@ -76,4 +76,4 @@ mod types;
pub use criteria::*;
pub use series::Series;
pub use types::{EmseriesReadError, EmseriesWriteError, Recordable, Timestamp, UniqueId};
pub use types::{EmseriesReadError, EmseriesWriteError, Record, RecordId, Recordable, Timestamp};

View File

@ -18,13 +18,51 @@ use serde::de::DeserializeOwned;
use serde::ser::Serialize;
use std::cmp::Ordering;
use std::collections::HashMap;
use std::convert::TryFrom;
use std::fs::File;
use std::fs::OpenOptions;
use std::io::{BufRead, BufReader, LineWriter, Write};
use std::iter::Iterator;
use criteria::Criteria;
use types::{EmseriesReadError, EmseriesWriteError, Record, Recordable, UniqueId};
use types::{EmseriesReadError, EmseriesWriteError, Record, RecordId, Recordable};
// A RecordOnDisk, a private data structure, is useful for handling all of the on-disk
// representations of a record. Unlike [Record], this one can accept an empty data value to
// represent that the data may have been deleted. This is not made public because, so far as the
// user is concerned, any record in the system must have data associated with it.
#[derive(Clone, Deserialize, Serialize)]
struct RecordOnDisk<T: Clone + Recordable> {
id: RecordId,
data: Option<T>,
}
/*
impl<T> FromStr for RecordOnDisk<T>
where
T: Clone + Recordable + DeserializeOwned + Serialize,
{
type Err = EmseriesReadError;
fn from_str(line: &str) -> Result<Self, Self::Err> {
serde_json::from_str(line).map_err(EmseriesReadError::JSONParseError)
}
}
*/
impl<T: Clone + Recordable> TryFrom<RecordOnDisk<T>> for Record<T> {
type Error = EmseriesReadError;
fn try_from(disk_record: RecordOnDisk<T>) -> Result<Self, Self::Error> {
match disk_record.data {
Some(data) => Ok(Record {
id: disk_record.id,
data,
}),
None => Err(Self::Error::RecordDeleted(disk_record.id)),
}
}
}
/// An open time series database.
///
@ -33,7 +71,7 @@ use types::{EmseriesReadError, EmseriesWriteError, Record, Recordable, UniqueId}
pub struct Series<T: Clone + Recordable + DeserializeOwned + Serialize> {
//path: String,
writer: LineWriter<File>,
records: HashMap<UniqueId, T>,
records: HashMap<RecordId, Record<T>>,
}
impl<T> Series<T>
@ -62,20 +100,18 @@ where
}
/// Load a file and return all of the records in it.
fn load_file(f: &File) -> Result<HashMap<UniqueId, T>, EmseriesReadError> {
let mut records: HashMap<UniqueId, T> = HashMap::new();
fn load_file(f: &File) -> Result<HashMap<RecordId, Record<T>>, EmseriesReadError> {
let mut records: HashMap<RecordId, Record<T>> = HashMap::new();
let reader = BufReader::new(f);
for line in reader.lines() {
match line {
Ok(line_) => {
/* Can't create a JSONParseError because I can't actually create the underlying error.
fail_point!("parse-line", Err(Error::JSONParseError()))
*/
match line_.parse::<Record<T>>() {
Ok(record) => match record.data {
Some(val) => records.insert(record.id.clone(), val),
None => records.remove(&record.id.clone()),
},
match serde_json::from_str::<RecordOnDisk<T>>(line_.as_ref())
.map_err(EmseriesReadError::JSONParseError)
.and_then(|record| Record::try_from(record))
{
Ok(record) => records.insert(record.id.clone(), record.clone()),
Err(EmseriesReadError::RecordDeleted(id)) => records.remove(&id),
Err(err) => return Err(err),
};
}
@ -87,18 +123,23 @@ where
/// Put a new record into the database. A unique id will be assigned to the record and
/// returned.
pub fn put(&mut self, entry: T) -> Result<UniqueId, EmseriesWriteError> {
let uuid = UniqueId::default();
self.update(uuid.clone(), entry).map(|_| uuid)
pub fn put(&mut self, entry: T) -> Result<RecordId, EmseriesWriteError> {
let uuid = RecordId::default();
let record = Record {
id: uuid.clone(),
data: entry,
};
self.update(record)?;
Ok(uuid)
}
/// Update an existing record. The `UniqueId` of the record passed into this function must match
/// the `UniqueId` of a record already in the database.
pub fn update(&mut self, uuid: UniqueId, entry: T) -> Result<(), EmseriesWriteError> {
self.records.insert(uuid.clone(), entry.clone());
let write_res = match serde_json::to_string(&Record {
id: uuid,
data: Some(entry),
/// Update an existing record. The [RecordId] of the record passed into this function must match
/// the [RecordId] of a record already in the database.
pub fn update(&mut self, record: Record<T>) -> Result<(), EmseriesWriteError> {
self.records.insert(record.id.clone(), record.clone());
let write_res = match serde_json::to_string(&RecordOnDisk {
id: record.id,
data: Some(record.data),
}) {
Ok(rec_str) => self
.writer
@ -118,13 +159,13 @@ where
/// Future note: while this deletes a record from the view, it only adds an entry to the
/// database that indicates `data: null`. If record histories ever become important, the record
/// and its entire history (including this delete) will still be available.
pub fn delete(&mut self, uuid: &UniqueId) -> Result<(), EmseriesWriteError> {
pub fn delete(&mut self, uuid: &RecordId) -> Result<(), EmseriesWriteError> {
if !self.records.contains_key(uuid) {
return Ok(());
};
self.records.remove(uuid);
let rec: Record<T> = Record {
let rec: RecordOnDisk<T> = RecordOnDisk {
id: uuid.clone(),
data: None,
};
@ -138,8 +179,8 @@ where
}
/// Get all of the records in the database.
pub fn records(&self) -> impl Iterator<Item = (&UniqueId, &T)> {
self.records.iter()
pub fn records(&self) -> impl Iterator<Item = &Record<T>> {
self.records.values()
}
/* The point of having Search is so that a lot of internal optimizations can happen once the
@ -148,29 +189,29 @@ where
pub fn search<'s>(
&'s self,
criteria: impl Criteria + 's,
) -> impl Iterator<Item = (&'s UniqueId, &'s T)> + 's {
self.records().filter(move |&tr| criteria.apply(tr.1))
) -> impl Iterator<Item = &'s Record<T>> + 's {
self.records().filter(move |&tr| criteria.apply(&tr.data))
}
/// Perform a search and sort the resulting records based on the comparison.
pub fn search_sorted<'s, C, CMP>(&'s self, criteria: C, compare: CMP) -> Vec<(&UniqueId, &T)>
pub fn search_sorted<'s, C, CMP>(&'s self, criteria: C, compare: CMP) -> Vec<&'s Record<T>>
where
C: Criteria + 's,
CMP: FnMut(&(&UniqueId, &T), &(&UniqueId, &T)) -> Ordering,
CMP: FnMut(&&Record<T>, &&Record<T>) -> Ordering,
{
let search_iter = self.search(criteria);
let mut records: Vec<(&UniqueId, &T)> = search_iter.collect();
let mut records: Vec<&Record<T>> = search_iter.collect();
records.sort_by(compare);
records
}
/// Get an exact record from the database based on unique id.
pub fn get(&self, uuid: &UniqueId) -> Option<T> {
pub fn get(&self, uuid: &RecordId) -> Option<Record<T>> {
self.records.get(uuid).cloned()
}
/*
pub fn remove(&self, uuid: UniqueId) -> Result<(), EmseriesError> {
pub fn remove(&self, uuid: RecordId) -> Result<(), EmseriesError> {
unimplemented!()
}
*/

View File

@ -28,6 +28,9 @@ pub enum EmseriesReadError {
#[error("Error parsing JSON: {0}")]
JSONParseError(serde_json::error::Error),
#[error("Record was deleted")]
RecordDeleted(RecordId),
/// Indicates a general IO error
#[error("IO Error: {0}")]
IOError(io::Error),
@ -145,54 +148,57 @@ pub trait Recordable {
///
/// This is a wrapper around a basic uuid with some extra convenience methods.
#[derive(Clone, Debug, Eq, Hash, PartialEq, Deserialize, Serialize)]
pub struct UniqueId(Uuid);
pub struct RecordId(Uuid);
impl Default for UniqueId {
impl Default for RecordId {
fn default() -> Self {
Self(Uuid::new_v4())
}
}
impl str::FromStr for UniqueId {
impl str::FromStr for RecordId {
type Err = EmseriesReadError;
/// Parse a UniqueId from a string. Raise UUIDParseError if the parsing fails.
/// Parse a RecordId from a string. Raise UUIDParseError if the parsing fails.
fn from_str(val: &str) -> Result<Self, Self::Err> {
Uuid::parse_str(val)
.map(UniqueId)
.map(RecordId)
.map_err(EmseriesReadError::UUIDParseError)
}
}
impl fmt::Display for UniqueId {
impl fmt::Display for RecordId {
/// Convert to a hyphenated string
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
write!(f, "{}", self.0.to_hyphenated())
}
}
/// Every record contains a unique ID and then the primary data, which itself must implementd the
/// Recordable trait.
#[derive(Clone, Deserialize, Serialize)]
/// A record represents data that actually exists in the database. Users cannot make the record
/// directly, as the database will create them.
#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)]
pub struct Record<T: Clone + Recordable> {
pub id: UniqueId,
pub data: Option<T>,
pub(crate) id: RecordId,
pub data: T,
}
impl<T> str::FromStr for Record<T>
where
T: Clone + Recordable + DeserializeOwned + Serialize,
{
type Err = EmseriesReadError;
impl<T: Clone + Recordable> Record<T> {
pub fn date(&self) -> NaiveDate {
match self.data.timestamp() {
Timestamp::DateTime(dt) => dt.date_naive(),
Timestamp::Date(dt) => dt,
}
}
fn from_str(line: &str) -> Result<Self, Self::Err> {
serde_json::from_str(line).map_err(EmseriesReadError::JSONParseError)
pub fn timestamp(&self) -> Timestamp {
self.data.timestamp()
}
}
#[cfg(test)]
mod test {
extern crate dimensioned;
extern crate serde_json;
use self::dimensioned::si::{Kilogram, KG};
@ -239,6 +245,7 @@ mod test {
);
}
/*
#[ignore]
fn v_alpha_serialization() {
const WEIGHT_ENTRY: &str = "{\"data\":{\"weight\":77.79109},\"date\":\"2003-11-10\",\"id\":\"3330c5b0-783f-4919-b2c4-8169c38f65ff\"}";
@ -252,12 +259,13 @@ mod test {
);
assert_eq!(
rec.data,
Some(WeightRecord {
WeightRecord {
date: NaiveDate::from_ymd_opt(2003, 11, 10).unwrap(),
weight: Weight(77.79109 * KG),
})
}
);
}
*/
#[test]
fn serialization_output() {

View File

@ -143,9 +143,9 @@ mod test {
.with_timezone(&FixedOffset::east_opt(0).unwrap())
)
);
assert_eq!(tr.duration, Duration(11040.0 * S));
assert_eq!(tr.comments, String::from("long time ago"));
assert_eq!(tr, trips[0]);
assert_eq!(tr.data.duration, Duration(11040.0 * S));
assert_eq!(tr.data.comments, String::from("long time ago"));
assert_eq!(tr.data, trips[0]);
}
}
})
@ -162,7 +162,7 @@ mod test {
ts.put(trip.clone()).expect("expect a successful put");
}
let v: Vec<(&UniqueId, &BikeTrip)> = ts
let v: Vec<&Record<BikeTrip>> = ts
.search(exact_time(Timestamp::DateTime(
UTC.with_ymd_and_hms(2011, 10, 31, 0, 0, 0)
.unwrap()
@ -170,7 +170,7 @@ mod test {
)))
.collect();
assert_eq!(v.len(), 1);
assert_eq!(*v[0].1, trips[1]);
assert_eq!(v[0].data, trips[1]);
})
}
@ -185,7 +185,7 @@ mod test {
ts.put(trip.clone()).expect("expect a successful put");
}
let v: Vec<(&UniqueId, &BikeTrip)> = ts.search_sorted(
let v: Vec<&Record<BikeTrip>> = ts.search_sorted(
time_range(
Timestamp::DateTime(
UTC.with_ymd_and_hms(2011, 10, 31, 0, 0, 0)
@ -200,12 +200,12 @@ mod test {
),
true,
),
|l, r| l.1.timestamp().cmp(&r.1.timestamp()),
|l, r| l.timestamp().cmp(&r.timestamp()),
);
assert_eq!(v.len(), 3);
assert_eq!(*v[0].1, trips[1]);
assert_eq!(*v[1].1, trips[2]);
assert_eq!(*v[2].1, trips[3]);
assert_eq!(v[0].data, trips[1]);
assert_eq!(v[1].data, trips[2]);
assert_eq!(v[2].data, trips[3]);
})
}
@ -226,7 +226,7 @@ mod test {
{
let ts: Series<BikeTrip> =
Series::open(&path).expect("expect the time series to open correctly");
let v: Vec<(&UniqueId, &BikeTrip)> = ts.search_sorted(
let v: Vec<&Record<BikeTrip>> = ts.search_sorted(
time_range(
Timestamp::DateTime(
UTC.with_ymd_and_hms(2011, 10, 31, 0, 0, 0)
@ -241,12 +241,12 @@ mod test {
),
true,
),
|l, r| l.1.timestamp().cmp(&r.1.timestamp()),
|l, r| l.timestamp().cmp(&r.timestamp()),
);
assert_eq!(v.len(), 3);
assert_eq!(*v[0].1, trips[1]);
assert_eq!(*v[1].1, trips[2]);
assert_eq!(*v[2].1, trips[3]);
assert_eq!(v[0].data, trips[1]);
assert_eq!(v[1].data, trips[2]);
assert_eq!(v[2].data, trips[3]);
}
})
}
@ -268,7 +268,7 @@ mod test {
{
let mut ts: Series<BikeTrip> =
Series::open(&path).expect("expect the time series to open correctly");
let v: Vec<(&UniqueId, &BikeTrip)> = ts.search_sorted(
let v: Vec<&Record<BikeTrip>> = ts.search_sorted(
time_range(
Timestamp::DateTime(
UTC.with_ymd_and_hms(2011, 10, 31, 0, 0, 0)
@ -284,11 +284,11 @@ mod test {
.into(),
true,
),
|l, r| l.1.timestamp().cmp(&r.1.timestamp()),
|l, r| l.timestamp().cmp(&r.timestamp()),
);
assert_eq!(v.len(), 2);
assert_eq!(*v[0].1, trips[1]);
assert_eq!(*v[1].1, trips[2]);
assert_eq!(v[0].data, trips[1]);
assert_eq!(v[1].data, trips[2]);
ts.put(trips[3].clone()).expect("expect a successful put");
ts.put(trips[4].clone()).expect("expect a successful put");
}
@ -296,7 +296,7 @@ mod test {
{
let ts: Series<BikeTrip> =
Series::open(&path).expect("expect the time series to open correctly");
let v: Vec<(&UniqueId, &BikeTrip)> = ts.search_sorted(
let v: Vec<&Record<BikeTrip>> = ts.search_sorted(
time_range(
Timestamp::DateTime(
UTC.with_ymd_and_hms(2011, 10, 31, 0, 0, 0)
@ -311,13 +311,13 @@ mod test {
),
true,
),
|l, r| l.1.timestamp().cmp(&r.1.timestamp()),
|l, r| l.timestamp().cmp(&r.timestamp()),
);
assert_eq!(v.len(), 4);
assert_eq!(*v[0].1, trips[1]);
assert_eq!(*v[1].1, trips[2]);
assert_eq!(*v[2].1, trips[3]);
assert_eq!(*v[3].1, trips[4]);
assert_eq!(v[0].data, trips[1]);
assert_eq!(v[1].data, trips[2]);
assert_eq!(v[2].data, trips[3]);
assert_eq!(v[3].data, trips[4]);
}
})
}
@ -337,9 +337,8 @@ mod test {
match ts.get(&trip_id) {
None => assert!(false, "record not found"),
Some(mut trip) => {
trip.distance = Distance(50000.0 * M);
ts.update(trip_id.clone(), trip)
.expect("expect record to update");
trip.data.distance = Distance(50000.0 * M);
ts.update(trip).expect("expect record to update");
}
};
@ -347,12 +346,12 @@ mod test {
None => assert!(false, "record not found"),
Some(trip) => {
assert_eq!(
trip.datetime,
trip.data.datetime,
UTC.with_ymd_and_hms(2011, 11, 02, 0, 0, 0).unwrap()
);
assert_eq!(trip.distance, Distance(50000.0 * M));
assert_eq!(trip.duration, Duration(7020.0 * S));
assert_eq!(trip.comments, String::from("Do Some Distance!"));
assert_eq!(trip.data.distance, Distance(50000.0 * M));
assert_eq!(trip.data.duration, Duration(7020.0 * S));
assert_eq!(trip.data.comments, String::from("Do Some Distance!"));
}
}
})
@ -374,8 +373,8 @@ mod test {
match ts.get(&trip_id) {
None => assert!(false, "record not found"),
Some(mut trip) => {
trip.distance = Distance(50000.0 * M);
ts.update(trip_id, trip).expect("expect record to update");
trip.data.distance = Distance(50000.0 * M);
ts.update(trip).expect("expect record to update");
}
};
}
@ -384,10 +383,10 @@ mod test {
let ts: Series<BikeTrip> =
Series::open(&path).expect("expect the time series to open correctly");
let trips: Vec<(&UniqueId, &BikeTrip)> = ts.records().collect();
let trips: Vec<&Record<BikeTrip>> = ts.records().collect();
assert_eq!(trips.len(), 3);
let trips: Vec<(&UniqueId, &BikeTrip)> = ts
let trips: Vec<&Record<BikeTrip>> = ts
.search(exact_time(Timestamp::DateTime(
UTC.with_ymd_and_hms(2011, 11, 02, 0, 0, 0)
.unwrap()
@ -396,14 +395,14 @@ mod test {
.collect();
assert_eq!(trips.len(), 1);
assert_eq!(
trips[0].1.datetime,
trips[0].data.datetime,
UTC.with_ymd_and_hms(2011, 11, 02, 0, 0, 0)
.unwrap()
.with_timezone(&FixedOffset::east_opt(0).unwrap())
);
assert_eq!(trips[0].1.distance, Distance(50000.0 * M));
assert_eq!(trips[0].1.duration, Duration(7020.0 * S));
assert_eq!(trips[0].1.comments, String::from("Do Some Distance!"));
assert_eq!(trips[0].data.distance, Distance(50000.0 * M));
assert_eq!(trips[0].data.duration, Duration(7020.0 * S));
assert_eq!(trips[0].data.comments, String::from("Do Some Distance!"));
}
})
}
@ -421,14 +420,14 @@ mod test {
ts.put(trips[2].clone()).expect("expect a successful put");
ts.delete(&trip_id).expect("successful delete");
let recs: Vec<(&UniqueId, &BikeTrip)> = ts.records().collect();
let recs: Vec<&Record<BikeTrip>> = ts.records().collect();
assert_eq!(recs.len(), 2);
}
{
let ts: Series<BikeTrip> =
Series::open(&path).expect("expect the time series to open correctly");
let recs: Vec<(&UniqueId, &BikeTrip)> = ts.records().collect();
let recs: Vec<&Record<BikeTrip>> = ts.records().collect();
assert_eq!(recs.len(), 2);
}
})