commit 9deb17fa153c42d884db9bee4533007017c4b110 Author: Savanni D'Gerinel Date: Wed Apr 20 09:01:39 2022 -0400 Re-extract emseries into its own project diff --git a/emseries/Cargo.toml b/emseries/Cargo.toml new file mode 100644 index 0000000..aa79cd6 --- /dev/null +++ b/emseries/Cargo.toml @@ -0,0 +1,30 @@ +[package] +name = "emseries" +version = "0.5.1" +authors = ["Savanni D'Gerinel "] +description = "an Embedded Time Series database" +license = "BSD-3-Clause" +documentation = "https://docs.rs/emseries" +homepage = "https://github.com/luminescent-dreams/emseries" +repository = "https://github.com/luminescent-dreams/emseries" +categories = ["database-implementations"] + +include = [ + "**/*.rs", + "Cargo.toml", + "fixtures/*", +] + +[dependencies] +chrono = { version = "0.4", features = ["serde"] } +chrono-tz = { version = "0.4", features = ["serde"] } +dimensioned = { version = "0.7", features = ["serde"] } +serde = "1" +serde_derive = "1" +serde_json = "1.0" +thiserror = "1.0" +uuid = { version = "0.8", features = ["v4", "serde"] } +yaml-rust = "0.4" + +[dev-dependencies] +tempfile = "3.1" diff --git a/emseries/crates-io.list b/emseries/crates-io.list new file mode 100644 index 0000000..89c76ed --- /dev/null +++ b/emseries/crates-io.list @@ -0,0 +1,39 @@ +aho-corasick-0.6.10 +bitflags-1.0.3 +cfg-if-0.1.4 +chrono-0.4.4 +chrono-tz-0.4.1 +dimensioned-0.7.0 +dtoa-0.4.3 +fuchsia-zircon-0.3.3 +fuchsia-zircon-sys-0.3.3 +generic-array-0.11.1 +itoa-0.4.2 +lazy_static-1.3.0 +libc-0.2.42 +linked-hash-map-0.5.1 +memchr-2.2.0 +num-integer-0.1.39 +num-traits-0.2.5 +parse-zoneinfo-0.1.1 +proc-macro2-0.4.17 +quote-0.6.8 +rand-0.4.2 +redox_syscall-0.1.40 +regex-0.2.11 +regex-syntax-0.5.6 +serde-1.0.70 +serde_derive-1.0.76 +serde_json-1.0.24 +syn-0.14.9 +thread_local-0.3.6 +time-0.1.40 +typenum-1.10.0 +ucd-util-0.1.3 +unicode-xid-0.1.0 +utf8-ranges-1.0.2 +uuid-0.6.5 +winapi-0.3.5 +winapi-i686-pc-windows-gnu-0.4.0 +winapi-x86_64-pc-windows-gnu-0.4.0 +yaml-rust-0.4.0 diff --git a/emseries/fixtures/weight.json b/emseries/fixtures/weight.json new file mode 100644 index 0000000..8548963 --- /dev/null +++ b/emseries/fixtures/weight.json @@ -0,0 +1,2 @@ +{"data":{"weight":77.79109,"date":"2003-11-10T06:00:00.000000000000Z"},"id":"3330c5b0-783f-4919-b2c4-8169c38f65ff"} +{"data":{"weight":77.56429,"date":"2003-11-11T06:00:00.000000000000Z"},"id":"54c10502-030e-43d2-9ca6-df2f9a5a5ddf"} diff --git a/emseries/readme.md b/emseries/readme.md new file mode 100644 index 0000000..919b506 --- /dev/null +++ b/emseries/readme.md @@ -0,0 +1,26 @@ +# EmSeries + +[![CircleCI](https://circleci.com/gh/luminescent-dreams/emseries.svg?style=svg)](https://circleci.com/gh/luminescent-dreams/emseries) + +EmSeries is an Embedded Time Series database. It is designed for small-scale applications which need to track time series data, but on a scale that does not justify extra database services. I use it for [Fitnesstrax](https://github.com/luminescent-dreams/fitnesstrax), which keeps track of my workout and biometric information, recorded only a few times a day. + +Documentation: [emseries - Rust](https://docs.rs/emseries/0.4.0/emseries/) + + +## Features + +* Open a time series file directly in your application +* Add, update, read, and delete records with arbitrary json-friendly structure +* Search for records by timestamp and optional tags + +## Future Plans + +* Indexing based on time and tags +* Support databases larger than memory +* Multi-process safety + +The actual extent of the features implemened will depend on how I and any others decide to use them. + +## Contributors + +* [m0n0chr0m3](https://github.com/m0n0chr0m3) diff --git a/emseries/src/criteria.rs b/emseries/src/criteria.rs new file mode 100644 index 0000000..b1c04cf --- /dev/null +++ b/emseries/src/criteria.rs @@ -0,0 +1,119 @@ +use date_time_tz::DateTimeTz; +use types::Recordable; + +/// This trait is used for constructing queries for searching the database. +pub trait Criteria { + /// Apply this criteria element to a record, returning true only if the record matches the + /// criteria. + fn apply(&self, record: &T) -> bool; +} + + +/// Specify two criteria that must both be matched. +pub struct And { + pub lside: A, + pub rside: B, +} + + +impl Criteria for And +where + A: Criteria, + B: Criteria, +{ + fn apply(&self, record: &T) -> bool { + self.lside.apply(record) && self.rside.apply(record) + } +} + + +/// Specify two criteria, either of which may be matched. +pub struct Or { + pub lside: A, + pub rside: B, +} + + +/// Specify the starting time for a search. This consists of a UTC timestamp and a specifier as to +/// whether the exact time is included in the search criteria. +pub struct StartTime { + pub time: DateTimeTz, + pub incl: bool, +} + + +impl Criteria for StartTime { + fn apply(&self, record: &T) -> bool { + if self.incl { + record.timestamp() >= self.time + } else { + record.timestamp() > self.time + } + } +} + + +/// Specify the ending time for a search. This consists of a UTC timestamp and a specifier as to +/// whether the exact time is included in the search criteria. +pub struct EndTime { + pub time: DateTimeTz, + pub incl: bool, +} + + +impl Criteria for EndTime { + fn apply(&self, record: &T) -> bool { + if self.incl { + record.timestamp() <= self.time + } else { + record.timestamp() < self.time + } + } +} + + +/// Specify a list of tags that must exist on the record. +pub struct Tags { + pub tags: Vec, +} + +impl Criteria for Tags { + fn apply(&self, record: &T) -> bool { + let record_tags = record.tags(); + self.tags + .iter() + .all(|v| record_tags.contains(v)) + } +} + + +/// Specify a criteria that searches for records matching an exact time. +pub fn exact_time(time: DateTimeTz) -> And { + And { + lside: StartTime { + time: time.clone(), + incl: true, + }, + rside: EndTime { time, incl: true }, + } +} + + +/// Specify a criteria that searches for all records within a time range. +pub fn time_range( + start: DateTimeTz, + start_incl: bool, + end: DateTimeTz, + end_incl: bool, +) -> And { + And { + lside: StartTime { + time: start, + incl: start_incl, + }, + rside: EndTime { + time: end, + incl: end_incl, + }, + } +} diff --git a/emseries/src/date_time_tz.rs b/emseries/src/date_time_tz.rs new file mode 100644 index 0000000..f9c7993 --- /dev/null +++ b/emseries/src/date_time_tz.rs @@ -0,0 +1,154 @@ +extern crate chrono; +extern crate chrono_tz; + +use chrono::SecondsFormat; +use chrono_tz::Etc::UTC; +use serde::de::{self, Deserialize, Deserializer, Visitor}; +use serde::ser::{Serialize, Serializer}; +use std::fmt; + +/// This is a wrapper around date time objects, using timezones from the chroon-tz database and +/// providing string representation and parsing of the form " ", i.e., +/// "2019-05-15T14:30:00Z US/Central". The to_string method, and serde serialization will +/// produce a string of this format. The parser will accept an RFC3339-only string of the forms +/// "2019-05-15T14:30:00Z", "2019-05-15T14:30:00+00:00", and also an "RFC3339 Timezone Name" +/// string. +/// +/// The function here is to generate as close to unambiguous time/date strings, (for earth's +/// gravitational frame of reference), as possible. Clumping together the time, offset from UTC, +/// and the named time zone allows future parsers to know the exact interpretation of the time in +/// the frame of reference of the original recording. +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub struct DateTimeTz(pub chrono::DateTime); + +impl DateTimeTz { + pub fn map(&self, f: F) -> DateTimeTz + where + F: FnOnce(chrono::DateTime) -> chrono::DateTime, + { + DateTimeTz(f(self.0)) + } + + pub fn to_string(&self) -> String { + if self.0.timezone() == UTC { + self.0.to_rfc3339_opts(SecondsFormat::Secs, true) + } else { + format!( + "{} {}", + self.0 + .with_timezone(&chrono_tz::Etc::UTC) + .to_rfc3339_opts(SecondsFormat::Secs, true,), + self.0.timezone().name() + ) + } + } + + pub fn from_str(s: &str) -> Result { + let v: Vec<&str> = s.split_terminator(" ").collect(); + if v.len() == 2 { + let tz = v[1].parse::().unwrap(); + chrono::DateTime::parse_from_rfc3339(v[0]).map(|ts| DateTimeTz(ts.with_timezone(&tz))) + } else { + chrono::DateTime::parse_from_rfc3339(v[0]).map(|ts| DateTimeTz(ts.with_timezone(&UTC))) + } + } +} + +struct DateTimeTzVisitor; + +impl<'de> Visitor<'de> for DateTimeTzVisitor { + type Value = DateTimeTz; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("a string date time representation that can be parsed") + } + + fn visit_str(self, s: &str) -> Result { + DateTimeTz::from_str(s).or(Err(E::custom(format!( + "string is not a parsable datetime representation" + )))) + } +} + +impl Serialize for DateTimeTz { + fn serialize(&self, serializer: S) -> Result { + serializer.serialize_str(&self.to_string()) + } +} + +impl<'de> Deserialize<'de> for DateTimeTz { + fn deserialize>(deserializer: D) -> Result { + deserializer.deserialize_str(DateTimeTzVisitor) + } +} + +#[cfg(test)] +mod test { + extern crate serde_json; + + use chrono::TimeZone; + use chrono_tz::America::Phoenix; + use chrono_tz::Etc::UTC; + use chrono_tz::US::{Arizona, Central}; + use date_time_tz::DateTimeTz; + + #[test] + fn it_creates_timestamp_with_z() { + let t = DateTimeTz(UTC.ymd(2019, 5, 15).and_hms(12, 0, 0)); + assert_eq!(t.to_string(), "2019-05-15T12:00:00Z"); + } + + #[test] + fn it_parses_utc_rfc3339_z() { + let t = DateTimeTz::from_str("2019-05-15T12:00:00Z").unwrap(); + assert_eq!(t, DateTimeTz(UTC.ymd(2019, 5, 15).and_hms(12, 0, 0))); + } + + #[test] + fn it_parses_rfc3339_with_offset() { + let t = DateTimeTz::from_str("2019-05-15T12:00:00-06:00").unwrap(); + assert_eq!(t, DateTimeTz(UTC.ymd(2019, 5, 15).and_hms(18, 0, 0))); + } + + #[test] + fn it_parses_rfc3339_with_tz() { + let t = DateTimeTz::from_str("2019-06-15T19:00:00Z US/Arizona").unwrap(); + assert_eq!(t, DateTimeTz(UTC.ymd(2019, 6, 15).and_hms(19, 0, 0))); + assert_eq!(t, DateTimeTz(Arizona.ymd(2019, 6, 15).and_hms(12, 0, 0))); + assert_eq!(t, DateTimeTz(Central.ymd(2019, 6, 15).and_hms(14, 0, 0))); + assert_eq!(t.to_string(), "2019-06-15T19:00:00Z US/Arizona"); + } + + #[derive(Serialize)] + struct DemoStruct { + id: String, + dt: DateTimeTz, + } + + // I used Arizona here specifically because large parts of Arizona do not honor DST, and so + // that adds in more ambiguity of the -0700 offset with Pacific time. + #[test] + fn it_json_serializes() { + let t = DateTimeTz::from_str("2019-06-15T19:00:00Z America/Phoenix").unwrap(); + assert_eq!( + serde_json::to_string(&t).unwrap(), + "\"2019-06-15T19:00:00Z America/Phoenix\"" + ); + + let demo = DemoStruct { + id: String::from("abcdefg"), + dt: t, + }; + assert_eq!( + serde_json::to_string(&demo).unwrap(), + "{\"id\":\"abcdefg\",\"dt\":\"2019-06-15T19:00:00Z America/Phoenix\"}" + ); + } + + #[test] + fn it_json_parses() { + let t = + serde_json::from_str::("\"2019-06-15T19:00:00Z America/Phoenix\"").unwrap(); + assert_eq!(t, DateTimeTz(Phoenix.ymd(2019, 6, 15).and_hms(12, 0, 0))); + } +} diff --git a/emseries/src/interval.rs b/emseries/src/interval.rs new file mode 100644 index 0000000..2a0c960 --- /dev/null +++ b/emseries/src/interval.rs @@ -0,0 +1,46 @@ +// NOTE: this module is a candidate for extraction into its own crate, or should be replaced with +// an existing crate. + +/// Specify an interval across the data type T +pub struct Interval { + start: T, + start_incl: bool, + end: T, + end_incl: bool, +} + +impl Interval +where + T: Clone + Ord, +{ + /// Create a new interval from the start value to the end value, specifying inclusivity on + /// either end of the interval. + pub fn new(start: T, start_incl: bool, end: T, end_incl: bool) -> Interval { + Interval { + start, + start_incl, + end, + end_incl, + } + } + + /// Create an interval that matches *exactly* the specified value. + pub fn exact(val: T) -> Interval { + Interval { + start: val.clone(), + start_incl: true, + end: val, + end_incl: true, + } + } + + /// Test whether a value is included in the specified interval. + pub fn contains(&self, val: T) -> bool { + match (self.start_incl, self.end_incl) { + (true, true) => self.start <= val && val <= self.end, + (true, false) => self.start <= val && val < self.end, + (false, true) => self.start < val && val <= self.end, + (false, false) => self.start < val && val < self.end, + } + } +} diff --git a/emseries/src/lib.rs b/emseries/src/lib.rs new file mode 100644 index 0000000..4b503d2 --- /dev/null +++ b/emseries/src/lib.rs @@ -0,0 +1,69 @@ +/*! An Embedded Time Series Database + +This library provides a low-intensity time series database meant to be embedded inside of an +application. + +From the signature of the series + +```text +pub struct Series { +``` + +you can know that you must parameterize the series over the data type that you want to store, +which must also have several traits implemented. + +```text +#[derive(Clone, Debug, PartialEq, Deserialize, Serialize)] +struct BikeTrip { + datetime: DateTime, + distance: Distance, + duration: Duration, + comments: String, +} + +impl Recordable for BikeTrip { + fn timestamp(&self) -> DateTime { + self.datetime + } + fn tags(&self) -> Vec { + Vec::new() + } +} +``` + +Recordable requires implementations for `timestamp` and `tags`, both of which can be used for +searching for records, and both of which may be used for indexing in the future. + +The series can only store a single data type, but you can always store multiple data types by +wrapping them into a single enum. + +Open the series: + +```text +let mut ts: Series = Series::open("var/bike_trips.json") + .expect("expect the time series to open correctly"); +``` + +The series file will be created if it does not already exist. If it does already exist, the existing data will be read into memory and made available. + +Note: all of the data is read into memory at once. For human-scale things, this probably takes up very little memory, but this software is not optimized for IoT scale deployments. Additionally, this library assumes only one process is writing to the file. Behavior from more than one process writing to the file is currently undefined. +*/ + +#[macro_use] +extern crate serde_derive; +extern crate chrono; +extern crate chrono_tz; +extern crate serde; +extern crate serde_json; +extern crate thiserror; +extern crate uuid; + +mod criteria; +mod date_time_tz; +mod series; +mod types; + +pub use criteria::*; +pub use date_time_tz::DateTimeTz; +pub use series::Series; +pub use types::{EmseriesReadError, EmseriesWriteError, Recordable, UniqueId}; diff --git a/emseries/src/series.rs b/emseries/src/series.rs new file mode 100644 index 0000000..e809860 --- /dev/null +++ b/emseries/src/series.rs @@ -0,0 +1,165 @@ +extern crate serde; +extern crate serde_json; +extern crate uuid; + +use serde::de::DeserializeOwned; +use serde::ser::Serialize; +use std::cmp::Ordering; +use std::collections::HashMap; +use std::fs::File; +use std::fs::OpenOptions; +use std::io::{BufRead, BufReader, LineWriter, Write}; +use std::iter::Iterator; + +use criteria::Criteria; +use types::{EmseriesReadError, EmseriesWriteError, Record, Recordable, UniqueId}; + +/// An open time series database. +/// +/// Any given database can store only one data type, T. The data type must be determined when the +/// database is opened. +pub struct Series { + //path: String, + writer: LineWriter, + records: HashMap, +} + +impl Series +where + T: Clone + Recordable + DeserializeOwned + Serialize, +{ + /// Open a time series database at the specified path. `path` is the full path and filename for + /// the database. + pub fn open(path: &str) -> Result, EmseriesReadError> { + let f = OpenOptions::new() + .read(true) + .append(true) + .create(true) + .open(&path) + .map_err(EmseriesReadError::IOError)?; + + let records = Series::load_file(&f)?; + + let writer = LineWriter::new(f); + + Ok(Series { + //path: String::from(path), + writer, + records, + }) + } + + /// Load a file and return all of the records in it. + fn load_file(f: &File) -> Result, EmseriesReadError> { + let mut records: HashMap = HashMap::new(); + let reader = BufReader::new(f); + for line in reader.lines() { + match line { + Ok(line_) => { + /* Can't create a JSONParseError because I can't actually create the underlying error. + fail_point!("parse-line", Err(Error::JSONParseError())) + */ + match line_.parse::>() { + Ok(record) => match record.data { + Some(val) => records.insert(record.id.clone(), val), + None => records.remove(&record.id.clone()), + }, + Err(err) => return Err(err), + }; + } + Err(err) => return Err(EmseriesReadError::IOError(err)), + } + } + Ok(records) + } + + /// Put a new record into the database. A unique id will be assigned to the record and + /// returned. + pub fn put(&mut self, entry: T) -> Result { + let uuid = UniqueId::new(); + self.update(uuid.clone(), entry).and_then(|_| Ok(uuid)) + } + + /// Update an existing record. The `UniqueId` of the record passed into this function must match + /// the `UniqueId` of a record already in the database. + pub fn update(&mut self, uuid: UniqueId, entry: T) -> Result<(), EmseriesWriteError> { + self.records.insert(uuid.clone(), entry.clone()); + let write_res = match serde_json::to_string(&Record { + id: uuid, + data: Some(entry), + }) { + Ok(rec_str) => self + .writer + .write_fmt(format_args!("{}\n", rec_str.as_str())) + .map_err(EmseriesWriteError::IOError), + Err(err) => Err(EmseriesWriteError::JSONWriteError(err)), + }; + + match write_res { + Ok(_) => Ok(()), + Err(err) => Err(err), + } + } + + /// Delete a record from the database + /// + /// Future note: while this deletes a record from the view, it only adds an entry to the + /// database that indicates `data: null`. If record histories ever become important, the record + /// and its entire history (including this delete) will still be available. + pub fn delete(&mut self, uuid: &UniqueId) -> Result<(), EmseriesWriteError> { + if !self.records.contains_key(uuid) { + return Ok(()); + }; + self.records.remove(uuid); + + let rec: Record = Record { + id: uuid.clone(), + data: None, + }; + match serde_json::to_string(&rec) { + Ok(rec_str) => self + .writer + .write_fmt(format_args!("{}\n", rec_str.as_str())) + .map_err(EmseriesWriteError::IOError), + Err(err) => Err(EmseriesWriteError::JSONWriteError(err)), + } + } + + /// Get all of the records in the database. + pub fn records<'s>(&'s self) -> impl Iterator + 's { + self.records.iter() + } + + /* The point of having Search is so that a lot of internal optimizations can happen once the + * data sets start getting large. */ + /// Perform a search on the records in a database, based on the given criteria. + pub fn search<'s>( + &'s self, + criteria: impl Criteria + 's, + ) -> impl Iterator + 's { + self.records().filter(move |&tr| criteria.apply(tr.1)) + } + + /// Perform a search and sort the resulting records based on the comparison. + pub fn search_sorted<'s, C, CMP>(&'s self, criteria: C, compare: CMP) -> Vec<(&UniqueId, &T)> + where + C: Criteria + 's, + CMP: FnMut(&(&UniqueId, &T), &(&UniqueId, &T)) -> Ordering, + { + let search_iter = self.search(criteria); + let mut records: Vec<(&UniqueId, &T)> = search_iter.collect(); + records.sort_by(compare); + records + } + + /// Get an exact record from the database based on unique id. + pub fn get(&self, uuid: &UniqueId) -> Option { + self.records.get(uuid).map(|v| v.clone()) + } + + /* + pub fn remove(&self, uuid: UniqueId) -> Result<(), EmseriesError> { + unimplemented!() + } + */ +} diff --git a/emseries/src/types.rs b/emseries/src/types.rs new file mode 100644 index 0000000..a89e88d --- /dev/null +++ b/emseries/src/types.rs @@ -0,0 +1,166 @@ +use date_time_tz::DateTimeTz; +use serde::de::DeserializeOwned; +use serde::ser::Serialize; +use std::{fmt, io, str}; +use thiserror::Error; +use uuid::Uuid; + +#[derive(Debug, Error)] +pub enum EmseriesReadError { + /// Indicates that the UUID specified is invalid and cannot be parsed + #[error("UUID failed to parse: {0}")] + UUIDParseError(uuid::Error), + + /// Indicates an error in the JSON deserialization + #[error("Error parsing JSON: {0}")] + JSONParseError(serde_json::error::Error), + + /// Indicates a general IO error + #[error("IO Error: {0}")] + IOError(io::Error), +} + +#[derive(Debug, Error)] +pub enum EmseriesWriteError { + /// Indicates a general IO error + #[error("IO Error: {0}")] + IOError(io::Error), + + /// Indicates an error in the JSON serialization + #[error("Error generating a JSON string: {0}")] + JSONWriteError(serde_json::error::Error), +} + +/// Any element to be put into the database needs to be Recordable. This is the common API that +/// will aid in searching and later in indexing records. +pub trait Recordable { + /// The timestamp for the record. + fn timestamp(&self) -> DateTimeTz; + + /// A list of string tags that can be used for indexing. This list defined per-type. + fn tags(&self) -> Vec; +} + +/// Uniquely identifies a record. +/// +/// This is a wrapper around a basic uuid with some extra convenience methods. +#[derive(Clone, Debug, Eq, Hash, PartialEq, Deserialize, Serialize)] +pub struct UniqueId(Uuid); + +impl UniqueId { + /// Create a new V4 UUID (this is the most common type in use these days). + pub fn new() -> UniqueId { + let id = Uuid::new_v4(); + UniqueId(id) + } +} + +impl str::FromStr for UniqueId { + type Err = EmseriesReadError; + + /// Parse a UniqueId from a string. Raise UUIDParseError if the parsing fails. + fn from_str(val: &str) -> Result { + Uuid::parse_str(val) + .map(UniqueId) + .map_err(|err| EmseriesReadError::UUIDParseError(err)) + } +} + +impl fmt::Display for UniqueId { + /// Convert to a hyphenated string + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), std::fmt::Error> { + write!(f, "{}", self.0.to_hyphenated().to_string()) + } +} + +/// Every record contains a unique ID and then the primary data, which itself must implementd the +/// Recordable trait. +#[derive(Clone, Deserialize, Serialize)] +pub struct Record { + pub id: UniqueId, + pub data: Option, +} + +impl str::FromStr for Record +where + T: Clone + Recordable + DeserializeOwned + Serialize, +{ + type Err = EmseriesReadError; + + fn from_str(line: &str) -> Result { + serde_json::from_str(&line).map_err(|err| EmseriesReadError::JSONParseError(err)) + } +} + +#[cfg(test)] +mod test { + extern crate dimensioned; + extern crate serde_json; + + use self::dimensioned::si::{Kilogram, KG}; + use super::{Record, Recordable}; + use chrono::TimeZone; + use chrono_tz::Etc::UTC; + use chrono_tz::US::Central; + use date_time_tz::DateTimeTz; + + #[derive(Clone, Debug, PartialEq, Deserialize, Serialize)] + pub struct Weight(Kilogram); + + #[derive(Clone, Debug, PartialEq, Deserialize, Serialize)] + pub struct WeightRecord { + pub date: DateTimeTz, + pub weight: Weight, + } + + impl Recordable for WeightRecord { + fn timestamp(&self) -> DateTimeTz { + self.date.clone() + } + + fn tags(&self) -> Vec { + Vec::new() + } + } + + const WEIGHT_ENTRY: &str = "{\"data\":{\"weight\":77.79109,\"date\":\"2003-11-10T06:00:00.000000000000Z\"},\"id\":\"3330c5b0-783f-4919-b2c4-8169c38f65ff\"}"; + + #[test] + pub fn legacy_deserialization() { + let rec: Record = WEIGHT_ENTRY + .parse() + .expect("should successfully parse the record"); + assert_eq!( + rec.id, + "3330c5b0-783f-4919-b2c4-8169c38f65ff".parse().unwrap() + ); + assert_eq!( + rec.data, + Some(WeightRecord { + date: DateTimeTz(UTC.ymd(2003, 11, 10).and_hms(6, 0, 0)), + weight: Weight(77.79109 * KG), + }) + ); + } + + #[test] + pub fn serialization_output() { + let rec = WeightRecord { + date: DateTimeTz(UTC.ymd(2003, 11, 10).and_hms(6, 0, 0)), + weight: Weight(77.0 * KG), + }; + assert_eq!( + serde_json::to_string(&rec).unwrap(), + "{\"date\":\"2003-11-10T06:00:00Z\",\"weight\":77.0}" + ); + + let rec2 = WeightRecord { + date: DateTimeTz(Central.ymd(2003, 11, 10).and_hms(0, 0, 0)), + weight: Weight(77.0 * KG), + }; + assert_eq!( + serde_json::to_string(&rec2).unwrap(), + "{\"date\":\"2003-11-10T06:00:00Z US/Central\",\"weight\":77.0}" + ); + } +} diff --git a/emseries/tests/test_io.rs b/emseries/tests/test_io.rs new file mode 100644 index 0000000..34b0bce --- /dev/null +++ b/emseries/tests/test_io.rs @@ -0,0 +1,401 @@ +#[macro_use] +extern crate serde_derive; + +extern crate chrono; +extern crate chrono_tz; +extern crate dimensioned; +extern crate emseries; + +#[cfg(test)] +mod test { + use chrono::prelude::*; + use chrono_tz::Etc::UTC; + use dimensioned::si::{Kilogram, Meter, Second, KG, M, S}; + + use emseries::*; + + #[derive(Clone, Debug, PartialEq, Deserialize, Serialize)] + struct Distance(Meter); + + #[derive(Clone, Debug, PartialEq, Deserialize, Serialize)] + struct Duration(Second); + + #[derive(Clone, Debug, PartialEq, Deserialize, Serialize)] + struct BikeTrip { + datetime: DateTimeTz, + distance: Distance, + duration: Duration, + comments: String, + } + + impl Recordable for BikeTrip { + fn timestamp(&self) -> DateTimeTz { + self.datetime.clone() + } + fn tags(&self) -> Vec { + Vec::new() + } + } + + fn mk_trips() -> [BikeTrip; 5] { + [ + BikeTrip { + datetime: DateTimeTz(UTC.ymd(2011, 10, 29).and_hms(0, 0, 0)), + distance: Distance(58741.055 * M), + duration: Duration(11040.0 * S), + comments: String::from("long time ago"), + }, + BikeTrip { + datetime: DateTimeTz(UTC.ymd(2011, 10, 31).and_hms(0, 0, 0)), + distance: Distance(17702.0 * M), + duration: Duration(2880.0 * S), + comments: String::from("day 2"), + }, + BikeTrip { + datetime: DateTimeTz(UTC.ymd(2011, 11, 02).and_hms(0, 0, 0)), + distance: Distance(41842.945 * M), + duration: Duration(7020.0 * S), + comments: String::from("Do Some Distance!"), + }, + BikeTrip { + datetime: DateTimeTz(UTC.ymd(2011, 11, 04).and_hms(0, 0, 0)), + distance: Distance(34600.895 * M), + duration: Duration(5580.0 * S), + comments: String::from("I did a lot of distance back then"), + }, + BikeTrip { + datetime: DateTimeTz(UTC.ymd(2011, 11, 05).and_hms(0, 0, 0)), + distance: Distance(6437.376 * M), + duration: Duration(960.0 * S), + comments: String::from("day 5"), + }, + ] + } + + fn run_test(test: T) -> () + where + T: FnOnce(tempfile::TempPath), + { + let tmp_file = tempfile::NamedTempFile::new().expect("temporary path created"); + let tmp_path = tmp_file.into_temp_path(); + test(tmp_path); + } + + fn run(test: T) -> () + where + T: FnOnce(Series), + { + let tmp_file = tempfile::NamedTempFile::new().expect("temporary path created"); + let tmp_path = tmp_file.into_temp_path(); + let ts: Series = Series::open(&tmp_path.to_string_lossy()) + .expect("the time series should open correctly"); + test(ts); + } + + #[test] + pub fn can_add_and_retrieve_entries() { + run(|mut series| { + let trips = mk_trips(); + let uuid = series + .put(trips[0].clone()) + .expect("expect a successful put"); + let record_res = series.get(&uuid); + + for trip in &trips[1..=4] { + series.put(trip.clone()).expect("expect a successful put"); + } + + match record_res { + None => assert!(false, "There should have been a value here"), + Some(tr) => { + assert_eq!( + tr.timestamp(), + DateTimeTz(UTC.ymd(2011, 10, 29).and_hms(0, 0, 0)) + ); + assert_eq!(tr.duration, Duration(11040.0 * S)); + assert_eq!(tr.comments, String::from("long time ago")); + assert_eq!(tr, trips[0]); + } + } + }) + } + + #[test] + pub fn can_search_for_an_entry_with_exact_time() { + run_test(|path| { + let trips = mk_trips(); + let mut ts: Series = Series::open(&path.to_string_lossy()) + .expect("expect the time series to open correctly"); + + for trip in &trips[0..=4] { + ts.put(trip.clone()).expect("expect a successful put"); + } + + let v: Vec<(&UniqueId, &BikeTrip)> = ts + .search(exact_time(DateTimeTz( + UTC.ymd(2011, 10, 31).and_hms(0, 0, 0), + ))) + .collect(); + assert_eq!(v.len(), 1); + assert_eq!(*v[0].1, trips[1]); + }) + } + + #[test] + pub fn can_get_entries_in_time_range() { + run_test(|path| { + let trips = mk_trips(); + let mut ts: Series = Series::open(&path.to_string_lossy()) + .expect("expect the time series to open correctly"); + + for trip in &trips[0..=4] { + ts.put(trip.clone()).expect("expect a successful put"); + } + + let v: Vec<(&UniqueId, &BikeTrip)> = ts.search_sorted( + time_range( + DateTimeTz(UTC.ymd(2011, 10, 31).and_hms(0, 0, 0)), + true, + DateTimeTz(UTC.ymd(2011, 11, 04).and_hms(0, 0, 0)), + true, + ), + |l, r| l.1.timestamp().cmp(&r.1.timestamp()), + ); + assert_eq!(v.len(), 3); + assert_eq!(*v[0].1, trips[1]); + assert_eq!(*v[1].1, trips[2]); + assert_eq!(*v[2].1, trips[3]); + }) + } + + #[test] + pub fn persists_and_reads_an_entry() { + run_test(|path| { + let trips = mk_trips(); + + { + let mut ts: Series = Series::open(&path.to_string_lossy()) + .expect("expect the time series to open correctly"); + + for trip in &trips[0..=4] { + ts.put(trip.clone()).expect("expect a successful put"); + } + } + + { + let ts: Series = Series::open(&path.to_string_lossy()) + .expect("expect the time series to open correctly"); + let v: Vec<(&UniqueId, &BikeTrip)> = ts.search_sorted( + time_range( + DateTimeTz(UTC.ymd(2011, 10, 31).and_hms(0, 0, 0)), + true, + DateTimeTz(UTC.ymd(2011, 11, 04).and_hms(0, 0, 0)), + true, + ), + |l, r| l.1.timestamp().cmp(&r.1.timestamp()), + ); + assert_eq!(v.len(), 3); + assert_eq!(*v[0].1, trips[1]); + assert_eq!(*v[1].1, trips[2]); + assert_eq!(*v[2].1, trips[3]); + } + }) + } + + #[test] + pub fn can_write_to_existing_file() { + run_test(|path| { + let trips = mk_trips(); + + { + let mut ts: Series = Series::open(&path.to_string_lossy()) + .expect("expect the time series to open correctly"); + + for trip in &trips[0..=2] { + ts.put(trip.clone()).expect("expect a successful put"); + } + } + + { + let mut ts: Series = Series::open(&path.to_string_lossy()) + .expect("expect the time series to open correctly"); + let v: Vec<(&UniqueId, &BikeTrip)> = ts.search_sorted( + time_range( + DateTimeTz(UTC.ymd(2011, 10, 31).and_hms(0, 0, 0)), + true, + DateTimeTz(UTC.ymd(2011, 11, 04).and_hms(0, 0, 0)), + true, + ), + |l, r| l.1.timestamp().cmp(&r.1.timestamp()), + ); + assert_eq!(v.len(), 2); + assert_eq!(*v[0].1, trips[1]); + assert_eq!(*v[1].1, trips[2]); + ts.put(trips[3].clone()).expect("expect a successful put"); + ts.put(trips[4].clone()).expect("expect a successful put"); + } + + { + let ts: Series = Series::open(&path.to_string_lossy()) + .expect("expect the time series to open correctly"); + let v: Vec<(&UniqueId, &BikeTrip)> = ts.search_sorted( + time_range( + DateTimeTz(UTC.ymd(2011, 10, 31).and_hms(0, 0, 0)), + true, + DateTimeTz(UTC.ymd(2011, 11, 05).and_hms(0, 0, 0)), + true, + ), + |l, r| l.1.timestamp().cmp(&r.1.timestamp()), + ); + assert_eq!(v.len(), 4); + assert_eq!(*v[0].1, trips[1]); + assert_eq!(*v[1].1, trips[2]); + assert_eq!(*v[2].1, trips[3]); + assert_eq!(*v[3].1, trips[4]); + } + }) + } + + #[test] + pub fn can_overwrite_existing_entry() { + run_test(|path| { + let trips = mk_trips(); + + let mut ts: Series = Series::open(&path.to_string_lossy()) + .expect("expect the time series to open correctly"); + + ts.put(trips[0].clone()).expect("expect a successful put"); + ts.put(trips[1].clone()).expect("expect a successful put"); + let trip_id = ts.put(trips[2].clone()).expect("expect a successful put"); + + match ts.get(&trip_id) { + None => assert!(false, "record not found"), + Some(mut trip) => { + trip.distance = Distance(50000.0 * M); + ts.update(trip_id.clone(), trip) + .expect("expect record to update"); + } + }; + + match ts.get(&trip_id) { + None => assert!(false, "record not found"), + Some(trip) => { + assert_eq!( + trip.datetime, + DateTimeTz(UTC.ymd(2011, 11, 02).and_hms(0, 0, 0)) + ); + assert_eq!(trip.distance, Distance(50000.0 * M)); + assert_eq!(trip.duration, Duration(7020.0 * S)); + assert_eq!(trip.comments, String::from("Do Some Distance!")); + } + } + }) + } + + #[test] + pub fn record_overwrites_get_persisted() { + run_test(|path| { + let trips = mk_trips(); + + { + let mut ts: Series = Series::open(&path.to_string_lossy()) + .expect("expect the time series to open correctly"); + + ts.put(trips[0].clone()).expect("expect a successful put"); + ts.put(trips[1].clone()).expect("expect a successful put"); + let trip_id = ts.put(trips[2].clone()).expect("expect a successful put"); + + match ts.get(&trip_id) { + None => assert!(false, "record not found"), + Some(mut trip) => { + trip.distance = Distance(50000.0 * M); + ts.update(trip_id, trip).expect("expect record to update"); + } + }; + } + + { + let ts: Series = Series::open(&path.to_string_lossy()) + .expect("expect the time series to open correctly"); + + let trips: Vec<(&UniqueId, &BikeTrip)> = ts.records().collect(); + assert_eq!(trips.len(), 3); + + let trips: Vec<(&UniqueId, &BikeTrip)> = ts + .search(exact_time(DateTimeTz( + UTC.ymd(2011, 11, 02).and_hms(0, 0, 0), + ))) + .collect(); + assert_eq!(trips.len(), 1); + assert_eq!( + trips[0].1.datetime, + DateTimeTz(UTC.ymd(2011, 11, 02).and_hms(0, 0, 0)) + ); + assert_eq!(trips[0].1.distance, Distance(50000.0 * M)); + assert_eq!(trips[0].1.duration, Duration(7020.0 * S)); + assert_eq!(trips[0].1.comments, String::from("Do Some Distance!")); + } + }) + } + + #[test] + pub fn can_delete_an_entry() { + run_test(|path| { + let trips = mk_trips(); + + { + let mut ts: Series = Series::open(&path.to_string_lossy()) + .expect("expect the time series to open correctly"); + let trip_id = ts.put(trips[0].clone()).expect("expect a successful put"); + ts.put(trips[1].clone()).expect("expect a successful put"); + ts.put(trips[2].clone()).expect("expect a successful put"); + + ts.delete(&trip_id).expect("successful delete"); + + let recs: Vec<(&UniqueId, &BikeTrip)> = ts.records().collect(); + assert_eq!(recs.len(), 2); + } + + { + let ts: Series = Series::open(&path.to_string_lossy()) + .expect("expect the time series to open correctly"); + let recs: Vec<(&UniqueId, &BikeTrip)> = ts.records().collect(); + assert_eq!(recs.len(), 2); + } + }) + } + + #[derive(Clone, Debug, PartialEq, Deserialize, Serialize)] + pub struct Weight(Kilogram); + + #[derive(Clone, Debug, PartialEq, Deserialize, Serialize)] + pub struct WeightRecord { + pub date: DateTimeTz, + pub weight: Weight, + } + + impl Recordable for WeightRecord { + fn timestamp(&self) -> DateTimeTz { + self.date.clone() + } + + fn tags(&self) -> Vec { + Vec::new() + } + } + + #[test] + pub fn legacy_file_load() { + let ts: Series = + Series::open("fixtures/weight.json").expect("legacy series should open correctly"); + + let uid = "3330c5b0-783f-4919-b2c4-8169c38f65ff" + .parse() + .expect("something is wrong with this ID"); + let rec = ts.get(&uid); + match rec { + None => assert!(false, "no record found"), + Some(rec) => assert_eq!(rec.weight, Weight(77.79109 * KG)), + } + } +}