use chrono::{Datelike, NaiveDate, Timelike}; use chrono_tz::Tz; use fixed_decimal::FixedDecimal; use fluent::{bundle::FluentBundle, FluentResource}; use icu::{datetime::options::length, decimal::FixedDecimalFormatter, locid::Locale}; use icu_provider::DataLocale; use std::{fs::File, io::Read, ops::Deref}; use sys_locale::get_locale; use thiserror::Error; use unic_langid::LanguageIdentifierError; // Re-exports. I'm doing these so that clients of this library don't have to go tracking down // additional structures pub use fixed_decimal::FloatPrecision; pub use fluent::{FluentArgs, FluentValue}; #[derive(Debug)] pub enum NonEmptyListError { BuildFromEmptyContainer, } pub struct NonEmptyList<A>(Vec<A>); impl<A> NonEmptyList<A> { pub fn new(elem: A) -> Self { Self(vec![elem]) } pub fn from_iter( iter: impl IntoIterator<Item = A>, ) -> Result<NonEmptyList<A>, NonEmptyListError> { let lst = iter.into_iter().collect::<Vec<A>>(); if lst.len() > 0 { Ok(NonEmptyList(lst)) } else { Err(NonEmptyListError::BuildFromEmptyContainer) } } pub fn push(&mut self, item: A) { self.0.push(item); } pub fn find(&self, f: impl Fn(&A) -> bool) -> Option<&A> { self.0.iter().find(|item| f(*item)) } fn first(&self) -> &A { &self.0[0] } fn iter<'a>(&'a self) -> impl Iterator<Item = &'a A> { self.0.iter() } } impl<A> Deref for NonEmptyList<A> { type Target = Vec<A>; fn deref(&self) -> &Self::Target { &self.0 } } #[derive(Debug, Error)] pub enum L10NError { #[error("Unparsable Locale")] UnparsableLocale, } impl From<icu::locid::Error> for L10NError { fn from(_: icu::locid::Error) -> L10NError { L10NError::UnparsableLocale } } #[derive(Debug, Error)] pub enum FileLoadError { #[error("Unparsable Locale")] UnparsableLocale, #[error("Source string file not found")] FileNotFound, #[error("The Fluent file is malformed")] FluentParseError(String), #[error("An unknown IO error was found")] IOError(std::io::Error), } impl From<LanguageIdentifierError> for FileLoadError { fn from(_: LanguageIdentifierError) -> Self { Self::UnparsableLocale } } impl From<std::io::Error> for FileLoadError { fn from(err: std::io::Error) -> Self { Self::IOError(err) } } // Potential Message structure. // // Let's assume the application has an enumeration that implements Message. For each element of the // enumeration, there should be some boilerplate code that returns the message ID and the arguments // as a FluentArgs. // // Nobody wants to generate all of that code, though I have done so in the past, and manually // generating that code could be useful for illustration. I think I'm going to want to do code // generation from the source strings file, and then compile the enumeration into the code. // However, I have not found a mechanism in Fluent to identify all of the placeholders within a // message, so I'm not even sure that I can automate this code generation. pub trait Message { fn msgid(&self) -> &str; fn args(&self) -> Option<FluentArgs>; } pub struct L10N { messages_root: std::path::PathBuf, message_bundles: Vec<FluentBundle<FluentResource, intl_memoizer::concurrent::IntlLangMemoizer>>, locales: NonEmptyList<Locale>, zone: chrono_tz::Tz, } impl L10N { pub fn new(messages_root: std::path::PathBuf) -> Self { let english = "en-US".parse::<Locale>().unwrap(); let sys_locale = get_locale() .and_then(|locale_str| locale_str.parse::<Locale>().ok()) .unwrap_or(english.clone()); let locales = NonEmptyList::new(sys_locale.clone()); let zone = chrono_tz::UTC; /* let mut source_message_path = messages_root.clone(); source_message_path.push("en-US.ftl"); let english_phrases = FluentResource::try_new */ let mut s = Self { messages_root, message_bundles: vec![], locales, zone, }; s.load_messages_from_file("en-US".to_owned()).unwrap(); s } fn load_messages_from_file(&mut self, locale: String) -> Result<(), FileLoadError> { let langid: unic_langid::LanguageIdentifier = locale.parse()?; let mut path = self.messages_root.clone(); path.push(locale); path.set_extension("ftl"); println!("{:?}", path); let mut buffer = Vec::new(); let mut f = File::open(path)?; f.read_to_end(&mut buffer)?; let text = String::from_utf8(buffer).unwrap(); match FluentResource::try_new(text) { Ok(resource) => { let mut bundle = FluentBundle::new_concurrent(vec![langid]); let _ = bundle.add_resource(resource); self.message_bundles.push(bundle); Ok(()) } Err((_, errors)) => Err(FileLoadError::FluentParseError( errors .into_iter() .map(|err| err.to_string()) .collect::<Vec<String>>() .join("\n"), )), } } // Now, whenever the user changes the locales, the list of messages has to change. How do we // automatically set up the messages? Theoretically they all need to be reloaded, and I've // already split how the messages get loaded from how the locales are specified. // // But, FluentErgo does that, too. It already has the concept of being constructed with a list // of languages and then having each language bundle manually loaded afterwards. // // Problem: be able to change the preferred list of locales and automatically have a new // FluentBundle which has all relevant translations loaded. // // One solution is that all bundles get loaded at startup time, and the bundle list gets // changed any time the list of locales gets changed. Also, the system can just run through the // entire list of fallbacks. pub fn set_locales(&mut self, locales: NonEmptyList<&str>) -> Result<(), L10NError> { let locales = locales .iter() .map(|locale| Locale::try_from_bytes(locale.as_bytes())) .collect::<Result<Vec<Locale>, icu::locid::Error>>()?; for locale in locales.iter() { self.load_messages_from_file(locale.to_string()).unwrap(); } self.locales = NonEmptyList(locales); Ok(()) } pub fn set_timezone(&mut self, zone: Tz) { self.zone = zone; } // Need to take a message and turn it into a string in the current language. Except I don't // know yet what form the message should take. Forming an adapter around fluent_ergonomics or // even around fluent itself. I would want for the message to be statically typed, but then I // don't know what can be the data type that gets passed in here. // // Formatting a message requires identifying the message and passing it any relevant // parameters. In an ideal world, neither of these can be incorrect. Messages are all checked // at compile time, as are their parameters. That implies an enumeration, with one element per // message, and with each element knowing its parameters. // pub fn messages(&self) -> Vec<FluentBundle<FluentResource>> { // self.message_bundles.clone() // } pub fn tr(&self, message: impl Message) -> String { for bundle in self.message_bundles.iter().rev() { let msg = bundle .get_message(message.msgid()) .and_then(|msg| msg.value()); match msg { Some(msg) => { let mut errors = vec![]; return self.message_bundles[0] .format_pattern(msg, message.args().as_ref(), &mut errors) .to_string(); } None => continue, } } unreachable!("The message {} is missing", message.msgid()); } pub fn format_date_time_utc( &self, time: DateTime, date_style: length::Date, time_style: length::Time, ) -> String { let time: DateTime = time.with_timezone(&chrono_tz::UTC).into(); let options = length::Bag::from_date_time_style(date_style, time_style); let formatter = icu::datetime::DateTimeFormatter::try_new( &DataLocale::from(self.locales.first()), options.into(), ) .unwrap(); let icu_time: icu::calendar::DateTime<icu::calendar::Gregorian> = time.into(); formatter.format_to_string(&icu_time.to_any()).unwrap() } pub fn format_date_time_local( &self, time: DateTime, date_style: length::Date, time_style: length::Time, ) -> String { let time: DateTime = time.with_timezone(&self.zone).into(); let options = length::Bag::from_date_time_style(date_style, time_style); let formatter = icu::datetime::DateTimeFormatter::try_new( &DataLocale::from(self.locales.first()), options.into(), ) .unwrap(); let icu_time: icu::calendar::DateTime<icu::calendar::Gregorian> = time.into(); formatter.format_to_string(&icu_time.to_any()).unwrap() } /* * I have been unable to get from a chrono_tz::Tz to an ICU timezone. I have tried a variety of * parsers on the CustomTimeZone object. I have not researched the data provider to see what is * available there. The ZoneID for the reference date is US/Mountain, and the abbreviation is * MST. I'll want to get to a CustomTimeZone so that the formatter can render MST or Mountain * Standard Time or something similar. fn format_date_time_tz( &self, time: DateTime, date_style: length::Date, time_style: length::Time, ) -> String { let options = length::Bag::from_date_time_style(date_style, time_style); let formatter = icu::datetime::ZonedDateTimeFormatter::try_new( &DataLocale::from(&self.locale), options.into(), Default::default(), ) .unwrap(); let icu_time: icu::calendar::DateTime<icu::calendar::Gregorian> = time.into(); let any = icu_time.to_any(); println!("{:?}", time.offset()); let zone_id: String = time.offset().abbreviation().to_owned(); println!("{:?}", zone_id); let zone_id = icu::timezone::TimeZoneBcp47Id::from_str(&zone_id).unwrap(); let zone: CustomTimeZone = CustomTimeZone { gmt_offset: None, time_zone_id: Some(zone_id), /* icu::timezone::TimeZoneBcp47Id::from_str(time.offset().tz_id().parse().unwrap()) .unwrap(), */ metazone_id: None, zone_variant: None, }; formatter.format_to_string(&any, &zone).unwrap() } */ pub fn format_date(&self, date: NaiveDate, date_style: length::Date) -> String { let formatter = icu::datetime::DateFormatter::try_new_with_length( &DataLocale::from(self.locales.first()), date_style, ) .unwrap(); let icu_date: icu::calendar::Date<icu::calendar::Gregorian> = icu::calendar::Date::try_new_gregorian_date( date.year(), date.month().try_into().unwrap(), date.day().try_into().unwrap(), ) .unwrap(); formatter.format_to_string(&icu_date.to_any()).unwrap() } pub fn format_f64(&self, value: f64, precision: FloatPrecision) -> String { let fdf = FixedDecimalFormatter::try_new( &self.locales.first().clone().into(), Default::default(), ) .expect("locale should be present"); let number = FixedDecimal::try_from_f64(value, precision).unwrap(); fdf.format_to_string(&FixedDecimal::try_from_f64(value, precision).unwrap()) } } #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] pub struct DateTime(chrono::DateTime<Tz>); impl Deref for DateTime { type Target = chrono::DateTime<Tz>; fn deref(&self) -> &Self::Target { &self.0 } } impl From<chrono::DateTime<Tz>> for DateTime { fn from(time: chrono::DateTime<Tz>) -> Self { Self(time) } } impl From<DateTime> for icu::calendar::DateTime<icu::calendar::Gregorian> { fn from(time: DateTime) -> Self { // SAFETY: these unwraps should be safe since chrono dates are already valid Gregorian // dates icu::calendar::DateTime::try_new_gregorian_datetime( time.year(), time.month().try_into().unwrap(), time.day().try_into().unwrap(), time.hour().try_into().unwrap(), time.minute().try_into().unwrap(), time.second().try_into().unwrap(), ) .unwrap() } } #[cfg(test)] mod tests { use super::*; use fluent::fluent_args; fn ref_l10n() -> L10N { let mut l10n = L10N::new(std::path::PathBuf::from("./test_files")); // Make sure we know the locale before the test begins. Some systems, such as my own, are // not actually in English. l10n.set_locales(NonEmptyList::from_iter(vec!["en-US"]).unwrap()); l10n.set_timezone(chrono_tz::US::Eastern); l10n } fn ref_date() -> NaiveDate { NaiveDate::from_ymd_opt(2006, 1, 2).unwrap() } fn ref_time() -> DateTime { NaiveDate::from_ymd_opt(2006, 1, 2) .unwrap() .and_hms_opt(3, 4, 5) .unwrap() .and_local_timezone(Tz::US__Mountain) .unwrap() .into() } #[test] fn it_formats_a_time_in_utc() { let mut l10n = ref_l10n(); let now = ref_time(); // 202f is the code-point for a narrow non-breaking space. Presumably this is used in // particular to ensure that the am/pm marker doesn't get split off from the time assert_eq!( l10n.format_date_time_utc(now.clone(), length::Date::Long, length::Time::Medium), "January 2, 2006, 10:04:05\u{202f}AM" ); l10n.set_locales(NonEmptyList::from_iter(vec!["eo-EO", "en-US"]).unwrap()); assert_eq!( l10n.format_date_time_utc(now.clone(), length::Date::Long, length::Time::Medium), "2006-Januaro-02 10:04:05" ); } #[test] fn it_formats_a_time_in_the_current_zone() { let mut l10n = ref_l10n(); let now = ref_time(); // 202f is the code-point for a narrow non-breaking space. Presumably this is used in // particular to ensure that the am/pm marker doesn't get split off from the time assert_eq!( l10n.format_date_time_local(now.clone(), length::Date::Long, length::Time::Medium), "January 2, 2006, 5:04:05\u{202f}AM" ); l10n.set_locales(NonEmptyList::from_iter(vec!["eo-EO", "en-US"]).unwrap()); assert_eq!( l10n.format_date_time_local(now.clone(), length::Date::Long, length::Time::Medium), "2006-Januaro-02 05:04:05" ); } #[test] fn it_formats_dates() { let mut l10n = ref_l10n(); let today = ref_date(); assert_eq!( l10n.format_date(today.clone(), length::Date::Long), "January 2, 2006" ); l10n.set_locales(NonEmptyList::from_iter(vec!["eo-EO", "en-US"]).unwrap()); assert_eq!( l10n.format_date(today.clone(), length::Date::Long), "2006-Januaro-02" ); } #[test] fn it_formats_a_number_according_to_locale() { let mut l10n = ref_l10n(); assert_eq!(l10n.format_f64(100.4, FloatPrecision::Floating), "100.4",); assert_eq!( l10n.format_f64(15000.4, FloatPrecision::Floating), "15,000.4", ); l10n.set_locales(NonEmptyList::from_iter(vec!["de-DE", "en-US"]).unwrap()); assert_eq!(l10n.format_f64(100.4, FloatPrecision::Floating), "100,4",); assert_eq!( l10n.format_f64(15000.4, FloatPrecision::Floating), "15.000,4", ); } #[test] fn it_can_load_message_files() { let mut l10n = ref_l10n(); let messages = l10n.messages(); let args = fluent_args![ "name" => "Savanni" ]; assert_eq!( messages.tr("welcome", Some(&args)).unwrap(), "Hello, Savanni" ); let args = fluent_args![ "count" => 1 ]; assert_eq!( messages.tr("games-in-database", Some(&args)).unwrap(), "There is one game in the database" ); let args = fluent_args![ "count" => 2 ]; assert_eq!( messages.tr("games-in-database", Some(&args)).unwrap(), "There are 2 games in the database" ); } /* #[test] fn it_can_change_languages_on_locale_change() { } #[test] fn phrases_can_be_translated() { } #[test] fn phrases_can_fall_back() { } */ /* Not really a unit test, more of a test to see what I could introspect within a fluent * message. I was hoping that attributes would give me placeholder names, but that doesn't seem * to be the case. #[test] fn messages() { let langid_en = "en-US".parse().expect("Parsing failed."); let resource = FluentResource::try_new(MESSAGES.to_owned()).unwrap(); let mut bundle = FluentBundle::new(vec![langid_en]); bundle.add_resource(&resource).unwrap(); let msg = bundle.get_message("welcome").expect("message should exist"); for attr in msg.attributes() { println!("attr: {:?}", attr); } assert!(false); } */ }