548 lines
18 KiB
Rust
548 lines
18 KiB
Rust
use chrono::{Datelike, NaiveDate, Timelike};
|
|
use chrono_tz::Tz;
|
|
use fixed_decimal::FixedDecimal;
|
|
use fluent::{bundle::FluentBundle, FluentResource};
|
|
use icu::{datetime::options::length, decimal::FixedDecimalFormatter, locid::Locale};
|
|
use icu_provider::DataLocale;
|
|
use std::{fs::File, io::Read, ops::Deref};
|
|
use sys_locale::get_locale;
|
|
use thiserror::Error;
|
|
use unic_langid::LanguageIdentifierError;
|
|
|
|
// Re-exports. I'm doing these so that clients of this library don't have to go tracking down
|
|
// additional structures
|
|
pub use fixed_decimal::FloatPrecision;
|
|
pub use fluent::{FluentArgs, FluentValue};
|
|
|
|
#[derive(Debug)]
|
|
pub enum NonEmptyListError {
|
|
BuildFromEmptyContainer,
|
|
}
|
|
|
|
pub struct NonEmptyList<A>(Vec<A>);
|
|
|
|
impl<A> NonEmptyList<A> {
|
|
pub fn new(elem: A) -> Self {
|
|
Self(vec![elem])
|
|
}
|
|
|
|
pub fn from_iter(
|
|
iter: impl IntoIterator<Item = A>,
|
|
) -> Result<NonEmptyList<A>, NonEmptyListError> {
|
|
let lst = iter.into_iter().collect::<Vec<A>>();
|
|
if lst.len() > 0 {
|
|
Ok(NonEmptyList(lst))
|
|
} else {
|
|
Err(NonEmptyListError::BuildFromEmptyContainer)
|
|
}
|
|
}
|
|
|
|
pub fn push(&mut self, item: A) {
|
|
self.0.push(item);
|
|
}
|
|
|
|
pub fn find(&self, f: impl Fn(&A) -> bool) -> Option<&A> {
|
|
self.0.iter().find(|item| f(*item))
|
|
}
|
|
|
|
fn first(&self) -> &A {
|
|
&self.0[0]
|
|
}
|
|
|
|
fn iter<'a>(&'a self) -> impl Iterator<Item = &'a A> {
|
|
self.0.iter()
|
|
}
|
|
}
|
|
|
|
impl<A> Deref for NonEmptyList<A> {
|
|
type Target = Vec<A>;
|
|
fn deref(&self) -> &Self::Target {
|
|
&self.0
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, Error)]
|
|
pub enum L10NError {
|
|
#[error("Unparsable Locale")]
|
|
UnparsableLocale,
|
|
}
|
|
|
|
impl From<icu::locid::Error> for L10NError {
|
|
fn from(_: icu::locid::Error) -> L10NError {
|
|
L10NError::UnparsableLocale
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, Error)]
|
|
pub enum FileLoadError {
|
|
#[error("Unparsable Locale")]
|
|
UnparsableLocale,
|
|
|
|
#[error("Source string file not found")]
|
|
FileNotFound,
|
|
|
|
#[error("The Fluent file is malformed")]
|
|
FluentParseError(String),
|
|
|
|
#[error("An unknown IO error was found")]
|
|
IOError(std::io::Error),
|
|
}
|
|
|
|
impl From<LanguageIdentifierError> for FileLoadError {
|
|
fn from(_: LanguageIdentifierError) -> Self {
|
|
Self::UnparsableLocale
|
|
}
|
|
}
|
|
|
|
impl From<std::io::Error> for FileLoadError {
|
|
fn from(err: std::io::Error) -> Self {
|
|
Self::IOError(err)
|
|
}
|
|
}
|
|
|
|
// Potential Message structure.
|
|
//
|
|
// Let's assume the application has an enumeration that implements Message. For each element of the
|
|
// enumeration, there should be some boilerplate code that returns the message ID and the arguments
|
|
// as a FluentArgs.
|
|
//
|
|
// Nobody wants to generate all of that code, though I have done so in the past, and manually
|
|
// generating that code could be useful for illustration. I think I'm going to want to do code
|
|
// generation from the source strings file, and then compile the enumeration into the code.
|
|
// However, I have not found a mechanism in Fluent to identify all of the placeholders within a
|
|
// message, so I'm not even sure that I can automate this code generation.
|
|
pub trait Message {
|
|
fn msgid(&self) -> &str;
|
|
fn args(&self) -> Option<FluentArgs>;
|
|
}
|
|
|
|
pub struct L10N {
|
|
messages_root: std::path::PathBuf,
|
|
message_bundles: Vec<FluentBundle<FluentResource, intl_memoizer::concurrent::IntlLangMemoizer>>,
|
|
|
|
locales: NonEmptyList<Locale>,
|
|
zone: chrono_tz::Tz,
|
|
}
|
|
|
|
impl L10N {
|
|
pub fn new(messages_root: std::path::PathBuf) -> Self {
|
|
let english = "en-US".parse::<Locale>().unwrap();
|
|
let sys_locale = get_locale()
|
|
.and_then(|locale_str| locale_str.parse::<Locale>().ok())
|
|
.unwrap_or(english.clone());
|
|
let locales = NonEmptyList::new(sys_locale.clone());
|
|
let zone = chrono_tz::UTC;
|
|
|
|
/*
|
|
let mut source_message_path = messages_root.clone();
|
|
source_message_path.push("en-US.ftl");
|
|
let english_phrases = FluentResource::try_new
|
|
*/
|
|
|
|
let mut s = Self {
|
|
messages_root,
|
|
message_bundles: vec![],
|
|
locales,
|
|
zone,
|
|
};
|
|
|
|
s.load_messages_from_file("en-US".to_owned()).unwrap();
|
|
|
|
s
|
|
}
|
|
|
|
fn load_messages_from_file(&mut self, locale: String) -> Result<(), FileLoadError> {
|
|
let langid: unic_langid::LanguageIdentifier = locale.parse()?;
|
|
|
|
let mut path = self.messages_root.clone();
|
|
path.push(locale);
|
|
path.set_extension("ftl");
|
|
println!("{:?}", path);
|
|
|
|
let mut buffer = Vec::new();
|
|
let mut f = File::open(path)?;
|
|
f.read_to_end(&mut buffer)?;
|
|
let text = String::from_utf8(buffer).unwrap();
|
|
match FluentResource::try_new(text) {
|
|
Ok(resource) => {
|
|
let mut bundle = FluentBundle::new_concurrent(vec![langid]);
|
|
let _ = bundle.add_resource(resource);
|
|
self.message_bundles.push(bundle);
|
|
Ok(())
|
|
}
|
|
Err((_, errors)) => Err(FileLoadError::FluentParseError(
|
|
errors
|
|
.into_iter()
|
|
.map(|err| err.to_string())
|
|
.collect::<Vec<String>>()
|
|
.join("\n"),
|
|
)),
|
|
}
|
|
}
|
|
|
|
// Now, whenever the user changes the locales, the list of messages has to change. How do we
|
|
// automatically set up the messages? Theoretically they all need to be reloaded, and I've
|
|
// already split how the messages get loaded from how the locales are specified.
|
|
//
|
|
// But, FluentErgo does that, too. It already has the concept of being constructed with a list
|
|
// of languages and then having each language bundle manually loaded afterwards.
|
|
//
|
|
// Problem: be able to change the preferred list of locales and automatically have a new
|
|
// FluentBundle which has all relevant translations loaded.
|
|
//
|
|
// One solution is that all bundles get loaded at startup time, and the bundle list gets
|
|
// changed any time the list of locales gets changed. Also, the system can just run through the
|
|
// entire list of fallbacks.
|
|
pub fn set_locales(&mut self, locales: NonEmptyList<&str>) -> Result<(), L10NError> {
|
|
let locales = locales
|
|
.iter()
|
|
.map(|locale| Locale::try_from_bytes(locale.as_bytes()))
|
|
.collect::<Result<Vec<Locale>, icu::locid::Error>>()?;
|
|
|
|
for locale in locales.iter() {
|
|
self.load_messages_from_file(locale.to_string()).unwrap();
|
|
}
|
|
|
|
self.locales = NonEmptyList(locales);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
pub fn set_timezone(&mut self, zone: Tz) {
|
|
self.zone = zone;
|
|
}
|
|
|
|
// Need to take a message and turn it into a string in the current language. Except I don't
|
|
// know yet what form the message should take. Forming an adapter around fluent_ergonomics or
|
|
// even around fluent itself. I would want for the message to be statically typed, but then I
|
|
// don't know what can be the data type that gets passed in here.
|
|
//
|
|
// Formatting a message requires identifying the message and passing it any relevant
|
|
// parameters. In an ideal world, neither of these can be incorrect. Messages are all checked
|
|
// at compile time, as are their parameters. That implies an enumeration, with one element per
|
|
// message, and with each element knowing its parameters.
|
|
// pub fn messages(&self) -> Vec<FluentBundle<FluentResource>> {
|
|
// self.message_bundles.clone()
|
|
// }
|
|
|
|
pub fn tr(&self, message: impl Message) -> String {
|
|
for bundle in self.message_bundles.iter().rev() {
|
|
let msg = bundle
|
|
.get_message(message.msgid())
|
|
.and_then(|msg| msg.value());
|
|
match msg {
|
|
Some(msg) => {
|
|
let mut errors = vec![];
|
|
return self.message_bundles[0]
|
|
.format_pattern(msg, message.args().as_ref(), &mut errors)
|
|
.to_string();
|
|
}
|
|
None => continue,
|
|
}
|
|
}
|
|
unreachable!("The message {} is missing", message.msgid());
|
|
}
|
|
|
|
pub fn format_date_time_utc(
|
|
&self,
|
|
time: DateTime,
|
|
date_style: length::Date,
|
|
time_style: length::Time,
|
|
) -> String {
|
|
let time: DateTime = time.with_timezone(&chrono_tz::UTC).into();
|
|
let options = length::Bag::from_date_time_style(date_style, time_style);
|
|
let formatter = icu::datetime::DateTimeFormatter::try_new(
|
|
&DataLocale::from(self.locales.first()),
|
|
options.into(),
|
|
)
|
|
.unwrap();
|
|
let icu_time: icu::calendar::DateTime<icu::calendar::Gregorian> = time.into();
|
|
formatter.format_to_string(&icu_time.to_any()).unwrap()
|
|
}
|
|
|
|
pub fn format_date_time_local(
|
|
&self,
|
|
time: DateTime,
|
|
date_style: length::Date,
|
|
time_style: length::Time,
|
|
) -> String {
|
|
let time: DateTime = time.with_timezone(&self.zone).into();
|
|
let options = length::Bag::from_date_time_style(date_style, time_style);
|
|
let formatter = icu::datetime::DateTimeFormatter::try_new(
|
|
&DataLocale::from(self.locales.first()),
|
|
options.into(),
|
|
)
|
|
.unwrap();
|
|
let icu_time: icu::calendar::DateTime<icu::calendar::Gregorian> = time.into();
|
|
formatter.format_to_string(&icu_time.to_any()).unwrap()
|
|
}
|
|
|
|
/*
|
|
* I have been unable to get from a chrono_tz::Tz to an ICU timezone. I have tried a variety of
|
|
* parsers on the CustomTimeZone object. I have not researched the data provider to see what is
|
|
* available there. The ZoneID for the reference date is US/Mountain, and the abbreviation is
|
|
* MST. I'll want to get to a CustomTimeZone so that the formatter can render MST or Mountain
|
|
* Standard Time or something similar.
|
|
fn format_date_time_tz(
|
|
&self,
|
|
time: DateTime,
|
|
date_style: length::Date,
|
|
time_style: length::Time,
|
|
) -> String {
|
|
let options = length::Bag::from_date_time_style(date_style, time_style);
|
|
let formatter = icu::datetime::ZonedDateTimeFormatter::try_new(
|
|
&DataLocale::from(&self.locale),
|
|
options.into(),
|
|
Default::default(),
|
|
)
|
|
.unwrap();
|
|
let icu_time: icu::calendar::DateTime<icu::calendar::Gregorian> = time.into();
|
|
let any = icu_time.to_any();
|
|
|
|
println!("{:?}", time.offset());
|
|
|
|
let zone_id: String = time.offset().abbreviation().to_owned();
|
|
println!("{:?}", zone_id);
|
|
let zone_id = icu::timezone::TimeZoneBcp47Id::from_str(&zone_id).unwrap();
|
|
|
|
let zone: CustomTimeZone = CustomTimeZone {
|
|
gmt_offset: None,
|
|
time_zone_id: Some(zone_id),
|
|
/*
|
|
icu::timezone::TimeZoneBcp47Id::from_str(time.offset().tz_id().parse().unwrap())
|
|
.unwrap(),
|
|
*/
|
|
metazone_id: None,
|
|
zone_variant: None,
|
|
};
|
|
|
|
formatter.format_to_string(&any, &zone).unwrap()
|
|
}
|
|
*/
|
|
|
|
pub fn format_date(&self, date: NaiveDate, date_style: length::Date) -> String {
|
|
let formatter = icu::datetime::DateFormatter::try_new_with_length(
|
|
&DataLocale::from(self.locales.first()),
|
|
date_style,
|
|
)
|
|
.unwrap();
|
|
let icu_date: icu::calendar::Date<icu::calendar::Gregorian> =
|
|
icu::calendar::Date::try_new_gregorian_date(
|
|
date.year(),
|
|
date.month().try_into().unwrap(),
|
|
date.day().try_into().unwrap(),
|
|
)
|
|
.unwrap();
|
|
formatter.format_to_string(&icu_date.to_any()).unwrap()
|
|
}
|
|
|
|
pub fn format_f64(&self, value: f64, precision: FloatPrecision) -> String {
|
|
let fdf = FixedDecimalFormatter::try_new(
|
|
&self.locales.first().clone().into(),
|
|
Default::default(),
|
|
)
|
|
.expect("locale should be present");
|
|
|
|
let number = FixedDecimal::try_from_f64(value, precision).unwrap();
|
|
|
|
fdf.format_to_string(&FixedDecimal::try_from_f64(value, precision).unwrap())
|
|
}
|
|
}
|
|
|
|
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
|
|
pub struct DateTime(chrono::DateTime<Tz>);
|
|
|
|
impl Deref for DateTime {
|
|
type Target = chrono::DateTime<Tz>;
|
|
|
|
fn deref(&self) -> &Self::Target {
|
|
&self.0
|
|
}
|
|
}
|
|
|
|
impl From<chrono::DateTime<Tz>> for DateTime {
|
|
fn from(time: chrono::DateTime<Tz>) -> Self {
|
|
Self(time)
|
|
}
|
|
}
|
|
|
|
impl From<DateTime> for icu::calendar::DateTime<icu::calendar::Gregorian> {
|
|
fn from(time: DateTime) -> Self {
|
|
// SAFETY: these unwraps should be safe since chrono dates are already valid Gregorian
|
|
// dates
|
|
icu::calendar::DateTime::try_new_gregorian_datetime(
|
|
time.year(),
|
|
time.month().try_into().unwrap(),
|
|
time.day().try_into().unwrap(),
|
|
time.hour().try_into().unwrap(),
|
|
time.minute().try_into().unwrap(),
|
|
time.second().try_into().unwrap(),
|
|
)
|
|
.unwrap()
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
use fluent::fluent_args;
|
|
|
|
fn ref_l10n() -> L10N {
|
|
let mut l10n = L10N::new(std::path::PathBuf::from("./test_files"));
|
|
// Make sure we know the locale before the test begins. Some systems, such as my own, are
|
|
// not actually in English.
|
|
l10n.set_locales(NonEmptyList::from_iter(vec!["en-US"]).unwrap());
|
|
l10n.set_timezone(chrono_tz::US::Eastern);
|
|
l10n
|
|
}
|
|
|
|
fn ref_date() -> NaiveDate {
|
|
NaiveDate::from_ymd_opt(2006, 1, 2).unwrap()
|
|
}
|
|
|
|
fn ref_time() -> DateTime {
|
|
NaiveDate::from_ymd_opt(2006, 1, 2)
|
|
.unwrap()
|
|
.and_hms_opt(3, 4, 5)
|
|
.unwrap()
|
|
.and_local_timezone(Tz::US__Mountain)
|
|
.unwrap()
|
|
.into()
|
|
}
|
|
|
|
#[test]
|
|
fn it_formats_a_time_in_utc() {
|
|
let mut l10n = ref_l10n();
|
|
let now = ref_time();
|
|
|
|
// 202f is the code-point for a narrow non-breaking space. Presumably this is used in
|
|
// particular to ensure that the am/pm marker doesn't get split off from the time
|
|
assert_eq!(
|
|
l10n.format_date_time_utc(now.clone(), length::Date::Long, length::Time::Medium),
|
|
"January 2, 2006, 10:04:05\u{202f}AM"
|
|
);
|
|
|
|
l10n.set_locales(NonEmptyList::from_iter(vec!["eo-EO", "en-US"]).unwrap());
|
|
assert_eq!(
|
|
l10n.format_date_time_utc(now.clone(), length::Date::Long, length::Time::Medium),
|
|
"2006-Januaro-02 10:04:05"
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn it_formats_a_time_in_the_current_zone() {
|
|
let mut l10n = ref_l10n();
|
|
let now = ref_time();
|
|
|
|
// 202f is the code-point for a narrow non-breaking space. Presumably this is used in
|
|
// particular to ensure that the am/pm marker doesn't get split off from the time
|
|
assert_eq!(
|
|
l10n.format_date_time_local(now.clone(), length::Date::Long, length::Time::Medium),
|
|
"January 2, 2006, 5:04:05\u{202f}AM"
|
|
);
|
|
|
|
l10n.set_locales(NonEmptyList::from_iter(vec!["eo-EO", "en-US"]).unwrap());
|
|
assert_eq!(
|
|
l10n.format_date_time_local(now.clone(), length::Date::Long, length::Time::Medium),
|
|
"2006-Januaro-02 05:04:05"
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn it_formats_dates() {
|
|
let mut l10n = ref_l10n();
|
|
let today = ref_date();
|
|
|
|
assert_eq!(
|
|
l10n.format_date(today.clone(), length::Date::Long),
|
|
"January 2, 2006"
|
|
);
|
|
|
|
l10n.set_locales(NonEmptyList::from_iter(vec!["eo-EO", "en-US"]).unwrap());
|
|
assert_eq!(
|
|
l10n.format_date(today.clone(), length::Date::Long),
|
|
"2006-Januaro-02"
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn it_formats_a_number_according_to_locale() {
|
|
let mut l10n = ref_l10n();
|
|
|
|
assert_eq!(l10n.format_f64(100.4, FloatPrecision::Floating), "100.4",);
|
|
assert_eq!(
|
|
l10n.format_f64(15000.4, FloatPrecision::Floating),
|
|
"15,000.4",
|
|
);
|
|
|
|
l10n.set_locales(NonEmptyList::from_iter(vec!["de-DE", "en-US"]).unwrap());
|
|
assert_eq!(l10n.format_f64(100.4, FloatPrecision::Floating), "100,4",);
|
|
assert_eq!(
|
|
l10n.format_f64(15000.4, FloatPrecision::Floating),
|
|
"15.000,4",
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn it_can_load_message_files() {
|
|
let mut l10n = ref_l10n();
|
|
let messages = l10n.messages();
|
|
|
|
let args = fluent_args![
|
|
"name" => "Savanni"
|
|
];
|
|
assert_eq!(
|
|
messages.tr("welcome", Some(&args)).unwrap(),
|
|
"Hello, Savanni"
|
|
);
|
|
|
|
let args = fluent_args![
|
|
"count" => 1
|
|
];
|
|
assert_eq!(
|
|
messages.tr("games-in-database", Some(&args)).unwrap(),
|
|
"There is one game in the database"
|
|
);
|
|
|
|
let args = fluent_args![
|
|
"count" => 2
|
|
];
|
|
assert_eq!(
|
|
messages.tr("games-in-database", Some(&args)).unwrap(),
|
|
"There are 2 games in the database"
|
|
);
|
|
}
|
|
|
|
/*
|
|
#[test]
|
|
fn it_can_change_languages_on_locale_change() {
|
|
}
|
|
|
|
#[test]
|
|
fn phrases_can_be_translated() {
|
|
}
|
|
|
|
#[test]
|
|
fn phrases_can_fall_back() {
|
|
}
|
|
*/
|
|
|
|
/* Not really a unit test, more of a test to see what I could introspect within a fluent
|
|
* message. I was hoping that attributes would give me placeholder names, but that doesn't seem
|
|
* to be the case.
|
|
#[test]
|
|
fn messages() {
|
|
let langid_en = "en-US".parse().expect("Parsing failed.");
|
|
let resource = FluentResource::try_new(MESSAGES.to_owned()).unwrap();
|
|
let mut bundle = FluentBundle::new(vec![langid_en]);
|
|
bundle.add_resource(&resource).unwrap();
|
|
|
|
let msg = bundle.get_message("welcome").expect("message should exist");
|
|
for attr in msg.attributes() {
|
|
println!("attr: {:?}", attr);
|
|
}
|
|
assert!(false);
|
|
}
|
|
*/
|
|
}
|