monorepo/l10n/src/lib.rs

use chrono::{Datelike, NaiveDate, Timelike};
use chrono_tz::Tz;
use fixed_decimal::FixedDecimal;
use fluent::{bundle::FluentBundle, FluentResource};
use icu::{datetime::options::length, decimal::FixedDecimalFormatter, locid::Locale};
use icu_provider::DataLocale;
use std::{fs::File, io::Read, ops::Deref};
use sys_locale::get_locale;
use thiserror::Error;
use unic_langid::LanguageIdentifierError;

// Re-exports. I'm doing these so that clients of this library don't have to go tracking down
// additional structures
pub use fixed_decimal::FloatPrecision;
pub use fluent::{FluentArgs, FluentValue};

#[derive(Debug)]
pub enum NonEmptyListError {
    BuildFromEmptyContainer,
}

pub struct NonEmptyList<A>(Vec<A>);

impl<A> NonEmptyList<A> {
    pub fn new(elem: A) -> Self {
        Self(vec![elem])
    }

    pub fn from_iter(
        iter: impl IntoIterator<Item = A>,
    ) -> Result<NonEmptyList<A>, NonEmptyListError> {
        let lst = iter.into_iter().collect::<Vec<A>>();
        if lst.len() > 0 {
            Ok(NonEmptyList(lst))
        } else {
            Err(NonEmptyListError::BuildFromEmptyContainer)
        }
    }

    pub fn push(&mut self, item: A) {
        self.0.push(item);
    }

    pub fn find(&self, f: impl Fn(&A) -> bool) -> Option<&A> {
        self.0.iter().find(|item| f(*item))
    }

    fn first(&self) -> &A {
        &self.0[0]
    }

    fn iter<'a>(&'a self) -> impl Iterator<Item = &'a A> {
        self.0.iter()
    }
}

impl<A> Deref for NonEmptyList<A> {
    type Target = Vec<A>;
    fn deref(&self) -> &Self::Target {
        &self.0
    }
}

#[derive(Debug, Error)]
pub enum L10NError {
    #[error("Unparsable Locale")]
    UnparsableLocale,
}

impl From<icu::locid::Error> for L10NError {
    fn from(_: icu::locid::Error) -> L10NError {
        L10NError::UnparsableLocale
    }
}

#[derive(Debug, Error)]
pub enum FileLoadError {
    #[error("Unparsable Locale")]
    UnparsableLocale,

    #[error("Source string file not found")]
    FileNotFound,

    #[error("The Fluent file is malformed")]
    FluentParseError(String),

    #[error("An unknown IO error was found")]
    IOError(std::io::Error),
}

impl From<LanguageIdentifierError> for FileLoadError {
    fn from(_: LanguageIdentifierError) -> Self {
        Self::UnparsableLocale
    }
}

impl From<std::io::Error> for FileLoadError {
    fn from(err: std::io::Error) -> Self {
        Self::IOError(err)
    }
}

// Potential Message structure.
//
// Let's assume the application has an enumeration that implements Message. For each element of the
// enumeration, there should be some boilerplate code that returns the message ID and the arguments
// as a FluentArgs.
//
// Nobody wants to generate all of that code, though I have done so in the past, and manually
// generating that code could be useful for illustration. I think I'm going to want to do code
// generation from the source strings file, and then compile the enumeration into the code.
// However, I have not found a mechanism in Fluent to identify all of the placeholders within a
// message, so I'm not even sure that I can automate this code generation.
pub trait Message {
    fn msgid(&self) -> &str;
    fn args(&self) -> Option<FluentArgs>;
}

pub struct L10N {
    messages_root: std::path::PathBuf,
    message_bundles: Vec<FluentBundle<FluentResource, intl_memoizer::concurrent::IntlLangMemoizer>>,

    locales: NonEmptyList<Locale>,
    zone: chrono_tz::Tz,
}

impl L10N {
    pub fn new(messages_root: std::path::PathBuf) -> Self {
        let english = "en-US".parse::<Locale>().unwrap();
        let sys_locale = get_locale()
            .and_then(|locale_str| locale_str.parse::<Locale>().ok())
            .unwrap_or(english.clone());
        let locales = NonEmptyList::new(sys_locale.clone());
        let zone = chrono_tz::UTC;

        /*
        let mut source_message_path = messages_root.clone();
        source_message_path.push("en-US.ftl");
        let english_phrases = FluentResource::try_new
        */

        let mut s = Self {
            messages_root,
            message_bundles: vec![],
            locales,
            zone,
        };

        s.load_messages_from_file("en-US".to_owned()).unwrap();

        s
    }

    fn load_messages_from_file(&mut self, locale: String) -> Result<(), FileLoadError> {
        let langid: unic_langid::LanguageIdentifier = locale.parse()?;

        let mut path = self.messages_root.clone();
        path.push(locale);
        path.set_extension("ftl");
        println!("{:?}", path);

        let mut buffer = Vec::new();
        let mut f = File::open(path)?;
        f.read_to_end(&mut buffer)?;
        let text = String::from_utf8(buffer).unwrap();
        match FluentResource::try_new(text) {
            Ok(resource) => {
                let mut bundle = FluentBundle::new_concurrent(vec![langid]);
                let _ = bundle.add_resource(resource);
                self.message_bundles.push(bundle);
                Ok(())
            }
            Err((_, errors)) => Err(FileLoadError::FluentParseError(
                errors
                    .into_iter()
                    .map(|err| err.to_string())
                    .collect::<Vec<String>>()
                    .join("\n"),
            )),
        }
    }

    // Now, whenever the user changes the locales, the list of messages has to change. How do we
    // automatically set up the messages? Theoretically they all need to be reloaded, and I've
    // already split how the messages get loaded from how the locales are specified.
    //
    // But, FluentErgo does that, too. It already has the concept of being constructed with a list
    // of languages and then having each language bundle manually loaded afterwards.
    //
    // Problem: be able to change the preferred list of locales and automatically have a new
    // FluentBundle which has all relevant translations loaded.
    //
    // One solution is that all bundles get loaded at startup time, and the bundle list gets
    // changed any time the list of locales gets changed. Also, the system can just run through the
    // entire list of fallbacks.
    pub fn set_locales(&mut self, locales: NonEmptyList<&str>) -> Result<(), L10NError> {
        let locales = locales
            .iter()
            .map(|locale| Locale::try_from_bytes(locale.as_bytes()))
            .collect::<Result<Vec<Locale>, icu::locid::Error>>()?;

        for locale in locales.iter() {
            self.load_messages_from_file(locale.to_string()).unwrap();
        }

        self.locales = NonEmptyList(locales);

        Ok(())
    }

    pub fn set_timezone(&mut self, zone: Tz) {
        self.zone = zone;
    }

    // Need to take a message and turn it into a string in the current language. Except I don't
    // know yet what form the message should take. Forming an adapter around fluent_ergonomics or
    // even around fluent itself. I would want for the message to be statically typed, but then I
    // don't know what can be the data type that gets passed in here.
    //
    // Formatting a message requires identifying the message and passing it any relevant
    // parameters. In an ideal world, neither of these can be incorrect. Messages are all checked
    // at compile time, as are their parameters. That implies an enumeration, with one element per
    // message, and with each element knowing its parameters.
    // pub fn messages(&self) -> Vec<FluentBundle<FluentResource>> {
    //     self.message_bundles.clone()
    // }

    pub fn tr(&self, message: impl Message) -> String {
        for bundle in self.message_bundles.iter().rev() {
            let msg = bundle
                .get_message(message.msgid())
                .and_then(|msg| msg.value());
            match msg {
                Some(msg) => {
                    let mut errors = vec![];
                    return self.message_bundles[0]
                        .format_pattern(msg, message.args().as_ref(), &mut errors)
                        .to_string();
                }
                None => continue,
            }
        }
        unreachable!("The message {} is missing", message.msgid());
    }

    pub fn format_date_time_utc(
        &self,
        time: DateTime,
        date_style: length::Date,
        time_style: length::Time,
    ) -> String {
        let time: DateTime = time.with_timezone(&chrono_tz::UTC).into();
        let options = length::Bag::from_date_time_style(date_style, time_style);
        let formatter = icu::datetime::DateTimeFormatter::try_new(
            &DataLocale::from(self.locales.first()),
            options.into(),
        )
        .unwrap();
        let icu_time: icu::calendar::DateTime<icu::calendar::Gregorian> = time.into();
        formatter.format_to_string(&icu_time.to_any()).unwrap()
    }

    pub fn format_date_time_local(
        &self,
        time: DateTime,
        date_style: length::Date,
        time_style: length::Time,
    ) -> String {
        let time: DateTime = time.with_timezone(&self.zone).into();
        let options = length::Bag::from_date_time_style(date_style, time_style);
        let formatter = icu::datetime::DateTimeFormatter::try_new(
            &DataLocale::from(self.locales.first()),
            options.into(),
        )
        .unwrap();
        let icu_time: icu::calendar::DateTime<icu::calendar::Gregorian> = time.into();
        formatter.format_to_string(&icu_time.to_any()).unwrap()
    }

    /*
     * I have been unable to get from a chrono_tz::Tz to an ICU timezone. I have tried a variety of
     * parsers on the CustomTimeZone object. I have not researched the data provider to see what is
     * available there. The ZoneID for the reference date is US/Mountain, and the abbreviation is
     * MST. I'll want to get to a CustomTimeZone so that the formatter can render MST or Mountain
     * Standard Time or something similar.
        fn format_date_time_tz(
            &self,
            time: DateTime,
            date_style: length::Date,
            time_style: length::Time,
        ) -> String {
            let options = length::Bag::from_date_time_style(date_style, time_style);
            let formatter = icu::datetime::ZonedDateTimeFormatter::try_new(
                &DataLocale::from(&self.locale),
                options.into(),
                Default::default(),
            )
            .unwrap();
            let icu_time: icu::calendar::DateTime<icu::calendar::Gregorian> = time.into();
            let any = icu_time.to_any();

            println!("{:?}", time.offset());

            let zone_id: String = time.offset().abbreviation().to_owned();
            println!("{:?}", zone_id);
            let zone_id = icu::timezone::TimeZoneBcp47Id::from_str(&zone_id).unwrap();

            let zone: CustomTimeZone = CustomTimeZone {
                gmt_offset: None,
                time_zone_id: Some(zone_id),
                /*
                icu::timezone::TimeZoneBcp47Id::from_str(time.offset().tz_id().parse().unwrap())
                    .unwrap(),
                    */
                metazone_id: None,
                zone_variant: None,
            };

            formatter.format_to_string(&any, &zone).unwrap()
        }
    */

    pub fn format_date(&self, date: NaiveDate, date_style: length::Date) -> String {
        let formatter = icu::datetime::DateFormatter::try_new_with_length(
            &DataLocale::from(self.locales.first()),
            date_style,
        )
        .unwrap();
        let icu_date: icu::calendar::Date<icu::calendar::Gregorian> =
            icu::calendar::Date::try_new_gregorian_date(
                date.year(),
                date.month().try_into().unwrap(),
                date.day().try_into().unwrap(),
            )
            .unwrap();
        formatter.format_to_string(&icu_date.to_any()).unwrap()
    }

    pub fn format_f64(&self, value: f64, precision: FloatPrecision) -> String {
        let fdf = FixedDecimalFormatter::try_new(
            &self.locales.first().clone().into(),
            Default::default(),
        )
        .expect("locale should be present");

        let number = FixedDecimal::try_from_f64(value, precision).unwrap();

        fdf.format_to_string(&FixedDecimal::try_from_f64(value, precision).unwrap())
    }
}

#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub struct DateTime(chrono::DateTime<Tz>);

impl Deref for DateTime {
    type Target = chrono::DateTime<Tz>;

    fn deref(&self) -> &Self::Target {
        &self.0
    }
}

impl From<chrono::DateTime<Tz>> for DateTime {
    fn from(time: chrono::DateTime<Tz>) -> Self {
        Self(time)
    }
}

impl From<DateTime> for icu::calendar::DateTime<icu::calendar::Gregorian> {
    fn from(time: DateTime) -> Self {
        // SAFETY: these unwraps should be safe since chrono dates are already valid Gregorian
        // dates
        icu::calendar::DateTime::try_new_gregorian_datetime(
            time.year(),
            time.month().try_into().unwrap(),
            time.day().try_into().unwrap(),
            time.hour().try_into().unwrap(),
            time.minute().try_into().unwrap(),
            time.second().try_into().unwrap(),
        )
        .unwrap()
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use fluent::fluent_args;

    fn ref_l10n() -> L10N {
        let mut l10n = L10N::new(std::path::PathBuf::from("./test_files"));
        // Make sure we know the locale before the test begins. Some systems, such as my own, are
        // not actually in English.
        l10n.set_locales(NonEmptyList::from_iter(vec!["en-US"]).unwrap());
        l10n.set_timezone(chrono_tz::US::Eastern);
        l10n
    }

    fn ref_date() -> NaiveDate {
        NaiveDate::from_ymd_opt(2006, 1, 2).unwrap()
    }

    fn ref_time() -> DateTime {
        NaiveDate::from_ymd_opt(2006, 1, 2)
            .unwrap()
            .and_hms_opt(3, 4, 5)
            .unwrap()
            .and_local_timezone(Tz::US__Mountain)
            .unwrap()
            .into()
    }

    #[test]
    fn it_formats_a_time_in_utc() {
        let mut l10n = ref_l10n();
        let now = ref_time();

        // 202f is the code-point for a narrow non-breaking space. Presumably this is used in
        // particular to ensure that the am/pm marker doesn't get split off from the time
        assert_eq!(
            l10n.format_date_time_utc(now.clone(), length::Date::Long, length::Time::Medium),
            "January 2, 2006, 10:04:05\u{202f}AM"
        );

        l10n.set_locales(NonEmptyList::from_iter(vec!["eo-EO", "en-US"]).unwrap());
        assert_eq!(
            l10n.format_date_time_utc(now.clone(), length::Date::Long, length::Time::Medium),
            "2006-Januaro-02 10:04:05"
        );
    }

    #[test]
    fn it_formats_a_time_in_the_current_zone() {
        let mut l10n = ref_l10n();
        let now = ref_time();

        // 202f is the code-point for a narrow non-breaking space. Presumably this is used in
        // particular to ensure that the am/pm marker doesn't get split off from the time
        assert_eq!(
            l10n.format_date_time_local(now.clone(), length::Date::Long, length::Time::Medium),
            "January 2, 2006, 5:04:05\u{202f}AM"
        );

        l10n.set_locales(NonEmptyList::from_iter(vec!["eo-EO", "en-US"]).unwrap());
        assert_eq!(
            l10n.format_date_time_local(now.clone(), length::Date::Long, length::Time::Medium),
            "2006-Januaro-02 05:04:05"
        );
    }

    #[test]
    fn it_formats_dates() {
        let mut l10n = ref_l10n();
        let today = ref_date();

        assert_eq!(
            l10n.format_date(today.clone(), length::Date::Long),
            "January 2, 2006"
        );

        l10n.set_locales(NonEmptyList::from_iter(vec!["eo-EO", "en-US"]).unwrap());
        assert_eq!(
            l10n.format_date(today.clone(), length::Date::Long),
            "2006-Januaro-02"
        );
    }

    #[test]
    fn it_formats_a_number_according_to_locale() {
        let mut l10n = ref_l10n();

        assert_eq!(l10n.format_f64(100.4, FloatPrecision::Floating), "100.4",);
        assert_eq!(
            l10n.format_f64(15000.4, FloatPrecision::Floating),
            "15,000.4",
        );

        l10n.set_locales(NonEmptyList::from_iter(vec!["de-DE", "en-US"]).unwrap());
        assert_eq!(l10n.format_f64(100.4, FloatPrecision::Floating), "100,4",);
        assert_eq!(
            l10n.format_f64(15000.4, FloatPrecision::Floating),
            "15.000,4",
        );
    }

    #[test]
    fn it_can_load_message_files() {
        let mut l10n = ref_l10n();
        let messages = l10n.messages();

        let args = fluent_args![
            "name" => "Savanni"
        ];
        assert_eq!(
            messages.tr("welcome", Some(&args)).unwrap(),
            "Hello, Savanni"
        );

        let args = fluent_args![
            "count" => 1
        ];
        assert_eq!(
            messages.tr("games-in-database", Some(&args)).unwrap(),
            "There is one game in the database"
        );

        let args = fluent_args![
            "count" => 2
        ];
        assert_eq!(
            messages.tr("games-in-database", Some(&args)).unwrap(),
            "There are 2 games in the database"
        );
    }

    /*
    #[test]
    fn it_can_change_languages_on_locale_change() {
    }

    #[test]
    fn phrases_can_be_translated() {
    }

    #[test]
    fn phrases_can_fall_back() {
    }
    */

    /* Not really a unit test, more of a test to see what I could introspect within a fluent
     * message. I was hoping that attributes would give me placeholder names, but that doesn't seem
     * to be the case.
    #[test]
    fn messages() {
        let langid_en = "en-US".parse().expect("Parsing failed.");
        let resource = FluentResource::try_new(MESSAGES.to_owned()).unwrap();
        let mut bundle = FluentBundle::new(vec![langid_en]);
        bundle.add_resource(&resource).unwrap();

        let msg = bundle.get_message("welcome").expect("message should exist");
        for attr in msg.attributes() {
            println!("attr: {:?}", attr);
        }
        assert!(false);
    }
    */
}
No results found.