use nom::{ branch::alt, bytes::complete::{escaped_transform, tag}, character::complete::{alpha1, digit1, multispace0, multispace1, none_of}, combinator::{opt, value}, multi::{many0, many1, separated_list1}, sequence::delimited, IResult, }; use std::num::ParseIntError; #[derive(Debug)] pub enum ParseSizeError { ParseIntError(ParseIntError), InsufficientArguments, } impl From for ParseSizeError { fn from(e: ParseIntError) -> Self { Self::ParseIntError(e) } } #[derive(Clone, Debug, PartialEq)] pub struct Size { pub width: i32, pub height: i32, } impl TryFrom<&str> for Size { type Error = ParseSizeError; fn try_from(s: &str) -> Result { let parts = s .split(':') .map(|v| v.parse::()) .collect::, ParseIntError>>()?; match parts[..] { [width, height, ..] => Ok(Size { width, height }), [dim] => Ok(Size { width: dim, height: dim, }), [] => Err(ParseSizeError::InsufficientArguments), } } } #[derive(Debug, PartialEq)] pub struct Tree { pub sequence: Vec, pub sub_sequences: Vec, } impl ToString for Tree { fn to_string(&self) -> String { let sequence = self .sequence .iter() .map(|node| node.to_string()) .collect::(); let subsequences = self .sub_sequences .iter() .map(|seq| seq.to_string()) .collect::(); format!("({}{})", sequence, subsequences) } } #[derive(Debug, PartialEq)] pub struct Node { pub properties: Vec, } impl ToString for Node { fn to_string(&self) -> String { let props = self .properties .iter() .map(|prop| prop.to_string()) .collect::(); format!(";{}", props) } } impl Node { pub fn find_prop(&self, ident: &str) -> Option { self.properties .iter() .find(|prop| prop.ident == ident) .cloned() } } #[derive(Clone, Debug, PartialEq)] pub struct Property { pub ident: String, pub values: Vec, } impl ToString for Property { fn to_string(&self) -> String { let values = self .values .iter() .map(|val| format!("[{}]", val)) .collect::(); format!("{}{}", self.ident, values) } } pub fn parse_collection<'a, E: nom::error::ParseError<&'a str>>( input: &'a str, ) -> IResult<&'a str, Vec, E> { separated_list1(multispace1, parse_tree)(input) } // note: must preserve unknown properties // note: must fix or preserve illegally formatted game-info properties // note: must correct or delete illegally foramtted properties, but display a warning fn parse_tree<'a, E: nom::error::ParseError<&'a str>>(input: &'a str) -> IResult<&'a str, Tree, E> { let (input, _) = multispace0(input)?; delimited(tag("("), parse_sequence, tag(")"))(input) } fn parse_sequence<'a, E: nom::error::ParseError<&'a str>>( input: &'a str, ) -> IResult<&'a str, Tree, E> { let (input, _) = multispace0(input)?; let (input, nodes) = many1(parse_node)(input)?; let (input, _) = multispace0(input)?; let (input, sub_sequences) = many0(parse_tree)(input)?; let (input, _) = multispace0(input)?; Ok(( input, Tree { sequence: nodes, sub_sequences, }, )) } fn parse_node<'a, E: nom::error::ParseError<&'a str>>(input: &'a str) -> IResult<&'a str, Node, E> { let (input, _) = multispace0(input)?; let (input, _) = tag(";")(input)?; let (input, properties) = many1(parse_property)(input)?; Ok((input, Node { properties })) } fn parse_property<'a, E: nom::error::ParseError<&'a str>>( input: &'a str, ) -> IResult<&'a str, Property, E> { let (input, _) = multispace0(input)?; let (input, ident) = alpha1(input)?; let (input, values) = many1(parse_propval)(input)?; let (input, _) = multispace0(input)?; let values = values .into_iter() .map(|v| v.to_owned()) .collect::>(); Ok(( input, Property { ident: ident.to_owned(), values, }, )) } fn parse_propval<'a, E: nom::error::ParseError<&'a str>>( input: &'a str, ) -> IResult<&'a str, String, E> { let (input, _) = multispace0(input)?; let (input, _) = tag("[")(input)?; let (input, value) = parse_propval_text(input)?; let (input, _) = tag("]")(input)?; Ok((input, value.unwrap_or(String::new()))) } fn parse_propval_text<'a, E: nom::error::ParseError<&'a str>>( input: &'a str, ) -> IResult<&'a str, Option, E> { let (input, value) = opt(escaped_transform( none_of("\\]"), '\\', alt(( value("]", tag("]")), value("\\", tag("\\")), value("", tag("\n")), )), ))(input)?; Ok((input, value.map(|v| v.to_owned()))) } pub fn parse_size<'a, E: nom::error::ParseError<&'a str>>( input: &'a str, ) -> IResult<&'a str, Size, E> { let (input, dimensions) = separated_list1(tag(":"), digit1)(input)?; let (width, height) = match dimensions.as_slice() { [width] => (width.parse::().unwrap(), width.parse::().unwrap()), [width, height] => ( width.parse::().unwrap(), height.parse::().unwrap(), ), _ => (19, 19), }; Ok((input, Size { width, height })) } #[cfg(test)] mod test { use std::{fs::File, io::Read}; use super::*; const EXAMPLE: &'static str = "(;FF[4]C[root](;C[a];C[b](;C[c]) (;C[d];C[e])) (;C[f](;C[g];C[h];C[i]) (;C[j])))"; #[test] fn it_can_parse_properties() { let (_, prop) = parse_property::>("C[a]").unwrap(); assert_eq!( prop, Property { ident: "C".to_owned(), values: vec!["a".to_owned()] } ); let (_, prop) = parse_property::>("C[a][b][c]").unwrap(); assert_eq!( prop, Property { ident: "C".to_owned(), values: vec!["a".to_owned(), "b".to_owned(), "c".to_owned()] } ); } #[test] fn it_can_parse_a_standalone_node() { let (_, node) = parse_node::>(";B[ab]").unwrap(); assert_eq!( node, Node { properties: vec![Property { ident: "B".to_owned(), values: vec!["ab".to_owned()] }] } ); let (_, node) = parse_node::>(";B[ab];W[dp];B[pq]C[some comments]") .unwrap(); assert_eq!( node, Node { properties: vec![Property { ident: "B".to_owned(), values: vec!["ab".to_owned()] }] } ); } #[test] fn it_can_parse_a_simple_sequence() { let (_, sequence) = parse_tree::>("(;B[ab];W[dp];B[pq]C[some comments])") .unwrap(); assert_eq!( sequence, Tree { sequence: vec![ Node { properties: vec![Property { ident: "B".to_owned(), values: vec!["ab".to_owned()] }] }, Node { properties: vec![Property { ident: "W".to_owned(), values: vec!["dp".to_owned()] }] }, Node { properties: vec![ Property { ident: "B".to_owned(), values: vec!["pq".to_owned()] }, Property { ident: "C".to_owned(), values: vec!["some comments".to_owned()] } ] } ], sub_sequences: vec![], } ); } #[test] fn it_can_parse_a_sequence_with_subsequences() { let text = "(;C[a];C[b](;C[c])(;C[d];C[e]))"; let (_, sequence) = parse_tree::>(text).unwrap(); let main_sequence = vec![ Node { properties: vec![Property { ident: "C".to_owned(), values: vec!["a".to_owned()], }], }, Node { properties: vec![Property { ident: "C".to_owned(), values: vec!["b".to_owned()], }], }, ]; let subsequence_1 = Tree { sequence: vec![Node { properties: vec![Property { ident: "C".to_owned(), values: vec!["c".to_owned()], }], }], sub_sequences: vec![], }; let subsequence_2 = Tree { sequence: vec![ Node { properties: vec![Property { ident: "C".to_owned(), values: vec!["d".to_owned()], }], }, Node { properties: vec![Property { ident: "C".to_owned(), values: vec!["e".to_owned()], }], }, ], sub_sequences: vec![], }; assert_eq!( sequence, Tree { sequence: main_sequence, sub_sequences: vec![subsequence_1, subsequence_2], } ); } #[test] fn it_can_parse_example_1() { let (_, ex_tree) = parse_tree::>(EXAMPLE).unwrap(); assert_eq!(ex_tree.sequence.len(), 1); assert_eq!(ex_tree.sequence[0].properties.len(), 2); assert_eq!( ex_tree.sequence[0].properties[0], Property { ident: "FF".to_owned(), values: vec!["4".to_owned()] } ); assert_eq!(ex_tree.sub_sequences.len(), 2); assert_eq!(ex_tree.sub_sequences[0].sequence.len(), 2); assert_eq!( ex_tree.sub_sequences[0].sequence, vec![ Node { properties: vec![Property { ident: "C".to_owned(), values: vec!["a".to_owned()] }] }, Node { properties: vec![Property { ident: "C".to_owned(), values: vec!["b".to_owned()] }] }, ] ); assert_eq!(ex_tree.sub_sequences[0].sub_sequences.len(), 2); } #[test] fn it_can_regenerate_the_tree() { let (_, tree1) = parse_tree::>(EXAMPLE).unwrap(); assert_eq!( tree1.to_string(), "(;FF[4]C[root](;C[a];C[b](;C[c])(;C[d];C[e]))(;C[f](;C[g];C[h];C[i])(;C[j])))" ); let (_, tree2) = parse_tree::>(&tree1.to_string()).unwrap(); assert_eq!(tree1, tree2); } #[test] fn it_parses_propvals() { let (_, propval) = parse_propval::>("[]").unwrap(); assert_eq!(propval, "".to_owned()); let (_, propval) = parse_propval::>("[normal propval]").unwrap(); assert_eq!(propval, "normal propval".to_owned()); let (_, propval) = parse_propval::>(r"[need an [escape\] in the propval]") .unwrap(); assert_eq!(propval, "need an [escape] in the propval".to_owned()); } #[test] fn it_parses_propvals_with_hard_linebreaks() { let (_, propval) = parse_propval_text::>( "There are hard linebreaks & soft linebreaks. Soft linebreaks...", ) .unwrap(); assert_eq!( propval, Some( "There are hard linebreaks & soft linebreaks. Soft linebreaks..." .to_owned() ) ); } #[test] fn it_parses_propvals_with_escaped_closing_brackets() { let (_, propval) = parse_propval_text::>(r"escaped closing \] bracket") .unwrap(); assert_eq!( propval, Some(r"escaped closing ] bracket".to_owned()).to_owned() ); } #[test] fn it_parses_propvals_with_soft_linebreaks() { let (_, propval) = parse_propval_text::>( r"Soft linebreaks are linebreaks preceeded by '\\' like this one >o\ k<. Hard line breaks are all other linebreaks.", ) .unwrap(); assert_eq!( propval, Some("Soft linebreaks are linebreaks preceeded by '\\' like this one >ok<. Hard line breaks are all other linebreaks.".to_owned()) .to_owned() ); } #[test] fn it_parses_sgf_with_newline_in_sequence() { let data = String::from( "(;FF[4]C[root](;C[a];C[b](;C[c])(;C[d];C[e] ))(;C[f](;C[g];C[h];C[i])(;C[j])))", ); parse_tree::>(&data).unwrap(); } #[test] fn it_parses_sgf_with_newline_between_two_sequence_closings() { let data = String::from( "(;FF[4]C[root](;C[a];C[b](;C[c])(;C[d];C[e]) )(;C[f](;C[g];C[h];C[i])(;C[j])))", ); parse_tree::>(&data).unwrap(); } }