From 69e4605d716356e5a18e1eb796f19aafb8435f0a Mon Sep 17 00:00:00 2001 From: Savanni D'Gerinel Date: Thu, 14 Sep 2023 00:26:28 -0400 Subject: [PATCH] Hugely overhaul the tree property parsing --- sgf/src/tree.rs | 365 ++++++++++++++++++++++++++++-------------------- 1 file changed, 212 insertions(+), 153 deletions(-) diff --git a/sgf/src/tree.rs b/sgf/src/tree.rs index 9bd4f19..234ffe1 100644 --- a/sgf/src/tree.rs +++ b/sgf/src/tree.rs @@ -1,11 +1,12 @@ use crate::{Color, Error, GameResult}; use nom::{ branch::alt, - bytes::complete::{escaped_transform, tag}, - character::complete::{alpha1, multispace0, multispace1, none_of}, + bytes::complete::{escaped_transform, tag, take_until1}, + character::complete::{alpha1, digit1, multispace0, multispace1, none_of}, combinator::{opt, value}, + error::ParseError, multi::{many0, many1, separated_list1}, - IResult, + IResult, Parser, }; use std::num::ParseIntError; @@ -27,6 +28,12 @@ impl From for ParseSizeError { } } +#[derive(Clone, Debug, PartialEq)] +pub enum Double { + Normal, + Emphasized, +} + #[derive(Clone, Debug, PartialEq)] pub enum GameType { Go, @@ -293,10 +300,10 @@ impl Node { #[derive(Clone, Debug, PartialEq)] pub enum Property { // B, W - Move((Color, Position)), + Move((Color, String)), // C - Comment(Vec), + Comment(String), // BM BadMove, @@ -317,7 +324,7 @@ pub enum Property { Charset(String), // FF - FileFormat(u8), + FileFormat(i32), // GM GameType(GameType), @@ -427,7 +434,7 @@ pub enum Property { #[derive(Clone, Debug, PartialEq)] pub struct UnknownProperty { ident: String, - values: Vec, + value: String, } /* @@ -442,15 +449,9 @@ impl ToString for Property { fn to_string(&self) -> String { match self { Property::Move((color, position)) => { - format!("{}[{}]", color.abbreviation(), position.0) + format!("{}[{}]", color.abbreviation(), position) } - Property::Comment(values) => format!( - "C{}", - values - .iter() - .map(|v| format!("[{}]", v)) - .collect::() - ), + Property::Comment(value) => format!("C[{}]", value), Property::BadMove => "BM[]".to_owned(), Property::DoubtfulMove => "DO[]".to_owned(), Property::InterestingMove => "IT[]".to_owned(), @@ -504,12 +505,8 @@ impl ToString for Property { Property::User(value) => format!("US[{}]", value), Property::WhiteRank(value) => format!("WR[{}]", value), Property::WhiteTeam(value) => format!("WT[{}]", value), - Property::Unknown(UnknownProperty { ident, values }) => { - let values = values - .iter() - .map(|val| format!("[{}]", val)) - .collect::(); - format!("{}{}", ident, values) + Property::Unknown(UnknownProperty { ident, value }) => { + format!("{}[{}]", ident, value) } } /* @@ -563,87 +560,154 @@ fn parse_property<'a, E: nom::error::ParseError<&'a str>>( ) -> IResult<&'a str, Property, E> { let (input, _) = multispace0(input)?; let (input, ident) = alpha1(input)?; - let (input, values) = many1(parse_propval)(input)?; - let (input, _) = multispace0(input)?; - let values = values - .into_iter() - .map(|v| v.to_owned()) - .collect::>(); - - let prop = match ident { - "W" => Property::Move((Color::White, Position(values[0].clone()))), - "B" => Property::Move((Color::Black, Position(values[0].clone()))), - "C" => Property::Comment(values), - "BM" => Property::BadMove, - "DO" => Property::DoubtfulMove, - "IT" => Property::InterestingMove, - "TE" => Property::Tesuji, - "AP" => Property::Application(values.join(",")), - "CA" => Property::Charset(values.join(",")), - "FF" => Property::FileFormat(values.join("").parse::().unwrap()), - "GM" => Property::GameType(GameType::from(values.join("").as_ref())), + let (input, prop) = match ident { + "W" => parse_propval(parse_move(Color::White))(input)?, + "B" => parse_propval(parse_move(Color::Black))(input)?, + "C" => parse_propval(parse_comment())(input)?, + "BM" => discard_propval().map(|_| Property::BadMove).parse(input)?, + "DO" => discard_propval() + .map(|_| Property::DoubtfulMove) + .parse(input)?, + "IT" => discard_propval() + .map(|_| Property::InterestingMove) + .parse(input)?, + "TE" => discard_propval().map(|_| Property::Tesuji).parse(input)?, + "AP" => parse_propval(parse_simple_text().map(Property::Application))(input)?, + "CA" => parse_propval(parse_simple_text().map(Property::Charset))(input)?, + "FF" => parse_propval(parse_number().map(Property::FileFormat))(input)?, + "GM" => unimplemented!(), "ST" => unimplemented!(), - "SZ" => Property::BoardSize(Size::try_from(values.join("").as_ref()).unwrap()), - "DM" => Property::EvenResult, - "GB" => Property::GoodForBlack, - "GW" => Property::GoodForWhite, - "UC" => Property::UnclearResult, - "V" => Property::Value(values.join("").parse::().unwrap()), - "AN" => Property::Annotator(values.join("")), - "BR" => Property::BlackRank(values.join("")), - "BT" => Property::BlackTeam(values.join("")), - "CP" => Property::Copyright(values.join("")), + "SZ" => unimplemented!(), + "DM" => discard_propval() + .map(|_| Property::EvenResult) + .parse(input)?, + "GB" => discard_propval() + .map(|_| Property::GoodForBlack) + .parse(input)?, + "GW" => discard_propval() + .map(|_| Property::GoodForWhite) + .parse(input)?, + "UC" => discard_propval() + .map(|_| Property::UnclearResult) + .parse(input)?, + "V" => unimplemented!(), + "AN" => parse_propval(parse_simple_text().map(Property::Annotator))(input)?, + "BR" => parse_propval(parse_simple_text().map(Property::BlackRank))(input)?, + "BT" => parse_propval(parse_simple_text().map(Property::BlackTeam))(input)?, + "CP" => parse_propval(parse_simple_text().map(Property::Copyright))(input)?, "DT" => unimplemented!(), - "EV" => Property::EventName(values.join("")), - "GN" => Property::GameName(values.join("")), - "GC" => Property::ExtraGameInformation(values.join("")), - "ON" => Property::GameOpening(values.join("")), - "OT" => Property::Overtime(values.join("")), - "PB" => Property::BlackPlayer(values.join("")), - "PC" => Property::GameLocation(values.join("")), - "PW" => Property::WhitePlayer(values.join("")), - "RE" => Property::Result(GameResult::try_from(values.join("").as_ref()).unwrap()), - "RO" => Property::Round(values.join("")), - "RU" => Property::Ruleset(values.join("")), - "SO" => Property::Source(values.join("")), + "EV" => parse_propval(parse_simple_text().map(Property::EventName))(input)?, + "GN" => parse_propval(parse_simple_text().map(Property::GameName))(input)?, + "GC" => parse_propval(parse_simple_text().map(Property::ExtraGameInformation))(input)?, + "ON" => parse_propval(parse_simple_text().map(Property::GameOpening))(input)?, + "OT" => parse_propval(parse_simple_text().map(Property::Overtime))(input)?, + "PB" => parse_propval(parse_simple_text().map(Property::BlackPlayer))(input)?, + "PC" => parse_propval(parse_simple_text().map(Property::GameLocation))(input)?, + "PW" => parse_propval(parse_simple_text().map(Property::WhitePlayer))(input)?, + "RE" => unimplemented!(), + "RO" => parse_propval(parse_simple_text().map(Property::Round))(input)?, + "RU" => parse_propval(parse_simple_text().map(Property::Ruleset))(input)?, + "SO" => parse_propval(parse_simple_text().map(Property::Source))(input)?, "TM" => unimplemented!(), - "US" => Property::User(values.join("")), - "WR" => Property::WhiteRank(values.join("")), - "WT" => Property::WhiteTeam(values.join("")), - _ => Property::Unknown(UnknownProperty { - ident: ident.to_owned(), - values, - }), + "US" => parse_propval(parse_simple_text().map(Property::User))(input)?, + "WR" => parse_propval(parse_simple_text().map(Property::WhiteRank))(input)?, + "WT" => parse_propval(parse_simple_text().map(Property::WhiteTeam))(input)?, + _ => parse_propval(parse_simple_text().map(|value| { + Property::Unknown(UnknownProperty { + ident: ident.to_owned(), + value, + }) + }))(input)?, }; Ok((input, prop)) } -fn parse_propval<'a, E: nom::error::ParseError<&'a str>>( - input: &'a str, -) -> IResult<&'a str, String, E> { - let (input, _) = multispace0(input)?; - let (input, _) = tag("[")(input)?; - let (input, value) = parse_propval_text(input)?; - let (input, _) = tag("]")(input)?; - - Ok((input, value.unwrap_or(String::new()))) +fn parse_comment<'a, E: nom::error::ParseError<&'a str>>() -> impl Parser<&'a str, Property, E> { + parse_text().map(|text| Property::Comment(text)) } -fn parse_propval_text<'a, E: nom::error::ParseError<&'a str>>( - input: &'a str, -) -> IResult<&'a str, Option, E> { - let (input, value) = opt(escaped_transform( - none_of("\\]"), - '\\', - alt(( - value("]", tag("]")), - value("\\", tag("\\")), - value("", tag("\n")), - )), - ))(input)?; - Ok((input, value.map(|v| v.to_owned()))) +fn parse_move<'a, E: nom::error::ParseError<&'a str>>( + color: Color, +) -> impl FnMut(&'a str) -> IResult<&'a str, Property, E> { + { + let color = color.clone(); + move |input: &'a str| { + take_until1("]") + .map(|text: &'a str| Property::Move((color.clone(), text.to_owned()))) + .parse(input) + } + } +} + +fn parse_propvals<'a, E: nom::error::ParseError<&'a str>>( + parser: impl Parser<&'a str, Property, E>, +) -> impl FnMut(&'a str) -> IResult<&'a str, Vec, E> { + many1(parse_propval(parser)) +} + +fn parse_propval<'a, E: nom::error::ParseError<&'a str>>( + mut parser: impl Parser<&'a str, Property, E>, +) -> impl FnMut(&'a str) -> IResult<&'a str, Property, E> { + move |input| { + let (input, _) = multispace0(input)?; + let (input, _) = tag("[")(input)?; + let (input, value) = parser.parse(input)?; + let (input, _) = tag("]")(input)?; + + Ok((input, value)) + } +} + +fn discard_propvals<'a, E: nom::error::ParseError<&'a str>>() -> impl Parser<&'a str, (), E> { + many1(discard_propval()).map(|_| ()) +} + +fn discard_propval<'a, E: nom::error::ParseError<&'a str>>() -> impl Parser<&'a str, (), E> { + |input| { + let (input, _) = multispace0(input)?; + let (input, _) = tag("[")(input)?; + let (input, _) = parse_text().parse(input)?; + let (input, _) = tag("]")(input)?; + Ok((input, ())) + } +} + +fn parse_number<'a, E: ParseError<&'a str>>() -> impl Parser<&'a str, i32, E> { + |input| { + let (input, sign) = opt(alt((tag("+"), tag("-"))))(input)?; + let (input, value) = digit1(input)?; + let mult = if sign == Some("-") { -1 } else { 1 }; + Ok((input, value.parse::().unwrap() * mult)) + } +} + +fn parse_real<'a, E: ParseError<&'a str>>() -> impl Parser<&'a str, f32, E> { + |input| unimplemented!() +} + +fn parse_double<'a, E: ParseError<&'a str>>() -> impl Parser<&'a str, Double, E> { + |input| unimplemented!() +} + +fn parse_simple_text<'a, E: ParseError<&'a str>>() -> impl Parser<&'a str, String, E> { + |input| unimplemented!() +} + +fn parse_text<'a, E: ParseError<&'a str>>() -> impl Parser<&'a str, String, E> { + |input| { + let (input, value) = opt(escaped_transform( + none_of("\\]"), + '\\', + alt(( + value("]", tag("]")), + value("\\", tag("\\")), + value("", tag("\n")), + )), + ))(input)?; + Ok((input, value.unwrap_or("".to_owned()))) + } } #[cfg(test)] @@ -658,13 +722,7 @@ mod test { #[test] fn it_can_parse_properties() { let (_, prop) = parse_property::>("C[a]").unwrap(); - assert_eq!(prop, Property::Comment(vec!["a".to_owned()])); - - let (_, prop) = parse_property::>("C[a][b][c]").unwrap(); - assert_eq!( - prop, - Property::Comment(vec!["a".to_owned(), "b".to_owned(), "c".to_owned()]) - ); + assert_eq!(prop, Property::Comment("a".to_owned())); } #[test] @@ -674,7 +732,7 @@ mod test { assert_eq!( node, Node { - properties: vec![Property::Move((Color::Black, Position("ab".to_owned())))], + properties: vec![Property::Move((Color::Black, "ab".to_owned()))], next: vec![] } ); @@ -686,13 +744,13 @@ mod test { assert_eq!( node, Node { - properties: vec![Property::Move((Color::Black, Position("ab".to_owned())))], + properties: vec![Property::Move((Color::Black, "ab".to_owned()))], next: vec![Node { - properties: vec![Property::Move((Color::White, Position("dp".to_owned())))], + properties: vec![Property::Move((Color::White, "dp".to_owned()))], next: vec![Node { properties: vec![ - Property::Move((Color::Black, Position("pq".to_owned()))), - Property::Comment(vec!["some comments".to_owned()]) + Property::Move((Color::Black, "pq".to_owned())), + Property::Comment("some comments".to_owned()) ], next: vec![], }] @@ -710,13 +768,13 @@ mod test { assert_eq!( sequence, Node { - properties: vec![Property::Move((Color::Black, Position("ab".to_owned())))], + properties: vec![Property::Move((Color::Black, "ab".to_owned()))], next: vec![Node { - properties: vec![Property::Move((Color::White, Position("dp".to_owned())))], + properties: vec![Property::Move((Color::White, "dp".to_owned()))], next: vec![Node { properties: vec![ - Property::Move((Color::Black, Position("pq".to_owned()))), - Property::Comment(vec!["some comments".to_owned()]) + Property::Move((Color::Black, "pq".to_owned())), + Property::Comment("some comments".to_owned()) ], next: vec![], }] @@ -731,18 +789,18 @@ mod test { let (_, tree) = parse_tree::>(text).unwrap(); let expected = Node { - properties: vec![Property::Comment(vec!["a".to_owned()])], + properties: vec![Property::Comment("a".to_owned())], next: vec![Node { - properties: vec![Property::Comment(vec!["b".to_owned()])], + properties: vec![Property::Comment("b".to_owned())], next: vec![ Node { - properties: vec![Property::Comment(vec!["c".to_owned()])], + properties: vec![Property::Comment("c".to_owned())], next: vec![], }, Node { - properties: vec![Property::Comment(vec!["d".to_owned()])], + properties: vec![Property::Comment("d".to_owned())], next: vec![Node { - properties: vec![Property::Comment(vec!["e".to_owned()])], + properties: vec![Property::Comment("e".to_owned())], next: vec![], }], }, @@ -758,49 +816,49 @@ mod test { let (_, tree) = parse_tree::>(EXAMPLE).unwrap(); let j = Node { - properties: vec![Property::Comment(vec!["j".to_owned()])], + properties: vec![Property::Comment("j".to_owned())], next: vec![], }; let i = Node { - properties: vec![Property::Comment(vec!["i".to_owned()])], + properties: vec![Property::Comment("i".to_owned())], next: vec![], }; let h = Node { - properties: vec![Property::Comment(vec!["h".to_owned()])], + properties: vec![Property::Comment("h".to_owned())], next: vec![i], }; let g = Node { - properties: vec![Property::Comment(vec!["g".to_owned()])], + properties: vec![Property::Comment("g".to_owned())], next: vec![h], }; let f = Node { - properties: vec![Property::Comment(vec!["f".to_owned()])], + properties: vec![Property::Comment("f".to_owned())], next: vec![g, j], }; let e = Node { - properties: vec![Property::Comment(vec!["e".to_owned()])], + properties: vec![Property::Comment("e".to_owned())], next: vec![], }; let d = Node { - properties: vec![Property::Comment(vec!["d".to_owned()])], + properties: vec![Property::Comment("d".to_owned())], next: vec![e], }; let c = Node { - properties: vec![Property::Comment(vec!["c".to_owned()])], + properties: vec![Property::Comment("c".to_owned())], next: vec![], }; let b = Node { - properties: vec![Property::Comment(vec!["b".to_owned()])], + properties: vec![Property::Comment("b".to_owned())], next: vec![c, d], }; let a = Node { - properties: vec![Property::Comment(vec!["a".to_owned()])], + properties: vec![Property::Comment("a".to_owned())], next: vec![b], }; let expected = Node { properties: vec![ Property::FileFormat(4), - Property::Comment(vec!["root".to_owned()]), + Property::Comment("root".to_owned()), ], next: vec![a, f], }; @@ -822,58 +880,59 @@ mod test { #[test] fn it_parses_propvals() { - let (_, propval) = parse_propval::>("[]").unwrap(); - assert_eq!(propval, "".to_owned()); + let (_, propval) = parse_propval::>(parse_comment()) + .parse("[]") + .unwrap(); + assert_eq!(propval, Property::Comment("".to_owned())); - let (_, propval) = - parse_propval::>("[normal propval]").unwrap(); - assert_eq!(propval, "normal propval".to_owned()); + let (_, propval) = parse_propval::>(parse_comment()) + .parse("[normal propval]") + .unwrap(); + assert_eq!(propval, Property::Comment("normal propval".to_owned())); - let (_, propval) = - parse_propval::>(r"[need an [escape\] in the propval]") - .unwrap(); - assert_eq!(propval, "need an [escape] in the propval".to_owned()); + let (_, propval) = parse_propval::>(parse_comment()) + .parse(r"[need an [escape\] in the propval]") + .unwrap(); + assert_eq!( + propval, + Property::Comment("need an [escape] in the propval".to_owned()) + ); } #[test] fn it_parses_propvals_with_hard_linebreaks() { - let (_, propval) = parse_propval_text::>( - "There are hard linebreaks & soft linebreaks. + let (_, propval) = parse_text::>() + .parse( + "There are hard linebreaks & soft linebreaks. Soft linebreaks...", - ) - .unwrap(); + ) + .unwrap(); assert_eq!( propval, - Some( - "There are hard linebreaks & soft linebreaks. + "There are hard linebreaks & soft linebreaks. Soft linebreaks..." - .to_owned() - ) ); } #[test] fn it_parses_propvals_with_escaped_closing_brackets() { - let (_, propval) = - parse_propval_text::>(r"escaped closing \] bracket") - .unwrap(); - assert_eq!( - propval, - Some(r"escaped closing ] bracket".to_owned()).to_owned() - ); + let (_, propval) = parse_text::>() + .parse(r"escaped closing \] bracket") + .unwrap(); + assert_eq!(propval, r"escaped closing ] bracket".to_owned()); } #[test] fn it_parses_propvals_with_soft_linebreaks() { - let (_, propval) = parse_propval_text::>( - r"Soft linebreaks are linebreaks preceeded by '\\' like this one >o\ + let (_, propval) = parse_text::>() + .parse( + r"Soft linebreaks are linebreaks preceeded by '\\' like this one >o\ k<. Hard line breaks are all other linebreaks.", - ) - .unwrap(); + ) + .unwrap(); assert_eq!( propval, - Some("Soft linebreaks are linebreaks preceeded by '\\' like this one >ok<. Hard line breaks are all other linebreaks.".to_owned()) - .to_owned() + "Soft linebreaks are linebreaks preceeded by '\\' like this one >ok<. Hard line breaks are all other linebreaks." ); }