From 109d635d7a08c472b58ae85e186da0268bff5820 Mon Sep 17 00:00:00 2001 From: Savanni D'Gerinel Date: Fri, 23 Jun 2023 09:46:05 -0400 Subject: [PATCH] Add more test cases and try to handle linebreaks --- go-sgf/src/lib.rs | 182 ++++++++++++++++++++++----- go-sgf/test_data/ff4_a.sgf | 118 +++++++++++++++++ go-sgf/test_data/ff4_b.sgf | 47 +++++++ go-sgf/test_data/linebreak_tests.sgf | 18 +++ 4 files changed, 333 insertions(+), 32 deletions(-) create mode 100644 go-sgf/test_data/ff4_a.sgf create mode 100644 go-sgf/test_data/ff4_b.sgf create mode 100644 go-sgf/test_data/linebreak_tests.sgf diff --git a/go-sgf/src/lib.rs b/go-sgf/src/lib.rs index d3fc4bb..614126d 100644 --- a/go-sgf/src/lib.rs +++ b/go-sgf/src/lib.rs @@ -68,12 +68,13 @@ // VW use nom::{ - bytes::complete::{tag, take_until}, - character::complete::{alpha1, anychar, digit1, multispace0}, - combinator::eof, - multi::{many0, many1, many_till, separated_list1}, - sequence::{delimited, terminated}, - Finish, IResult, Parser, + branch::alt, + bytes::complete::{escaped_transform, is_not, tag}, + character::complete::{alpha1, digit1, multispace0, multispace1}, + combinator::{opt, value}, + multi::{many0, many1, separated_list1}, + sequence::delimited, + Finish, IResult, }; use thiserror::Error; @@ -82,15 +83,39 @@ pub enum Warning {} #[derive(Debug, PartialEq, Error)] pub enum ParseError { #[error("An unknown error was found")] - UnknownError, + NomError(nom::error::Error), } impl From> for ParseError { - fn from(_: nom::error::Error<&str>) -> Self { - Self::UnknownError + fn from(err: nom::error::Error<&str>) -> Self { + Self::NomError(nom::error::Error { + input: err.input.to_owned(), + code: err.code.clone(), + }) } } +/* +impl From<(&str, VerboseErrorKind)> for + +impl From> for ParseError { + fn from(err: nom::error::VerboseError<&str>) -> Self { + Self::NomErrors( + err.errors + .into_iter() + .map(|err| ParseError::from(err)) + .collect(), + ) + /* + Self::NomError(nom::error::Error { + input: err.input.to_owned(), + code: err.code.clone(), + }) + */ + } +} +*/ + // todo: support ST root node #[derive(Debug)] pub struct GameTree { @@ -159,7 +184,7 @@ pub enum GameType { Unsupported, } -struct Sequence(Node); +// struct Sequence(Node); /* struct Node { @@ -188,7 +213,7 @@ enum PropValue { } pub fn parse_sgf(input: &str) -> Result, ParseError> { - let (_, trees) = parse_collection(input).finish()?; + let (_, trees) = parse_collection::>(input).finish()?; trees .into_iter() @@ -202,7 +227,8 @@ pub fn parse_sgf(input: &str) -> Result, ParseError> { .map(|prop| prop.values[0].clone()); let board_size = match tree.sequence[0].find_prop("SZ") { Some(prop) => { - let (_, size) = parse_size(prop.values[0].as_str()).finish()?; + let (_, size) = + parse_size::>(prop.values[0].as_str()).finish()?; size } None => Size { @@ -287,21 +313,25 @@ impl ToString for Property { } } -fn parse_collection(input: &str) -> IResult<&str, Vec> { - separated_list1(multispace0, parse_tree)(input) +fn parse_collection<'a, E: nom::error::ParseError<&'a str>>( + input: &'a str, +) -> IResult<&'a str, Vec, E> { + separated_list1(multispace1, parse_tree)(input) } // note: must preserve unknown properties // note: must fix or preserve illegally formatted game-info properties // note: must correct or delete illegally foramtted properties, but display a warning -fn parse_tree(input: &str) -> IResult<&str, Tree> { - println!(":: parse_tree: {}", input); +fn parse_tree<'a, E: nom::error::ParseError<&'a str>>(input: &'a str) -> IResult<&'a str, Tree, E> { + println!("::: parse_tree: {}", input); let (input, _) = multispace0(input)?; delimited(tag("("), parse_sequence, tag(")"))(input) } -fn parse_sequence(input: &str) -> IResult<&str, Tree> { - println!("parse_sequence: {}", input); +fn parse_sequence<'a, E: nom::error::ParseError<&'a str>>( + input: &'a str, +) -> IResult<&'a str, Tree, E> { + println!("::: parse_sequence: {}", input); let (input, _) = multispace0(input)?; let (input, nodes) = many1(parse_node)(input)?; let (input, sub_sequences) = many0(parse_tree)(input)?; @@ -315,19 +345,21 @@ fn parse_sequence(input: &str) -> IResult<&str, Tree> { )) } -fn parse_node(input: &str) -> IResult<&str, Node> { - println!(":: parse_node: {}", input); +fn parse_node<'a, E: nom::error::ParseError<&'a str>>(input: &'a str) -> IResult<&'a str, Node, E> { + println!("::: parse_node: {}", input); let (input, _) = multispace0(input)?; let (input, _) = tag(";")(input)?; let (input, properties) = many1(parse_property)(input)?; Ok((input, Node { properties })) } -fn parse_property(input: &str) -> IResult<&str, Property> { +fn parse_property<'a, E: nom::error::ParseError<&'a str>>( + input: &'a str, +) -> IResult<&'a str, Property, E> { println!(":: parse_property: {}", input); let (input, _) = multispace0(input)?; let (input, ident) = alpha1(input)?; - let (input, values) = many1(delimited(tag("["), take_until("]"), tag("]")))(input)?; + let (input, values) = many1(parse_propval)(input)?; let values = values .into_iter() @@ -342,7 +374,25 @@ fn parse_property(input: &str) -> IResult<&str, Property> { )) } -fn parse_size(input: &str) -> IResult<&str, Size> { +fn parse_propval<'a, E: nom::error::ParseError<&'a str>>( + input: &'a str, +) -> IResult<&'a str, String, E> { + let (input, _) = multispace0(input)?; + println!("- {}", input); + let (input, _) = tag("[")(input)?; + println!("-- {}", input); + let (input, value) = opt(escaped_transform( + is_not(r"\]"), + '\\', + alt((value("]", tag("\\]")), value("", tag("\\\n")))), + ))(input)?; + println!("--- {}", input); + let (input, _) = tag("]")(input)?; + + Ok((input, value.map(|v| v.to_owned()).unwrap_or(String::new()))) +} + +fn parse_size<'a, E: nom::error::ParseError<&'a str>>(input: &'a str) -> IResult<&'a str, Size, E> { let (input, dimensions) = separated_list1(tag(":"), digit1)(input)?; let (width, height) = match dimensions.as_slice() { [width] => (width.parse::().unwrap(), width.parse::().unwrap()), @@ -368,7 +418,7 @@ mod tests { #[test] fn it_can_parse_properties() { - let (_, prop) = parse_property("C[a]").unwrap(); + let (_, prop) = parse_property::>("C[a]").unwrap(); assert_eq!( prop, Property { @@ -377,7 +427,7 @@ mod tests { } ); - let (_, prop) = parse_property("C[a][b][c]").unwrap(); + let (_, prop) = parse_property::>("C[a][b][c]").unwrap(); assert_eq!( prop, Property { @@ -389,7 +439,7 @@ mod tests { #[test] fn it_can_parse_a_standalone_node() { - let (_, node) = parse_node(";B[ab]").unwrap(); + let (_, node) = parse_node::>(";B[ab]").unwrap(); assert_eq!( node, @@ -401,7 +451,9 @@ mod tests { } ); - let (_, node) = parse_node(";B[ab];W[dp];B[pq]C[some comments]").unwrap(); + let (_, node) = + parse_node::>(";B[ab];W[dp];B[pq]C[some comments]") + .unwrap(); assert_eq!( node, @@ -416,7 +468,9 @@ mod tests { #[test] fn it_can_parse_a_simple_sequence() { - let (_, sequence) = parse_tree("(;B[ab];W[dp];B[pq]C[some comments])").unwrap(); + let (_, sequence) = + parse_tree::>("(;B[ab];W[dp];B[pq]C[some comments])") + .unwrap(); assert_eq!( sequence, @@ -455,7 +509,7 @@ mod tests { #[test] fn it_can_parse_a_sequence_with_subsequences() { let text = "(;C[a];C[b](;C[c])(;C[d];C[e]))"; - let (_, sequence) = parse_tree(text).unwrap(); + let (_, sequence) = parse_tree::>(text).unwrap(); let main_sequence = vec![ Node { @@ -509,7 +563,7 @@ mod tests { #[test] fn it_can_parse_example_1() { - let (_, ex_tree) = parse_tree(EXAMPLE).unwrap(); + let (_, ex_tree) = parse_tree::>(EXAMPLE).unwrap(); assert_eq!(ex_tree.sequence.len(), 1); assert_eq!(ex_tree.sequence[0].properties.len(), 2); @@ -545,15 +599,59 @@ mod tests { #[test] fn it_can_regenerate_the_tree() { - let (_, tree1) = parse_tree(EXAMPLE).unwrap(); + let (_, tree1) = parse_tree::>(EXAMPLE).unwrap(); assert_eq!( tree1.to_string(), "(;FF[4]C[root](;C[a];C[b](;C[c])(;C[d];C[e]))(;C[f](;C[g];C[h];C[i])(;C[j])))" ); - let (_, tree2) = parse_tree(&tree1.to_string()).unwrap(); + let (_, tree2) = parse_tree::>(&tree1.to_string()).unwrap(); assert_eq!(tree1, tree2); } + #[test] + fn it_parses_propvals() { + let (_, propval) = parse_propval::>("[]").unwrap(); + assert_eq!(propval, "".to_owned()); + + let (_, propval) = + parse_propval::>("[normal propval]").unwrap(); + assert_eq!(propval, "normal propval".to_owned()); + + let (_, propval) = + parse_propval::>(r"[need an [escape\] in the propval]") + .unwrap(); + assert_eq!(propval, "need an [escape] in the propval".to_owned()); + } + + #[test] + fn it_parses_propvals_with_hard_linebreaks() { + let (_, propval) = parse_propval::>( + "[There are hard linebreaks & soft linebreaks. +Soft linebreaks...]", + ) + .unwrap(); + assert_eq!( + propval, + "There are hard linebreaks & soft linebreaks. +Soft linebreaks..." + .to_owned() + ); + } + + #[test] + fn it_parses_propvals_with_soft_linebreaks() { + let (_, propval) = parse_propval::>( + r"[Soft linebreaks are linebreaks preceeded by '\\' like this one >o\ +k<. Hard line breaks are all other linebreaks.]", + ) + .unwrap(); + assert_eq!( + propval, + r"Soft linebreaks are linebreaks preceeded by '\\' like this one >ok<. Hard line breaks are all other linebreaks." + .to_owned() + ); + } + fn with_text(text: &str, f: impl FnOnce(Vec)) { f(parse_sgf(text).unwrap()); } @@ -599,8 +697,28 @@ mod tests { }); } + /* + #[test] + fn it_parses_linebreaks() { + with_file( + std::path::Path::new("test_data/linebreak_tests.sgf"), + |tree| {}, + ); + } + + #[test] + fn it_parses_ff4_a() { + with_file(std::path::Path::new("test_data/ff4_a.sgf"), |tree| {}); + } + + #[test] + fn it_parses_ff4_b() { + with_file(std::path::Path::new("test_data/ff4_b.sgf"), |tree| {}); + } + #[test] fn it_parses_ff4_ex() { with_file(std::path::Path::new("test_data/ff4_ex.sgf"), |tree| {}); } + */ } diff --git a/go-sgf/test_data/ff4_a.sgf b/go-sgf/test_data/ff4_a.sgf new file mode 100644 index 0000000..107d6db --- /dev/null +++ b/go-sgf/test_data/ff4_a.sgf @@ -0,0 +1,118 @@ +(;FF[4]AP[Primiview:3.1]GM[1]SZ[19]GN[Gametree 1: properties]US[Arno Hollosi] + +(;B[pd]N[Moves, comments, annotations] +C[Nodename set to: "Moves, comments, annotations"];W[dp]GW[1] +C[Marked as "Good for White"];B[pp]GB[2] +C[Marked as "Very good for Black"];W[dc]GW[2] +C[Marked as "Very good for White"];B[pj]DM[1] +C[Marked as "Even position"];W[ci]UC[1] +C[Marked as "Unclear position"];B[jd]TE[1] +C[Marked as "Tesuji" or "Good move"];W[jp]BM[2] +C[Marked as "Very bad move"];B[gd]DO[] +C[Marked as "Doubtful move"];W[de]IT[] +C[Marked as "Interesting move"];B[jj]; +C[White "Pass" move]W[]; +C[Black "Pass" move]B[tt]) + +(;AB[dd][de][df][dg][do:gq] + AW[jd][je][jf][jg][kn:lq][pn:pq] +N[Setup]C[Black & white stones at the top are added as single stones. + +Black & white stones at the bottom are added using compressed point lists.] +;AE[ep][fp][kn][lo][lq][pn:pq] +C[AddEmpty + +Black stones & stones of left white group are erased in FF[3\] way. + +White stones at bottom right were erased using compressed point list.] +;AB[pd]AW[pp]PL[B]C[Added two stones. + +Node marked with "Black to play".];PL[W] +C[Node marked with "White to play"]) + +(;AB[dd][de][df][dg][dh][di][dj][nj][ni][nh][nf][ne][nd][ij][ii][ih][hq] +[gq][fq][eq][dr][ds][dq][dp][cp][bp][ap][iq][ir][is][bo][bn][an][ms][mr] +AW[pd][pe][pf][pg][ph][pi][pj][fd][fe][ff][fh][fi][fj][kh][ki][kj][os][or] +[oq][op][pp][qp][rp][sp][ro][rn][sn][nq][mq][lq][kq][kr][ks][fs][gs][gr] +[er]N[Markup]C[Position set up without compressed point lists.] + +;TR[dd][de][df][ed][ee][ef][fd:ff] + MA[dh][di][dj][ej][ei][eh][fh:fj] + CR[nd][ne][nf][od][oe][of][pd:pf] + SQ[nh][ni][nj][oh][oi][oj][ph:pj] + SL[ih][ii][ij][jj][ji][jh][kh:kj] + TW[pq:ss][so][lr:ns] + TB[aq:cs][er:hs][ao] +C[Markup at top partially using compressed point lists (for markup on white stones); listed clockwise, starting at upper left: +- TR (triangle) +- CR (circle) +- SQ (square) +- SL (selected points) +- MA ('X') + +Markup at bottom: black & white territory (using compressed point lists)] +;LB[dc:1][fc:2][nc:3][pc:4][dj:a][fj:b][nj:c] +[pj:d][gs:ABCDEFGH][gr:ABCDEFG][gq:ABCDEF][gp:ABCDE][go:ABCD][gn:ABC][gm:AB] +[mm:12][mn:123][mo:1234][mp:12345][mq:123456][mr:1234567][ms:12345678] +C[Label (LB property) + +Top: 8 single char labels (1-4, a-d) + +Bottom: Labels up to 8 char length.] + +;DD[kq:os][dq:hs] +AR[aa:sc][sa:ac][aa:sa][aa:ac][cd:cj] + [gd:md][fh:ij][kj:nh] +LN[pj:pd][nf:ff][ih:fj][kh:nj] +C[Arrows, lines and dimmed points.]) + +(;B[qd]N[Style & text type] +C[There are hard linebreaks & soft linebreaks. +Soft linebreaks are linebreaks preceeded by '\\' like this one >o\ +k<. Hard line breaks are all other linebreaks. +Soft linebreaks are converted to >nothing<, i.e. removed. + +Note that linebreaks are coded differently on different systems. + +Examples (>ok< shouldn't be split): + +linebreak 1 "\\n": >o\ +k< +linebreak 2 "\\n\\r": >o\ + k< +linebreak 3 "\\r\\n": >o\ +k< +linebreak 4 "\\r": >o\ k<] + +(;W[dd]N[W d16]C[Variation C is better.](;B[pp]N[B q4]) +(;B[dp]N[B d4]) +(;B[pq]N[B q3]) +(;B[oq]N[B p3]) +) +(;W[dp]N[W d4]) +(;W[pp]N[W q4]) +(;W[cc]N[W c17]) +(;W[cq]N[W c3]) +(;W[qq]N[W r3]) +) + +(;B[qr]N[Time limits, captures & move numbers] +BL[120.0]C[Black time left: 120 sec];W[rr] +WL[300]C[White time left: 300 sec];B[rq] +BL[105.6]OB[10]C[Black time left: 105.6 sec +Black stones left (in this byo-yomi period): 10];W[qq] +WL[200]OW[2]C[White time left: 200 sec +White stones left: 2];B[sr] +BL[87.00]OB[9]C[Black time left: 87 sec +Black stones left: 9];W[qs] +WL[13.20]OW[1]C[White time left: 13.2 sec +White stones left: 1];B[rs] +C[One white stone at s2 captured];W[ps];B[pr];W[or] +MN[2]C[Set move number to 2];B[os] +C[Two white stones captured +(at q1 & r1)] +;MN[112]W[pq]C[Set move number to 112];B[sq];W[rp];B[ps] +;W[ns];B[ss];W[nr] +;B[rr];W[sp];B[qs]C[Suicide move +(all B stones get captured)]) +) diff --git a/go-sgf/test_data/ff4_b.sgf b/go-sgf/test_data/ff4_b.sgf new file mode 100644 index 0000000..d18e991 --- /dev/null +++ b/go-sgf/test_data/ff4_b.sgf @@ -0,0 +1,47 @@ +(;FF[4]AP[Primiview:3.1]GM[1]SZ[19]C[Gametree 2: game-info + +Game-info properties are usually stored in the root node. +If games are merged into a single game-tree, they are stored in the node\ + where the game first becomes distinguishable from all other games in\ + the tree.] +;B[pd] +(;PW[W. Hite]WR[6d]RO[2]RE[W+3.5] +PB[B. Lack]BR[5d]PC[London]EV[Go Congress]W[dp] +C[Game-info: +Black: B. Lack, 5d +White: W. Hite, 6d +Place: London +Event: Go Congress +Round: 2 +Result: White wins by 3.5]) +(;PW[T. Suji]WR[7d]RO[1]RE[W+Resign] +PB[B. Lack]BR[5d]PC[London]EV[Go Congress]W[cp] +C[Game-info: +Black: B. Lack, 5d +White: T. Suji, 7d +Place: London +Event: Go Congress +Round: 1 +Result: White wins by resignation]) +(;W[ep];B[pp] +(;PW[S. Abaki]WR[1d]RO[3]RE[B+63.5] +PB[B. Lack]BR[5d]PC[London]EV[Go Congress]W[ed] +C[Game-info: +Black: B. Lack, 5d +White: S. Abaki, 1d +Place: London +Event: Go Congress +Round: 3 +Result: Balck wins by 63.5]) +(;PW[A. Tari]WR[12k]KM[-59.5]RO[4]RE[B+R] +PB[B. Lack]BR[5d]PC[London]EV[Go Congress]W[cd] +C[Game-info: +Black: B. Lack, 5d +White: A. Tari, 12k +Place: London +Event: Go Congress +Round: 4 +Komi: -59.5 points +Result: Black wins by resignation]) +)) + diff --git a/go-sgf/test_data/linebreak_tests.sgf b/go-sgf/test_data/linebreak_tests.sgf new file mode 100644 index 0000000..023ae86 --- /dev/null +++ b/go-sgf/test_data/linebreak_tests.sgf @@ -0,0 +1,18 @@ +(;FF[4]AP[Primiview:3.1]GM[1]SZ[19]GN[Gametree 1: properties]US[Arno Hollosi] +C[There are hard linebreaks & soft linebreaks. +Soft linebreaks are linebreaks preceeded by '\\' like this one >o\ +k<. Hard line breaks are all other linebreaks. +Soft linebreaks are converted to >nothing<, i.e. removed. + +Note that linebreaks are coded differently on different systems. + +Examples (>ok< shouldn't be split): + +linebreak 1 "\\n": >o\ +k< +linebreak 2 "\\n\\r": >o\ + k< +linebreak 3 "\\r\\n": >o\ +k< +linebreak 4 "\\r": >o\ k<] +)