Add more test cases and try to handle linebreaks

This commit is contained in:
Savanni D'Gerinel 2023-06-23 09:46:05 -04:00
parent 4fd07b240e
commit 3cca3a7f89
4 changed files with 333 additions and 32 deletions

View File

@ -68,12 +68,13 @@
// VW
use nom::{
bytes::complete::{tag, take_until},
character::complete::{alpha1, anychar, digit1, multispace0},
combinator::eof,
multi::{many0, many1, many_till, separated_list1},
sequence::{delimited, terminated},
Finish, IResult, Parser,
branch::alt,
bytes::complete::{escaped_transform, is_not, tag},
character::complete::{alpha1, digit1, multispace0, multispace1},
combinator::{opt, value},
multi::{many0, many1, separated_list1},
sequence::delimited,
Finish, IResult,
};
use thiserror::Error;
@ -82,15 +83,39 @@ pub enum Warning {}
#[derive(Debug, PartialEq, Error)]
pub enum ParseError {
#[error("An unknown error was found")]
UnknownError,
NomError(nom::error::Error<String>),
}
impl From<nom::error::Error<&str>> for ParseError {
fn from(_: nom::error::Error<&str>) -> Self {
Self::UnknownError
fn from(err: nom::error::Error<&str>) -> Self {
Self::NomError(nom::error::Error {
input: err.input.to_owned(),
code: err.code.clone(),
})
}
}
/*
impl From<(&str, VerboseErrorKind)> for
impl From<nom::error::VerboseError<&str>> for ParseError {
fn from(err: nom::error::VerboseError<&str>) -> Self {
Self::NomErrors(
err.errors
.into_iter()
.map(|err| ParseError::from(err))
.collect(),
)
/*
Self::NomError(nom::error::Error {
input: err.input.to_owned(),
code: err.code.clone(),
})
*/
}
}
*/
// todo: support ST root node
#[derive(Debug)]
pub struct GameTree {
@ -159,7 +184,7 @@ pub enum GameType {
Unsupported,
}
struct Sequence(Node);
// struct Sequence(Node);
/*
struct Node {
@ -188,7 +213,7 @@ enum PropValue {
}
pub fn parse_sgf(input: &str) -> Result<Vec<GameTree>, ParseError> {
let (_, trees) = parse_collection(input).finish()?;
let (_, trees) = parse_collection::<nom::error::Error<&str>>(input).finish()?;
trees
.into_iter()
@ -202,7 +227,8 @@ pub fn parse_sgf(input: &str) -> Result<Vec<GameTree>, ParseError> {
.map(|prop| prop.values[0].clone());
let board_size = match tree.sequence[0].find_prop("SZ") {
Some(prop) => {
let (_, size) = parse_size(prop.values[0].as_str()).finish()?;
let (_, size) =
parse_size::<nom::error::Error<&str>>(prop.values[0].as_str()).finish()?;
size
}
None => Size {
@ -287,21 +313,25 @@ impl ToString for Property {
}
}
fn parse_collection(input: &str) -> IResult<&str, Vec<Tree>> {
separated_list1(multispace0, parse_tree)(input)
fn parse_collection<'a, E: nom::error::ParseError<&'a str>>(
input: &'a str,
) -> IResult<&'a str, Vec<Tree>, E> {
separated_list1(multispace1, parse_tree)(input)
}
// note: must preserve unknown properties
// note: must fix or preserve illegally formatted game-info properties
// note: must correct or delete illegally foramtted properties, but display a warning
fn parse_tree(input: &str) -> IResult<&str, Tree> {
println!(":: parse_tree: {}", input);
fn parse_tree<'a, E: nom::error::ParseError<&'a str>>(input: &'a str) -> IResult<&'a str, Tree, E> {
println!("::: parse_tree: {}", input);
let (input, _) = multispace0(input)?;
delimited(tag("("), parse_sequence, tag(")"))(input)
}
fn parse_sequence(input: &str) -> IResult<&str, Tree> {
println!("parse_sequence: {}", input);
fn parse_sequence<'a, E: nom::error::ParseError<&'a str>>(
input: &'a str,
) -> IResult<&'a str, Tree, E> {
println!("::: parse_sequence: {}", input);
let (input, _) = multispace0(input)?;
let (input, nodes) = many1(parse_node)(input)?;
let (input, sub_sequences) = many0(parse_tree)(input)?;
@ -315,19 +345,21 @@ fn parse_sequence(input: &str) -> IResult<&str, Tree> {
))
}
fn parse_node(input: &str) -> IResult<&str, Node> {
println!(":: parse_node: {}", input);
fn parse_node<'a, E: nom::error::ParseError<&'a str>>(input: &'a str) -> IResult<&'a str, Node, E> {
println!("::: parse_node: {}", input);
let (input, _) = multispace0(input)?;
let (input, _) = tag(";")(input)?;
let (input, properties) = many1(parse_property)(input)?;
Ok((input, Node { properties }))
}
fn parse_property(input: &str) -> IResult<&str, Property> {
fn parse_property<'a, E: nom::error::ParseError<&'a str>>(
input: &'a str,
) -> IResult<&'a str, Property, E> {
println!(":: parse_property: {}", input);
let (input, _) = multispace0(input)?;
let (input, ident) = alpha1(input)?;
let (input, values) = many1(delimited(tag("["), take_until("]"), tag("]")))(input)?;
let (input, values) = many1(parse_propval)(input)?;
let values = values
.into_iter()
@ -342,7 +374,25 @@ fn parse_property(input: &str) -> IResult<&str, Property> {
))
}
fn parse_size(input: &str) -> IResult<&str, Size> {
fn parse_propval<'a, E: nom::error::ParseError<&'a str>>(
input: &'a str,
) -> IResult<&'a str, String, E> {
let (input, _) = multispace0(input)?;
println!("- {}", input);
let (input, _) = tag("[")(input)?;
println!("-- {}", input);
let (input, value) = opt(escaped_transform(
is_not(r"\]"),
'\\',
alt((value("]", tag("\\]")), value("", tag("\\\n")))),
))(input)?;
println!("--- {}", input);
let (input, _) = tag("]")(input)?;
Ok((input, value.map(|v| v.to_owned()).unwrap_or(String::new())))
}
fn parse_size<'a, E: nom::error::ParseError<&'a str>>(input: &'a str) -> IResult<&'a str, Size, E> {
let (input, dimensions) = separated_list1(tag(":"), digit1)(input)?;
let (width, height) = match dimensions.as_slice() {
[width] => (width.parse::<i32>().unwrap(), width.parse::<i32>().unwrap()),
@ -368,7 +418,7 @@ mod tests {
#[test]
fn it_can_parse_properties() {
let (_, prop) = parse_property("C[a]").unwrap();
let (_, prop) = parse_property::<nom::error::VerboseError<&str>>("C[a]").unwrap();
assert_eq!(
prop,
Property {
@ -377,7 +427,7 @@ mod tests {
}
);
let (_, prop) = parse_property("C[a][b][c]").unwrap();
let (_, prop) = parse_property::<nom::error::VerboseError<&str>>("C[a][b][c]").unwrap();
assert_eq!(
prop,
Property {
@ -389,7 +439,7 @@ mod tests {
#[test]
fn it_can_parse_a_standalone_node() {
let (_, node) = parse_node(";B[ab]").unwrap();
let (_, node) = parse_node::<nom::error::VerboseError<&str>>(";B[ab]").unwrap();
assert_eq!(
node,
@ -401,7 +451,9 @@ mod tests {
}
);
let (_, node) = parse_node(";B[ab];W[dp];B[pq]C[some comments]").unwrap();
let (_, node) =
parse_node::<nom::error::VerboseError<&str>>(";B[ab];W[dp];B[pq]C[some comments]")
.unwrap();
assert_eq!(
node,
@ -416,7 +468,9 @@ mod tests {
#[test]
fn it_can_parse_a_simple_sequence() {
let (_, sequence) = parse_tree("(;B[ab];W[dp];B[pq]C[some comments])").unwrap();
let (_, sequence) =
parse_tree::<nom::error::VerboseError<&str>>("(;B[ab];W[dp];B[pq]C[some comments])")
.unwrap();
assert_eq!(
sequence,
@ -455,7 +509,7 @@ mod tests {
#[test]
fn it_can_parse_a_sequence_with_subsequences() {
let text = "(;C[a];C[b](;C[c])(;C[d];C[e]))";
let (_, sequence) = parse_tree(text).unwrap();
let (_, sequence) = parse_tree::<nom::error::VerboseError<&str>>(text).unwrap();
let main_sequence = vec![
Node {
@ -509,7 +563,7 @@ mod tests {
#[test]
fn it_can_parse_example_1() {
let (_, ex_tree) = parse_tree(EXAMPLE).unwrap();
let (_, ex_tree) = parse_tree::<nom::error::VerboseError<&str>>(EXAMPLE).unwrap();
assert_eq!(ex_tree.sequence.len(), 1);
assert_eq!(ex_tree.sequence[0].properties.len(), 2);
@ -545,15 +599,59 @@ mod tests {
#[test]
fn it_can_regenerate_the_tree() {
let (_, tree1) = parse_tree(EXAMPLE).unwrap();
let (_, tree1) = parse_tree::<nom::error::VerboseError<&str>>(EXAMPLE).unwrap();
assert_eq!(
tree1.to_string(),
"(;FF[4]C[root](;C[a];C[b](;C[c])(;C[d];C[e]))(;C[f](;C[g];C[h];C[i])(;C[j])))"
);
let (_, tree2) = parse_tree(&tree1.to_string()).unwrap();
let (_, tree2) = parse_tree::<nom::error::VerboseError<&str>>(&tree1.to_string()).unwrap();
assert_eq!(tree1, tree2);
}
#[test]
fn it_parses_propvals() {
let (_, propval) = parse_propval::<nom::error::VerboseError<&str>>("[]").unwrap();
assert_eq!(propval, "".to_owned());
let (_, propval) =
parse_propval::<nom::error::VerboseError<&str>>("[normal propval]").unwrap();
assert_eq!(propval, "normal propval".to_owned());
let (_, propval) =
parse_propval::<nom::error::VerboseError<&str>>(r"[need an [escape\] in the propval]")
.unwrap();
assert_eq!(propval, "need an [escape] in the propval".to_owned());
}
#[test]
fn it_parses_propvals_with_hard_linebreaks() {
let (_, propval) = parse_propval::<nom::error::VerboseError<&str>>(
"[There are hard linebreaks & soft linebreaks.
Soft linebreaks...]",
)
.unwrap();
assert_eq!(
propval,
"There are hard linebreaks & soft linebreaks.
Soft linebreaks..."
.to_owned()
);
}
#[test]
fn it_parses_propvals_with_soft_linebreaks() {
let (_, propval) = parse_propval::<nom::error::VerboseError<&str>>(
r"[Soft linebreaks are linebreaks preceeded by '\\' like this one >o\
k<. Hard line breaks are all other linebreaks.]",
)
.unwrap();
assert_eq!(
propval,
r"Soft linebreaks are linebreaks preceeded by '\\' like this one >ok<. Hard line breaks are all other linebreaks."
.to_owned()
);
}
fn with_text(text: &str, f: impl FnOnce(Vec<GameTree>)) {
f(parse_sgf(text).unwrap());
}
@ -599,8 +697,28 @@ mod tests {
});
}
/*
#[test]
fn it_parses_linebreaks() {
with_file(
std::path::Path::new("test_data/linebreak_tests.sgf"),
|tree| {},
);
}
#[test]
fn it_parses_ff4_a() {
with_file(std::path::Path::new("test_data/ff4_a.sgf"), |tree| {});
}
#[test]
fn it_parses_ff4_b() {
with_file(std::path::Path::new("test_data/ff4_b.sgf"), |tree| {});
}
#[test]
fn it_parses_ff4_ex() {
with_file(std::path::Path::new("test_data/ff4_ex.sgf"), |tree| {});
}
*/
}

118
go-sgf/test_data/ff4_a.sgf Normal file
View File

@ -0,0 +1,118 @@
(;FF[4]AP[Primiview:3.1]GM[1]SZ[19]GN[Gametree 1: properties]US[Arno Hollosi]
(;B[pd]N[Moves, comments, annotations]
C[Nodename set to: "Moves, comments, annotations"];W[dp]GW[1]
C[Marked as "Good for White"];B[pp]GB[2]
C[Marked as "Very good for Black"];W[dc]GW[2]
C[Marked as "Very good for White"];B[pj]DM[1]
C[Marked as "Even position"];W[ci]UC[1]
C[Marked as "Unclear position"];B[jd]TE[1]
C[Marked as "Tesuji" or "Good move"];W[jp]BM[2]
C[Marked as "Very bad move"];B[gd]DO[]
C[Marked as "Doubtful move"];W[de]IT[]
C[Marked as "Interesting move"];B[jj];
C[White "Pass" move]W[];
C[Black "Pass" move]B[tt])
(;AB[dd][de][df][dg][do:gq]
AW[jd][je][jf][jg][kn:lq][pn:pq]
N[Setup]C[Black & white stones at the top are added as single stones.
Black & white stones at the bottom are added using compressed point lists.]
;AE[ep][fp][kn][lo][lq][pn:pq]
C[AddEmpty
Black stones & stones of left white group are erased in FF[3\] way.
White stones at bottom right were erased using compressed point list.]
;AB[pd]AW[pp]PL[B]C[Added two stones.
Node marked with "Black to play".];PL[W]
C[Node marked with "White to play"])
(;AB[dd][de][df][dg][dh][di][dj][nj][ni][nh][nf][ne][nd][ij][ii][ih][hq]
[gq][fq][eq][dr][ds][dq][dp][cp][bp][ap][iq][ir][is][bo][bn][an][ms][mr]
AW[pd][pe][pf][pg][ph][pi][pj][fd][fe][ff][fh][fi][fj][kh][ki][kj][os][or]
[oq][op][pp][qp][rp][sp][ro][rn][sn][nq][mq][lq][kq][kr][ks][fs][gs][gr]
[er]N[Markup]C[Position set up without compressed point lists.]
;TR[dd][de][df][ed][ee][ef][fd:ff]
MA[dh][di][dj][ej][ei][eh][fh:fj]
CR[nd][ne][nf][od][oe][of][pd:pf]
SQ[nh][ni][nj][oh][oi][oj][ph:pj]
SL[ih][ii][ij][jj][ji][jh][kh:kj]
TW[pq:ss][so][lr:ns]
TB[aq:cs][er:hs][ao]
C[Markup at top partially using compressed point lists (for markup on white stones); listed clockwise, starting at upper left:
- TR (triangle)
- CR (circle)
- SQ (square)
- SL (selected points)
- MA ('X')
Markup at bottom: black & white territory (using compressed point lists)]
;LB[dc:1][fc:2][nc:3][pc:4][dj:a][fj:b][nj:c]
[pj:d][gs:ABCDEFGH][gr:ABCDEFG][gq:ABCDEF][gp:ABCDE][go:ABCD][gn:ABC][gm:AB]
[mm:12][mn:123][mo:1234][mp:12345][mq:123456][mr:1234567][ms:12345678]
C[Label (LB property)
Top: 8 single char labels (1-4, a-d)
Bottom: Labels up to 8 char length.]
;DD[kq:os][dq:hs]
AR[aa:sc][sa:ac][aa:sa][aa:ac][cd:cj]
[gd:md][fh:ij][kj:nh]
LN[pj:pd][nf:ff][ih:fj][kh:nj]
C[Arrows, lines and dimmed points.])
(;B[qd]N[Style & text type]
C[There are hard linebreaks & soft linebreaks.
Soft linebreaks are linebreaks preceeded by '\\' like this one >o\
k<. Hard line breaks are all other linebreaks.
Soft linebreaks are converted to >nothing<, i.e. removed.
Note that linebreaks are coded differently on different systems.
Examples (>ok< shouldn't be split):
linebreak 1 "\\n": >o\
k<
linebreak 2 "\\n\\r": >o\
k<
linebreak 3 "\\r\\n": >o\
k<
linebreak 4 "\\r": >o\ k<]
(;W[dd]N[W d16]C[Variation C is better.](;B[pp]N[B q4])
(;B[dp]N[B d4])
(;B[pq]N[B q3])
(;B[oq]N[B p3])
)
(;W[dp]N[W d4])
(;W[pp]N[W q4])
(;W[cc]N[W c17])
(;W[cq]N[W c3])
(;W[qq]N[W r3])
)
(;B[qr]N[Time limits, captures & move numbers]
BL[120.0]C[Black time left: 120 sec];W[rr]
WL[300]C[White time left: 300 sec];B[rq]
BL[105.6]OB[10]C[Black time left: 105.6 sec
Black stones left (in this byo-yomi period): 10];W[qq]
WL[200]OW[2]C[White time left: 200 sec
White stones left: 2];B[sr]
BL[87.00]OB[9]C[Black time left: 87 sec
Black stones left: 9];W[qs]
WL[13.20]OW[1]C[White time left: 13.2 sec
White stones left: 1];B[rs]
C[One white stone at s2 captured];W[ps];B[pr];W[or]
MN[2]C[Set move number to 2];B[os]
C[Two white stones captured
(at q1 & r1)]
;MN[112]W[pq]C[Set move number to 112];B[sq];W[rp];B[ps]
;W[ns];B[ss];W[nr]
;B[rr];W[sp];B[qs]C[Suicide move
(all B stones get captured)])
)

View File

@ -0,0 +1,47 @@
(;FF[4]AP[Primiview:3.1]GM[1]SZ[19]C[Gametree 2: game-info
Game-info properties are usually stored in the root node.
If games are merged into a single game-tree, they are stored in the node\
where the game first becomes distinguishable from all other games in\
the tree.]
;B[pd]
(;PW[W. Hite]WR[6d]RO[2]RE[W+3.5]
PB[B. Lack]BR[5d]PC[London]EV[Go Congress]W[dp]
C[Game-info:
Black: B. Lack, 5d
White: W. Hite, 6d
Place: London
Event: Go Congress
Round: 2
Result: White wins by 3.5])
(;PW[T. Suji]WR[7d]RO[1]RE[W+Resign]
PB[B. Lack]BR[5d]PC[London]EV[Go Congress]W[cp]
C[Game-info:
Black: B. Lack, 5d
White: T. Suji, 7d
Place: London
Event: Go Congress
Round: 1
Result: White wins by resignation])
(;W[ep];B[pp]
(;PW[S. Abaki]WR[1d]RO[3]RE[B+63.5]
PB[B. Lack]BR[5d]PC[London]EV[Go Congress]W[ed]
C[Game-info:
Black: B. Lack, 5d
White: S. Abaki, 1d
Place: London
Event: Go Congress
Round: 3
Result: Balck wins by 63.5])
(;PW[A. Tari]WR[12k]KM[-59.5]RO[4]RE[B+R]
PB[B. Lack]BR[5d]PC[London]EV[Go Congress]W[cd]
C[Game-info:
Black: B. Lack, 5d
White: A. Tari, 12k
Place: London
Event: Go Congress
Round: 4
Komi: -59.5 points
Result: Black wins by resignation])
))

View File

@ -0,0 +1,18 @@
(;FF[4]AP[Primiview:3.1]GM[1]SZ[19]GN[Gametree 1: properties]US[Arno Hollosi]
C[There are hard linebreaks & soft linebreaks.
Soft linebreaks are linebreaks preceeded by '\\' like this one >o\
k<. Hard line breaks are all other linebreaks.
Soft linebreaks are converted to >nothing<, i.e. removed.
Note that linebreaks are coded differently on different systems.
Examples (>ok< shouldn't be split):
linebreak 1 "\\n": >o\
k<
linebreak 2 "\\n\\r": >o\
k<
linebreak 3 "\\r\\n": >o\
k<
linebreak 4 "\\r": >o\ k<]
)