monorepo/sgf/src/tree.rs

498 lines
14 KiB
Rust
Raw Normal View History

2023-07-26 23:33:50 +00:00
use crate::Error;
use nom::{
branch::alt,
bytes::complete::{escaped_transform, tag},
character::complete::{alpha1, digit1, multispace0, multispace1, none_of},
combinator::{opt, value},
multi::{many0, many1, separated_list1},
sequence::delimited,
IResult,
};
2023-07-21 03:22:00 +00:00
use std::num::ParseIntError;
impl From<ParseSizeError> for Error {
fn from(_: ParseSizeError) -> Self {
Self::InvalidBoardSize
}
}
#[derive(Debug)]
pub enum ParseSizeError {
ParseIntError(ParseIntError),
InsufficientArguments,
}
impl From<ParseIntError> for ParseSizeError {
fn from(e: ParseIntError) -> Self {
Self::ParseIntError(e)
}
}
2023-07-21 03:22:00 +00:00
#[derive(Clone, Debug, PartialEq)]
pub struct Size {
pub width: i32,
pub height: i32,
}
impl TryFrom<&str> for Size {
type Error = ParseSizeError;
fn try_from(s: &str) -> Result<Self, Self::Error> {
let parts = s
.split(':')
.map(|v| v.parse::<i32>())
.collect::<Result<Vec<i32>, ParseIntError>>()?;
match parts[..] {
[width, height, ..] => Ok(Size { width, height }),
[dim] => Ok(Size {
width: dim,
height: dim,
}),
[] => Err(ParseSizeError::InsufficientArguments),
}
}
}
#[derive(Clone, Debug, PartialEq)]
pub struct Tree {
pub sequence: Vec<Node>,
pub sub_sequences: Vec<Tree>,
}
impl ToString for Tree {
fn to_string(&self) -> String {
let sequence = self
.sequence
.iter()
.map(|node| node.to_string())
.collect::<String>();
let subsequences = self
.sub_sequences
.iter()
.map(|seq| seq.to_string())
.collect::<String>();
format!("({}{})", sequence, subsequences)
}
}
#[derive(Clone, Debug, PartialEq)]
pub struct Node {
pub properties: Vec<Property>,
}
impl ToString for Node {
fn to_string(&self) -> String {
let props = self
.properties
.iter()
.map(|prop| prop.to_string())
.collect::<String>();
format!(";{}", props)
}
}
impl Node {
pub fn find_prop(&self, ident: &str) -> Option<Property> {
self.properties
.iter()
.find(|prop| prop.ident == ident)
.cloned()
}
}
#[derive(Clone, Debug, PartialEq)]
pub struct Property {
pub ident: String,
pub values: Vec<String>,
}
impl ToString for Property {
fn to_string(&self) -> String {
let values = self
.values
.iter()
.map(|val| format!("[{}]", val))
.collect::<String>();
format!("{}{}", self.ident, values)
}
}
pub fn parse_collection<'a, E: nom::error::ParseError<&'a str>>(
input: &'a str,
) -> IResult<&'a str, Vec<Tree>, E> {
separated_list1(multispace1, parse_tree)(input)
}
// note: must preserve unknown properties
// note: must fix or preserve illegally formatted game-info properties
// note: must correct or delete illegally foramtted properties, but display a warning
fn parse_tree<'a, E: nom::error::ParseError<&'a str>>(input: &'a str) -> IResult<&'a str, Tree, E> {
let (input, _) = multispace0(input)?;
delimited(tag("("), parse_sequence, tag(")"))(input)
}
fn parse_sequence<'a, E: nom::error::ParseError<&'a str>>(
input: &'a str,
) -> IResult<&'a str, Tree, E> {
let (input, _) = multispace0(input)?;
let (input, nodes) = many1(parse_node)(input)?;
let (input, _) = multispace0(input)?;
let (input, sub_sequences) = many0(parse_tree)(input)?;
let (input, _) = multispace0(input)?;
Ok((
input,
Tree {
sequence: nodes,
sub_sequences,
},
))
}
fn parse_node<'a, E: nom::error::ParseError<&'a str>>(input: &'a str) -> IResult<&'a str, Node, E> {
let (input, _) = multispace0(input)?;
let (input, _) = tag(";")(input)?;
let (input, properties) = many1(parse_property)(input)?;
Ok((input, Node { properties }))
}
fn parse_property<'a, E: nom::error::ParseError<&'a str>>(
input: &'a str,
) -> IResult<&'a str, Property, E> {
let (input, _) = multispace0(input)?;
let (input, ident) = alpha1(input)?;
let (input, values) = many1(parse_propval)(input)?;
let (input, _) = multispace0(input)?;
let values = values
.into_iter()
.map(|v| v.to_owned())
.collect::<Vec<String>>();
Ok((
input,
Property {
ident: ident.to_owned(),
values,
},
))
}
fn parse_propval<'a, E: nom::error::ParseError<&'a str>>(
input: &'a str,
) -> IResult<&'a str, String, E> {
let (input, _) = multispace0(input)?;
let (input, _) = tag("[")(input)?;
let (input, value) = parse_propval_text(input)?;
let (input, _) = tag("]")(input)?;
Ok((input, value.unwrap_or(String::new())))
}
fn parse_propval_text<'a, E: nom::error::ParseError<&'a str>>(
input: &'a str,
) -> IResult<&'a str, Option<String>, E> {
let (input, value) = opt(escaped_transform(
none_of("\\]"),
'\\',
alt((
value("]", tag("]")),
value("\\", tag("\\")),
value("", tag("\n")),
)),
))(input)?;
Ok((input, value.map(|v| v.to_owned())))
}
pub fn parse_size<'a, E: nom::error::ParseError<&'a str>>(
input: &'a str,
) -> IResult<&'a str, Size, E> {
let (input, dimensions) = separated_list1(tag(":"), digit1)(input)?;
let (width, height) = match dimensions.as_slice() {
[width] => (width.parse::<i32>().unwrap(), width.parse::<i32>().unwrap()),
[width, height] => (
width.parse::<i32>().unwrap(),
height.parse::<i32>().unwrap(),
),
_ => (19, 19),
};
Ok((input, Size { width, height }))
}
#[cfg(test)]
mod test {
use std::{fs::File, io::Read};
use super::*;
const EXAMPLE: &'static str = "(;FF[4]C[root](;C[a];C[b](;C[c])
(;C[d];C[e]))
(;C[f](;C[g];C[h];C[i])
(;C[j])))";
#[test]
fn it_can_parse_properties() {
let (_, prop) = parse_property::<nom::error::VerboseError<&str>>("C[a]").unwrap();
assert_eq!(
prop,
Property {
ident: "C".to_owned(),
values: vec!["a".to_owned()]
}
);
let (_, prop) = parse_property::<nom::error::VerboseError<&str>>("C[a][b][c]").unwrap();
assert_eq!(
prop,
Property {
ident: "C".to_owned(),
values: vec!["a".to_owned(), "b".to_owned(), "c".to_owned()]
}
);
}
#[test]
fn it_can_parse_a_standalone_node() {
let (_, node) = parse_node::<nom::error::VerboseError<&str>>(";B[ab]").unwrap();
assert_eq!(
node,
Node {
properties: vec![Property {
ident: "B".to_owned(),
values: vec!["ab".to_owned()]
}]
}
);
let (_, node) =
parse_node::<nom::error::VerboseError<&str>>(";B[ab];W[dp];B[pq]C[some comments]")
.unwrap();
assert_eq!(
node,
Node {
properties: vec![Property {
ident: "B".to_owned(),
values: vec!["ab".to_owned()]
}]
}
);
}
#[test]
fn it_can_parse_a_simple_sequence() {
let (_, sequence) =
parse_tree::<nom::error::VerboseError<&str>>("(;B[ab];W[dp];B[pq]C[some comments])")
.unwrap();
assert_eq!(
sequence,
Tree {
sequence: vec![
Node {
properties: vec![Property {
ident: "B".to_owned(),
values: vec!["ab".to_owned()]
}]
},
Node {
properties: vec![Property {
ident: "W".to_owned(),
values: vec!["dp".to_owned()]
}]
},
Node {
properties: vec![
Property {
ident: "B".to_owned(),
values: vec!["pq".to_owned()]
},
Property {
ident: "C".to_owned(),
values: vec!["some comments".to_owned()]
}
]
}
],
sub_sequences: vec![],
}
);
}
#[test]
fn it_can_parse_a_sequence_with_subsequences() {
let text = "(;C[a];C[b](;C[c])(;C[d];C[e]))";
let (_, sequence) = parse_tree::<nom::error::VerboseError<&str>>(text).unwrap();
let main_sequence = vec![
Node {
properties: vec![Property {
ident: "C".to_owned(),
values: vec!["a".to_owned()],
}],
},
Node {
properties: vec![Property {
ident: "C".to_owned(),
values: vec!["b".to_owned()],
}],
},
];
let subsequence_1 = Tree {
sequence: vec![Node {
properties: vec![Property {
ident: "C".to_owned(),
values: vec!["c".to_owned()],
}],
}],
sub_sequences: vec![],
};
let subsequence_2 = Tree {
sequence: vec![
Node {
properties: vec![Property {
ident: "C".to_owned(),
values: vec!["d".to_owned()],
}],
},
Node {
properties: vec![Property {
ident: "C".to_owned(),
values: vec!["e".to_owned()],
}],
},
],
sub_sequences: vec![],
};
assert_eq!(
sequence,
Tree {
sequence: main_sequence,
sub_sequences: vec![subsequence_1, subsequence_2],
}
);
}
#[test]
fn it_can_parse_example_1() {
let (_, ex_tree) = parse_tree::<nom::error::VerboseError<&str>>(EXAMPLE).unwrap();
assert_eq!(ex_tree.sequence.len(), 1);
assert_eq!(ex_tree.sequence[0].properties.len(), 2);
assert_eq!(
ex_tree.sequence[0].properties[0],
Property {
ident: "FF".to_owned(),
values: vec!["4".to_owned()]
}
);
assert_eq!(ex_tree.sub_sequences.len(), 2);
assert_eq!(ex_tree.sub_sequences[0].sequence.len(), 2);
assert_eq!(
ex_tree.sub_sequences[0].sequence,
vec![
Node {
properties: vec![Property {
ident: "C".to_owned(),
values: vec!["a".to_owned()]
}]
},
Node {
properties: vec![Property {
ident: "C".to_owned(),
values: vec!["b".to_owned()]
}]
},
]
);
assert_eq!(ex_tree.sub_sequences[0].sub_sequences.len(), 2);
}
#[test]
fn it_can_regenerate_the_tree() {
let (_, tree1) = parse_tree::<nom::error::VerboseError<&str>>(EXAMPLE).unwrap();
assert_eq!(
tree1.to_string(),
"(;FF[4]C[root](;C[a];C[b](;C[c])(;C[d];C[e]))(;C[f](;C[g];C[h];C[i])(;C[j])))"
);
let (_, tree2) = parse_tree::<nom::error::VerboseError<&str>>(&tree1.to_string()).unwrap();
assert_eq!(tree1, tree2);
}
#[test]
fn it_parses_propvals() {
let (_, propval) = parse_propval::<nom::error::VerboseError<&str>>("[]").unwrap();
assert_eq!(propval, "".to_owned());
let (_, propval) =
parse_propval::<nom::error::VerboseError<&str>>("[normal propval]").unwrap();
assert_eq!(propval, "normal propval".to_owned());
let (_, propval) =
parse_propval::<nom::error::VerboseError<&str>>(r"[need an [escape\] in the propval]")
.unwrap();
assert_eq!(propval, "need an [escape] in the propval".to_owned());
}
#[test]
fn it_parses_propvals_with_hard_linebreaks() {
let (_, propval) = parse_propval_text::<nom::error::VerboseError<&str>>(
"There are hard linebreaks & soft linebreaks.
Soft linebreaks...",
)
.unwrap();
assert_eq!(
propval,
Some(
"There are hard linebreaks & soft linebreaks.
Soft linebreaks..."
.to_owned()
)
);
}
#[test]
fn it_parses_propvals_with_escaped_closing_brackets() {
let (_, propval) =
parse_propval_text::<nom::error::VerboseError<&str>>(r"escaped closing \] bracket")
.unwrap();
assert_eq!(
propval,
Some(r"escaped closing ] bracket".to_owned()).to_owned()
);
}
#[test]
fn it_parses_propvals_with_soft_linebreaks() {
let (_, propval) = parse_propval_text::<nom::error::VerboseError<&str>>(
r"Soft linebreaks are linebreaks preceeded by '\\' like this one >o\
k<. Hard line breaks are all other linebreaks.",
)
.unwrap();
assert_eq!(
propval,
Some("Soft linebreaks are linebreaks preceeded by '\\' like this one >ok<. Hard line breaks are all other linebreaks.".to_owned())
.to_owned()
);
}
#[test]
fn it_parses_sgf_with_newline_in_sequence() {
let data = String::from(
"(;FF[4]C[root](;C[a];C[b](;C[c])(;C[d];C[e]
))(;C[f](;C[g];C[h];C[i])(;C[j])))",
);
parse_tree::<nom::error::VerboseError<&str>>(&data).unwrap();
}
#[test]
fn it_parses_sgf_with_newline_between_two_sequence_closings() {
let data = String::from(
"(;FF[4]C[root](;C[a];C[b](;C[c])(;C[d];C[e])
)(;C[f](;C[g];C[h];C[i])(;C[j])))",
);
parse_tree::<nom::error::VerboseError<&str>>(&data).unwrap();
}
}