Import the new level-one parser

This is the parser that does a raw parse of the SGF file, interpreting components but not enforcing node types.
This commit is contained in:
Savanni D'Gerinel 2023-10-19 03:02:37 -04:00 committed by savanni
parent 942e91009e
commit e461cb9908
5 changed files with 1395 additions and 553 deletions

View File

@ -43,10 +43,11 @@ impl GamePreviewElement {
Win::Time => "Timeout".to_owned(), Win::Time => "Timeout".to_owned(),
Win::Forfeit => "Forfeit".to_owned(), Win::Forfeit => "Forfeit".to_owned(),
Win::Score(score) => format!("{:.1}", score), Win::Score(score) => format!("{:.1}", score),
Win::Unknown => "Unknown".to_owned(),
}; };
let result = match game.info.result { let result = match game.info.result {
Some(GameResult::Annulled) => "Annulled".to_owned(), Some(GameResult::Void) => "Annulled".to_owned(),
Some(GameResult::Draw) => "Draw".to_owned(), Some(GameResult::Draw) => "Draw".to_owned(),
Some(GameResult::Black(ref win)) => format!("Black by {}", format_win(win)), Some(GameResult::Black(ref win)) => format!("Black by {}", format_win(win)),
Some(GameResult::White(ref win)) => format!("White by {}", format_win(win)), Some(GameResult::White(ref win)) => format!("White by {}", format_win(win)),

View File

@ -1,8 +1,8 @@
mod date; mod date;
pub use date::Date; pub use date::Date;
mod tree; mod parser;
pub use tree::parse_collection; pub use parser::parse_collection;
use thiserror::Error; use thiserror::Error;

1276
sgf/src/parser.rs Normal file

File diff suppressed because it is too large Load Diff

View File

@ -1,547 +0,0 @@
use crate::Error;
use nom::{
branch::alt,
bytes::complete::{escaped_transform, tag},
character::complete::{alpha1, multispace0, multispace1, none_of},
combinator::{opt, value},
multi::{many0, many1, separated_list1},
IResult,
};
use std::num::ParseIntError;
impl From<ParseSizeError> for Error {
fn from(_: ParseSizeError) -> Self {
Self::InvalidBoardSize
}
}
#[derive(Debug)]
pub enum ParseSizeError {
ParseIntError(ParseIntError),
InsufficientArguments,
}
impl From<ParseIntError> for ParseSizeError {
fn from(e: ParseIntError) -> Self {
Self::ParseIntError(e)
}
}
#[derive(Clone, Debug, PartialEq)]
pub struct Size {
pub width: i32,
pub height: i32,
}
impl TryFrom<&str> for Size {
type Error = ParseSizeError;
fn try_from(s: &str) -> Result<Self, Self::Error> {
let parts = s
.split(':')
.map(|v| v.parse::<i32>())
.collect::<Result<Vec<i32>, ParseIntError>>()?;
match parts[..] {
[width, height, ..] => Ok(Size { width, height }),
[dim] => Ok(Size {
width: dim,
height: dim,
}),
[] => Err(ParseSizeError::InsufficientArguments),
}
}
}
#[derive(Clone, Debug, PartialEq)]
pub struct Tree {
pub root: Node,
}
impl ToString for Tree {
fn to_string(&self) -> String {
format!("({})", self.root.to_string())
}
}
#[derive(Clone, Debug, PartialEq)]
pub struct Node {
pub properties: Vec<Property>,
pub next: Vec<Node>,
}
impl ToString for Node {
fn to_string(&self) -> String {
let props = self
.properties
.iter()
.map(|prop| prop.to_string())
.collect::<String>();
let next = if self.next.len() == 1 {
self.next
.iter()
.map(|node| node.to_string())
.collect::<Vec<String>>()
.join("")
} else {
self.next
.iter()
.map(|node| format!("({})", node.to_string()))
.collect::<Vec<String>>()
.join("")
};
format!(";{}{}", props, next)
}
}
impl Node {
pub fn find_prop(&self, ident: &str) -> Option<Property> {
self.properties
.iter()
.find(|prop| prop.ident == ident)
.cloned()
}
pub fn next(&self) -> Option<&Node> {
self.next.get(0)
}
}
#[derive(Clone, Debug, PartialEq)]
pub struct Property {
pub ident: String,
pub values: Vec<String>,
}
impl ToString for Property {
fn to_string(&self) -> String {
let values = self
.values
.iter()
.map(|val| format!("[{}]", val))
.collect::<String>();
format!("{}{}", self.ident, values)
}
}
pub fn parse_collection<'a, E: nom::error::ParseError<&'a str>>(
input: &'a str,
) -> IResult<&'a str, Vec<Tree>, E> {
let (input, roots) = separated_list1(multispace1, parse_tree)(input)?;
let trees = roots
.into_iter()
.map(|root| Tree { root })
.collect::<Vec<Tree>>();
Ok((input, trees))
}
// note: must preserve unknown properties
// note: must fix or preserve illegally formatted game-info properties
// note: must correct or delete illegally foramtted properties, but display a warning
fn parse_tree<'a, E: nom::error::ParseError<&'a str>>(input: &'a str) -> IResult<&'a str, Node, E> {
let (input, _) = multispace0(input)?;
let (input, _) = tag("(")(input)?;
let (input, node) = parse_node(input)?;
let (input, _) = multispace0(input)?;
let (input, _) = tag(")")(input)?;
Ok((input, node))
}
fn parse_node<'a, E: nom::error::ParseError<&'a str>>(input: &'a str) -> IResult<&'a str, Node, E> {
let (input, _) = multispace0(input)?;
let (input, _) = opt(tag(";"))(input)?;
let (input, properties) = many1(parse_property)(input)?;
let (input, next) = opt(parse_node)(input)?;
let (input, mut next_seq) = many0(parse_tree)(input)?;
let mut next = next.map(|n| vec![n]).unwrap_or(vec![]);
next.append(&mut next_seq);
Ok((input, Node { properties, next }))
}
fn parse_property<'a, E: nom::error::ParseError<&'a str>>(
input: &'a str,
) -> IResult<&'a str, Property, E> {
let (input, _) = multispace0(input)?;
let (input, ident) = alpha1(input)?;
let (input, values) = many1(parse_propval)(input)?;
let (input, _) = multispace0(input)?;
let values = values
.into_iter()
.map(|v| v.to_owned())
.collect::<Vec<String>>();
Ok((
input,
Property {
ident: ident.to_owned(),
values,
},
))
}
fn parse_propval<'a, E: nom::error::ParseError<&'a str>>(
input: &'a str,
) -> IResult<&'a str, String, E> {
let (input, _) = multispace0(input)?;
let (input, _) = tag("[")(input)?;
let (input, value) = parse_propval_text(input)?;
let (input, _) = tag("]")(input)?;
Ok((input, value.unwrap_or(String::new())))
}
fn parse_propval_text<'a, E: nom::error::ParseError<&'a str>>(
input: &'a str,
) -> IResult<&'a str, Option<String>, E> {
let (input, value) = opt(escaped_transform(
none_of("\\]"),
'\\',
alt((
value("]", tag("]")),
value("\\", tag("\\")),
value("", tag("\n")),
)),
))(input)?;
Ok((input, value.map(|v| v.to_owned())))
}
#[cfg(test)]
mod test {
use super::*;
const EXAMPLE: &'static str = "(;FF[4]C[root](;C[a];C[b](;C[c])
(;C[d];C[e]))
(;C[f](;C[g];C[h];C[i])
(;C[j])))";
#[test]
fn it_can_parse_properties() {
let (_, prop) = parse_property::<nom::error::VerboseError<&str>>("C[a]").unwrap();
assert_eq!(
prop,
Property {
ident: "C".to_owned(),
values: vec!["a".to_owned()]
}
);
let (_, prop) = parse_property::<nom::error::VerboseError<&str>>("C[a][b][c]").unwrap();
assert_eq!(
prop,
Property {
ident: "C".to_owned(),
values: vec!["a".to_owned(), "b".to_owned(), "c".to_owned()]
}
);
}
#[test]
fn it_can_parse_a_standalone_node() {
let (_, node) = parse_node::<nom::error::VerboseError<&str>>(";B[ab]").unwrap();
assert_eq!(
node,
Node {
properties: vec![Property {
ident: "B".to_owned(),
values: vec!["ab".to_owned()]
}],
next: vec![]
}
);
let (_, node) =
parse_node::<nom::error::VerboseError<&str>>(";B[ab];W[dp];B[pq]C[some comments]")
.unwrap();
assert_eq!(
node,
Node {
properties: vec![Property {
ident: "B".to_owned(),
values: vec!["ab".to_owned()]
}],
next: vec![Node {
properties: vec![Property {
ident: "W".to_owned(),
values: vec!["dp".to_owned()]
}],
next: vec![Node {
properties: vec![
Property {
ident: "B".to_owned(),
values: vec!["pq".to_owned()]
},
Property {
ident: "C".to_owned(),
values: vec!["some comments".to_owned()]
}
],
next: vec![],
}]
}]
}
);
}
#[test]
fn it_can_parse_a_simple_sequence() {
let (_, sequence) =
parse_tree::<nom::error::VerboseError<&str>>("(;B[ab];W[dp];B[pq]C[some comments])")
.unwrap();
assert_eq!(
sequence,
Node {
properties: vec![Property {
ident: "B".to_owned(),
values: vec!["ab".to_owned()]
}],
next: vec![Node {
properties: vec![Property {
ident: "W".to_owned(),
values: vec!["dp".to_owned()]
}],
next: vec![Node {
properties: vec![
Property {
ident: "B".to_owned(),
values: vec!["pq".to_owned()]
},
Property {
ident: "C".to_owned(),
values: vec!["some comments".to_owned()]
}
],
next: vec![],
}]
}],
},
);
}
#[test]
fn it_can_parse_a_branching_sequence() {
let text = "(;C[a];C[b](;C[c])(;C[d];C[e]))";
let (_, tree) = parse_tree::<nom::error::VerboseError<&str>>(text).unwrap();
let expected = Node {
properties: vec![Property {
ident: "C".to_owned(),
values: vec!["a".to_owned()],
}],
next: vec![Node {
properties: vec![Property {
ident: "C".to_owned(),
values: vec!["b".to_owned()],
}],
next: vec![
Node {
properties: vec![Property {
ident: "C".to_owned(),
values: vec!["c".to_owned()],
}],
next: vec![],
},
Node {
properties: vec![Property {
ident: "C".to_owned(),
values: vec!["d".to_owned()],
}],
next: vec![Node {
properties: vec![Property {
ident: "C".to_owned(),
values: vec!["e".to_owned()],
}],
next: vec![],
}],
},
],
}],
};
assert_eq!(tree, expected);
}
#[test]
fn it_can_parse_example_1() {
let (_, tree) = parse_tree::<nom::error::VerboseError<&str>>(EXAMPLE).unwrap();
let j = Node {
properties: vec![Property {
ident: "C".to_owned(),
values: vec!["j".to_owned()],
}],
next: vec![],
};
let i = Node {
properties: vec![Property {
ident: "C".to_owned(),
values: vec!["i".to_owned()],
}],
next: vec![],
};
let h = Node {
properties: vec![Property {
ident: "C".to_owned(),
values: vec!["h".to_owned()],
}],
next: vec![i],
};
let g = Node {
properties: vec![Property {
ident: "C".to_owned(),
values: vec!["g".to_owned()],
}],
next: vec![h],
};
let f = Node {
properties: vec![Property {
ident: "C".to_owned(),
values: vec!["f".to_owned()],
}],
next: vec![g, j],
};
let e = Node {
properties: vec![Property {
ident: "C".to_owned(),
values: vec!["e".to_owned()],
}],
next: vec![],
};
let d = Node {
properties: vec![Property {
ident: "C".to_owned(),
values: vec!["d".to_owned()],
}],
next: vec![e],
};
let c = Node {
properties: vec![Property {
ident: "C".to_owned(),
values: vec!["c".to_owned()],
}],
next: vec![],
};
let b = Node {
properties: vec![Property {
ident: "C".to_owned(),
values: vec!["b".to_owned()],
}],
next: vec![c, d],
};
let a = Node {
properties: vec![Property {
ident: "C".to_owned(),
values: vec!["a".to_owned()],
}],
next: vec![b],
};
let expected = Node {
properties: vec![
Property {
ident: "FF".to_owned(),
values: vec!["4".to_owned()],
},
Property {
ident: "C".to_owned(),
values: vec!["root".to_owned()],
},
],
next: vec![a, f],
};
assert_eq!(tree, expected);
}
#[test]
fn it_can_regenerate_the_tree() {
let (_, tree1) = parse_tree::<nom::error::VerboseError<&str>>(EXAMPLE).unwrap();
let tree1 = Tree { root: tree1 };
assert_eq!(
tree1.to_string(),
"(;FF[4]C[root](;C[a];C[b](;C[c])(;C[d];C[e]))(;C[f](;C[g];C[h];C[i])(;C[j])))"
);
let (_, tree2) = parse_tree::<nom::error::VerboseError<&str>>(&tree1.to_string()).unwrap();
assert_eq!(tree1, Tree { root: tree2 });
}
#[test]
fn it_parses_propvals() {
let (_, propval) = parse_propval::<nom::error::VerboseError<&str>>("[]").unwrap();
assert_eq!(propval, "".to_owned());
let (_, propval) =
parse_propval::<nom::error::VerboseError<&str>>("[normal propval]").unwrap();
assert_eq!(propval, "normal propval".to_owned());
let (_, propval) =
parse_propval::<nom::error::VerboseError<&str>>(r"[need an [escape\] in the propval]")
.unwrap();
assert_eq!(propval, "need an [escape] in the propval".to_owned());
}
#[test]
fn it_parses_propvals_with_hard_linebreaks() {
let (_, propval) = parse_propval_text::<nom::error::VerboseError<&str>>(
"There are hard linebreaks & soft linebreaks.
Soft linebreaks...",
)
.unwrap();
assert_eq!(
propval,
Some(
"There are hard linebreaks & soft linebreaks.
Soft linebreaks..."
.to_owned()
)
);
}
#[test]
fn it_parses_propvals_with_escaped_closing_brackets() {
let (_, propval) =
parse_propval_text::<nom::error::VerboseError<&str>>(r"escaped closing \] bracket")
.unwrap();
assert_eq!(
propval,
Some(r"escaped closing ] bracket".to_owned()).to_owned()
);
}
#[test]
fn it_parses_propvals_with_soft_linebreaks() {
let (_, propval) = parse_propval_text::<nom::error::VerboseError<&str>>(
r"Soft linebreaks are linebreaks preceeded by '\\' like this one >o\
k<. Hard line breaks are all other linebreaks.",
)
.unwrap();
assert_eq!(
propval,
Some("Soft linebreaks are linebreaks preceeded by '\\' like this one >ok<. Hard line breaks are all other linebreaks.".to_owned())
.to_owned()
);
}
#[test]
fn it_parses_sgf_with_newline_in_sequence() {
let data = String::from(
"(;FF[4]C[root](;C[a];C[b](;C[c])(;C[d];C[e]
))(;C[f](;C[g];C[h];C[i])(;C[j])))",
);
parse_tree::<nom::error::VerboseError<&str>>(&data).unwrap();
}
#[test]
fn it_parses_sgf_with_newline_between_two_sequence_closings() {
let data = String::from(
"(;FF[4]C[root](;C[a];C[b](;C[c])(;C[d];C[e])
)(;C[f](;C[g];C[h];C[i])(;C[j])))",
);
parse_tree::<nom::error::VerboseError<&str>>(&data).unwrap();
}
}

View File

@ -1,5 +1,7 @@
use crate::date::Date; use crate::date::Date;
use thiserror::Error;
/// This is a placeholder structure. It is not meant to represent a game, only to provide a mock /// This is a placeholder structure. It is not meant to represent a game, only to provide a mock
/// interface for code already written that expects a Game data type to exist. /// interface for code already written that expects a Game data type to exist.
#[derive(Debug, Default)] #[derive(Debug, Default)]
@ -19,11 +21,120 @@ pub struct GameInfo {
} }
#[derive(Clone, Debug, PartialEq)] #[derive(Clone, Debug, PartialEq)]
pub enum GameType {
Go,
Othello,
Chess,
GomokuRenju,
NineMensMorris,
Backgammon,
ChineseChess,
Shogi,
LinesOfAction,
Ataxx,
Hex,
Jungle,
Neutron,
PhilosophersFootball,
Quadrature,
Trax,
Tantrix,
Amazons,
Octi,
Gess,
Twixt,
Zertz,
Plateau,
Yinsh,
Punct,
Gobblet,
Hive,
Exxit,
Hnefatal,
Kuba,
Tripples,
Chase,
TumblingDown,
Sahara,
Byte,
Focus,
Dvonn,
Tamsk,
Gipf,
Kropki,
Other(String),
}
#[derive(Debug)]
pub enum Error {
// InvalidField,
// InvalidBoardSize,
Incomplete,
InvalidSgf(VerboseNomError),
}
#[derive(Debug)]
pub struct VerboseNomError(nom::error::VerboseError<String>);
impl From<nom::error::VerboseError<&str>> for VerboseNomError {
fn from(err: nom::error::VerboseError<&str>) -> Self {
VerboseNomError(nom::error::VerboseError {
errors: err
.errors
.into_iter()
.map(|err| (err.0.to_owned(), err.1))
.collect(),
})
}
}
impl From<nom::Err<nom::error::VerboseError<&str>>> for Error {
fn from(err: nom::Err<nom::error::VerboseError<&str>>) -> Self {
match err {
nom::Err::Incomplete(_) => Error::Incomplete,
nom::Err::Error(e) => Error::InvalidSgf(VerboseNomError::from(e)),
nom::Err::Failure(e) => Error::InvalidSgf(VerboseNomError::from(e)),
}
}
}
#[derive(Debug, PartialEq, Error)]
pub enum ParseError {
#[error("An unknown error was found")]
NomError(nom::error::Error<String>),
}
impl From<nom::error::Error<&str>> for ParseError {
fn from(err: nom::error::Error<&str>) -> Self {
Self::NomError(nom::error::Error {
input: err.input.to_owned(),
code: err.code,
})
}
}
#[derive(Clone, Debug, PartialEq)]
pub enum Color {
Black,
White,
}
impl Color {
pub fn abbreviation(&self) -> String {
match self {
Color::White => "W",
Color::Black => "B",
}
.to_owned()
}
}
#[derive(Debug, Clone, PartialEq)]
pub enum GameResult { pub enum GameResult {
Annulled,
Draw, Draw,
Black(Win), Black(Win),
White(Win), White(Win),
Void,
Unknown(String), Unknown(String),
} }
@ -33,13 +144,13 @@ impl TryFrom<&str> for GameResult {
if s == "0" { if s == "0" {
Ok(GameResult::Draw) Ok(GameResult::Draw)
} else if s == "Void" { } else if s == "Void" {
Ok(GameResult::Annulled) Ok(GameResult::Void)
} else { } else {
let parts = s.split('+').collect::<Vec<&str>>(); let parts = s.split('+').collect::<Vec<&str>>();
let res = match parts[0].to_ascii_lowercase().as_str() { let res = match parts[0].to_ascii_lowercase().as_str() {
"b" => GameResult::Black, "b" => GameResult::Black,
"w" => GameResult::White, "w" => GameResult::White,
_ => return Ok(GameResult::Unknown(parts[0].to_owned())), res => return Ok(GameResult::Unknown(res.to_owned())),
}; };
match parts[1].to_ascii_lowercase().as_str() { match parts[1].to_ascii_lowercase().as_str() {
"r" | "resign" => Ok(res(Win::Resignation)), "r" | "resign" => Ok(res(Win::Resignation)),
@ -60,4 +171,5 @@ pub enum Win {
Resignation, Resignation,
Forfeit, Forfeit,
Time, Time,
Unknown,
} }