Merge pull request #117 from Kerollmops/tokenizer-support-parentheses

Make the tokenizer support parentheses
This commit is contained in:
Clément Renault 2019-02-22 19:36:15 +01:00 committed by GitHub
commit 12a352ae2f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -75,7 +75,7 @@ impl Separator {
fn detect_separator(c: char) -> Option<Separator> { fn detect_separator(c: char) -> Option<Separator> {
match c { match c {
'.' | ';' | ',' | '!' | '?' | '-' => Some(Long), '.' | ';' | ',' | '!' | '?' | '-' | '(' | ')' => Some(Long),
' ' | '\'' | '"' => Some(Short), ' ' | '\'' | '"' => Some(Short),
_ => None, _ => None,
} }
@ -150,11 +150,12 @@ mod tests {
#[test] #[test]
fn hard() { fn hard() {
let mut tokenizer = Tokenizer::new(" .? yo lolo. aïe"); let mut tokenizer = Tokenizer::new(" .? yo lolo. aïe (ouch)");
assert_eq!(tokenizer.next(), Some(Token { word: "yo", word_index: 0, char_index: 4 })); assert_eq!(tokenizer.next(), Some(Token { word: "yo", word_index: 0, char_index: 4 }));
assert_eq!(tokenizer.next(), Some(Token { word: "lolo", word_index: 1, char_index: 7 })); assert_eq!(tokenizer.next(), Some(Token { word: "lolo", word_index: 1, char_index: 7 }));
assert_eq!(tokenizer.next(), Some(Token { word: "aïe", word_index: 9, char_index: 13 })); assert_eq!(tokenizer.next(), Some(Token { word: "aïe", word_index: 9, char_index: 13 }));
assert_eq!(tokenizer.next(), Some(Token { word: "ouch", word_index: 17, char_index: 18 }));
assert_eq!(tokenizer.next(), None); assert_eq!(tokenizer.next(), None);
let mut tokenizer = Tokenizer::new("yo ! lolo ? wtf - lol . aïe ,"); let mut tokenizer = Tokenizer::new("yo ! lolo ? wtf - lol . aïe ,");