Skip to content

Commit 9069997

Browse files
Merge pull request #74 from nipunsadvilkar/npn-newline-fix
2 parents 92362f7 + 60983a2 commit 9069997

File tree

4 files changed

+19
-2
lines changed

4 files changed

+19
-2
lines changed

CHANGELOG.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,13 @@
1+
# v0.3.1
2+
- 🚑 ✅ Handle Newline character & update tests
3+
4+
# v0.3.0
5+
- ✨ 💫 Support Multiple languages - \#2
6+
- 🏎⚡️💯 Benchmark across Segmentation Tools, Libraries and Algorithms
7+
- 🎨 ♻️ Update sentence char_span logic
8+
- ⚡️ Performance improvements - \#41
9+
- ♻️🐛 Refactor AbbreviationReplacer
10+
111
# v0.3.0rc
212
- ✨ 💫 sent `char_span` through with spaCy & regex approach - \#63
313
- ♻️ Refactoring to support multiple languages

pysbd/about.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
# https://python-packaging-user-guide.readthedocs.org/en/latest/single_source_version/
33

44
__title__ = "pysbd"
5-
__version__ = "0.3.0"
5+
__version__ = "0.3.1"
66
__summary__ = "pysbd (Python Sentence Boundary Disambiguation) is a rule-based sentence boundary detection that works out-of-the-box across many languages."
77
__uri__ = "http://nipunsadvilkar.github.io/"
88
__author__ = "Nipun Sadvilkar"

pysbd/processor.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ def __init__(self, text, lang, char_span=False):
2828
def process(self):
2929
if not self.text:
3030
return self.text
31+
self.text = self.text.replace('\n', '\r')
3132
li = ListItemReplacer(self.text)
3233
self.text = li.add_line_break()
3334
self.replace_abbreviations()

tests/regression/test_issues.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,13 @@
5858
('#55', "She turned to him, \"This is great.\" She held the book out to show him.",
5959
[
6060
('She turned to him, "This is great." ', 0, 36), ('She held the book out to show him.', 36, 70)
61-
])
61+
]),
62+
('#56',
63+
"""This eBook is for the use of anyone anywhere at no cost
64+
you may copy it, give it away or re-use it under the terms of the this license
65+
""",
66+
[('This eBook is for the use of anyone anywhere at no cost\n', 0, 56),
67+
('you may copy it, give it away or re-use it under the terms of the this license\n', 56, 135)])
6268

6369
]
6470

0 commit comments

Comments
 (0)