From ef7bcfdf7f019820fd036cef5008862b453b53de Mon Sep 17 00:00:00 2001 From: Jonathan Neuhauser Date: Wed, 26 May 2021 18:00:20 +0200 Subject: [PATCH 1/2] rewrite text split extension --- inkex/properties.py | 2 +- ..._--id__t1__--id__t3__--splittype__word.out | 41 -- ...plit__5478757cb04224bacfa3c641fea954ba.out | 28 ++ ...plit__74947d6e9e9ee89a80bbec0bfacc6dfb.out | 28 ++ ...plit__897ab8b3c516acacaf68391a51eb8836.out | 27 ++ ...plit__ad318803d098b3903561d44825a36ea0.out | 45 ++ ...plit__c242adcac78228419d3c065a99727bfb.out | 44 ++ ...plit__d8b155ac58a7424471f416b64f21669f.out | 27 ++ ...plit__dd77d3cc0134ad62833e05ca73e38896.out | 27 ++ tests/data/svg/text_types.svg | 256 +++++++++++ tests/test_text_split.py | 17 +- text_split.inx | 9 +- text_split.py | 398 +++++++++++------- 13 files changed, 753 insertions(+), 196 deletions(-) delete mode 100644 tests/data/refs/text_split__--id__t1__--id__t3__--splittype__word.out create mode 100644 tests/data/refs/text_split__5478757cb04224bacfa3c641fea954ba.out create mode 100644 tests/data/refs/text_split__74947d6e9e9ee89a80bbec0bfacc6dfb.out create mode 100644 tests/data/refs/text_split__897ab8b3c516acacaf68391a51eb8836.out create mode 100644 tests/data/refs/text_split__ad318803d098b3903561d44825a36ea0.out create mode 100644 tests/data/refs/text_split__c242adcac78228419d3c065a99727bfb.out create mode 100644 tests/data/refs/text_split__d8b155ac58a7424471f416b64f21669f.out create mode 100644 tests/data/refs/text_split__dd77d3cc0134ad62833e05ca73e38896.out create mode 100644 tests/data/svg/text_types.svg diff --git a/inkex/properties.py b/inkex/properties.py index 57fd22e0..10834329 100644 --- a/inkex/properties.py +++ b/inkex/properties.py @@ -479,7 +479,7 @@ all_properties: Dict[str, Tuple[Type[BaseStyleValue], str, bool, bool, Union[Lis "image-rendering": (EnumValue, "auto", True, True, ["auto", "optimizeQuality", "optimizeSpeed"]), "letter-spacing": (BaseStyleValue, "normal", True, True, None), "lighting-color": (ColorValue, "normal", True, False, None), - "line-height": (BaseStyleValue, "normal", False, False, None), + "line-height": (BaseStyleValue, "normal", False, True, None), "marker" : (MarkerShorthandValue, "", True, True, None), "marker-end": (URLNoneValue, "none", True, True, None), "marker-mid": (URLNoneValue, "none", True, True, None), diff --git a/tests/data/refs/text_split__--id__t1__--id__t3__--splittype__word.out b/tests/data/refs/text_split__--id__t1__--id__t3__--splittype__word.out deleted file mode 100644 index 0c0e4fd9..00000000 --- a/tests/data/refs/text_split__--id__t1__--id__t3__--splittype__word.out +++ /dev/null @@ -1,41 +0,0 @@ - - - - - - - - - - - - format: png -dpi: 96 -layout-disposition: bg-el-norepeat -layout-position-anchor: tl - - - - - - - - - - - - - Hello World - flow text which wraps UPPER - Multi linetextFOO - - Grouped - text - - - - - HelloWorldMultilinetextFOO - - \ No newline at end of file diff --git a/tests/data/refs/text_split__5478757cb04224bacfa3c641fea954ba.out b/tests/data/refs/text_split__5478757cb04224bacfa3c641fea954ba.out new file mode 100644 index 00000000..1e395528 --- /dev/null +++ b/tests/data/refs/text_split__5478757cb04224bacfa3c641fea954ba.out @@ -0,0 +1,28 @@ + + + + + + + This is a regular textWith a line break.This is regular textwith a line breakand some transforms.This is an SVG2 flowed text. (inline size) +This is a line break inside that text.This is a text with kerns adjusted in tspans(as Inkscape does it)This is an SVG1.2 flowed text (flowroot)And this is a linebreak inside.inside.linebreakaisthisAnd(flowroot)textflowedSVG1.2anisThis + This is an SVG1.2 flowed textWith absolute line-heightline-heightabsoluteWithtextflowedSVG1.2anisThis + This is an SVG1.2 flowed textwithout line height.height.linewithouttextflowedSVG1.2anisThis + 0.50.10.10.10.34.20.54.30.14.5This is a text with direction:rtl This is an SVG2 flowed text (shape inside)This is a test document containing different transformed text elements that were created using different methods.The containing layer has a transform applied as well. + break.lineaWithtextregularaisThis + + transforms.someandbreaklineawithtextregularisThis + + text.thatinsidebreaklineaisThise) +size(inlintext.flowedSVG2anisThis + + it)doesInkscape(astspansinadjustedkernswithtextaisThis + + 4.50.14.30.54.20.30.10.10.10.5 + + direction:rtlwithtextaisThis + + inside)(shapetextflowedSVG2anisThis + + + \ No newline at end of file diff --git a/tests/data/refs/text_split__74947d6e9e9ee89a80bbec0bfacc6dfb.out b/tests/data/refs/text_split__74947d6e9e9ee89a80bbec0bfacc6dfb.out new file mode 100644 index 00000000..a74d0520 --- /dev/null +++ b/tests/data/refs/text_split__74947d6e9e9ee89a80bbec0bfacc6dfb.out @@ -0,0 +1,28 @@ + + + + + + + This is a regular textWith a line break.This is regular textwith a line breakand some transforms.This is an SVG2 flowed text. (inline size) +This is a line break inside that text.This is a text with kerns adjusted in tspans(as Inkscape does it)This is an SVG1.2 flowed text (flowroot)And this is a linebreak inside..edisni kaerbenil a si siht dnA)toorwolf( txet dewolf 2.1GVS na si sihT + This is an SVG1.2 flowed textWith absolute line-heightthgieh-enil etulosba htiWtxet dewolf 2.1GVS na si sihT + This is an SVG1.2 flowed textwithout line height..thgieh enil tuohtiwtxet dewolf 2.1GVS na si sihT + 0.50.10.10.10.34.20.54.30.14.5This is a text with direction:rtl This is an SVG2 flowed text (shape inside)This is a test document containing different transformed text elements that were created using different methods.The containing layer has a transform applied as well. + .kaerb enil a htiWtxet raluger a si sihT + + .smrofsnart emos dnakaerb enil a htiwtxet raluger si sihT + + .txet taht edisni kaerb enil a si sihT +)ezis enilni( .txet dewolf 2GVS na si sihT + + )ti seod epacsknI sa(snapst ni detsujda snrek htiw txet a si sihT + + 5.41.03.45.02.43.01.01.01.05.0 + + ltr:noitcerid htiw txet a si sihT + + )edisni epahs( txet dewolf 2GVS na si sihT + + + \ No newline at end of file diff --git a/tests/data/refs/text_split__897ab8b3c516acacaf68391a51eb8836.out b/tests/data/refs/text_split__897ab8b3c516acacaf68391a51eb8836.out new file mode 100644 index 00000000..c12794ec --- /dev/null +++ b/tests/data/refs/text_split__897ab8b3c516acacaf68391a51eb8836.out @@ -0,0 +1,27 @@ + + + + + + + inside.linebreakaisthisAnd(flowroot)textflowedSVG1.2anisThis + line-heightabsoluteWithtextflowedSVG1.2anisThis + height.linewithouttextflowedSVG1.2anisThis + This is a test document containing different transformed text elements that were created using different methods.The containing layer has a transform applied as well. + break.lineaWithtextregularaisThis + + transforms.someandbreaklineawithtextregularisThis + + text.thatinsidebreaklineaisThise) +size(inlintext.flowedSVG2anisThis + + it)doesInkscape(astspansinadjustedkernswithtextaisThis + + .541.03.45.02.43.01.01.01.05.0 + + direction:rtlwithtextaisThis + + inside)(shapetextflowedSVG2anisThis + + + \ No newline at end of file diff --git a/tests/data/refs/text_split__ad318803d098b3903561d44825a36ea0.out b/tests/data/refs/text_split__ad318803d098b3903561d44825a36ea0.out new file mode 100644 index 00000000..ccd31f3f --- /dev/null +++ b/tests/data/refs/text_split__ad318803d098b3903561d44825a36ea0.out @@ -0,0 +1,45 @@ + + + + + + + This is a regular textWith a line break.This is regular textwith a line breakand some transforms.This is an SVG2 flowed text. (inline size) +This is a line break inside that text.This is a text with kerns adjusted in tspans(as Inkscape does it)This is an SVG1.2 flowed text (flowroot)And this is a linebreak inside.And this is a linebreak inside.This is an SVG1.2 flowed text (flowroot) + + This is an SVG1.2 flowed textWith absolute line-heightWith absolute line-heightThis is an SVG1.2 flowed text + + This is an SVG1.2 flowed textwithout line height.without line height.This is an SVG1.2 flowed text + + 0.50.10.10.10.34.20.54.30.14.5This is a text with direction:rtl This is an SVG2 flowed text (shape inside)This is a test document containing different transformed text elements that were created using different methods.The containing layer has a transform applied as well. + With a line break.This is a regular text + + + + and some transforms.with a line breakThis is regular text + + + + text.break inside that This is a line (inline size) +flowed text. This is an SVG2 + + + + (as Inkscape does it)This is a text with kerns adjusted in tspans + + + + 0.50.10.10.10.34.20.54.30.14.5 + + + + This is a text with direction:rtl + + + + inside)flowed text (shape This is an SVG2 + + + + + \ No newline at end of file diff --git a/tests/data/refs/text_split__c242adcac78228419d3c065a99727bfb.out b/tests/data/refs/text_split__c242adcac78228419d3c065a99727bfb.out new file mode 100644 index 00000000..f356a085 --- /dev/null +++ b/tests/data/refs/text_split__c242adcac78228419d3c065a99727bfb.out @@ -0,0 +1,44 @@ + + + + + + + And this is a linebreak inside.This is an SVG1.2 flowed text (flowroot) + + With absolute line-heightThis is an SVG1.2 flowed text + + without line height.This is an SVG1.2 flowed text + + This is a test document containing different transformed text elements that were created using different methods.The containing layer has a transform applied as well. + With a line break.This is a regular text + + + + and some transforms.with a line breakThis is regular text + + + + text.break inside that This is a line (inline size) +flowed text. This is an SVG2 + + + + (as Inkscape does it)This is a text with kerns adjusted in tspans + + + + 0.50.10.10.10.34.20.54.30.14.5 + + + + This is a text with direction:rtl + + + + inside)flowed text (shape This is an SVG2 + + + + + \ No newline at end of file diff --git a/tests/data/refs/text_split__d8b155ac58a7424471f416b64f21669f.out b/tests/data/refs/text_split__d8b155ac58a7424471f416b64f21669f.out new file mode 100644 index 00000000..0508d54c --- /dev/null +++ b/tests/data/refs/text_split__d8b155ac58a7424471f416b64f21669f.out @@ -0,0 +1,27 @@ + + + + + + + inside.linebreakaisthisAnd(flowroot)textflowedSVG1.2anisThis + line-heightabsoluteWithtextflowedSVG1.2anisThis + height.linewithouttextflowedSVG1.2anisThis + This is a test document containing different transformed text elements that were created using different methods.The containing layer has a transform applied as well. + break.lineaWithtextregularaisThis + + transforms.someandbreaklineawithtextregularisThis + + text.thatinsidebreaklineaisThise) +size(inlintext.flowedSVG2anisThis + + it)doesInkscape(astspansinadjustedkernswithtextaisThis + + 4.50.14.30.54.20.30.10.10.10.5 + + direction:rtlwithtextaisThis + + inside)(shapetextflowedSVG2anisThis + + + \ No newline at end of file diff --git a/tests/data/refs/text_split__dd77d3cc0134ad62833e05ca73e38896.out b/tests/data/refs/text_split__dd77d3cc0134ad62833e05ca73e38896.out new file mode 100644 index 00000000..8eb90d36 --- /dev/null +++ b/tests/data/refs/text_split__dd77d3cc0134ad62833e05ca73e38896.out @@ -0,0 +1,27 @@ + + + + + + + .edisni kaerbenil a si siht dnA)toorwolf( txet dewolf 2.1GVS na si sihT + thgieh-enil etulosba htiWtxet dewolf 2.1GVS na si sihT + .thgieh enil tuohtiwtxet dewolf 2.1GVS na si sihT + This is a test document containing different transformed text elements that were created using different methods.The containing layer has a transform applied as well. + .kaerb enil a htiWtxet raluger a si sihT + + .smrofsnart emos dnakaerb enil a htiwtxet raluger si sihT + + .txet taht edisni kaerb enil a si sihT +)ezis enilni( .txet dewolf 2GVS na si sihT + + )ti seod epacsknI sa(snapst ni detsujda snrek htiw txet a si sihT + + 5.41.03.45.02.43.01.01.01.05.0 + + ltr:noitcerid htiw txet a si sihT + + )edisni epahs( txet dewolf 2GVS na si sihT + + + \ No newline at end of file diff --git a/tests/data/svg/text_types.svg b/tests/data/svg/text_types.svg new file mode 100644 index 00000000..191623f5 --- /dev/null +++ b/tests/data/svg/text_types.svg @@ -0,0 +1,256 @@ + + + + + + + + + + This is a regular textWith a line break. + This is regular textwith a line breakand some transforms. + This is an SVG2 flowed text. (inline size) +This is a line break inside that text. + This is a text with kerns adjusted in tspans(as Inkscape does it) + This is an SVG1.2 flowed text (flowroot)And this is a linebreak inside. + This is an SVG1.2 flowed textWith absolute line-height + This is an SVG1.2 flowed textwithout line height. + 0.50.10.10.10.34.20.54.30.14.5 + This is a text with direction:rtl + This is an SVG2 flowed text (shape inside) + This is a test document containing different transformed text elements that were created using different methods.The containing layer has a transform applied as well. + + diff --git a/tests/test_text_split.py b/tests/test_text_split.py index 1e19ed52..fa6bd757 100644 --- a/tests/test_text_split.py +++ b/tests/test_text_split.py @@ -1,8 +1,23 @@ # coding=utf-8 +from inkex.tester.filters import CompareWithoutIds from text_split import TextSplit from inkex.tester import ComparisonMixin, TestCase +from inkex.tester.filters import CompareWithoutIds class TestSplitBasic(ComparisonMixin, TestCase): """Test split effect""" effect_class = TextSplit - comparisons = [('--id=t1', '--id=t3', '--splittype=word')] + compare_filters = [CompareWithoutIds()] + compare_file = "svg/text_types.svg" + all_shapes = ('--id=regular', '--id=regular-transform', '--id=inline-size', '--id=kerning', + '--id=flowroot', '--id=flowroot-abs-lineheight', '--id=flowroot-no-lineheight', + '--id=manual-kerns', '--id=rtl', '--id=shape-inside') + comparisons = [all_shapes + ('--splittype=line', '--preserve=True'), #ad3188 + all_shapes + ('--splittype=line', '--preserve=False'), #c242ad + all_shapes + ('--splittype=word', '--preserve=True'), #547875 + all_shapes + ('--splittype=word', '--preserve=False'), #d8b155 + all_shapes + ('--splittype=word', '--preserve=False', '--separation=0.0'), #897ab8 + all_shapes + ('--splittype=letter', '--preserve=True'), #74947d + all_shapes + ('--splittype=letter', '--preserve=False') #dd77d3 + ] + print("test") diff --git a/text_split.inx b/text_split.inx index 35c3cba1..5fafe9af 100644 --- a/text_split.inx +++ b/text_split.inx @@ -1,7 +1,7 @@ Split text - com.nerdson.text_split + org.inkscape.text_split @@ -9,10 +9,17 @@ + 1.0 true + + + diff --git a/text_split.py b/text_split.py index 23a5f4db..91f3e614 100755 --- a/text_split.py +++ b/text_split.py @@ -2,6 +2,7 @@ # coding=utf-8 # # Copyright (C) 2009 Karlisson Bezerra, contato@nerdson.com +# 2021 Jonathan Neuhauser, jonathan.neuhauser@outlook.com # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -17,152 +18,53 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # +"""Splits a text element into lines, words, chars. +Supports all text elements that Inkscape can create, such as normal text, shape-inside (SVG2), +flowroot (SVG1.2), inline-size, manual kerns, and nested tspans (with possibly different kerns) + +Possible future improvements: + - The position of words and character is currently simply determined by multiplying the number + of characters with a constant and the font size. The size of the current text fragment could be + computed using the font size, kerning, ... and content to give a better preservation of + coordinates. But this would require inkex to have actual text handling. + - Automatic line breaks in flowroot texts are not handled, since this would require actual + computation of text sizes as well. + - writing-mode:tb.* doesn't crash, but the result isn't correctly aligned. + +The code is structured as followed. For each selected text element: + - preprocess_text_element duplicates the element, converts flowroots to plain text elements, + and simplifies manual kerning if requested (only for split_words and split_chars, for all, + possibly nested, children) using simplify_nested_tspans. + - if split lines: split_lines copies all top-level tspans from the previous step into their own + text element, which is otherwise a duplicate of the original text element (thus preserving + style and transforms), see append_splitted_element + - if split words or chars: split_words_or_chars: the text is recursively processed. For each tspan, + the content and tail is split (words: at spaces, chars: after each character) into their own + tspan, again using append_splitted_element. The method keeps track of the horizontal and vertical + coordinate, incrementing it with the number of characters and a multiple of font size. +""" + +import re as regex +from typing import Union, Callable import inkex -from inkex import ( - TextElement, FlowRoot, FlowPara, Tspan, TextPath, Rectangle -) +from inkex import TextElement, FlowRoot, FlowPara, Tspan, Rectangle +from inkex.units import parse_unit +from inkex.localization import inkex_gettext as _ + +TextLike = Union[FlowRoot, TextElement] class TextSplit(inkex.EffectExtension): """Split text up.""" def add_arguments(self, pars): - pars.add_argument("--tab", help="The selected UI-tab when OK was pressed") - pars.add_argument("-s", "--splittype", default="line", help="type of split") - pars.add_argument("-p", "--preserve", type=inkex.Boolean, default=True,\ - help="Preserve original") - - def split_lines(self, node): - """Returns a list of lines""" - lines = [] - count = 1 - - for elem in node: - if isinstance(elem, TextPath): - inkex.errormsg("Text on path isn't supported. First remove text from path.") - break - elif not isinstance(elem, (FlowPara, Tspan)): - continue - - text = TextElement(**node.attrib) - - # handling flowed text nodes - if isinstance(node, FlowRoot): - fontsize = node.style.get("font-size", "12px") - fs = self.svg.unittouu(fontsize) - - # selects the flowRegion's child (svg:rect) to get @X and @Y - flowref = node.findone('svg:flowRegion')[0] - - if isinstance(flowref, Rectangle): - text.set("x", flowref.get("x")) - text.set("y", str(float(flowref.get("y")) + fs * count)) - count += 1 - else: - inkex.debug("This type of text element isn't supported. First unflow text.") - break - - # now let's convert flowPara into tspan - tspan = Tspan() - tspan.set("sodipodi:role", "line") - tspan.text = elem.text - text.append(tspan) - - else: - from copy import copy - x = elem.get("x") or node.get("x") - y = elem.get("y") or node.get("y") - - text.set("x", x) - text.set("y", y) - text.append(copy(elem)) - - lines.append(text) - - return lines - - def split_words(self, node): - """Returns a list of words""" - words = [] - - # Function to recursively extract text - def plain_str(elem): - words = [] - if elem.text: - words.append(elem.text) - for n in elem: - words.extend(plain_str(n)) - if n.tail: - words.append(n.tail) - return words - - # if text has more than one line, iterates through elements - lines = self.split_lines(node) - if not lines: - return words - - for line in lines: - # gets the position of text node - x = float(line.get("x")) - y = line.get("y") - - # gets the font size. if element doesn't have a style attribute, it assumes font-size = 12px - fontsize = line.style.get("font-size", "12px") - fs = self.svg.unittouu(fontsize) - - # extract and returns a list of words - words_list = "".join(plain_str(line)).split() - prev_len = 0 - - # creates new text nodes for each string in words_list - for word in words_list: - tspan = Tspan() - tspan.text = word - - text = TextElement(**line.attrib) - tspan.set('sodipodi:role', "line") - - # positioning new text elements - x = x + prev_len * fs - prev_len = len(word) - text.set("x", str(x)) - text.set("y", str(y)) - - text.append(tspan) - words.append(text) - - return words - - def split_letters(self, node): - """Returns a list of letters""" - - letters = [] - - words = self.split_words(node) - if not words: - return letters - - for word in words: - - x = float(word.get("x")) - y = word.get("y") - - # gets the font size. If element doesn't have a style attribute, it assumes font-size = 12px - fontsize = word.style.get("font-size", "12px") - fs = self.svg.unittouu(fontsize) - - # for each letter in element string - for letter in word[0].text: - tspan = Tspan() - tspan.text = letter - - text = TextElement(**node.attrib) - text.set("x", str(x)) - text.set("y", str(y)) - x += fs - - text.append(tspan) - letters.append(text) - return letters + pars.add_argument("--tab", help="The selected UI tab when OK was pressed") + pars.add_argument("-t", "--splittype", default="line", choices=["letter", "word", "line"], + help="type of split") + pars.add_argument("-p", "--preserve", type=inkex.Boolean, default=True, + help="Preserve original") + pars.add_argument("-s", "--separation", type=float, default=1, + help="Threshold for separating text with manual kerns in multiples of" + "font-size") def effect(self): """Applies the effect""" @@ -171,21 +73,213 @@ class TextSplit(inkex.EffectExtension): preserve = self.options.preserve # checks if the selected elements are text nodes - for elem in self.svg.selection.get(TextElement, FlowRoot): + for elem in self.svg.selection.filter_nonzero(TextElement, FlowRoot): if split_type == "line": - nodes = self.split_lines(elem) + node = split_lines(elem) elif split_type == "word": - nodes = self.split_words(elem) - elif split_type == "letter": - nodes = self.split_letters(elem) - - for child in nodes: - elem.getparent().append(child) + node = split_words_or_chars(elem, separation=self.options.separation) + else: + node = split_words_or_chars(elem, process_plain_chars, separation=0) + + node.getparent().remove(node) + + if not preserve and node is not None: + elem.getparent().remove(elem) + +def get_font_size(element): + try: + return element.unittouu(element.specified_style()("font-size")) + except ValueError: #unable to parse font size, e.g. font-size:normal + return element.unittouu("12pt") + +def get_line_height(element): + font_size = get_font_size(element) + line_height = element.specified_style()("line-height") + parsed = parse_unit(line_height) + if parsed is None: + return font_size * 1.2 + if parsed[1] == "%": + return font_size * parsed[0] * 0.01 + return element.unittouu(line_height) + +def simplify_child_tspans(element: TextElement, separation: int = 3): + """Checks all child tspans if they have manual kerns. + If it does, try to find words (characters with a distance > separation * font-size). + Then concatenate the words with spaces, set this string as a new text and """ + for child in list(element): + # process manual kerns + if not isinstance(child, Tspan): + continue + xvals = list(map(float, filter(len, regex.split(r"[,\s]", child.get("x") or "")))) + content = child.text + if content not in [None, ""] and len(xvals) >= 2: + fsize = get_font_size(child) + separation = separation*fsize + current_word_start = 0 + for i in range(1, max(len(content), len(xvals))): + if i >= len(content) -1 or i >= len(xvals) -1: + # consume the entire remaining string + i = len(content) + if i == len(content) or abs(xvals[i] - xvals[i-1]) > separation: + wordspan = Tspan(x=str(xvals[current_word_start])) + wordspan.text = content[current_word_start:i] + child.add(wordspan) + current_word_start = i + child.pop("x") + child.text = None + # process child elements + simplify_child_tspans(child) + +def preprocess_text_element(element: TextElement, process_kerns=True, separation=3): + """Processes a text element and returns an element containing tspans with x and y coordinate, + possibly nested (for Inkscape-type kerning), so that the actual splitting can work as if the + text was a simple text. Manual kerns (one x value per letter) are converted to spaces + if requested (not necessary for "split characters")""" + + oldelement = element + if isinstance(element, FlowRoot): + element = TextElement() + oldelement.addnext(element) + element.style = oldelement.style + element.transform = oldelement.transform + flowref = oldelement.findone('svg:flowRegion')[0] + if isinstance(flowref, Rectangle): + flowx = element.unittouu(flowref.get("x")) + flowy = element.unittouu(float(flowref.get("y"))) + first = True + else: + inkex.errormsg(_("Element {} uses a flow region that is not a rectangle." + "First unflow text.".format(element.get_id()))) + return element + for child in oldelement: + if isinstance(child, FlowPara): + # convert the flowpara "line" (note: no automatic wrapping) + # to a tspan and set the y coordinate. + # future FlowRoot improvements could add a better conversion. + newchild = Tspan() + element.append(newchild) + newchild.text = child.text + newchild.style = child.style + newchild.transform = child.transform + newchild.set("x", flowx) + if first: + flowy += get_font_size(child) * 1.25 + first = False + else: + flowy += get_line_height(child) + newchild.set("y", str(flowy)) + + else: + element = oldelement.duplicate() + oldelement.getparent().append(element) + + element.style.pop("shape-inside", None) + + # Real support for RTL text is missing, but we can emulate it by just removing the + # attribute. However, line breaks will be misaligned. + element.style.pop("direction", None) + for child in element: + child.style.pop("direction", None) + + if process_kerns: + simplify_child_tspans(element, separation) + return element + +def append_splitted_element(element: TextElement, text, x=0, y=0, prototype=None): + """Creates a new text element, sibling to element, at x,y, with content text. + + element: the existing text element that the new element will be a sibling of + text: either a Tspan that should be moved to a new text element - in this case, text is + a direct child of element; or a string + x, y: coordinates; ignored if text is a tspan + prototype: if text is a string, style and transform will be taken from prototype""" + + if isinstance(text, Tspan) and text.getparent() == element: + # we just move the tspan to a new text element. + elem = element.duplicate() + elem.remove_all(Tspan) + elem.append(text) + elem.set("x", text.get("x")) + elem.set("y", text.get("y")) + else: + elem = TextElement(x=str(x), y=str(y)) + # transfer the style from all parents, including the text element (if there's a style to the + # text element's parent applied, it will be duplicated, but that doesn't really matter) + elem.style = prototype.specified_style() + # the element will be appended to the parent of element, but there might be nested + # tspans between the prototype and the element. The next line says + # "compose transforms until you reach the parent of element" + elem.transform = (- element.getparent().transform) * prototype.composed_transform() + tsp = Tspan(x=str(x), y=str(y)) + tsp.text = text + elem.add(tsp) + element.addnext(elem) + + +def split_lines(element: TextLike) -> TextElement: + """Splits a text into its lines""" + preprocessed = preprocess_text_element(element, process_kerns=False) + + # Now we only have to copy each tspan into its own text element. + for child in list(preprocessed): + append_splitted_element(preprocessed, child) + + return preprocessed + +def process_plain_text(root, element, splitted, current_x, current_y, fontsize, multiplier) \ + -> float: + """Appends new text elements to as sibling root for each element of splitted, starting at + current_x, current_y, incrementing those and returing current_x, with prototype element (that + styles and transforms will be taken from) """ + if splitted is None: + return current_x + for word in splitted: + if word != "": + append_splitted_element(root, word, current_x, current_y, element) + current_x += fontsize * (len(word) + 1) * multiplier # +1 since for words, we lost a space + return current_x + +def process_plain_words(root, element, text, current_x, current_y, fontsize) -> float: + """Calls process_plain_text for splitting words""" + if text is not None: + return process_plain_text(root, element, text.split(" "), current_x, current_y, fontsize, + 0.4) + return current_x + +def process_plain_chars(root, element, text, current_x, current_y, fontsize): + """Calls process_plain_text for splitting characters""" + return process_plain_text(root, element, text, current_x, current_y, fontsize, 0.25) + + +def split_words_or_chars(element: TextLike, mode: Callable = process_plain_words, + separation: float = 3) -> TextElement: + """Splits a text into its lines""" + preprocessed = preprocess_text_element(element, process_kerns=True, separation=separation) + + def process_element(element, process_plain, current_x=0, current_y=0, root=None) -> float: + if root is None: + root = element + + elem_coords = {i: element.root.unittouu(element.get(i)) + if element.get(i) is not None else None for i in "xy"} + if elem_coords["x"] is not None: + current_x = elem_coords["x"] + if elem_coords["y"] is not None: + current_y = elem_coords["y"] + parent_font_size = get_font_size(element) + current_x = process_plain(root, element, element.text, current_x, current_y, + parent_font_size) + + for elem in element: + if isinstance(elem, Tspan): + current_x = process_element(elem, process_plain, current_x, current_y, root) + current_x = process_plain(root, element, elem.tail, current_x, current_y, + parent_font_size) + return current_x + + process_element(preprocessed, mode) + return preprocessed - # preserve original element - if not preserve and nodes: - parent = elem.getparent() - parent.remove(elem) if __name__ == '__main__': TextSplit().run() -- GitLab From aa8edee827cfb4a85ddeb10471d9780c9373b048 Mon Sep 17 00:00:00 2001 From: Jonathan Neuhauser Date: Wed, 26 May 2021 23:09:33 +0200 Subject: [PATCH 2/2] review comments --- inkex/elements/_base.py | 14 +- inkex/properties.py | 15 +- text_split.inx | 3 +- text_split.py | 424 ++++++++++++++++++++-------------------- 4 files changed, 239 insertions(+), 217 deletions(-) diff --git a/inkex/elements/_base.py b/inkex/elements/_base.py index f8282247..9d018848 100644 --- a/inkex/elements/_base.py +++ b/inkex/elements/_base.py @@ -34,7 +34,7 @@ from ..paths import Path from ..styles import Style, Classes from ..transforms import Transform, BoundingBox from ..utils import FragmentError -from ..units import convert_unit, render_unit +from ..units import convert_unit, render_unit, parse_unit from ._utils import ChildToProperty, NSS, addNS, removeNS, splitNS from ..properties import all_properties @@ -570,3 +570,15 @@ class ShapeElement(BaseElement): if not float(self.style.get('opacity', 1.0)): return False return True + + def get_line_height_uu(self): + """Returns the specified value of line-height, in user units""" + style = self.specified_style() + font_size = style("font-size") # already in uu + line_height = style("line-height") + parsed = parse_unit(line_height) + if parsed is None: + return font_size * 1.2 + if parsed[1] == "%": + return font_size * parsed[0] * 0.01 + return self.unittouu(line_height) diff --git a/inkex/properties.py b/inkex/properties.py index 10834329..c7538c53 100644 --- a/inkex/properties.py +++ b/inkex/properties.py @@ -426,6 +426,17 @@ class MarkerShorthandValue(ShorthandValue, URLNoneValue): return "" return super()._parse_value(value, element) +class FontSizeValue(BaseStyleValue): + """ Logic for the font-size property""" + def _parse_value(self, value: str, element=None): + if element is None: + return value #no additional logic in this case + try: + return element.unittouu(value) + except ValueError: #unable to parse font size, e.g. font-size:normal + return element.unittouu("12pt") + + # keys: attributes, right side: # - Subclass of BaseStyleValue used for instantiating # - default value @@ -464,7 +475,7 @@ all_properties: Dict[str, Tuple[Type[BaseStyleValue], str, bool, bool, Union[Lis "flood-opacity": (AlphaValue, "1", True, False, None), "font": (FontValue, "", True, False, None), "font-family": (BaseStyleValue, "sans-serif", True, True, None), - "font-size": (BaseStyleValue, "medium", True, True, None), + "font-size": (FontSizeValue, "medium", True, True, None), "font-size-adjust": (BaseStyleValue, "none", True, True, None), "font-stretch": (EnumValue, "normal", True, True, ["normal", "ultra-condensed", "extra-condensed", "condensed", "semi-condensed", "semi-expanded", "expanded", "extra-expanded", @@ -515,7 +526,7 @@ all_properties: Dict[str, Tuple[Type[BaseStyleValue], str, bool, bool, Union[Lis "white-space": (EnumValue, "normal", True, True, ["normal", "pre", "nowrap", "pre-wrap", "break-spaces", "pre-line"]), "word-spacing": (BaseStyleValue, "normal", True, True, None), # including obsolete SVG 1.1 values - "writing-mode": (EnumValue, "visible", True, True, ["horizontal-tb", "vertical-rl", "vertical-lr", "lr", "lr-tb", "rl", "rl-tb", "tb", "tb-rl"]), + "writing-mode": (EnumValue, "horizontal-tb", True, True, ["horizontal-tb", "vertical-rl", "vertical-lr", "lr", "lr-tb", "rl", "rl-tb", "tb", "tb-rl"]), "-inkscape-font-specification": (BaseStyleValue, "sans-serif", False, False, None) } # pylint: enable=line-too-long diff --git a/text_split.inx b/text_split.inx index 5fafe9af..e6dc9429 100644 --- a/text_split.inx +++ b/text_split.inx @@ -19,7 +19,8 @@ you know what you're doing! + Text with different writing mode (e.g. tb-rl) is processed, but will be misaligned. + Automatic line breaks in legacy flowtext (flowroot elements) are ignored. diff --git a/text_split.py b/text_split.py index 91f3e614..3d7a1854 100755 --- a/text_split.py +++ b/text_split.py @@ -22,15 +22,6 @@ Supports all text elements that Inkscape can create, such as normal text, shape-inside (SVG2), flowroot (SVG1.2), inline-size, manual kerns, and nested tspans (with possibly different kerns) -Possible future improvements: - - The position of words and character is currently simply determined by multiplying the number - of characters with a constant and the font size. The size of the current text fragment could be - computed using the font size, kerning, ... and content to give a better preservation of - coordinates. But this would require inkex to have actual text handling. - - Automatic line breaks in flowroot texts are not handled, since this would require actual - computation of text sizes as well. - - writing-mode:tb.* doesn't crash, but the result isn't correctly aligned. - The code is structured as followed. For each selected text element: - preprocess_text_element duplicates the element, converts flowroots to plain text elements, and simplifies manual kerning if requested (only for split_words and split_chars, for all, @@ -48,7 +39,7 @@ import re as regex from typing import Union, Callable import inkex -from inkex import TextElement, FlowRoot, FlowPara, Tspan, Rectangle +from inkex import TextElement, FlowRoot, FlowPara, Tspan, Rectangle, ShapeElement from inkex.units import parse_unit from inkex.localization import inkex_gettext as _ @@ -56,6 +47,17 @@ TextLike = Union[FlowRoot, TextElement] class TextSplit(inkex.EffectExtension): """Split text up.""" + def __init__(self): + """Initialize State machine""" + super().__init__() + self.mode: Callable + self.separation: float = 1 + self.fs_multiplier: float = 0.25 + self.current_x: float = 0 + self.current_y: float = 0 + self.process_kerns: bool = True + self.current_root: TextLike + self.current_fontsize: float = 0 def add_arguments(self, pars): pars.add_argument("--tab", help="The selected UI tab when OK was pressed") pars.add_argument("-t", "--splittype", default="line", choices=["letter", "word", "line"], @@ -74,211 +76,207 @@ class TextSplit(inkex.EffectExtension): # checks if the selected elements are text nodes for elem in self.svg.selection.filter_nonzero(TextElement, FlowRoot): - if split_type == "line": - node = split_lines(elem) - elif split_type == "word": - node = split_words_or_chars(elem, separation=self.options.separation) + try: + self.separation = self.options.separation + if split_type == "line": + node = self.split_lines(elem) + elif split_type == "word": + self.mode = self.process_plain_words + node = self.split_words_or_chars(elem) + else: + self.separation = 0 + self.mode = self.process_plain_chars + node = self.split_words_or_chars(elem) + + node.getparent().remove(node) + + if not preserve and node is not None: + elem.getparent().remove(elem) + except TypeError as err: + inkex.errormsg(err) # if an element can not be processed + + @staticmethod + def get_font_size(element): + """get the font size of an element""" + return element.specified_style()("font-size") + + @staticmethod + def get_line_height(element: ShapeElement): + """ get the line height of an element""" + return element.get_line_height_uu() + + def simplify_child_tspans(self, element: TextElement): + """Checks all child tspans if they have manual kerns. + If it does, try to find words (characters with a distance > separation * font-size). + Then concatenate the words with spaces, set this string as a new text and """ + for child in list(element): + # process manual kerns + if not isinstance(child, Tspan): + continue + xvals = list(map(float, filter(len, regex.split(r"[,\s]", child.get("x") or "")))) + content = child.text + if content not in [None, ""] and len(xvals) >= 2: + fsize = self.get_font_size(child) + separation = self.separation*fsize + current_word_start = 0 + for i in range(1, max(len(content), len(xvals))): + if i >= len(content) -1 or i >= len(xvals) -1: + # consume the entire remaining string + i = len(content) + if i == len(content) or abs(xvals[i] - xvals[i-1]) > separation: + wordspan = Tspan(x=str(xvals[current_word_start])) + wordspan.text = content[current_word_start:i] + child.add(wordspan) + current_word_start = i + child.pop("x") + child.text = None + # process child elements + self.simplify_child_tspans(child) + + def preprocess_text_element(self, element: TextElement): + """Processes a text element and returns an element containing tspans with x and y coordinate, + possibly nested (for Inkscape-type kerning), so that the actual splitting can work as if the + text was a simple text. Manual kerns (one x value per letter) are converted to spaces + if requested (not necessary for "split characters")""" + + oldelement = element + if isinstance(element, FlowRoot): + element = TextElement() + oldelement.addnext(element) + element.style = oldelement.style + element.transform = oldelement.transform + flowref = oldelement.findone('svg:flowRegion')[0] + if isinstance(flowref, Rectangle): + flowx = element.unittouu(flowref.get("x")) + flowy = element.unittouu(float(flowref.get("y"))) + first = True else: - node = split_words_or_chars(elem, process_plain_chars, separation=0) - - node.getparent().remove(node) - - if not preserve and node is not None: - elem.getparent().remove(elem) - -def get_font_size(element): - try: - return element.unittouu(element.specified_style()("font-size")) - except ValueError: #unable to parse font size, e.g. font-size:normal - return element.unittouu("12pt") - -def get_line_height(element): - font_size = get_font_size(element) - line_height = element.specified_style()("line-height") - parsed = parse_unit(line_height) - if parsed is None: - return font_size * 1.2 - if parsed[1] == "%": - return font_size * parsed[0] * 0.01 - return element.unittouu(line_height) - -def simplify_child_tspans(element: TextElement, separation: int = 3): - """Checks all child tspans if they have manual kerns. - If it does, try to find words (characters with a distance > separation * font-size). - Then concatenate the words with spaces, set this string as a new text and """ - for child in list(element): - # process manual kerns - if not isinstance(child, Tspan): - continue - xvals = list(map(float, filter(len, regex.split(r"[,\s]", child.get("x") or "")))) - content = child.text - if content not in [None, ""] and len(xvals) >= 2: - fsize = get_font_size(child) - separation = separation*fsize - current_word_start = 0 - for i in range(1, max(len(content), len(xvals))): - if i >= len(content) -1 or i >= len(xvals) -1: - # consume the entire remaining string - i = len(content) - if i == len(content) or abs(xvals[i] - xvals[i-1]) > separation: - wordspan = Tspan(x=str(xvals[current_word_start])) - wordspan.text = content[current_word_start:i] - child.add(wordspan) - current_word_start = i - child.pop("x") - child.text = None - # process child elements - simplify_child_tspans(child) - -def preprocess_text_element(element: TextElement, process_kerns=True, separation=3): - """Processes a text element and returns an element containing tspans with x and y coordinate, - possibly nested (for Inkscape-type kerning), so that the actual splitting can work as if the - text was a simple text. Manual kerns (one x value per letter) are converted to spaces - if requested (not necessary for "split characters")""" - - oldelement = element - if isinstance(element, FlowRoot): - element = TextElement() - oldelement.addnext(element) - element.style = oldelement.style - element.transform = oldelement.transform - flowref = oldelement.findone('svg:flowRegion')[0] - if isinstance(flowref, Rectangle): - flowx = element.unittouu(flowref.get("x")) - flowy = element.unittouu(float(flowref.get("y"))) - first = True + raise TypeError(_("Element {} uses a flow region that is not a rectangle. " + "First unflow text.".format(element.get_id()))) + for child in oldelement: + if isinstance(child, FlowPara): + # convert the flowpara "line" (note: no automatic wrapping) + # to a tspan and set the y coordinate. + # future FlowRoot improvements could add a better conversion. + newchild = Tspan() + element.append(newchild) + newchild.text = child.text + newchild.style = child.style + newchild.transform = child.transform + newchild.set("x", flowx) + if first: + flowy += self.get_font_size(child) * 1.25 + first = False + else: + flowy += self.get_line_height(child) + newchild.set("y", str(flowy)) + else: - inkex.errormsg(_("Element {} uses a flow region that is not a rectangle." - "First unflow text.".format(element.get_id()))) - return element - for child in oldelement: - if isinstance(child, FlowPara): - # convert the flowpara "line" (note: no automatic wrapping) - # to a tspan and set the y coordinate. - # future FlowRoot improvements could add a better conversion. - newchild = Tspan() - element.append(newchild) - newchild.text = child.text - newchild.style = child.style - newchild.transform = child.transform - newchild.set("x", flowx) - if first: - flowy += get_font_size(child) * 1.25 - first = False - else: - flowy += get_line_height(child) - newchild.set("y", str(flowy)) - - else: - element = oldelement.duplicate() - oldelement.getparent().append(element) - - element.style.pop("shape-inside", None) - - # Real support for RTL text is missing, but we can emulate it by just removing the - # attribute. However, line breaks will be misaligned. - element.style.pop("direction", None) - for child in element: - child.style.pop("direction", None) - - if process_kerns: - simplify_child_tspans(element, separation) - return element - -def append_splitted_element(element: TextElement, text, x=0, y=0, prototype=None): - """Creates a new text element, sibling to element, at x,y, with content text. - - element: the existing text element that the new element will be a sibling of - text: either a Tspan that should be moved to a new text element - in this case, text is - a direct child of element; or a string - x, y: coordinates; ignored if text is a tspan - prototype: if text is a string, style and transform will be taken from prototype""" - - if isinstance(text, Tspan) and text.getparent() == element: - # we just move the tspan to a new text element. - elem = element.duplicate() - elem.remove_all(Tspan) - elem.append(text) - elem.set("x", text.get("x")) - elem.set("y", text.get("y")) - else: - elem = TextElement(x=str(x), y=str(y)) - # transfer the style from all parents, including the text element (if there's a style to the - # text element's parent applied, it will be duplicated, but that doesn't really matter) - elem.style = prototype.specified_style() - # the element will be appended to the parent of element, but there might be nested - # tspans between the prototype and the element. The next line says - # "compose transforms until you reach the parent of element" - elem.transform = (- element.getparent().transform) * prototype.composed_transform() - tsp = Tspan(x=str(x), y=str(y)) - tsp.text = text - elem.add(tsp) - element.addnext(elem) - - -def split_lines(element: TextLike) -> TextElement: - """Splits a text into its lines""" - preprocessed = preprocess_text_element(element, process_kerns=False) - - # Now we only have to copy each tspan into its own text element. - for child in list(preprocessed): - append_splitted_element(preprocessed, child) - - return preprocessed - -def process_plain_text(root, element, splitted, current_x, current_y, fontsize, multiplier) \ - -> float: - """Appends new text elements to as sibling root for each element of splitted, starting at - current_x, current_y, incrementing those and returing current_x, with prototype element (that - styles and transforms will be taken from) """ - if splitted is None: - return current_x - for word in splitted: - if word != "": - append_splitted_element(root, word, current_x, current_y, element) - current_x += fontsize * (len(word) + 1) * multiplier # +1 since for words, we lost a space - return current_x - -def process_plain_words(root, element, text, current_x, current_y, fontsize) -> float: - """Calls process_plain_text for splitting words""" - if text is not None: - return process_plain_text(root, element, text.split(" "), current_x, current_y, fontsize, - 0.4) - return current_x - -def process_plain_chars(root, element, text, current_x, current_y, fontsize): - """Calls process_plain_text for splitting characters""" - return process_plain_text(root, element, text, current_x, current_y, fontsize, 0.25) - - -def split_words_or_chars(element: TextLike, mode: Callable = process_plain_words, - separation: float = 3) -> TextElement: - """Splits a text into its lines""" - preprocessed = preprocess_text_element(element, process_kerns=True, separation=separation) - - def process_element(element, process_plain, current_x=0, current_y=0, root=None) -> float: - if root is None: - root = element - - elem_coords = {i: element.root.unittouu(element.get(i)) - if element.get(i) is not None else None for i in "xy"} - if elem_coords["x"] is not None: - current_x = elem_coords["x"] - if elem_coords["y"] is not None: - current_y = elem_coords["y"] - parent_font_size = get_font_size(element) - current_x = process_plain(root, element, element.text, current_x, current_y, - parent_font_size) - - for elem in element: - if isinstance(elem, Tspan): - current_x = process_element(elem, process_plain, current_x, current_y, root) - current_x = process_plain(root, element, elem.tail, current_x, current_y, - parent_font_size) - return current_x - - process_element(preprocessed, mode) - return preprocessed + element = oldelement.duplicate() + oldelement.getparent().append(element) + + element.style.pop("shape-inside", None) + + # Real support for RTL text is missing, but we can emulate it by just removing the + # attribute. However, line breaks will be misaligned. + element.style.pop("direction", None) + for child in element: + child.style.pop("direction", None) + + if self.process_kerns: + self.simplify_child_tspans(element) + return element + + def append_splitted_element(self, text, prototype=None): + """Creates a new text element, sibling to self.current_root, at (self.current_x, + self.current_y) with content text. + + text: either a Tspan that should be moved to a new text element - in this case, text is + a direct child of element; or a string + prototype: if text is a string, style and transform will be taken from prototype""" + + if isinstance(text, Tspan) and text.getparent() == self.current_root: + # we just move the tspan to a new text element. + elem = self.current_root.duplicate() + elem.remove_all(Tspan) + elem.append(text) + elem.set("x", text.get("x")) + elem.set("y", text.get("y")) + else: + elem = TextElement(x=str(self.current_x), y=str(self.current_y)) + # transfer the style from all parents, including the text element (if there's a style to + # the text element's parent applied, it will be duplicated, but that doesn't really + # matter) + elem.style = prototype.specified_style() + # the element will be appended to the parent of element, but there might be nested + # tspans between the prototype and the element. The next line says + # "compose transforms until you reach the parent of element" + elem.transform = (- self.current_root.getparent().transform) \ + * prototype.composed_transform() + tsp = Tspan(x=str(self.current_x), y=str(self.current_y)) + tsp.text = text + elem.add(tsp) + self.current_root.addnext(elem) + + + def split_lines(self, element: TextLike) -> TextElement: + """Splits a text into its lines""" + self.process_kerns = False + preprocessed = self.preprocess_text_element(element) + self.current_root = preprocessed + # Now we only have to copy each tspan into its own text element. + for child in list(preprocessed): + self.append_splitted_element(child) + + return preprocessed + + def process_plain_text(self, element, splitted): + """Appends new text elements to as sibling root for each element of splitted, starting at + self.current_x, self.current_y, incrementing those, with prototype element (that + styles and transforms will be taken from) """ + if splitted is None: + return + for word in splitted: + if word != "": + self.append_splitted_element(word, element) + # +1 since for words, we lost a space + self.current_x += self.current_fontsize * (len(word) + 1) * self.fs_multiplier + + def process_plain_words(self, element, text): + """Calls process_plain_text for splitting words""" + self.fs_multiplier = 0.4 + if text is not None: + self.process_plain_text(element, text.split(" ")) + + def process_plain_chars(self, element, text): + """Calls process_plain_text for splitting characters""" + self.fs_multiplier = 0.25 + self.process_plain_text(element, text) + + + def split_words_or_chars(self, element: TextLike) -> TextElement: + """Splits a text into its lines""" + self.process_kerns = True + preprocessed = self.preprocess_text_element(element) + def process_element(element) -> float: + elem_coords = {i: element.root.unittouu(element.get(i)) + if element.get(i) is not None else None for i in "xy"} + if elem_coords["x"] is not None: + self.current_x = elem_coords["x"] + if elem_coords["y"] is not None: + self.current_y = elem_coords["y"] + self.current_fontsize = self.get_font_size(element) + current_x = self.mode(element, element.text) + + for elem in element: + if isinstance(elem, Tspan): + current_x = process_element(elem) + current_x = self.mode(element, elem.tail) + return current_x + + self.current_root = preprocessed + process_element(preprocessed) + return preprocessed if __name__ == '__main__': -- GitLab