diff --git a/inkex/elements/_base.py b/inkex/elements/_base.py
index f8282247f5f686f4d665fd915ebdb975cc2e2ee2..9d01884870385185f20a836fb518c394ae4a3b9d 100644
--- a/inkex/elements/_base.py
+++ b/inkex/elements/_base.py
@@ -34,7 +34,7 @@ from ..paths import Path
from ..styles import Style, Classes
from ..transforms import Transform, BoundingBox
from ..utils import FragmentError
-from ..units import convert_unit, render_unit
+from ..units import convert_unit, render_unit, parse_unit
from ._utils import ChildToProperty, NSS, addNS, removeNS, splitNS
from ..properties import all_properties
@@ -570,3 +570,15 @@ class ShapeElement(BaseElement):
if not float(self.style.get('opacity', 1.0)):
return False
return True
+
+ def get_line_height_uu(self):
+ """Returns the specified value of line-height, in user units"""
+ style = self.specified_style()
+ font_size = style("font-size") # already in uu
+ line_height = style("line-height")
+ parsed = parse_unit(line_height)
+ if parsed is None:
+ return font_size * 1.2
+ if parsed[1] == "%":
+ return font_size * parsed[0] * 0.01
+ return self.unittouu(line_height)
diff --git a/inkex/properties.py b/inkex/properties.py
index 57fd22e040cdda38cda132c6d95dff8c04b8030d..c7538c53a2e6162f8813122a372dfc91d7e1d64d 100644
--- a/inkex/properties.py
+++ b/inkex/properties.py
@@ -426,6 +426,17 @@ class MarkerShorthandValue(ShorthandValue, URLNoneValue):
return ""
return super()._parse_value(value, element)
+class FontSizeValue(BaseStyleValue):
+ """ Logic for the font-size property"""
+ def _parse_value(self, value: str, element=None):
+ if element is None:
+ return value #no additional logic in this case
+ try:
+ return element.unittouu(value)
+ except ValueError: #unable to parse font size, e.g. font-size:normal
+ return element.unittouu("12pt")
+
+
# keys: attributes, right side:
# - Subclass of BaseStyleValue used for instantiating
# - default value
@@ -464,7 +475,7 @@ all_properties: Dict[str, Tuple[Type[BaseStyleValue], str, bool, bool, Union[Lis
"flood-opacity": (AlphaValue, "1", True, False, None),
"font": (FontValue, "", True, False, None),
"font-family": (BaseStyleValue, "sans-serif", True, True, None),
- "font-size": (BaseStyleValue, "medium", True, True, None),
+ "font-size": (FontSizeValue, "medium", True, True, None),
"font-size-adjust": (BaseStyleValue, "none", True, True, None),
"font-stretch": (EnumValue, "normal", True, True, ["normal", "ultra-condensed", "extra-condensed", "condensed",
"semi-condensed", "semi-expanded", "expanded", "extra-expanded",
@@ -479,7 +490,7 @@ all_properties: Dict[str, Tuple[Type[BaseStyleValue], str, bool, bool, Union[Lis
"image-rendering": (EnumValue, "auto", True, True, ["auto", "optimizeQuality", "optimizeSpeed"]),
"letter-spacing": (BaseStyleValue, "normal", True, True, None),
"lighting-color": (ColorValue, "normal", True, False, None),
- "line-height": (BaseStyleValue, "normal", False, False, None),
+ "line-height": (BaseStyleValue, "normal", False, True, None),
"marker" : (MarkerShorthandValue, "", True, True, None),
"marker-end": (URLNoneValue, "none", True, True, None),
"marker-mid": (URLNoneValue, "none", True, True, None),
@@ -515,7 +526,7 @@ all_properties: Dict[str, Tuple[Type[BaseStyleValue], str, bool, bool, Union[Lis
"white-space": (EnumValue, "normal", True, True, ["normal", "pre", "nowrap", "pre-wrap", "break-spaces", "pre-line"]),
"word-spacing": (BaseStyleValue, "normal", True, True, None),
# including obsolete SVG 1.1 values
- "writing-mode": (EnumValue, "visible", True, True, ["horizontal-tb", "vertical-rl", "vertical-lr", "lr", "lr-tb", "rl", "rl-tb", "tb", "tb-rl"]),
+ "writing-mode": (EnumValue, "horizontal-tb", True, True, ["horizontal-tb", "vertical-rl", "vertical-lr", "lr", "lr-tb", "rl", "rl-tb", "tb", "tb-rl"]),
"-inkscape-font-specification": (BaseStyleValue, "sans-serif", False, False, None)
}
# pylint: enable=line-too-long
diff --git a/tests/data/refs/text_split__--id__t1__--id__t3__--splittype__word.out b/tests/data/refs/text_split__--id__t1__--id__t3__--splittype__word.out
deleted file mode 100644
index 0c0e4fd96f6cacc3b8adfef72aa9122c67f43f38..0000000000000000000000000000000000000000
--- a/tests/data/refs/text_split__--id__t1__--id__t3__--splittype__word.out
+++ /dev/null
@@ -1,41 +0,0 @@
-
\ No newline at end of file
diff --git a/tests/data/refs/text_split__5478757cb04224bacfa3c641fea954ba.out b/tests/data/refs/text_split__5478757cb04224bacfa3c641fea954ba.out
new file mode 100644
index 0000000000000000000000000000000000000000..1e3955284d42806d7951afa4305169c7bcef5eeb
--- /dev/null
+++ b/tests/data/refs/text_split__5478757cb04224bacfa3c641fea954ba.out
@@ -0,0 +1,28 @@
+
\ No newline at end of file
diff --git a/tests/data/refs/text_split__74947d6e9e9ee89a80bbec0bfacc6dfb.out b/tests/data/refs/text_split__74947d6e9e9ee89a80bbec0bfacc6dfb.out
new file mode 100644
index 0000000000000000000000000000000000000000..a74d052026ff4956314704f34ca1b94ddd68d088
--- /dev/null
+++ b/tests/data/refs/text_split__74947d6e9e9ee89a80bbec0bfacc6dfb.out
@@ -0,0 +1,28 @@
+
\ No newline at end of file
diff --git a/tests/data/refs/text_split__897ab8b3c516acacaf68391a51eb8836.out b/tests/data/refs/text_split__897ab8b3c516acacaf68391a51eb8836.out
new file mode 100644
index 0000000000000000000000000000000000000000..c12794ecb9f5410567bf8356ee9816c97d320335
--- /dev/null
+++ b/tests/data/refs/text_split__897ab8b3c516acacaf68391a51eb8836.out
@@ -0,0 +1,27 @@
+
\ No newline at end of file
diff --git a/tests/data/refs/text_split__ad318803d098b3903561d44825a36ea0.out b/tests/data/refs/text_split__ad318803d098b3903561d44825a36ea0.out
new file mode 100644
index 0000000000000000000000000000000000000000..ccd31f3fea3bb07dd4e327ef533f06903b8f10f9
--- /dev/null
+++ b/tests/data/refs/text_split__ad318803d098b3903561d44825a36ea0.out
@@ -0,0 +1,45 @@
+
\ No newline at end of file
diff --git a/tests/data/refs/text_split__c242adcac78228419d3c065a99727bfb.out b/tests/data/refs/text_split__c242adcac78228419d3c065a99727bfb.out
new file mode 100644
index 0000000000000000000000000000000000000000..f356a0855f2c06d9dff49b73ee7b4bd1d009dfcf
--- /dev/null
+++ b/tests/data/refs/text_split__c242adcac78228419d3c065a99727bfb.out
@@ -0,0 +1,44 @@
+
\ No newline at end of file
diff --git a/tests/data/refs/text_split__d8b155ac58a7424471f416b64f21669f.out b/tests/data/refs/text_split__d8b155ac58a7424471f416b64f21669f.out
new file mode 100644
index 0000000000000000000000000000000000000000..0508d54cd34721108d5aa0e8dab3ba8bbc6e3dc7
--- /dev/null
+++ b/tests/data/refs/text_split__d8b155ac58a7424471f416b64f21669f.out
@@ -0,0 +1,27 @@
+
\ No newline at end of file
diff --git a/tests/data/refs/text_split__dd77d3cc0134ad62833e05ca73e38896.out b/tests/data/refs/text_split__dd77d3cc0134ad62833e05ca73e38896.out
new file mode 100644
index 0000000000000000000000000000000000000000..8eb90d3675da3b45454c4b6026db79e3d1f32856
--- /dev/null
+++ b/tests/data/refs/text_split__dd77d3cc0134ad62833e05ca73e38896.out
@@ -0,0 +1,27 @@
+
\ No newline at end of file
diff --git a/tests/data/svg/text_types.svg b/tests/data/svg/text_types.svg
new file mode 100644
index 0000000000000000000000000000000000000000..191623f5c2776b7bf5ade294b5b8b548334c91a6
--- /dev/null
+++ b/tests/data/svg/text_types.svg
@@ -0,0 +1,256 @@
+
+
+
+
diff --git a/tests/test_text_split.py b/tests/test_text_split.py
index 1e19ed52ef1753766e74cf0ca04096ba3117b652..fa6bd757454a9942dca1a09a3955b49657ad0501 100644
--- a/tests/test_text_split.py
+++ b/tests/test_text_split.py
@@ -1,8 +1,23 @@
# coding=utf-8
+from inkex.tester.filters import CompareWithoutIds
from text_split import TextSplit
from inkex.tester import ComparisonMixin, TestCase
+from inkex.tester.filters import CompareWithoutIds
class TestSplitBasic(ComparisonMixin, TestCase):
"""Test split effect"""
effect_class = TextSplit
- comparisons = [('--id=t1', '--id=t3', '--splittype=word')]
+ compare_filters = [CompareWithoutIds()]
+ compare_file = "svg/text_types.svg"
+ all_shapes = ('--id=regular', '--id=regular-transform', '--id=inline-size', '--id=kerning',
+ '--id=flowroot', '--id=flowroot-abs-lineheight', '--id=flowroot-no-lineheight',
+ '--id=manual-kerns', '--id=rtl', '--id=shape-inside')
+ comparisons = [all_shapes + ('--splittype=line', '--preserve=True'), #ad3188
+ all_shapes + ('--splittype=line', '--preserve=False'), #c242ad
+ all_shapes + ('--splittype=word', '--preserve=True'), #547875
+ all_shapes + ('--splittype=word', '--preserve=False'), #d8b155
+ all_shapes + ('--splittype=word', '--preserve=False', '--separation=0.0'), #897ab8
+ all_shapes + ('--splittype=letter', '--preserve=True'), #74947d
+ all_shapes + ('--splittype=letter', '--preserve=False') #dd77d3
+ ]
+ print("test")
diff --git a/text_split.inx b/text_split.inx
index 35c3cba11b860d62591afdec5d6ffcc8477f6ae2..e6dc942972b0ca738737373f2826daaf1d8c4c1e 100644
--- a/text_split.inx
+++ b/text_split.inx
@@ -1,7 +1,7 @@
Split text
- com.nerdson.text_split
+ org.inkscape.text_split
@@ -9,10 +9,18 @@
+ 1.0
true
+
+
+
diff --git a/text_split.py b/text_split.py
index 23a5f4dbf0ac9d8f9166699f1d6620f340946950..3d7a1854f4c3e98528af41b8e3fbdc99daab6510 100755
--- a/text_split.py
+++ b/text_split.py
@@ -2,6 +2,7 @@
# coding=utf-8
#
# Copyright (C) 2009 Karlisson Bezerra, contato@nerdson.com
+# 2021 Jonathan Neuhauser, jonathan.neuhauser@outlook.com
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -17,152 +18,55 @@
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#
+"""Splits a text element into lines, words, chars.
+Supports all text elements that Inkscape can create, such as normal text, shape-inside (SVG2),
+flowroot (SVG1.2), inline-size, manual kerns, and nested tspans (with possibly different kerns)
+
+The code is structured as followed. For each selected text element:
+ - preprocess_text_element duplicates the element, converts flowroots to plain text elements,
+ and simplifies manual kerning if requested (only for split_words and split_chars, for all,
+ possibly nested, children) using simplify_nested_tspans.
+ - if split lines: split_lines copies all top-level tspans from the previous step into their own
+ text element, which is otherwise a duplicate of the original text element (thus preserving
+ style and transforms), see append_splitted_element
+ - if split words or chars: split_words_or_chars: the text is recursively processed. For each tspan,
+ the content and tail is split (words: at spaces, chars: after each character) into their own
+ tspan, again using append_splitted_element. The method keeps track of the horizontal and vertical
+ coordinate, incrementing it with the number of characters and a multiple of font size.
+"""
+
+import re as regex
+from typing import Union, Callable
import inkex
-from inkex import (
- TextElement, FlowRoot, FlowPara, Tspan, TextPath, Rectangle
-)
+from inkex import TextElement, FlowRoot, FlowPara, Tspan, Rectangle, ShapeElement
+from inkex.units import parse_unit
+from inkex.localization import inkex_gettext as _
+
+TextLike = Union[FlowRoot, TextElement]
class TextSplit(inkex.EffectExtension):
"""Split text up."""
+ def __init__(self):
+ """Initialize State machine"""
+ super().__init__()
+ self.mode: Callable
+ self.separation: float = 1
+ self.fs_multiplier: float = 0.25
+ self.current_x: float = 0
+ self.current_y: float = 0
+ self.process_kerns: bool = True
+ self.current_root: TextLike
+ self.current_fontsize: float = 0
def add_arguments(self, pars):
- pars.add_argument("--tab", help="The selected UI-tab when OK was pressed")
- pars.add_argument("-s", "--splittype", default="line", help="type of split")
- pars.add_argument("-p", "--preserve", type=inkex.Boolean, default=True,\
- help="Preserve original")
-
- def split_lines(self, node):
- """Returns a list of lines"""
- lines = []
- count = 1
-
- for elem in node:
- if isinstance(elem, TextPath):
- inkex.errormsg("Text on path isn't supported. First remove text from path.")
- break
- elif not isinstance(elem, (FlowPara, Tspan)):
- continue
-
- text = TextElement(**node.attrib)
-
- # handling flowed text nodes
- if isinstance(node, FlowRoot):
- fontsize = node.style.get("font-size", "12px")
- fs = self.svg.unittouu(fontsize)
-
- # selects the flowRegion's child (svg:rect) to get @X and @Y
- flowref = node.findone('svg:flowRegion')[0]
-
- if isinstance(flowref, Rectangle):
- text.set("x", flowref.get("x"))
- text.set("y", str(float(flowref.get("y")) + fs * count))
- count += 1
- else:
- inkex.debug("This type of text element isn't supported. First unflow text.")
- break
-
- # now let's convert flowPara into tspan
- tspan = Tspan()
- tspan.set("sodipodi:role", "line")
- tspan.text = elem.text
- text.append(tspan)
-
- else:
- from copy import copy
- x = elem.get("x") or node.get("x")
- y = elem.get("y") or node.get("y")
-
- text.set("x", x)
- text.set("y", y)
- text.append(copy(elem))
-
- lines.append(text)
-
- return lines
-
- def split_words(self, node):
- """Returns a list of words"""
- words = []
-
- # Function to recursively extract text
- def plain_str(elem):
- words = []
- if elem.text:
- words.append(elem.text)
- for n in elem:
- words.extend(plain_str(n))
- if n.tail:
- words.append(n.tail)
- return words
-
- # if text has more than one line, iterates through elements
- lines = self.split_lines(node)
- if not lines:
- return words
-
- for line in lines:
- # gets the position of text node
- x = float(line.get("x"))
- y = line.get("y")
-
- # gets the font size. if element doesn't have a style attribute, it assumes font-size = 12px
- fontsize = line.style.get("font-size", "12px")
- fs = self.svg.unittouu(fontsize)
-
- # extract and returns a list of words
- words_list = "".join(plain_str(line)).split()
- prev_len = 0
-
- # creates new text nodes for each string in words_list
- for word in words_list:
- tspan = Tspan()
- tspan.text = word
-
- text = TextElement(**line.attrib)
- tspan.set('sodipodi:role', "line")
-
- # positioning new text elements
- x = x + prev_len * fs
- prev_len = len(word)
- text.set("x", str(x))
- text.set("y", str(y))
-
- text.append(tspan)
- words.append(text)
-
- return words
-
- def split_letters(self, node):
- """Returns a list of letters"""
-
- letters = []
-
- words = self.split_words(node)
- if not words:
- return letters
-
- for word in words:
-
- x = float(word.get("x"))
- y = word.get("y")
-
- # gets the font size. If element doesn't have a style attribute, it assumes font-size = 12px
- fontsize = word.style.get("font-size", "12px")
- fs = self.svg.unittouu(fontsize)
-
- # for each letter in element string
- for letter in word[0].text:
- tspan = Tspan()
- tspan.text = letter
-
- text = TextElement(**node.attrib)
- text.set("x", str(x))
- text.set("y", str(y))
- x += fs
-
- text.append(tspan)
- letters.append(text)
- return letters
+ pars.add_argument("--tab", help="The selected UI tab when OK was pressed")
+ pars.add_argument("-t", "--splittype", default="line", choices=["letter", "word", "line"],
+ help="type of split")
+ pars.add_argument("-p", "--preserve", type=inkex.Boolean, default=True,
+ help="Preserve original")
+ pars.add_argument("-s", "--separation", type=float, default=1,
+ help="Threshold for separating text with manual kerns in multiples of"
+ "font-size")
def effect(self):
"""Applies the effect"""
@@ -171,21 +75,209 @@ class TextSplit(inkex.EffectExtension):
preserve = self.options.preserve
# checks if the selected elements are text nodes
- for elem in self.svg.selection.get(TextElement, FlowRoot):
- if split_type == "line":
- nodes = self.split_lines(elem)
- elif split_type == "word":
- nodes = self.split_words(elem)
- elif split_type == "letter":
- nodes = self.split_letters(elem)
-
- for child in nodes:
- elem.getparent().append(child)
-
- # preserve original element
- if not preserve and nodes:
- parent = elem.getparent()
- parent.remove(elem)
+ for elem in self.svg.selection.filter_nonzero(TextElement, FlowRoot):
+ try:
+ self.separation = self.options.separation
+ if split_type == "line":
+ node = self.split_lines(elem)
+ elif split_type == "word":
+ self.mode = self.process_plain_words
+ node = self.split_words_or_chars(elem)
+ else:
+ self.separation = 0
+ self.mode = self.process_plain_chars
+ node = self.split_words_or_chars(elem)
+
+ node.getparent().remove(node)
+
+ if not preserve and node is not None:
+ elem.getparent().remove(elem)
+ except TypeError as err:
+ inkex.errormsg(err) # if an element can not be processed
+
+ @staticmethod
+ def get_font_size(element):
+ """get the font size of an element"""
+ return element.specified_style()("font-size")
+
+ @staticmethod
+ def get_line_height(element: ShapeElement):
+ """ get the line height of an element"""
+ return element.get_line_height_uu()
+
+ def simplify_child_tspans(self, element: TextElement):
+ """Checks all child tspans if they have manual kerns.
+ If it does, try to find words (characters with a distance > separation * font-size).
+ Then concatenate the words with spaces, set this string as a new text and """
+ for child in list(element):
+ # process manual kerns
+ if not isinstance(child, Tspan):
+ continue
+ xvals = list(map(float, filter(len, regex.split(r"[,\s]", child.get("x") or ""))))
+ content = child.text
+ if content not in [None, ""] and len(xvals) >= 2:
+ fsize = self.get_font_size(child)
+ separation = self.separation*fsize
+ current_word_start = 0
+ for i in range(1, max(len(content), len(xvals))):
+ if i >= len(content) -1 or i >= len(xvals) -1:
+ # consume the entire remaining string
+ i = len(content)
+ if i == len(content) or abs(xvals[i] - xvals[i-1]) > separation:
+ wordspan = Tspan(x=str(xvals[current_word_start]))
+ wordspan.text = content[current_word_start:i]
+ child.add(wordspan)
+ current_word_start = i
+ child.pop("x")
+ child.text = None
+ # process child elements
+ self.simplify_child_tspans(child)
+
+ def preprocess_text_element(self, element: TextElement):
+ """Processes a text element and returns an element containing tspans with x and y coordinate,
+ possibly nested (for Inkscape-type kerning), so that the actual splitting can work as if the
+ text was a simple text. Manual kerns (one x value per letter) are converted to spaces
+ if requested (not necessary for "split characters")"""
+
+ oldelement = element
+ if isinstance(element, FlowRoot):
+ element = TextElement()
+ oldelement.addnext(element)
+ element.style = oldelement.style
+ element.transform = oldelement.transform
+ flowref = oldelement.findone('svg:flowRegion')[0]
+ if isinstance(flowref, Rectangle):
+ flowx = element.unittouu(flowref.get("x"))
+ flowy = element.unittouu(float(flowref.get("y")))
+ first = True
+ else:
+ raise TypeError(_("Element {} uses a flow region that is not a rectangle. "
+ "First unflow text.".format(element.get_id())))
+ for child in oldelement:
+ if isinstance(child, FlowPara):
+ # convert the flowpara "line" (note: no automatic wrapping)
+ # to a tspan and set the y coordinate.
+ # future FlowRoot improvements could add a better conversion.
+ newchild = Tspan()
+ element.append(newchild)
+ newchild.text = child.text
+ newchild.style = child.style
+ newchild.transform = child.transform
+ newchild.set("x", flowx)
+ if first:
+ flowy += self.get_font_size(child) * 1.25
+ first = False
+ else:
+ flowy += self.get_line_height(child)
+ newchild.set("y", str(flowy))
+
+ else:
+ element = oldelement.duplicate()
+ oldelement.getparent().append(element)
+
+ element.style.pop("shape-inside", None)
+
+ # Real support for RTL text is missing, but we can emulate it by just removing the
+ # attribute. However, line breaks will be misaligned.
+ element.style.pop("direction", None)
+ for child in element:
+ child.style.pop("direction", None)
+
+ if self.process_kerns:
+ self.simplify_child_tspans(element)
+ return element
+
+ def append_splitted_element(self, text, prototype=None):
+ """Creates a new text element, sibling to self.current_root, at (self.current_x,
+ self.current_y) with content text.
+
+ text: either a Tspan that should be moved to a new text element - in this case, text is
+ a direct child of element; or a string
+ prototype: if text is a string, style and transform will be taken from prototype"""
+
+ if isinstance(text, Tspan) and text.getparent() == self.current_root:
+ # we just move the tspan to a new text element.
+ elem = self.current_root.duplicate()
+ elem.remove_all(Tspan)
+ elem.append(text)
+ elem.set("x", text.get("x"))
+ elem.set("y", text.get("y"))
+ else:
+ elem = TextElement(x=str(self.current_x), y=str(self.current_y))
+ # transfer the style from all parents, including the text element (if there's a style to
+ # the text element's parent applied, it will be duplicated, but that doesn't really
+ # matter)
+ elem.style = prototype.specified_style()
+ # the element will be appended to the parent of element, but there might be nested
+ # tspans between the prototype and the element. The next line says
+ # "compose transforms until you reach the parent of element"
+ elem.transform = (- self.current_root.getparent().transform) \
+ * prototype.composed_transform()
+ tsp = Tspan(x=str(self.current_x), y=str(self.current_y))
+ tsp.text = text
+ elem.add(tsp)
+ self.current_root.addnext(elem)
+
+
+ def split_lines(self, element: TextLike) -> TextElement:
+ """Splits a text into its lines"""
+ self.process_kerns = False
+ preprocessed = self.preprocess_text_element(element)
+ self.current_root = preprocessed
+ # Now we only have to copy each tspan into its own text element.
+ for child in list(preprocessed):
+ self.append_splitted_element(child)
+
+ return preprocessed
+
+ def process_plain_text(self, element, splitted):
+ """Appends new text elements to as sibling root for each element of splitted, starting at
+ self.current_x, self.current_y, incrementing those, with prototype element (that
+ styles and transforms will be taken from) """
+ if splitted is None:
+ return
+ for word in splitted:
+ if word != "":
+ self.append_splitted_element(word, element)
+ # +1 since for words, we lost a space
+ self.current_x += self.current_fontsize * (len(word) + 1) * self.fs_multiplier
+
+ def process_plain_words(self, element, text):
+ """Calls process_plain_text for splitting words"""
+ self.fs_multiplier = 0.4
+ if text is not None:
+ self.process_plain_text(element, text.split(" "))
+
+ def process_plain_chars(self, element, text):
+ """Calls process_plain_text for splitting characters"""
+ self.fs_multiplier = 0.25
+ self.process_plain_text(element, text)
+
+
+ def split_words_or_chars(self, element: TextLike) -> TextElement:
+ """Splits a text into its lines"""
+ self.process_kerns = True
+ preprocessed = self.preprocess_text_element(element)
+ def process_element(element) -> float:
+ elem_coords = {i: element.root.unittouu(element.get(i))
+ if element.get(i) is not None else None for i in "xy"}
+ if elem_coords["x"] is not None:
+ self.current_x = elem_coords["x"]
+ if elem_coords["y"] is not None:
+ self.current_y = elem_coords["y"]
+ self.current_fontsize = self.get_font_size(element)
+ current_x = self.mode(element, element.text)
+
+ for elem in element:
+ if isinstance(elem, Tspan):
+ current_x = process_element(elem)
+ current_x = self.mode(element, elem.tail)
+ return current_x
+
+ self.current_root = preprocessed
+ process_element(preprocessed)
+ return preprocessed
+
if __name__ == '__main__':
TextSplit().run()