Make paren highlighting more accurate.

Trundle · Trundle · commit 8b134ea403ee · 2009-10-17T18:04:46.000+02:00
diff --git a/bpython/cli.py b/bpython/cli.py
@@ -829,7 +829,7 @@ def print_line(self, s, clr=False, newline=False):
 
         if self.highlighted_paren is not None:
             # Clear previous highlighted paren
-            self.reprint_line(self.highlighted_paren)
+            self.reprint_line(*self.highlighted_paren)
             self.highlighted_paren = None
 
         if OPTS.syntax and (not self.paste_mode or newline):
@@ -937,7 +937,7 @@ def repl(self):
                 self.prev_block_finished = stdout_position
                 self.s = ''
 
-    def reprint_line(self, lineno, tokens=None):
+    def reprint_line(self, lineno, tokens):
         """Helper function for paren highlighting: Reprint line at offset
         `lineno` in current input buffer."""
         if not self.buffer or lineno == len(self.buffer):
@@ -954,8 +954,6 @@ def reprint_line(self, lineno, tokens=None):
             return
 
         self.scr.move(real_lineno, 4)
-        if tokens is None:
-            tokens = list(PythonLexer().get_tokens(self.buffer[lineno]))
         line = format(tokens, BPythonFormatter(OPTS.color_scheme))
         for string in line.split('\x04'):
             self.echo(string)
@@ -1125,6 +1123,8 @@ def tab(self):
         otherwise attempt to autocomplete to the best match of possible
         choices in the match list."""
 
+        print >> sys.__stderr__, 'lala:', repr(self.current_string())
+
         if self.atbol():
             x_pos = len(self.s) - self.cpos
             num_spaces = x_pos % OPTS.tab_length
diff --git a/bpython/gtk_.py b/bpython/gtk_.py
@@ -415,7 +415,7 @@ def highlight(self):
         """
         if OPTS.syntax:
             if self.highlighted_paren is not None:
-                self.reprint_line(self.highlighted_paren)
+                self.reprint_line(*self.highlighted_paren)
                 self.highlighted_paren = None
 
             offset = self.get_cursor_iter().get_offset()
@@ -434,12 +434,6 @@ def insert_highlighted(self, iter_, string):
     def insert_highlighted_tokens(self, iter_, tokens):
         offset = iter_.get_offset()
         buffer = self.text_buffer
-        # Unfortunately, Pygments adds a trailing newline and strings with
-        # no size, so strip them
-        tokens = list(tokens)
-        while not tokens[-1][1]:
-            tokens.pop()
-        tokens[-1] = (tokens[-1][0], tokens[-1][1].rstrip('\n'))
         for (token, value) in tokens:
             while token not in theme_map:
                 token = token.parent
@@ -489,7 +483,8 @@ def on_buf_insert_text(self, buffer, iter_, text, length):
         self.complete()
 
     def on_buf_mark_set(self, buffer, iter_, textmark):
-        if textmark.get_name() == 'insert':
+        name = textmark.get_name()
+        if name == 'insert':
             line_start = self.get_line_start_iter()
             if line_start.compare(iter_) > 0:
                 # Don't set cursor before the start of line
@@ -538,7 +533,7 @@ def push_line(self):
         self.highlight()
         return self.push(line + '\n')
 
-    def reprint_line(self, lineno, tokens=None):
+    def reprint_line(self, lineno, tokens):
         """
         Helper function for paren highlighting: Reprint line at offset
         `lineno` in current input buffer.
@@ -552,8 +547,6 @@ def reprint_line(self, lineno, tokens=None):
         end.forward_to_line_end()
         with self.editing:
             self.text_buffer.delete(start, end)
-            if tokens is None:
-                tokens = PythonLexer().get_tokens(self.buffer[lineno])
             start = self.text_buffer.get_iter_at_mark(mark)
             self.insert_highlighted_tokens(start, tokens)
 
diff --git a/bpython/repl.py b/bpython/repl.py
@@ -33,6 +33,7 @@
 import textwrap
 import traceback
 from glob import glob
+from itertools import takewhile
 from locale import getpreferredencoding
 from urlparse import urljoin
 from xmlrpclib import ServerProxy, Error as XMLRPCError
@@ -274,7 +275,6 @@ def __init__(self, interp, idle=None):
         self.matches_iter = MatchesIterator()
         self.argspec = None
         self.current_func = None
-        self.inside_string = False
         self.highlighted_paren = None
         self.list_win_visible = False
         self._C = {}
@@ -334,23 +334,34 @@ def _callable_postfix(self, value, word):
                 word += '('
         return word
 
-    def current_string(self):
-        """Return the current string.
-        Note: This method will not really work for multiline strings."""
-        line = self.current_line()
-        inside_string = next_token_inside_string(line, self.inside_string)
-        if inside_string:
-            string = list()
-            next_char = ''
-            for (char, next_char) in zip(reversed(line),
-                                         reversed(line[:-1])):
-                if char == inside_string and next_char != '\\':
-                    return ''.join(reversed(string))
-                string.append(char)
+    def current_string(self, concatenate=False):
+        """Return the current string."""
+        tokens = self.tokenize(self.current_line())
+        string_tokens = list(takewhile(token_is_any_of([Token.String,
+                                                        Token.Text]),
+                                       reversed(tokens)))
+        if not string_tokens:
+            return ''
+        opening = string_tokens.pop()[1]
+        string = list()
+        for (token, value) in reversed(string_tokens):
+            if token is Token.Text:
+                continue
+            elif opening is None:
+                opening = value
+            elif token is Token.String.Doc:
+                string.append(value[3:-3])
+                opening = None
+            elif value == opening:
+                opening = None
+                if not concatenate:
+                    string = list()
             else:
-                if next_char == inside_string:
-                    return ''.join(reversed(string))
-        return ''
+                string.append(value)
+
+        if opening is None:
+            return ''
+        return ''.join(string)
 
     def get_object(self, name):
         if name in self.interp.locals:
@@ -602,8 +613,6 @@ def push(self, s, insert_into_history=True):
         if insert_into_history:
             self.rl_history.append(s)
 
-        self.inside_string = next_token_inside_string(s, self.inside_string)
-
         try:
             more = self.interp.runsource('\n'.join(self.buffer))
         except SystemExit:
@@ -707,36 +716,45 @@ def close(self):
 
     def tokenize(self, s, newline=False):
         """Tokenize a line of code."""
-        if self.inside_string:
-            # A string started in another line is continued in this
-            # line
-            tokens = PythonLexer().get_tokens(self.inside_string + s)
-            token, value = tokens.next()
-            if token is Token.String.Doc:
-                tokens = [(Token.String, value[3:])] + list(tokens)
-        else:
-            tokens = list(PythonLexer().get_tokens(s))
 
         source = '\n'.join(self.buffer + [s])
         cursor = len(source) - self.cpos
         if self.cpos:
             cursor += 1
         stack = list()
         all_tokens = list(PythonLexer().get_tokens(source))
-        i = line = 0
-        pos = 0
+        # Unfortunately, Pygments adds a trailing newline and strings with
+        # no size, so strip them
+        while not all_tokens[-1][1]:
+            all_tokens.pop()
+        all_tokens[-1] = (all_tokens[-1][0], all_tokens[-1][1].rstrip('\n'))
+        line = pos = 0
         parens = dict(zip('{([', '})]'))
-        for (token, value) in all_tokens:
+        line_tokens = list()
+        saved_tokens = list()
+        search_for_paren = True
+        for (token, value) in split_lines(all_tokens):
             pos += len(value)
+            if token is Token.Text and value == '\n':
+                line += 1
+                # Remove trailing newline
+                line_tokens = list()
+                saved_tokens = list()
+                continue
+            line_tokens.append((token, value))
+            saved_tokens.append((token, value))
+            if not search_for_paren:
+                continue
             under_cursor = (pos == cursor)
             if token is Token.Punctuation:
                 if value in parens:
                     if under_cursor:
-                        tokens[i] = (Parenthesis.UnderCursor, value)
+                        line_tokens[-1] = (Parenthesis.UnderCursor, value)
                         # Push marker on the stack
                         stack.append((Parenthesis, value))
                     else:
-                        stack.append((line, i, value))
+                        stack.append((line, len(line_tokens) - 1,
+                                      line_tokens, value))
                 elif value in parens.itervalues():
                     saved_stack = list(stack)
                     try:
@@ -747,58 +765,45 @@ def tokenize(self, s, newline=False):
                     except IndexError:
                         # SyntaxError.. more closed parentheses than
                         # opened or a wrong closing paren
+                        opening = None
                         if not saved_stack:
-                            break
+                            search_for_paren = False
                         else:
-                            opening = None
                             stack = saved_stack
                     if opening and opening[0] is Parenthesis:
                         # Marker found
-                        tokens[i] = (Parenthesis, value)
-                        break
+                        line_tokens[-1] = (Parenthesis, value)
+                        search_for_paren = False
                     elif opening and under_cursor and not newline:
                         if self.cpos:
-                            tokens[i] = (Parenthesis.UnderCursor, value)
+                            line_tokens[-1] = (Parenthesis.UnderCursor, value)
                         else:
                             # The cursor is at the end of line and next to
                             # the paren, so it doesn't reverse the paren.
                             # Therefore, we insert the Parenthesis token
                             # here instead of the Parenthesis.UnderCursor
                             # token.
-                            if i < len(tokens):
-                                # XXX This is a bug in our index
-                                # calculation (happens with multiline
-                                # strings)
-                                tokens[i] = (Parenthesis, value)
-                        (lineno, i, opening) = opening
+                            line_tokens[-1] = (Parenthesis, value)
+                        (lineno, i, tokens, opening) = opening
                         if lineno == len(self.buffer):
-                            self.highlighted_paren = lineno
-                            tokens[i] = (Parenthesis, opening)
+                            self.highlighted_paren = (lineno, saved_tokens)
+                            line_tokens[i] = (Parenthesis, opening)
                         else:
-                            line = self.buffer[lineno]
-                            self.highlighted_paren = lineno
+                            self.highlighted_paren = (lineno, list(tokens))
                             # We need to redraw a line
-                            line_tokens = list(PythonLexer().get_tokens(line))
-                            line_tokens[i] = (Parenthesis, opening)
-                            self.reprint_line(lineno, line_tokens)
-                        break
+                            tokens[i] = (Parenthesis, opening)
+                            self.reprint_line(lineno, tokens)
+                        search_for_paren = False
                 elif under_cursor:
-                    break
-            elif token is Token.Text and value == '\n':
-                line += 1
-                if line == len(self.buffer):
-                    i = -1
-            elif under_cursor:
-                break
-            i += 1
-        return tokens
+                    search_for_paren = False
+        if line != len(self.buffer):
+            return list()
+        return line_tokens
 
     def clear_current_line(self):
         """This is used as the exception callback for the Interpreter instance.
         It prevents autoindentation from occuring after a traceback."""
 
-        self.inside_string = False
-
 
 def next_indentantion(line):
     """Given a code line, return the indentation of the next line."""
@@ -821,3 +826,36 @@ def next_token_inside_string(s, inside_string):
                 elif value == inside_string:
                     inside_string = False
     return inside_string
+
+
+def split_lines(tokens):
+    for (token, value) in tokens:
+        if not value:
+            continue
+        while value:
+            head, newline, value = value.partition('\n')
+            yield (token, head)
+            if newline:
+                yield (Token.Text, newline)
+
+def token_is(token_type):
+    """Return a callable object that returns whether a token is of the
+    given type `token_type`."""
+    def token_is_type(token):
+        """Return whether a token is of a certain type or not."""
+        token = token[0]
+        while token is not token_type and token.parent:
+            token = token.parent
+        return token is token_type
+
+    return token_is_type
+
+def token_is_any_of(token_types):
+    """Return a callable object that returns whether a token is any of the
+    given types `token_types`."""
+    is_token_types = map(token_is, token_types)
+
+    def token_is_any_of(token):
+        return any(check(token) for check in is_token_types)
+
+    return token_is_any_of