Highlight string literals green with treesitter

2023-11-17 19:11:06 +07:00 · 2023-11-17 19:11:06 +07:00 · 56f3048d2d
commit 56f3048d2d
parent 039b671973
3 changed files with 99 additions and 40 deletions
--- a/fide.py
+++ b/fide.py
@ -98,6 +98,7 @@ else:
 from config import *
 from editing import *
 from keybindings import *
+from treesitter import *
 def main():
    running = True
    frame_count = 0
@ -154,28 +155,66 @@ def main():
                    #     print('error evaluating line: ' + src[3:])
                #     console.runsource(src[3:])

+        cur_col = 0
        for i,curr_line in enumerate(current_buffer.lines):
+            line_txt = curr_line.piece.get_text()
+            ts_nodes = get_ts_nodes(curr_line.piece.get_text())
+            color_ranges = [(0, len(line_txt), 'black')]
+            # color_ranges = []
+            for node in ts_nodes:
+                if node.grammar_name == 'string_literal':
+                    start_col, end_col = node.start_point[1], node.end_point[1]
+                    print(node.start_point, node.end_point, node.text)
+                    prev_start,prev_end,prev_col = color_ranges[-1]
+                    color = 'green'
+                    if start_col > 0:
+                        color_ranges[-1] = (prev_start, start_col - 1,prev_col)
+                        color_ranges.append((start_col, end_col, color))
+                        if end_col < len(line_txt):
+                            color_ranges.append((end_col + 1, end_col, color))
+                    else:
+                        color_ranges[0] = (start_col, end_col, color)
+
+            char_width = 8
+            for start,end,color in color_ranges:
+                render = text_renderer.render(line_txt[start:end], True, color)
+                screen.blit(render, (start * char_width, i * line_height))
+
+            continue
+            # We need to iterate over the treesitter tree, I think that for now
+            # it can be as easy as going until you find the 'string' grammar name
+            # So we need to check three conditions to decide when to draw
+            # 1: Did we hit a newline?
+            # 2: Did we hit EOF?
+            # 3: Did we run into a string
+
+            # We need to figure out a way to ask treesitter if some
+            # tokens are located at the current line that you're
+            # checking. Since we're operating line by line. Since we
+            # don't iterate character by character, we can't check if
+            # we ran into a single or double quote which I guess we
+            # could then use treesitter to part. It doesn't make sense
+            # to do that anyway because while that might solve this
+            # string task, ideally it's generalized so that as we walk
+            # the lines we're rendering, we're also checking
+            # treesitter grammar. But this is tricky because
+            # treesitter doesn't do anything line by line.
+
+            # So is what we're doing compatible then? maybe we should
+            # consider iterating over the lines in a different way and
+            # our current data structure won't scale at all. Maybe we
+            # should just use the piece table wholesale after all and
+            # not use insert mode to generate a temp buffer? That
+            # sounds like it'll complicate things a lot.
+
+            # Another thing would be to collect all the needed tokens?
+
            line_syntax = syntax_highlights.get_line(i)
            for grammar in line_syntax:
                if grammar.name == 'string':
                    color = 'green'
                else:
                    color = 'black'
-                # Does this even make sense? Do we even need a piece
-                # table anymore? Why can't we just use a parsed
-                # grammar and iterate that and print out the text?
-
-                # I think it makes sense to just grab the string from
-                # the treesitter grammar What we need to do is iterate
-                # over each child, keeping track of our position As we
-                # iterate, we start at row 0, col 0, add any
-                # whitespace needed so get the first token. If we were
-                # to do it that way, then we have to generate the
-                # whitespace and draw it as well.
-
-                # The thing I'm not clear on is what is the best way
-                # to identify a token, because we need to tell when to
-                # actually write the thing out
                string = curr_line.piece.string_at(grammar.start, grammar.end - grammar.start)
                render = text_renderer.render(string, True, color)
                screen.blit(render, (0, i * line_height))
--- a/fide.todo
+++ b/fide.todo
@ -11,11 +11,12 @@
 * DONE Pass the file you want to edit as a command line argument
 * DONE Save buffer function with a keybinding
 * DONE Install, Import and play around with Treesitter
-* TODO Change the color of string literals
+* DONE Highlight string literals with treesitter
 * TODO Incorporate color regions into the buffer/line objects
 * TODO Create a simple color map based on treesitter keywords
 * TODO Walk the AST to generate the color regions
 * TODO Update treesitter AST as the user edits the file
+* TODO Add scrolling based on cursor position
 * TODO Add reverse parsing of keymaps to show users what they're typing


--- a/treesitter.py
+++ b/treesitter.py
@ -1,4 +1,4 @@
-from tree_sitter import Language, Parser, Node
+from tree_sitter import Language, Parser, Node, TreeCursor, Tree
 from typing import Tuple, List
 from dataclasses import dataclass

@ -17,30 +17,46 @@ from dataclasses import dataclass
 #   ]
 # )
 C_LANGUAGE = Language('build/compiled-languages.so', 'c')
-CPP_LANGUAGE = Language('build/compiled-languages.so', 'cpp')
-PYTHON_LANGUAGE = Language('build/compiled-languages.so', 'python')
+# CPP_LANGUAGE = Language('build/compiled-languages.so', 'cpp')
+# PYTHON_LANGUAGE = Language('build/compiled-languages.so', 'python')
 # PY_LANGUAGE = Language('build/my-languages.so', 'python')

-cpp_parser = Parser()
-cpp_parser.set_language(CPP_LANGUAGE)
-python_parser = Parser()
-python_parser.set_language(PYTHON_LANGUAGE)
+c_parser: Parser = Parser()
+c_parser.set_language(C_LANGUAGE)
+# cpp_parser = Parser()
+# cpp_parser.set_language(CPP_LANGUAGE)
+# python_parser = Parser()
+# python_parser.set_language(PYTHON_LANGUAGE)

-with open('fide.py', 'r') as f:
-    text = f.read()
-    # tree = cpp_parser.parse(bytes(text, 'utf-8'))
-    tree = python_parser.parse(bytes(text, 'utf-8'))
-    root: Node = tree.root_node
+def traverse_tree(tree: Tree):
+    cursor = tree.walk()

-    def dfs(node: Node, indent_level):
-        indent = "".join([' ' * (indent_level * 4)])
-        # if node.grammar_name != node.type:
-        #     print(node)
-        print(indent, '-', f"({node.type})", # node.range,
-              node.start_point, node.end_point, node.text)
-        for n in node.children:
-            dfs(n, indent_level + 1)
-    dfs(root, 0)
+    reached_root = False
+    while reached_root == False:
+        yield cursor.node
+        if cursor.goto_first_child():
+            continue
+        if cursor.goto_next_sibling():
+            continue
+
+        retracing = True
+        while retracing:
+            if not cursor.goto_parent():
+                retracing = False
+                reached_root = True
+            if cursor.goto_next_sibling():
+                retracing = False
+
+# with open('treesitter.py', 'r') as f:
+    # def dfs(node: Node, indent_level):
+    #     indent = "".join([' ' * (indent_level * 4)])
+    #     # if node.grammar_name != node.type:
+    #     #     print(node)
+    #     print(indent, '-', f"({node.type})", # node.range,
+    #           node.start_point, node.end_point, node.text)
+    #     for n in node.children:
+    #         dfs(n, indent_level + 1)
+    # dfs(root, 0)

@dataclass
 class ParsedRange:
@ -48,9 +64,12 @@ class ParsedRange:
    line_num: int
    crange: Tuple[int, int]
    
-def parse_file(text: str) -> List[ParsedRange]:
-    lines = [] * len()
-
+# def get_ts_nodes(text: str) -> List[ParsedRange]:
+def get_ts_nodes(text: str) -> List[Node]:
+    tree = c_parser.parse(bytes(text, 'utf-8'))
+    # root: Node = tree.root_node
+    # query = C_LANGUAGE.query(root.sexp())

+    return [n for n in traverse_tree(tree)]