Highlight string literals green with treesitter

2023-11-17 19:11:06 +07:00 · 2023-11-17 19:11:06 +07:00 · 56f3048d2d
commit 56f3048d2d
parent 039b671973
3 changed files with 99 additions and 40 deletions
--- a/fide.py
+++ b/fide.py
@ -98,6 +98,7 @@ else:
 from config import *
 from editing import *
 from keybindings import *
 from treesitter import *
 def main():
    running = True
    frame_count = 0
@ -154,28 +155,66 @@ def main():
                    #     print('error evaluating line: ' + src[3:])
                #     console.runsource(src[3:])
        cur_col = 0
        for i,curr_line in enumerate(current_buffer.lines):
            line_txt = curr_line.piece.get_text()
            ts_nodes = get_ts_nodes(curr_line.piece.get_text())
            color_ranges = [(0, len(line_txt), 'black')]
            # color_ranges = []
            for node in ts_nodes:
                if node.grammar_name == 'string_literal':
                    start_col, end_col = node.start_point[1], node.end_point[1]
                    print(node.start_point, node.end_point, node.text)
                    prev_start,prev_end,prev_col = color_ranges[-1]
                    color = 'green'
                    if start_col > 0:
                        color_ranges[-1] = (prev_start, start_col - 1,prev_col)
                        color_ranges.append((start_col, end_col, color))
                        if end_col < len(line_txt):
                            color_ranges.append((end_col + 1, end_col, color))
                    else:
                        color_ranges[0] = (start_col, end_col, color)
            char_width = 8
            for start,end,color in color_ranges:
                render = text_renderer.render(line_txt[start:end], True, color)
                screen.blit(render, (start * char_width, i * line_height))
            continue
            # We need to iterate over the treesitter tree, I think that for now
            # it can be as easy as going until you find the 'string' grammar name
            # So we need to check three conditions to decide when to draw
            # 1: Did we hit a newline?
            # 2: Did we hit EOF?
            # 3: Did we run into a string
            # We need to figure out a way to ask treesitter if some
            # tokens are located at the current line that you're
            # checking. Since we're operating line by line. Since we
            # don't iterate character by character, we can't check if
            # we ran into a single or double quote which I guess we
            # could then use treesitter to part. It doesn't make sense
            # to do that anyway because while that might solve this
            # string task, ideally it's generalized so that as we walk
            # the lines we're rendering, we're also checking
            # treesitter grammar. But this is tricky because
            # treesitter doesn't do anything line by line.
            # So is what we're doing compatible then? maybe we should
            # consider iterating over the lines in a different way and
            # our current data structure won't scale at all. Maybe we
            # should just use the piece table wholesale after all and
            # not use insert mode to generate a temp buffer? That
            # sounds like it'll complicate things a lot.
            # Another thing would be to collect all the needed tokens?
            line_syntax = syntax_highlights.get_line(i)
            for grammar in line_syntax:
                if grammar.name == 'string':
                    color = 'green'
                else:
                    color = 'black'
                # Does this even make sense? Do we even need a piece
                # table anymore? Why can't we just use a parsed
                # grammar and iterate that and print out the text?
                # I think it makes sense to just grab the string from
                # the treesitter grammar What we need to do is iterate
                # over each child, keeping track of our position As we
                # iterate, we start at row 0, col 0, add any
                # whitespace needed so get the first token. If we were
                # to do it that way, then we have to generate the
                # whitespace and draw it as well.
                # The thing I'm not clear on is what is the best way
                # to identify a token, because we need to tell when to
                # actually write the thing out
                string = curr_line.piece.string_at(grammar.start, grammar.end - grammar.start)
                render = text_renderer.render(string, True, color)
                screen.blit(render, (0, i * line_height))
--- a/fide.todo
+++ b/fide.todo
@ -11,11 +11,12 @@
 * DONE Pass the file you want to edit as a command line argument
 * DONE Save buffer function with a keybinding
 * DONE Install, Import and play around with Treesitter
-* TODO Change the color of string literals
+* DONE Highlight string literals with treesitter
 * TODO Incorporate color regions into the buffer/line objects
 * TODO Create a simple color map based on treesitter keywords
 * TODO Walk the AST to generate the color regions
 * TODO Update treesitter AST as the user edits the file
 * TODO Add scrolling based on cursor position
 * TODO Add reverse parsing of keymaps to show users what they're typing
--- a/treesitter.py
+++ b/treesitter.py
@ -1,4 +1,4 @@
-from tree_sitter import Language, Parser, Node
+from tree_sitter import Language, Parser, Node, TreeCursor, Tree
 from typing import Tuple, List
 from dataclasses import dataclass
@ -17,30 +17,46 @@ from dataclasses import dataclass
 #   ]
 # )
 C_LANGUAGE = Language('build/compiled-languages.so', 'c')
-CPP_LANGUAGE = Language('build/compiled-languages.so', 'cpp')
+# CPP_LANGUAGE = Language('build/compiled-languages.so', 'cpp')
-PYTHON_LANGUAGE = Language('build/compiled-languages.so', 'python')
+# PYTHON_LANGUAGE = Language('build/compiled-languages.so', 'python')
 # PY_LANGUAGE = Language('build/my-languages.so', 'python')
-cpp_parser = Parser()
+c_parser: Parser = Parser()
-cpp_parser.set_language(CPP_LANGUAGE)
+c_parser.set_language(C_LANGUAGE)
-python_parser = Parser()
+# cpp_parser = Parser()
-python_parser.set_language(PYTHON_LANGUAGE)
+# cpp_parser.set_language(CPP_LANGUAGE)
 # python_parser = Parser()
 # python_parser.set_language(PYTHON_LANGUAGE)
-with open('fide.py', 'r') as f:
+def traverse_tree(tree: Tree):
-    text = f.read()
+    cursor = tree.walk()
    # tree = cpp_parser.parse(bytes(text, 'utf-8'))
    tree = python_parser.parse(bytes(text, 'utf-8'))
    root: Node = tree.root_node
-    def dfs(node: Node, indent_level):
+    reached_root = False
-        indent = "".join([' ' * (indent_level * 4)])
+    while reached_root == False:
-        # if node.grammar_name != node.type:
+        yield cursor.node
-        #     print(node)
+        if cursor.goto_first_child():
-        print(indent, '-', f"({node.type})", # node.range,
+            continue
-              node.start_point, node.end_point, node.text)
+        if cursor.goto_next_sibling():
-        for n in node.children:
+            continue
-            dfs(n, indent_level + 1)
+
-    dfs(root, 0)
+        retracing = True
        while retracing:
            if not cursor.goto_parent():
                retracing = False
                reached_root = True
            if cursor.goto_next_sibling():
                retracing = False
 # with open('treesitter.py', 'r') as f:
    # def dfs(node: Node, indent_level):
    #     indent = "".join([' ' * (indent_level * 4)])
    #     # if node.grammar_name != node.type:
    #     #     print(node)
    #     print(indent, '-', f"({node.type})", # node.range,
    #           node.start_point, node.end_point, node.text)
    #     for n in node.children:
    #         dfs(n, indent_level + 1)
    # dfs(root, 0)
@dataclass
 class ParsedRange:
@ -48,9 +64,12 @@ class ParsedRange:
    line_num: int
    crange: Tuple[int, int]
-def parse_file(text: str) -> List[ParsedRange]:
+# def get_ts_nodes(text: str) -> List[ParsedRange]:
-    lines = [] * len()
+def get_ts_nodes(text: str) -> List[Node]:
-
+    tree = c_parser.parse(bytes(text, 'utf-8'))
-
+    # root: Node = tree.root_node
    # query = C_LANGUAGE.query(root.sexp())
    return [n for n in traverse_tree(tree)]