From 56f3048d2d46163affd3ceb289e7ba3d2d3f13d3 Mon Sep 17 00:00:00 2001 From: Joseph Ferano Date: Fri, 17 Nov 2023 19:11:06 +0700 Subject: [PATCH] Highlight string literals green with treesitter --- fide.py | 69 ++++++++++++++++++++++++++++++++++++++++----------- fide.todo | 3 ++- treesitter.py | 67 +++++++++++++++++++++++++++++++------------------ 3 files changed, 99 insertions(+), 40 deletions(-) diff --git a/fide.py b/fide.py index 59f90b5..0d0a2c7 100755 --- a/fide.py +++ b/fide.py @@ -98,6 +98,7 @@ else: from config import * from editing import * from keybindings import * +from treesitter import * def main(): running = True frame_count = 0 @@ -154,28 +155,66 @@ def main(): # print('error evaluating line: ' + src[3:]) # console.runsource(src[3:]) + cur_col = 0 for i,curr_line in enumerate(current_buffer.lines): + line_txt = curr_line.piece.get_text() + ts_nodes = get_ts_nodes(curr_line.piece.get_text()) + color_ranges = [(0, len(line_txt), 'black')] + # color_ranges = [] + for node in ts_nodes: + if node.grammar_name == 'string_literal': + start_col, end_col = node.start_point[1], node.end_point[1] + print(node.start_point, node.end_point, node.text) + prev_start,prev_end,prev_col = color_ranges[-1] + color = 'green' + if start_col > 0: + color_ranges[-1] = (prev_start, start_col - 1,prev_col) + color_ranges.append((start_col, end_col, color)) + if end_col < len(line_txt): + color_ranges.append((end_col + 1, end_col, color)) + else: + color_ranges[0] = (start_col, end_col, color) + + char_width = 8 + for start,end,color in color_ranges: + render = text_renderer.render(line_txt[start:end], True, color) + screen.blit(render, (start * char_width, i * line_height)) + + continue + # We need to iterate over the treesitter tree, I think that for now + # it can be as easy as going until you find the 'string' grammar name + # So we need to check three conditions to decide when to draw + # 1: Did we hit a newline? + # 2: Did we hit EOF? + # 3: Did we run into a string + + # We need to figure out a way to ask treesitter if some + # tokens are located at the current line that you're + # checking. Since we're operating line by line. Since we + # don't iterate character by character, we can't check if + # we ran into a single or double quote which I guess we + # could then use treesitter to part. It doesn't make sense + # to do that anyway because while that might solve this + # string task, ideally it's generalized so that as we walk + # the lines we're rendering, we're also checking + # treesitter grammar. But this is tricky because + # treesitter doesn't do anything line by line. + + # So is what we're doing compatible then? maybe we should + # consider iterating over the lines in a different way and + # our current data structure won't scale at all. Maybe we + # should just use the piece table wholesale after all and + # not use insert mode to generate a temp buffer? That + # sounds like it'll complicate things a lot. + + # Another thing would be to collect all the needed tokens? + line_syntax = syntax_highlights.get_line(i) for grammar in line_syntax: if grammar.name == 'string': color = 'green' else: color = 'black' - # Does this even make sense? Do we even need a piece - # table anymore? Why can't we just use a parsed - # grammar and iterate that and print out the text? - - # I think it makes sense to just grab the string from - # the treesitter grammar What we need to do is iterate - # over each child, keeping track of our position As we - # iterate, we start at row 0, col 0, add any - # whitespace needed so get the first token. If we were - # to do it that way, then we have to generate the - # whitespace and draw it as well. - - # The thing I'm not clear on is what is the best way - # to identify a token, because we need to tell when to - # actually write the thing out string = curr_line.piece.string_at(grammar.start, grammar.end - grammar.start) render = text_renderer.render(string, True, color) screen.blit(render, (0, i * line_height)) diff --git a/fide.todo b/fide.todo index 09b5135..1bf7ceb 100644 --- a/fide.todo +++ b/fide.todo @@ -11,11 +11,12 @@ * DONE Pass the file you want to edit as a command line argument * DONE Save buffer function with a keybinding * DONE Install, Import and play around with Treesitter -* TODO Change the color of string literals +* DONE Highlight string literals with treesitter * TODO Incorporate color regions into the buffer/line objects * TODO Create a simple color map based on treesitter keywords * TODO Walk the AST to generate the color regions * TODO Update treesitter AST as the user edits the file +* TODO Add scrolling based on cursor position * TODO Add reverse parsing of keymaps to show users what they're typing diff --git a/treesitter.py b/treesitter.py index 73e2aa6..d0ddf20 100644 --- a/treesitter.py +++ b/treesitter.py @@ -1,4 +1,4 @@ -from tree_sitter import Language, Parser, Node +from tree_sitter import Language, Parser, Node, TreeCursor, Tree from typing import Tuple, List from dataclasses import dataclass @@ -17,30 +17,46 @@ from dataclasses import dataclass # ] # ) C_LANGUAGE = Language('build/compiled-languages.so', 'c') -CPP_LANGUAGE = Language('build/compiled-languages.so', 'cpp') -PYTHON_LANGUAGE = Language('build/compiled-languages.so', 'python') +# CPP_LANGUAGE = Language('build/compiled-languages.so', 'cpp') +# PYTHON_LANGUAGE = Language('build/compiled-languages.so', 'python') # PY_LANGUAGE = Language('build/my-languages.so', 'python') -cpp_parser = Parser() -cpp_parser.set_language(CPP_LANGUAGE) -python_parser = Parser() -python_parser.set_language(PYTHON_LANGUAGE) +c_parser: Parser = Parser() +c_parser.set_language(C_LANGUAGE) +# cpp_parser = Parser() +# cpp_parser.set_language(CPP_LANGUAGE) +# python_parser = Parser() +# python_parser.set_language(PYTHON_LANGUAGE) -with open('fide.py', 'r') as f: - text = f.read() - # tree = cpp_parser.parse(bytes(text, 'utf-8')) - tree = python_parser.parse(bytes(text, 'utf-8')) - root: Node = tree.root_node +def traverse_tree(tree: Tree): + cursor = tree.walk() - def dfs(node: Node, indent_level): - indent = "".join([' ' * (indent_level * 4)]) - # if node.grammar_name != node.type: - # print(node) - print(indent, '-', f"({node.type})", # node.range, - node.start_point, node.end_point, node.text) - for n in node.children: - dfs(n, indent_level + 1) - dfs(root, 0) + reached_root = False + while reached_root == False: + yield cursor.node + if cursor.goto_first_child(): + continue + if cursor.goto_next_sibling(): + continue + + retracing = True + while retracing: + if not cursor.goto_parent(): + retracing = False + reached_root = True + if cursor.goto_next_sibling(): + retracing = False + +# with open('treesitter.py', 'r') as f: + # def dfs(node: Node, indent_level): + # indent = "".join([' ' * (indent_level * 4)]) + # # if node.grammar_name != node.type: + # # print(node) + # print(indent, '-', f"({node.type})", # node.range, + # node.start_point, node.end_point, node.text) + # for n in node.children: + # dfs(n, indent_level + 1) + # dfs(root, 0) @dataclass class ParsedRange: @@ -48,9 +64,12 @@ class ParsedRange: line_num: int crange: Tuple[int, int] -def parse_file(text: str) -> List[ParsedRange]: - lines = [] * len() - +# def get_ts_nodes(text: str) -> List[ParsedRange]: +def get_ts_nodes(text: str) -> List[Node]: + tree = c_parser.parse(bytes(text, 'utf-8')) + # root: Node = tree.root_node + # query = C_LANGUAGE.query(root.sexp()) + return [n for n in traverse_tree(tree)]