From 039b671973536a7f6f146f0842e58a956c601c59 Mon Sep 17 00:00:00 2001 From: Joseph Ferano Date: Sat, 11 Nov 2023 15:15:47 +0700 Subject: [PATCH] Playing around with treesitter --- .gitignore | 1 + fide.py | 26 ++++++++++++++++++++++-- fide.todo | 7 ++++++- treesitter.py | 56 +++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 87 insertions(+), 3 deletions(-) create mode 100644 treesitter.py diff --git a/.gitignore b/.gitignore index c9e0e9c..3ded613 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ /cosmic-text/ /__pycache__/ /nim-version/fide +/build/ diff --git a/fide.py b/fide.py index 5529287..59f90b5 100755 --- a/fide.py +++ b/fide.py @@ -155,8 +155,30 @@ def main(): # console.runsource(src[3:]) for i,curr_line in enumerate(current_buffer.lines): - render = text_renderer.render(curr_line.piece.get_text(), True, 'black') - screen.blit(render, (0, i * line_height)) + line_syntax = syntax_highlights.get_line(i) + for grammar in line_syntax: + if grammar.name == 'string': + color = 'green' + else: + color = 'black' + # Does this even make sense? Do we even need a piece + # table anymore? Why can't we just use a parsed + # grammar and iterate that and print out the text? + + # I think it makes sense to just grab the string from + # the treesitter grammar What we need to do is iterate + # over each child, keeping track of our position As we + # iterate, we start at row 0, col 0, add any + # whitespace needed so get the first token. If we were + # to do it that way, then we have to generate the + # whitespace and draw it as well. + + # The thing I'm not clear on is what is the best way + # to identify a token, because we need to tell when to + # actually write the thing out + string = curr_line.piece.string_at(grammar.start, grammar.end - grammar.start) + render = text_renderer.render(string, True, color) + screen.blit(render, (0, i * line_height)) if cursor_on and cursor_flash < 0.5: cursor_draw(cursor) diff --git a/fide.todo b/fide.todo index 15e4b42..09b5135 100644 --- a/fide.todo +++ b/fide.todo @@ -10,7 +10,12 @@ * DONE Keymap fallthrough/precedence stacking * DONE Pass the file you want to edit as a command line argument * DONE Save buffer function with a keybinding -* TODO Add reverse parsing of keymaps to show users what they're typing +* DONE Install, Import and play around with Treesitter +* TODO Change the color of string literals +* TODO Incorporate color regions into the buffer/line objects +* TODO Create a simple color map based on treesitter keywords +* TODO Walk the AST to generate the color regions +* TODO Update treesitter AST as the user edits the file * TODO Add reverse parsing of keymaps to show users what they're typing diff --git a/treesitter.py b/treesitter.py new file mode 100644 index 0000000..73e2aa6 --- /dev/null +++ b/treesitter.py @@ -0,0 +1,56 @@ +from tree_sitter import Language, Parser, Node +from typing import Tuple, List +from dataclasses import dataclass + +# Language.build_library( +# # Store the library in the `build` directory +# 'build/compiled-languages.so', + +# # Include one or more languages +# [ +# '/home/joe/Repositories/tree-sitter/languages/tree-sitter-cpp', +# '/home/joe/Repositories/tree-sitter/languages/tree-sitter-c', +# '/home/joe/Repositories/tree-sitter/languages/tree-sitter-rust', +# '/home/joe/Repositories/tree-sitter/languages/tree-sitter-python', +# # 'vendor/tree-sitter-javascript', +# # 'vendor/tree-sitter-python' +# ] +# ) +C_LANGUAGE = Language('build/compiled-languages.so', 'c') +CPP_LANGUAGE = Language('build/compiled-languages.so', 'cpp') +PYTHON_LANGUAGE = Language('build/compiled-languages.so', 'python') +# PY_LANGUAGE = Language('build/my-languages.so', 'python') + +cpp_parser = Parser() +cpp_parser.set_language(CPP_LANGUAGE) +python_parser = Parser() +python_parser.set_language(PYTHON_LANGUAGE) + +with open('fide.py', 'r') as f: + text = f.read() + # tree = cpp_parser.parse(bytes(text, 'utf-8')) + tree = python_parser.parse(bytes(text, 'utf-8')) + root: Node = tree.root_node + + def dfs(node: Node, indent_level): + indent = "".join([' ' * (indent_level * 4)]) + # if node.grammar_name != node.type: + # print(node) + print(indent, '-', f"({node.type})", # node.range, + node.start_point, node.end_point, node.text) + for n in node.children: + dfs(n, indent_level + 1) + dfs(root, 0) + +@dataclass +class ParsedRange: + name: str + line_num: int + crange: Tuple[int, int] + +def parse_file(text: str) -> List[ParsedRange]: + lines = [] * len() + + + +