Highlight string literals green with treesitter

This commit is contained in:
Joseph Ferano 2023-11-17 19:11:06 +07:00
parent 039b671973
commit 56f3048d2d
3 changed files with 99 additions and 40 deletions

69
fide.py
View File

@ -98,6 +98,7 @@ else:
from config import * from config import *
from editing import * from editing import *
from keybindings import * from keybindings import *
from treesitter import *
def main(): def main():
running = True running = True
frame_count = 0 frame_count = 0
@ -154,28 +155,66 @@ def main():
# print('error evaluating line: ' + src[3:]) # print('error evaluating line: ' + src[3:])
# console.runsource(src[3:]) # console.runsource(src[3:])
cur_col = 0
for i,curr_line in enumerate(current_buffer.lines): for i,curr_line in enumerate(current_buffer.lines):
line_txt = curr_line.piece.get_text()
ts_nodes = get_ts_nodes(curr_line.piece.get_text())
color_ranges = [(0, len(line_txt), 'black')]
# color_ranges = []
for node in ts_nodes:
if node.grammar_name == 'string_literal':
start_col, end_col = node.start_point[1], node.end_point[1]
print(node.start_point, node.end_point, node.text)
prev_start,prev_end,prev_col = color_ranges[-1]
color = 'green'
if start_col > 0:
color_ranges[-1] = (prev_start, start_col - 1,prev_col)
color_ranges.append((start_col, end_col, color))
if end_col < len(line_txt):
color_ranges.append((end_col + 1, end_col, color))
else:
color_ranges[0] = (start_col, end_col, color)
char_width = 8
for start,end,color in color_ranges:
render = text_renderer.render(line_txt[start:end], True, color)
screen.blit(render, (start * char_width, i * line_height))
continue
# We need to iterate over the treesitter tree, I think that for now
# it can be as easy as going until you find the 'string' grammar name
# So we need to check three conditions to decide when to draw
# 1: Did we hit a newline?
# 2: Did we hit EOF?
# 3: Did we run into a string
# We need to figure out a way to ask treesitter if some
# tokens are located at the current line that you're
# checking. Since we're operating line by line. Since we
# don't iterate character by character, we can't check if
# we ran into a single or double quote which I guess we
# could then use treesitter to part. It doesn't make sense
# to do that anyway because while that might solve this
# string task, ideally it's generalized so that as we walk
# the lines we're rendering, we're also checking
# treesitter grammar. But this is tricky because
# treesitter doesn't do anything line by line.
# So is what we're doing compatible then? maybe we should
# consider iterating over the lines in a different way and
# our current data structure won't scale at all. Maybe we
# should just use the piece table wholesale after all and
# not use insert mode to generate a temp buffer? That
# sounds like it'll complicate things a lot.
# Another thing would be to collect all the needed tokens?
line_syntax = syntax_highlights.get_line(i) line_syntax = syntax_highlights.get_line(i)
for grammar in line_syntax: for grammar in line_syntax:
if grammar.name == 'string': if grammar.name == 'string':
color = 'green' color = 'green'
else: else:
color = 'black' color = 'black'
# Does this even make sense? Do we even need a piece
# table anymore? Why can't we just use a parsed
# grammar and iterate that and print out the text?
# I think it makes sense to just grab the string from
# the treesitter grammar What we need to do is iterate
# over each child, keeping track of our position As we
# iterate, we start at row 0, col 0, add any
# whitespace needed so get the first token. If we were
# to do it that way, then we have to generate the
# whitespace and draw it as well.
# The thing I'm not clear on is what is the best way
# to identify a token, because we need to tell when to
# actually write the thing out
string = curr_line.piece.string_at(grammar.start, grammar.end - grammar.start) string = curr_line.piece.string_at(grammar.start, grammar.end - grammar.start)
render = text_renderer.render(string, True, color) render = text_renderer.render(string, True, color)
screen.blit(render, (0, i * line_height)) screen.blit(render, (0, i * line_height))

View File

@ -11,11 +11,12 @@
* DONE Pass the file you want to edit as a command line argument * DONE Pass the file you want to edit as a command line argument
* DONE Save buffer function with a keybinding * DONE Save buffer function with a keybinding
* DONE Install, Import and play around with Treesitter * DONE Install, Import and play around with Treesitter
* TODO Change the color of string literals * DONE Highlight string literals with treesitter
* TODO Incorporate color regions into the buffer/line objects * TODO Incorporate color regions into the buffer/line objects
* TODO Create a simple color map based on treesitter keywords * TODO Create a simple color map based on treesitter keywords
* TODO Walk the AST to generate the color regions * TODO Walk the AST to generate the color regions
* TODO Update treesitter AST as the user edits the file * TODO Update treesitter AST as the user edits the file
* TODO Add scrolling based on cursor position
* TODO Add reverse parsing of keymaps to show users what they're typing * TODO Add reverse parsing of keymaps to show users what they're typing

View File

@ -1,4 +1,4 @@
from tree_sitter import Language, Parser, Node from tree_sitter import Language, Parser, Node, TreeCursor, Tree
from typing import Tuple, List from typing import Tuple, List
from dataclasses import dataclass from dataclasses import dataclass
@ -17,30 +17,46 @@ from dataclasses import dataclass
# ] # ]
# ) # )
C_LANGUAGE = Language('build/compiled-languages.so', 'c') C_LANGUAGE = Language('build/compiled-languages.so', 'c')
CPP_LANGUAGE = Language('build/compiled-languages.so', 'cpp') # CPP_LANGUAGE = Language('build/compiled-languages.so', 'cpp')
PYTHON_LANGUAGE = Language('build/compiled-languages.so', 'python') # PYTHON_LANGUAGE = Language('build/compiled-languages.so', 'python')
# PY_LANGUAGE = Language('build/my-languages.so', 'python') # PY_LANGUAGE = Language('build/my-languages.so', 'python')
cpp_parser = Parser() c_parser: Parser = Parser()
cpp_parser.set_language(CPP_LANGUAGE) c_parser.set_language(C_LANGUAGE)
python_parser = Parser() # cpp_parser = Parser()
python_parser.set_language(PYTHON_LANGUAGE) # cpp_parser.set_language(CPP_LANGUAGE)
# python_parser = Parser()
# python_parser.set_language(PYTHON_LANGUAGE)
with open('fide.py', 'r') as f: def traverse_tree(tree: Tree):
text = f.read() cursor = tree.walk()
# tree = cpp_parser.parse(bytes(text, 'utf-8'))
tree = python_parser.parse(bytes(text, 'utf-8'))
root: Node = tree.root_node
def dfs(node: Node, indent_level): reached_root = False
indent = "".join([' ' * (indent_level * 4)]) while reached_root == False:
# if node.grammar_name != node.type: yield cursor.node
# print(node) if cursor.goto_first_child():
print(indent, '-', f"({node.type})", # node.range, continue
node.start_point, node.end_point, node.text) if cursor.goto_next_sibling():
for n in node.children: continue
dfs(n, indent_level + 1)
dfs(root, 0) retracing = True
while retracing:
if not cursor.goto_parent():
retracing = False
reached_root = True
if cursor.goto_next_sibling():
retracing = False
# with open('treesitter.py', 'r') as f:
# def dfs(node: Node, indent_level):
# indent = "".join([' ' * (indent_level * 4)])
# # if node.grammar_name != node.type:
# # print(node)
# print(indent, '-', f"({node.type})", # node.range,
# node.start_point, node.end_point, node.text)
# for n in node.children:
# dfs(n, indent_level + 1)
# dfs(root, 0)
@dataclass @dataclass
class ParsedRange: class ParsedRange:
@ -48,9 +64,12 @@ class ParsedRange:
line_num: int line_num: int
crange: Tuple[int, int] crange: Tuple[int, int]
def parse_file(text: str) -> List[ParsedRange]: # def get_ts_nodes(text: str) -> List[ParsedRange]:
lines = [] * len() def get_ts_nodes(text: str) -> List[Node]:
tree = c_parser.parse(bytes(text, 'utf-8'))
# root: Node = tree.root_node
# query = C_LANGUAGE.query(root.sexp())
return [n for n in traverse_tree(tree)]