From a9fa5104837f919f37c107abdaa4e2c10b62c832 Mon Sep 17 00:00:00 2001 From: omniscient <17525998+omnisci3nce@users.noreply.github.com> Date: Tue, 9 Jul 2024 21:01:55 +1000 Subject: [PATCH] example parsing string to AST --- .ocamlformat | 1 + bin/main.ml | 9 +++++++-- dune-project | 3 ++- flan.opam | 1 + lib/dune | 5 +++++ lib/example.ml | 1 - lib/examples.ml | 4 ++++ lib/oflan.mly | 42 ++++++++++++++++++++++++++++++++++++++++++ lib/olexer.mll | 35 +++++++++++++++++++++++++++++++++++ lib/omniflan.ml | 43 +++++++++++++++++++++++++------------------ lib/parse.ml | 31 +++++++++++++++++++++++++++++++ 11 files changed, 153 insertions(+), 22 deletions(-) create mode 100644 .ocamlformat delete mode 100644 lib/example.ml create mode 100644 lib/examples.ml create mode 100644 lib/oflan.mly create mode 100644 lib/olexer.mll create mode 100644 lib/parse.ml diff --git a/.ocamlformat b/.ocamlformat new file mode 100644 index 0000000..12f51cf --- /dev/null +++ b/.ocamlformat @@ -0,0 +1 @@ +margin=100 \ No newline at end of file diff --git a/bin/main.ml b/bin/main.ml index 99d7c38..247911b 100644 --- a/bin/main.ml +++ b/bin/main.ml @@ -1,4 +1,9 @@ -open Flan +open Flan.Parse let () = - Example.print_greeting "mate" + let source_str = Flan.Examples.let_bind_int in + let lexbuf = Lexing.from_string source_str in + Printf.printf "Convert source \"%s\" ->\n" source_str; + match parse_program lexbuf with + | Ok ast -> print_ast ast + | Error msg -> print_endline ("ERROR: \n" ^ msg) diff --git a/dune-project b/dune-project index 9cb37dd..eacd961 100644 --- a/dune-project +++ b/dune-project @@ -1,4 +1,5 @@ (lang dune 3.15) +(using menhir 3.0) (name flan) @@ -19,7 +20,7 @@ (name flan) (synopsis "A short synopsis") (description "A longer description") - (depends ocaml dune) + (depends ocaml dune menhir) (tags (topics "to describe" your project))) diff --git a/flan.opam b/flan.opam index d500377..8507c27 100644 --- a/flan.opam +++ b/flan.opam @@ -12,6 +12,7 @@ bug-reports: "https://github.com/username/reponame/issues" depends: [ "ocaml" "dune" {>= "3.15"} + "menhir" "odoc" {with-doc} ] build: [ diff --git a/lib/dune b/lib/dune index ad39be8..082be58 100644 --- a/lib/dune +++ b/lib/dune @@ -1,2 +1,7 @@ (library (name flan)) + +(menhir + (modules oflan)) + +(ocamllex olexer) diff --git a/lib/example.ml b/lib/example.ml deleted file mode 100644 index ce7c639..0000000 --- a/lib/example.ml +++ /dev/null @@ -1 +0,0 @@ -let print_greeting name = print_endline ("G'day, " ^ name) \ No newline at end of file diff --git a/lib/examples.ml b/lib/examples.ml new file mode 100644 index 0000000..65c3a8c --- /dev/null +++ b/lib/examples.ml @@ -0,0 +1,4 @@ +(** Examples of syntax / programs as strings that can be imported and tested *) + +let let_bind_int = "let x = 10" +let let_bind_str = "let s = \"hello\" " diff --git a/lib/oflan.mly b/lib/oflan.mly new file mode 100644 index 0000000..f8f8a60 --- /dev/null +++ b/lib/oflan.mly @@ -0,0 +1,42 @@ +/* Declarations */ + +%{ + open Omniflan.Ast +%} + +%token Eof +%token Newline +%token Let + +%token False +%token True +%token Ident +%token Int +%token F32 + +%token Equal +%token LParen +%token RParen + +%start prog + +%% +/* Grammar */ + +expr: + | i = Int; { Int i } + +stmt: + | Let; var_name = Ident; Equal; bound_expr = expr + { Let { + loc = $startpos; + var_name = var_name; + bindee = bound_expr + } + } + +toplevel_item: + | stmt = stmt { Stmt stmt } + +prog: + | prog = separated_list(Newline, toplevel_item); Eof { prog } \ No newline at end of file diff --git a/lib/olexer.mll b/lib/olexer.mll new file mode 100644 index 0000000..ba5eb0a --- /dev/null +++ b/lib/olexer.mll @@ -0,0 +1,35 @@ +{ + open Lexing + open Oflan + + exception SyntaxError of string + + let next_line lexbuf = + let pos = lexbuf.lex_curr_p in + lexbuf.lex_curr_p <- + { pos with pos_bol = lexbuf.lex_curr_pos; + pos_lnum = pos.pos_lnum + 1 + } +} + +let digit = ['0'-'9'] +let digits = digit* +let alpha = ['a'-'z' 'A'-'Z'] +let ident = (alpha) (alpha|digit|'_')* (* regex for identifier *) +let whitespace = [' ' '\t']+ +let newline = '\r' | '\n' | "\r\n" + +let int = digits + +rule read = + parse + | whitespace { read lexbuf } + | newline { next_line lexbuf; read lexbuf } + | int { Int (int_of_string (Lexing.lexeme lexbuf))} + | "let" { Let } + | ident { Ident (Lexing.lexeme lexbuf) } + | '=' { Equal } + | '(' { LParen } + | 'R' { RParen } + | eof { Eof } + | _ { raise (SyntaxError ("Unexpected char: " ^ Lexing.lexeme lexbuf)) } \ No newline at end of file diff --git a/lib/omniflan.ml b/lib/omniflan.ml index b79ddb4..13047dd 100644 --- a/lib/omniflan.ml +++ b/lib/omniflan.ml @@ -1,22 +1,29 @@ +(* +Notes -type binary_opt = - | Add - | Subtract - | Multiply - | Divide +For now everything will be split into modules inside this one big file while prototyping. +*) -type expr = - | Let of { name: string; bindee: expr } - | Binary of { lhs: expr; rhs: expr; operator: binary_opt } - | IfElse of { condition: expr } +module Ast = struct + type loc = Lexing.position + type unary_op = Negate + type binary_op = Add | Subtract | Multiply | Divide + type literal = Int of int -type builtin_type = - | I32 - | F32 - | Bool - | Char + type expr = Int of int + (* | Literal of literal *) + (* | BinaryOp of { lhs: expr; rhs: expr; operator: binary_op } *) + (* | IfElse of { condition: expr; if_expr: expr; else_expr: expr } *) -module Examples = struct - let let_bind_int = "let x = 10" - let let_bind_str = "let s = \"hello\" " -end \ No newline at end of file + and stmt = + | Let of { loc : loc; var_name : string; bindee : expr } (* Let binding "let x = 5" *) + | FuncDecl (* TODO: arguments *) + + and toplevel_item = Stmt of stmt + + type builtin_type = I32 | F32 | Bool | Char + type program = toplevel_item list +end + +module Typer = struct end +(** This module helps take an untyped AST and produce a typed AST *) diff --git a/lib/parse.ml b/lib/parse.ml new file mode 100644 index 0000000..4487362 --- /dev/null +++ b/lib/parse.ml @@ -0,0 +1,31 @@ +open Lexing + +exception SyntaxError of string + +(* Prints the line number and character number where the error occurred.*) +let print_error_position lexbuf = + let pos = lexbuf.lex_curr_p in + Printf.sprintf "Line:%d Position:%d" pos.pos_lnum (pos.pos_cnum - pos.pos_bol + 1) + +let parse_program lexbuf = + try Ok (Oflan.prog Olexer.read lexbuf) with + | SyntaxError msg -> + let error_msg = Printf.sprintf "%s: %s\n" (print_error_position lexbuf) msg in + Error error_msg + | Oflan.Error -> + let error_msg = Printf.sprintf "%s: syntax error\n" (print_error_position lexbuf) in + Error error_msg + +open Omniflan.Ast + +let string_of_expr expr = match expr with Int i -> "Int " ^ string_of_int i + +let string_of_stmt stmt = + match stmt with + | Let s -> Printf.sprintf "(%d) Let %s = %s" s.loc.pos_lnum s.var_name (string_of_expr s.bindee) + | FuncDecl -> failwith "TODO" + +let print_ast prog = + List.iter + (fun toplevel -> match toplevel with Stmt stmt -> print_endline (string_of_stmt stmt)) + prog