1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
module ST = Source_token
open Trivia
include Lexer_lib

type source_token = ST.token * Lexing.position * Lexing.position

type parser_token = Parser.token * Lexing.position * Lexing.position

let first (t, _, _) = t

let opt_is_whitespace : 'a trivia option -> bool =
 fun x -> Option.fold ~none:false ~some:ST.is_whitespace x

let tokenizer (mode : Lexer_lib.mode) (lexbuf : Lexing.lexbuf) :
    (unit -> parser_token) * triv_table =
  let trivia_table : triv_table = PosHashtbl.create 1013 in
  let lookahead : source_token option ref = ref None in
  (* We keep the trailing whitespace of the previous token
     around so we can disambiguate operators *)
  let last_trailing : line_feed trivia list ref = ref [] in
  let next () : source_token =
    match !lookahead with
    | Some t ->
        lookahead := None;
        t
    | None ->
        let token = Source_lexer.token mode lexbuf in
        let start = Lexing.lexeme_start_p lexbuf in
        let end_ = Lexing.lexeme_end_p lexbuf in
        (token, start, end_)
  in
  let peek () : source_token =
    match !lookahead with
    | None ->
        let token = next () in
        lookahead := Some token;
        token
    | Some t -> t
  in
  let next_parser_token () : parser_token =
    let rec eat_leading acc =
      let token, start, end_ = next () in
      match ST.to_parser_token token with
      (* A semicolon immediately followed by a newline gets a special token for the REPL *)
      | Ok Parser.SEMICOLON when ST.is_line_feed (first (peek ())) ->
          (List.rev acc, (Parser.SEMICOLON_EOL, start, end_))
      (* >> can either close two nested type applications, or be a shift
         operator depending on whether it's prefixed with whitespace *)
      | Ok Parser.GT
        when opt_is_whitespace (Lib.List.hd_opt (acc @ List.rev !last_trailing))
             && first (peek ()) = ST.GT ->
          let _, _, end_ = next () in
          (acc, (Parser.SHROP, start, end_))
      | Ok t -> (List.rev acc, (t, start, end_))
      | Error t -> eat_leading (t :: acc)
    in
    let rec eat_trailing acc =
      match ST.is_lineless_trivia (first (peek ())) with
      | Some t ->
          ignore (next ());
          eat_trailing (t :: acc)
      | None -> List.rev acc
    in
    let leading_trivia, (token, start, end_) = eat_leading [] in
    let trailing_trivia = eat_trailing [] in
    let leading_ws () =
      opt_is_whitespace (Lib.List.last_opt (!last_trailing @ leading_trivia))
    in
    let trailing_ws () =
      opt_is_whitespace (Lib.List.hd_opt trailing_trivia)
      || (trailing_trivia = [] && ST.is_line_feed (first (peek ())))
    in
    (* Disambiguating operators based on whitespace *)
    let token =
      match token with
      | Parser.GT when leading_ws () && trailing_ws () -> Parser.GTOP
      | Parser.LT when leading_ws () && trailing_ws () -> Parser.LTOP
      | _ -> token
    in
    last_trailing := List.map (map_trivia absurd) trailing_trivia;
    PosHashtbl.add trivia_table (pos_of_lexpos start)
      { leading_trivia; trailing_trivia };
    (token, start, end_)
  in
  (next_parser_token, trivia_table)