567 lines
8.7 KiB
HTML
567 lines
8.7 KiB
HTML
|
|
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
|
|
<HTML><HEAD><TITLE>Man page of Stdlib.Lexing</TITLE>
|
|
</HEAD><BODY>
|
|
<H1>Stdlib.Lexing</H1>
|
|
Section: OCaml library (3o)<BR>Updated: 2020-01-30<BR><A HREF="#index">Index</A>
|
|
<A HREF="/cgi-bin/man/man2html">Return to Main Contents</A><HR>
|
|
|
|
<A NAME="lbAB"> </A>
|
|
<H2>NAME</H2>
|
|
|
|
Stdlib.Lexing - no description
|
|
<A NAME="lbAC"> </A>
|
|
<H2>Module</H2>
|
|
|
|
Module Stdlib.Lexing
|
|
<A NAME="lbAD"> </A>
|
|
<H2>Documentation</H2>
|
|
|
|
<P>
|
|
Module
|
|
<B>Lexing</B>
|
|
|
|
<BR> :
|
|
<B>(module Stdlib__lexing)</B>
|
|
|
|
<P>
|
|
<P>
|
|
<P>
|
|
<P>
|
|
<P>
|
|
<P>
|
|
<P>
|
|
<P>
|
|
<P>
|
|
|
|
<A NAME="lbAE"> </A>
|
|
<H3>Positions</H3>
|
|
|
|
<P>
|
|
<P>
|
|
|
|
<I>type position </I>
|
|
|
|
= {
|
|
<BR> pos_fname :
|
|
<B>string</B>
|
|
|
|
;
|
|
<BR> pos_lnum :
|
|
<B>int</B>
|
|
|
|
;
|
|
<BR> pos_bol :
|
|
<B>int</B>
|
|
|
|
;
|
|
<BR> pos_cnum :
|
|
<B>int</B>
|
|
|
|
;
|
|
<BR> }
|
|
<P>
|
|
<P>
|
|
A value of type
|
|
<B>position</B>
|
|
|
|
describes a point in a source file.
|
|
<B>pos_fname</B>
|
|
|
|
is the file name;
|
|
<B>pos_lnum</B>
|
|
|
|
is the line number;
|
|
<B>pos_bol</B>
|
|
|
|
is the offset of the beginning of the line (number
|
|
of characters between the beginning of the lexbuf and the beginning
|
|
of the line);
|
|
<B>pos_cnum</B>
|
|
|
|
is the offset of the position (number of
|
|
characters between the beginning of the lexbuf and the position).
|
|
The difference between
|
|
<B>pos_cnum</B>
|
|
|
|
and
|
|
<B>pos_bol</B>
|
|
|
|
is the character
|
|
offset within the line (i.e. the column number, assuming each
|
|
character is one column wide).
|
|
<P>
|
|
See the documentation of type
|
|
<B>lexbuf</B>
|
|
|
|
for information about
|
|
how the lexing engine will manage positions.
|
|
<P>
|
|
<P>
|
|
<P>
|
|
<I>val dummy_pos </I>
|
|
|
|
:
|
|
<B>position</B>
|
|
|
|
<P>
|
|
A value of type
|
|
<B>position</B>
|
|
|
|
, guaranteed to be different from any
|
|
valid position.
|
|
<P>
|
|
<P>
|
|
<P>
|
|
<P>
|
|
|
|
<A NAME="lbAF"> </A>
|
|
<H3>Lexer buffers</H3>
|
|
|
|
<P>
|
|
<P>
|
|
|
|
<I>type lexbuf </I>
|
|
|
|
= {
|
|
<BR> refill_buff :
|
|
<B>lexbuf -> unit</B>
|
|
|
|
;
|
|
<P>
|
|
<B>mutable </B>
|
|
|
|
lex_buffer :
|
|
<B>bytes</B>
|
|
|
|
;
|
|
<P>
|
|
<B>mutable </B>
|
|
|
|
lex_buffer_len :
|
|
<B>int</B>
|
|
|
|
;
|
|
<P>
|
|
<B>mutable </B>
|
|
|
|
lex_abs_pos :
|
|
<B>int</B>
|
|
|
|
;
|
|
<P>
|
|
<B>mutable </B>
|
|
|
|
lex_start_pos :
|
|
<B>int</B>
|
|
|
|
;
|
|
<P>
|
|
<B>mutable </B>
|
|
|
|
lex_curr_pos :
|
|
<B>int</B>
|
|
|
|
;
|
|
<P>
|
|
<B>mutable </B>
|
|
|
|
lex_last_pos :
|
|
<B>int</B>
|
|
|
|
;
|
|
<P>
|
|
<B>mutable </B>
|
|
|
|
lex_last_action :
|
|
<B>int</B>
|
|
|
|
;
|
|
<P>
|
|
<B>mutable </B>
|
|
|
|
lex_eof_reached :
|
|
<B>bool</B>
|
|
|
|
;
|
|
<P>
|
|
<B>mutable </B>
|
|
|
|
lex_mem :
|
|
<B>int array</B>
|
|
|
|
;
|
|
<P>
|
|
<B>mutable </B>
|
|
|
|
lex_start_p :
|
|
<B>position</B>
|
|
|
|
;
|
|
<P>
|
|
<B>mutable </B>
|
|
|
|
lex_curr_p :
|
|
<B>position</B>
|
|
|
|
;
|
|
<BR> }
|
|
<P>
|
|
<P>
|
|
The type of lexer buffers. A lexer buffer is the argument passed
|
|
to the scanning functions defined by the generated scanners.
|
|
The lexer buffer holds the current state of the scanner, plus
|
|
a function to refill the buffer from the input.
|
|
<P>
|
|
Lexers can optionally maintain the
|
|
<B>lex_curr_p</B>
|
|
|
|
and
|
|
<B>lex_start_p</B>
|
|
|
|
position fields. This "position tracking" mode is the default, and
|
|
it corresponds to passing
|
|
<B>~with_position:true</B>
|
|
|
|
to functions that
|
|
create lexer buffers. In this mode, the lexing engine and lexer
|
|
actions are co-responsible for properly updating the position
|
|
fields, as described in the next paragraph. When the mode is
|
|
explicitly disabled (with
|
|
<B>~with_position:false</B>
|
|
|
|
), the lexing
|
|
engine will not touch the position fields and the lexer actions
|
|
should be careful not to do it either; the
|
|
<B>lex_curr_p</B>
|
|
|
|
and
|
|
<B>lex_start_p</B>
|
|
|
|
field will then always hold the
|
|
<B>dummy_pos</B>
|
|
|
|
invalid
|
|
position. Not tracking positions avoids allocations and memory
|
|
writes and can significantly improve the performance of the lexer
|
|
in contexts where
|
|
<B>lex_start_p</B>
|
|
|
|
and
|
|
<B>lex_curr_p</B>
|
|
|
|
are not needed.
|
|
<P>
|
|
Position tracking mode works as follows. At each token, the lexing
|
|
engine will copy
|
|
<B>lex_curr_p</B>
|
|
|
|
to
|
|
<B>lex_start_p</B>
|
|
|
|
, then change the
|
|
<B>pos_cnum</B>
|
|
|
|
field of
|
|
<B>lex_curr_p</B>
|
|
|
|
by updating it with the number of
|
|
characters read since the start of the
|
|
<B>lexbuf</B>
|
|
|
|
. The other fields
|
|
are left unchanged by the lexing engine. In order to keep them
|
|
accurate, they must be initialised before the first use of the
|
|
lexbuf, and updated by the relevant lexer actions (i.e. at each end
|
|
of line -- see also
|
|
<B>new_line</B>
|
|
|
|
).
|
|
<P>
|
|
<P>
|
|
<P>
|
|
<I>val from_channel </I>
|
|
|
|
:
|
|
<B>?with_positions:bool -> in_channel -> lexbuf</B>
|
|
|
|
<P>
|
|
Create a lexer buffer on the given input channel.
|
|
<B>Lexing.from_channel inchan</B>
|
|
|
|
returns a lexer buffer which reads
|
|
from the input channel
|
|
<B>inchan</B>
|
|
|
|
, at the current reading position.
|
|
<P>
|
|
<P>
|
|
<P>
|
|
<I>val from_string </I>
|
|
|
|
:
|
|
<B>?with_positions:bool -> string -> lexbuf</B>
|
|
|
|
<P>
|
|
Create a lexer buffer which reads from
|
|
the given string. Reading starts from the first character in
|
|
the string. An end-of-input condition is generated when the
|
|
end of the string is reached.
|
|
<P>
|
|
<P>
|
|
<P>
|
|
<I>val from_function </I>
|
|
|
|
:
|
|
<B>?with_positions:bool -> (bytes -> int -> int) -> lexbuf</B>
|
|
|
|
<P>
|
|
Create a lexer buffer with the given function as its reading method.
|
|
When the scanner needs more characters, it will call the given
|
|
function, giving it a byte sequence
|
|
<B>s</B>
|
|
|
|
and a byte
|
|
count
|
|
<B>n</B>
|
|
|
|
. The function should put
|
|
<B>n</B>
|
|
|
|
bytes or fewer in
|
|
<B>s</B>
|
|
|
|
,
|
|
starting at index 0, and return the number of bytes
|
|
provided. A return value of 0 means end of input.
|
|
<P>
|
|
<P>
|
|
<P>
|
|
<I>val with_positions </I>
|
|
|
|
:
|
|
<B>lexbuf -> bool</B>
|
|
|
|
<P>
|
|
Tell whether the lexer buffer keeps track of position fields
|
|
<B>lex_curr_p</B>
|
|
|
|
/
|
|
<B>lex_start_p</B>
|
|
|
|
, as determined by the corresponding
|
|
optional argument for functions that create lexer buffers
|
|
(whose default value is
|
|
<B>true</B>
|
|
|
|
).
|
|
<P>
|
|
When
|
|
<B>with_positions</B>
|
|
|
|
is
|
|
<B>false</B>
|
|
|
|
, lexer actions should not
|
|
modify position fields. Doing it nevertheless could
|
|
re-enable the
|
|
<B>with_position</B>
|
|
|
|
mode and degrade performances.
|
|
<P>
|
|
<P>
|
|
<P>
|
|
<P>
|
|
|
|
<A NAME="lbAG"> </A>
|
|
<H3>Functions for lexer semantic actions</H3>
|
|
|
|
<P>
|
|
<P>
|
|
|
|
<P>
|
|
<P>
|
|
|
|
The following functions can be called from the semantic actions
|
|
of lexer definitions (the ML code enclosed in braces that
|
|
computes the value returned by lexing functions). They give
|
|
access to the character string matched by the regular expression
|
|
associated with the semantic action. These functions must be
|
|
applied to the argument
|
|
<B>lexbuf</B>
|
|
|
|
, which, in the code generated by
|
|
<B>ocamllex</B>
|
|
|
|
, is bound to the lexer buffer passed to the parsing
|
|
function.
|
|
<P>
|
|
|
|
<P>
|
|
<I>val lexeme </I>
|
|
|
|
:
|
|
<B>lexbuf -> string</B>
|
|
|
|
<P>
|
|
<P>
|
|
<B>Lexing.lexeme lexbuf</B>
|
|
|
|
returns the string matched by
|
|
the regular expression.
|
|
<P>
|
|
<P>
|
|
<P>
|
|
<I>val lexeme_char </I>
|
|
|
|
:
|
|
<B>lexbuf -> int -> char</B>
|
|
|
|
<P>
|
|
<P>
|
|
<B>Lexing.lexeme_char lexbuf i</B>
|
|
|
|
returns character number
|
|
<B>i</B>
|
|
|
|
in
|
|
the matched string.
|
|
<P>
|
|
<P>
|
|
<P>
|
|
<I>val lexeme_start </I>
|
|
|
|
:
|
|
<B>lexbuf -> int</B>
|
|
|
|
<P>
|
|
<P>
|
|
<B>Lexing.lexeme_start lexbuf</B>
|
|
|
|
returns the offset in the
|
|
input stream of the first character of the matched string.
|
|
The first character of the stream has offset 0.
|
|
<P>
|
|
<P>
|
|
<P>
|
|
<I>val lexeme_end </I>
|
|
|
|
:
|
|
<B>lexbuf -> int</B>
|
|
|
|
<P>
|
|
<P>
|
|
<B>Lexing.lexeme_end lexbuf</B>
|
|
|
|
returns the offset in the input stream
|
|
of the character following the last character of the matched
|
|
string. The first character of the stream has offset 0.
|
|
<P>
|
|
<P>
|
|
<P>
|
|
<I>val lexeme_start_p </I>
|
|
|
|
:
|
|
<B>lexbuf -> position</B>
|
|
|
|
<P>
|
|
Like
|
|
<B>lexeme_start</B>
|
|
|
|
, but return a complete
|
|
<B>position</B>
|
|
|
|
instead
|
|
of an offset. When position tracking is disabled, the function
|
|
returns
|
|
<B>dummy_pos</B>
|
|
|
|
.
|
|
<P>
|
|
<P>
|
|
<P>
|
|
<I>val lexeme_end_p </I>
|
|
|
|
:
|
|
<B>lexbuf -> position</B>
|
|
|
|
<P>
|
|
Like
|
|
<B>lexeme_end</B>
|
|
|
|
, but return a complete
|
|
<B>position</B>
|
|
|
|
instead
|
|
of an offset. When position tracking is disabled, the function
|
|
returns
|
|
<B>dummy_pos</B>
|
|
|
|
.
|
|
<P>
|
|
<P>
|
|
<P>
|
|
<I>val new_line </I>
|
|
|
|
:
|
|
<B>lexbuf -> unit</B>
|
|
|
|
<P>
|
|
Update the
|
|
<B>lex_curr_p</B>
|
|
|
|
field of the lexbuf to reflect the start
|
|
of a new line. You can call this function in the semantic action
|
|
of the rule that matches the end-of-line character. The function
|
|
does nothing when position tracking is disabled.
|
|
<P>
|
|
<P>
|
|
<B>Since</B>
|
|
|
|
3.11.0
|
|
<P>
|
|
<P>
|
|
<P>
|
|
<P>
|
|
|
|
<A NAME="lbAH"> </A>
|
|
<H3>Miscellaneous functions</H3>
|
|
|
|
<P>
|
|
<P>
|
|
|
|
<P>
|
|
<I>val flush_input </I>
|
|
|
|
:
|
|
<B>lexbuf -> unit</B>
|
|
|
|
<P>
|
|
Discard the contents of the buffer and reset the current
|
|
position to 0. The next use of the lexbuf will trigger a
|
|
refill.
|
|
<P>
|
|
<P>
|
|
<P>
|
|
|
|
<HR>
|
|
<A NAME="index"> </A><H2>Index</H2>
|
|
<DL>
|
|
<DT id="1"><A HREF="#lbAB">NAME</A><DD>
|
|
<DT id="2"><A HREF="#lbAC">Module</A><DD>
|
|
<DT id="3"><A HREF="#lbAD">Documentation</A><DD>
|
|
<DL>
|
|
<DT id="4"><A HREF="#lbAE">Positions</A><DD>
|
|
<DT id="5"><A HREF="#lbAF">Lexer buffers</A><DD>
|
|
<DT id="6"><A HREF="#lbAG">Functions for lexer semantic actions</A><DD>
|
|
<DT id="7"><A HREF="#lbAH">Miscellaneous functions</A><DD>
|
|
</DL>
|
|
</DL>
|
|
<HR>
|
|
This document was created by
|
|
<A HREF="/cgi-bin/man/man2html">man2html</A>,
|
|
using the manual pages.<BR>
|
|
Time: 00:05:57 GMT, March 31, 2021
|
|
</BODY>
|
|
</HTML>
|