Fuzion • APIs • Standard APIs • io/buffered/reader.fz
io/buffered/reader.fz


# This file is part of the Fuzion language implementation.
#
# The Fuzion language implementation is free software: you can redistribute it
# and/or modify it under the terms of the GNU General Public License as published
# by the Free Software Foundation, version 3 of the License.
#
# The Fuzion language implementation is distributed in the hope that it will be
# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
# License for more details.
#
# You should have received a copy of the GNU General Public License along with The
# Fuzion language implementation.  If not, see <https://www.gnu.org/licenses/>.


# -----------------------------------------------------------------------
#
#  Tokiwa Software GmbH, Germany
#
#  Source code of Fuzion standard library feature io.buffered.reader
#
# -----------------------------------------------------------------------


# buffered.reader effect allows buffered reading
# by using the given Read_Provider
#
# note: anything in the buffer when effect is uninstalled will be discarded.
#
public reader(private rp Read_Provider, buf_size i32, buffer array u8 | io.end_of_file) : effect effect_mode.plain is


  # install this effect and execute 'f'. Wrap the result of 'f' into an
  # 'outcome' if 'f' returns normally, otherwise if 'f' is aborted early
  # via a call to 'raise' wrap the 'error' passed to 'raise' into the
  # resulting 'outcome'.
  #
  public with(R type, f ()->R) outcome R =>
    try reader R (() -> run f (() -> exit 1))


  # terminate immediately with the given error wrapped in 'option'.
  #
  raise(e error) =>
    (try reader).env.raise e


  # read returns the current buffer or end of file.
  # in case the buffer is empty it fills the buffer
  # before returning it.
  #
  public read => read buf_size


  # read returns the current buffer or end of file.
  # in case the buffer is empty it fills the buffer
  # with up to max_n bytes before returning it.
  #
  public read(max_n i32) array u8 | io.end_of_file
  post result ? io.end_of_file => true | a array => !a.is_empty
  =>
    match buffer
      b array =>
        if b.is_empty
          fill_buffer max_n
        buffer
      io.end_of_file => io.end_of_file



  # fill the currently empty buffer with up to max_n bytes
  #
  private fill_buffer(max_n i32)
  pre match buffer
        a array => a.is_empty
        io.end_of_file => false
  is
    match rp.read max_n
      a array =>
        set buffer := a
        replace
      eof io.end_of_file =>
        set buffer := eof
        replace
      e error => raise e



  # discard n items from buffer
  #
  public discard(n i32)
  pre n >= 0
  is
    match buffer
      b array =>
        set buffer := (b.slice n b.length).as_array
        replace
      io.end_of_file =>



  # discard complete buffer
  #
  # NYI naming this feature discard leads to error: Duplicate feature declaration
  public discard_all
  is
    match buffer
      b array =>
        set buffer := []
        replace
      io.end_of_file =>



# short hand for getting the currently installed `buffered.reader`
#
reader =>
  io.buffered.reader.env



# read n bytes using the currently installed byte reader effect
# if the returned sequence is empty or count is less than n, end of file has been reached.
#
public read_bytes(n i32) Sequence u8 ! reader =>

  mi : mutate is

  mi.go ()->

    res := (mutate.array u8).new mi

    for n_read := 0, n_read + r
    while n_read < n
      r := match reader.read   # NYI: we should limit the number of byted read by reader.read, we can exceed n!
          io.end_of_file => -1
          a (array u8) =>
            reader.discard a.length
            for b in a do
              res.add b
            a.length
    until r < 0
    res.as_array


# read string, up to n codepoints or until end of file
# requires `buffered.reader` effect to be installed.
#
public read_string(n i32) String ! reader
pre n >= 0
=>

  take_valid_codepoints(a Sequence u8, max i32) =>
    v := String.from_bytes a
      .codepoints_and_errors
      .take_while x->
        match x
          codepoint => true
          error => false
      .take max
      .map String x->
        match x
          c codepoint => c
          error => exit 1
      .as_array

    bytes_used := (v.map c->c.as_string.byte_length).fold i32.sum
    reader.discard bytes_used
    v

  for
    is_eof                 := reader.read ? io.end_of_file => true | a array => false
    next_bytes Sequence u8 := (reader.read ? io.end_of_file => []   | a array => a), rest ++ (reader.read ? io.end_of_file => [] | a array => a)
    next_codepoints        := take_valid_codepoints next_bytes n, take_valid_codepoints next_bytes n-codepoint_count
    # if we did not use any bytes and `next_bytes` contains not enough bytes for a codepoint potentially,
    # we trigger a `discard_all` and remember what we read so far via `rest`.
    # this is necesarry e.g. for stdin where we read one byte at a time.
    rest Sequence u8       := if n>0 && next_codepoints.is_empty && next_bytes.count < 4 then reader.discard_all; next_bytes else []
    codepoint_count        := next_codepoints.count, codepoint_count+next_codepoints.count
    res Sequence String    := next_codepoints, res ++ next_codepoints
  while !is_eof && codepoint_count < n
  else
    if is_eof && res.is_empty
      reader.raise (error "-- end of file --"); ""
    else
      String.join res


# use the currently installed byte reader effect
# to read until a line feed occurs.
# returns the line
#
public read_line String|io.end_of_file ! reader =>

  if reader.read ? io.end_of_file => true | * => false
    io.end_of_file
  else
    mi : mutate is

    mi.go String ()->
      res := (mutate.array u8).new mi

      while
        match reader.read
          io.end_of_file =>
            false
          a array =>

            # trailing carriage returns are dropped
            add_to_res(a Sequence u8) is
              if !a.is_empty
                a1 := if a.last = character_encodings.ascii.cr
                          (a.slice 0 a.count-1)
                        else
                          a
                for b in a1 do
                  res.add b

            match (container.searchable_sequence a).index_of character_encodings.ascii.lf
              idx i32 =>
                add_to_res (a.slice 0 idx)
                reader.discard idx+1
                false
              nil =>
                add_to_res a
                reader.discard_all
                true

      ref : String
        utf8 Sequence u8 := res.as_array


# Read input fully into an array of bytes until end_of_file is reached
#
public read_fully array u8 ! reader =>
  for
    r Sequence u8 := [], r++n
    n := (io.buffered.read_bytes 8192)
  while n.count > 0 else
    r.as_array


# Read input fully and split it at linefeed (ASCII 10) characters. Delete
# any trailing carriage returns (ASCII 13) from the resulting strings.
#
public read_lines array String ! reader =>
  (String.from_bytes read_fully).split "\n"
                                     .map s->
                                       s.ends_with "\r" ? TRUE  => s.substring 0 s.byte_length-1
                                                        | FALSE => s
                                     .as_array
last changed: 2024-03-07
next: io/dir.fz