"Fossies" - the Fresh Open Source Software Archive

Member "ponyc-0.33.0/packages/buffered/reader.pony" (1 Nov 2019, 19281 Bytes) of package /linux/misc/ponyc-0.33.0.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) Pony source code syntax highlighting (style: standard) with prefixed line numbers. Alternatively you can here view or download the uninterpreted source code file.

    1 use "collections"
    2 
    3 class Reader
    4   """
    5   Store network data and provide a parsing interface.
    6 
    7   `Reader` provides a way to extract typed data from a sequence of
    8   bytes. The `Reader` manages the underlying data structures to
    9   provide a read cursor over a contiguous sequence of bytes. It is
   10   useful for decoding data that is received over a network or stored
   11   in a file. Chunk of bytes are added to the `Reader` using the
   12   `append` method, and typed data is extracted using the getter
   13   methods.
   14 
   15   For example, suppose we have a UDP-based network data protocol where
   16   messages consist of the following:
   17 
   18   * `list_size` - the number of items in the following list of items
   19     as a big-endian 32-bit integer
   20   * zero or more items of the following data:
   21     * a big-endian 64-bit floating point number
   22     * a string that starts with a big-endian 32-bit integer that
   23       specifies the length of the string, followed by a number of
   24       bytes that represent the string
   25 
   26   A message would be something like this:
   27 
   28   ```
   29   [message_length][list_size][float1][string1][float2][string2]...
   30   ```
   31 
   32   The following program uses a `Reader` to decode a message of
   33   this type and print them:
   34 
   35   ```pony
   36   use "buffered"
   37   use "collections"
   38 
   39   class Notify is InputNotify
   40     let _env: Env
   41     new create(env: Env) =>
   42       _env = env
   43     fun ref apply(data: Array[U8] iso) =>
   44       let rb = Reader
   45       rb.append(consume data)
   46       try
   47         while true do
   48           let len = rb.i32_be()?
   49           let items = rb.i32_be()?.usize()
   50           for range in Range(0, items) do
   51             let f = rb.f32_be()?
   52             let str_len = rb.i32_be()?.usize()
   53             let str = String.from_array(rb.block(str_len)?)
   54             _env.out.print("[(" + f.string() + "), (" + str + ")]")
   55           end
   56         end
   57       end
   58 
   59   actor Main
   60     new create(env: Env) =>
   61       env.input(recover Notify(env) end, 1024)
   62   ```
   63   """
   64   embed _chunks: List[(Array[U8] val, USize)] = _chunks.create()
   65   var _available: USize = 0
   66 
   67   fun size(): USize =>
   68     """
   69     Return the number of available bytes.
   70     """
   71     _available
   72 
   73   fun ref clear() =>
   74     """
   75     Discard all pending data.
   76     """
   77     _chunks.clear()
   78     _available = 0
   79 
   80   fun ref append(data: ByteSeq) =>
   81     """
   82     Add a chunk of data.
   83     """
   84     let data_array =
   85       match data
   86       | let data': Array[U8] val => data'
   87       | let data': String => data'.array()
   88       end
   89 
   90     _available = _available + data_array.size()
   91     _chunks.push((data_array, 0))
   92 
   93   fun ref skip(n: USize) ? =>
   94     """
   95     Skip n bytes.
   96     """
   97     if _available >= n then
   98       _available = _available - n
   99       var rem = n
  100 
  101       while rem > 0 do
  102         let node = _chunks.head()?
  103         (var data, var offset) = node()?
  104         let avail = data.size() - offset
  105 
  106         if avail > rem then
  107           node()? = (data, offset + rem)
  108           break
  109         end
  110 
  111         rem = rem - avail
  112         _chunks.shift()?
  113       end
  114 
  115     else
  116       error
  117     end
  118 
  119   fun ref block(len: USize): Array[U8] iso^ ? =>
  120     """
  121     Return a block as a contiguous chunk of memory.
  122     Will throw an error if you request a block larger than what is currently
  123     stored in the `Reader`.
  124     """
  125     if _available < len then
  126       error
  127     end
  128 
  129     _available = _available - len
  130     var out = recover Array[U8] .> undefined(len) end
  131     var i = USize(0)
  132 
  133     while i < len do
  134       let node = _chunks.head()?
  135       (let data, let offset) = node()?
  136 
  137       let avail = data.size() - offset
  138       let need = len - i
  139       let copy_len = need.min(avail)
  140 
  141       out = recover
  142         let r = consume ref out
  143         data.copy_to(r, offset, i, copy_len)
  144         consume r
  145       end
  146 
  147       if avail > need then
  148         node()? = (data, offset + need)
  149         break
  150       end
  151 
  152       i = i + copy_len
  153       _chunks.shift()?
  154     end
  155 
  156     out
  157 
  158 
  159   fun ref read_until(separator: U8): Array[U8] iso^ ? =>
  160     """
  161     Find the first occurrence of the separator and return the block of bytes
  162     before its position. The separator is not included in the returned array,
  163     but it is removed from the buffer. To read a line of text, prefer line()
  164     that handles \n and \r\n.
  165     """
  166     let b = block(_distance_of(separator)? - 1)?
  167     u8()?
  168     b
  169 
  170   fun ref line(keep_line_breaks: Bool = false): String iso^ ? =>
  171     """
  172     Return a \n or \r\n terminated line as a string. By default the newline is not
  173     included in the returned string, but it is removed from the buffer.
  174     Set `keep_line_breaks` to `true` to keep the line breaks in the returned line.
  175     """
  176     let len = _search_length()?
  177 
  178     _available = _available - len
  179     var out = recover String(len) end
  180     var i = USize(0)
  181 
  182     while i < len do
  183       let node = _chunks.head()?
  184       (let data, let offset) = node()?
  185 
  186       let avail = data.size() - offset
  187       let need = len - i
  188       let copy_len = need.min(avail)
  189 
  190       out.append(data, offset, copy_len)
  191 
  192       if avail > need then
  193         node()? = (data, offset + need)
  194         break
  195       end
  196 
  197       i = i + copy_len
  198       _chunks.shift()?
  199     end
  200 
  201     let trunc_len: USize =
  202       if keep_line_breaks then
  203         0
  204       elseif (len >= 2) and (out.at_offset(-2)? == '\r') then
  205         2
  206       else
  207         1
  208       end
  209     out.truncate(len - trunc_len)
  210 
  211     consume out
  212 
  213   fun ref u8(): U8 ? =>
  214     """
  215     Get a U8. Raise an error if there isn't enough data.
  216     """
  217     if _available >= 1 then
  218       _byte()?
  219     else
  220       error
  221     end
  222 
  223   fun ref i8(): I8 ? =>
  224     """
  225     Get an I8.
  226     """
  227     u8()?.i8()
  228 
  229   fun ref u16_be(): U16 ? =>
  230     """
  231     Get a big-endian U16.
  232     """
  233     let num_bytes = U16(0).bytewidth()
  234     if _available >= num_bytes then
  235       let node = _chunks.head()?
  236       (var data, var offset) = node()?
  237       if (data.size() - offset) >= num_bytes then
  238         let r =
  239           ifdef bigendian then
  240             data.read_u16(offset)?
  241           else
  242             data.read_u16(offset)?.bswap()
  243           end
  244 
  245         offset = offset + num_bytes
  246         _available = _available - num_bytes
  247 
  248         if offset < data.size() then
  249           node()? = (data, offset)
  250         else
  251           _chunks.shift()?
  252         end
  253         r
  254       else
  255         // single array did not have all the bytes needed
  256         (u8()?.u16() << 8) or u8()?.u16()
  257       end
  258     else
  259       error
  260     end
  261 
  262   fun ref u16_le(): U16 ? =>
  263     """
  264     Get a little-endian U16.
  265     """
  266     let num_bytes = U16(0).bytewidth()
  267     if _available >= num_bytes then
  268       let node = _chunks.head()?
  269       (var data, var offset) = node()?
  270       if (data.size() - offset) >= num_bytes then
  271         let r =
  272           ifdef littleendian then
  273             data.read_u16(offset)?
  274           else
  275             data.read_u16(offset)?.bswap()
  276           end
  277 
  278         offset = offset + num_bytes
  279         _available = _available - num_bytes
  280 
  281         if offset < data.size() then
  282           node()? = (data, offset)
  283         else
  284           _chunks.shift()?
  285         end
  286         r
  287       else
  288         // single array did not have all the bytes needed
  289         u8()?.u16() or (u8()?.u16() << 8)
  290       end
  291     else
  292       error
  293     end
  294 
  295   fun ref i16_be(): I16 ? =>
  296     """
  297     Get a big-endian I16.
  298     """
  299     u16_be()?.i16()
  300 
  301   fun ref i16_le(): I16 ? =>
  302     """
  303     Get a little-endian I16.
  304     """
  305     u16_le()?.i16()
  306 
  307   fun ref u32_be(): U32 ? =>
  308     """
  309     Get a big-endian U32.
  310     """
  311     let num_bytes = U32(0).bytewidth()
  312     if _available >= num_bytes then
  313       let node = _chunks.head()?
  314       (var data, var offset) = node()?
  315       if (data.size() - offset) >= num_bytes then
  316         let r =
  317           ifdef bigendian then
  318             data.read_u32(offset)?
  319           else
  320             data.read_u32(offset)?.bswap()
  321           end
  322 
  323         offset = offset + num_bytes
  324         _available = _available - num_bytes
  325 
  326         if offset < data.size() then
  327           node()? = (data, offset)
  328         else
  329           _chunks.shift()?
  330         end
  331         r
  332       else
  333         // single array did not have all the bytes needed
  334         (u8()?.u32() << 24) or (u8()?.u32() << 16) or
  335           (u8()?.u32() << 8) or u8()?.u32()
  336       end
  337     else
  338       error
  339     end
  340 
  341   fun ref u32_le(): U32 ? =>
  342     """
  343     Get a little-endian U32.
  344     """
  345     let num_bytes = U32(0).bytewidth()
  346     if _available >= num_bytes then
  347       let node = _chunks.head()?
  348       (var data, var offset) = node()?
  349       if (data.size() - offset) >= num_bytes then
  350         let r =
  351           ifdef littleendian then
  352             data.read_u32(offset)?
  353           else
  354             data.read_u32(offset)?.bswap()
  355           end
  356 
  357         offset = offset + num_bytes
  358         _available = _available - num_bytes
  359 
  360         if offset < data.size() then
  361           node()? = (data, offset)
  362         else
  363           _chunks.shift()?
  364         end
  365         r
  366       else
  367         // single array did not have all the bytes needed
  368         u8()?.u32() or (u8()?.u32() << 8) or
  369           (u8()?.u32() << 16) or (u8()?.u32() << 24)
  370       end
  371     else
  372       error
  373     end
  374 
  375   fun ref i32_be(): I32 ? =>
  376     """
  377     Get a big-endian I32.
  378     """
  379     u32_be()?.i32()
  380 
  381   fun ref i32_le(): I32 ? =>
  382     """
  383     Get a little-endian I32.
  384     """
  385     u32_le()?.i32()
  386 
  387   fun ref u64_be(): U64 ? =>
  388     """
  389     Get a big-endian U64.
  390     """
  391     let num_bytes = U64(0).bytewidth()
  392     if _available >= num_bytes then
  393       let node = _chunks.head()?
  394       (var data, var offset) = node()?
  395       if (data.size() - offset) >= num_bytes then
  396         let r =
  397           ifdef bigendian then
  398             data.read_u64(offset)?
  399           else
  400             data.read_u64(offset)?.bswap()
  401           end
  402 
  403         offset = offset + num_bytes
  404         _available = _available - num_bytes
  405 
  406         if offset < data.size() then
  407           node()? = (data, offset)
  408         else
  409           _chunks.shift()?
  410         end
  411         r
  412       else
  413         // single array did not have all the bytes needed
  414         (u8()?.u64() << 56) or (u8()?.u64() << 48) or
  415           (u8()?.u64() << 40) or (u8()?.u64() << 32) or
  416           (u8()?.u64() << 24) or (u8()?.u64() << 16) or
  417           (u8()?.u64() << 8) or u8()?.u64()
  418       end
  419     else
  420       error
  421     end
  422 
  423   fun ref u64_le(): U64 ? =>
  424     """
  425     Get a little-endian U64.
  426     """
  427     let num_bytes = U64(0).bytewidth()
  428     if _available >= num_bytes then
  429       let node = _chunks.head()?
  430       (var data, var offset) = node()?
  431       if (data.size() - offset) >= num_bytes then
  432         let r =
  433           ifdef littleendian then
  434             data.read_u64(offset)?
  435           else
  436             data.read_u64(offset)?.bswap()
  437           end
  438 
  439         offset = offset + num_bytes
  440         _available = _available - num_bytes
  441 
  442         if offset < data.size() then
  443           node()? = (data, offset)
  444         else
  445           _chunks.shift()?
  446         end
  447         r
  448       else
  449         // single array did not have all the bytes needed
  450         u8()?.u64() or (u8()?.u64() << 8) or
  451           (u8()?.u64() << 16) or (u8()?.u64() << 24) or
  452           (u8()?.u64() << 32) or (u8()?.u64() << 40) or
  453           (u8()?.u64() << 48) or (u8()?.u64() << 56)
  454       end
  455     else
  456       error
  457     end
  458 
  459   fun ref i64_be(): I64 ? =>
  460     """
  461     Get a big-endian I64.
  462     """
  463     u64_be()?.i64()
  464 
  465   fun ref i64_le(): I64 ? =>
  466     """
  467     Get a little-endian I64.
  468     """
  469     u64_le()?.i64()
  470 
  471   fun ref u128_be(): U128 ? =>
  472     """
  473     Get a big-endian U128.
  474     """
  475     let num_bytes = U128(0).bytewidth()
  476     if _available >= num_bytes then
  477       let node = _chunks.head()?
  478       (var data, var offset) = node()?
  479       if (data.size() - offset) >= num_bytes then
  480         let r =
  481           ifdef bigendian then
  482             data.read_u128(offset)?
  483           else
  484             data.read_u128(offset)?.bswap()
  485           end
  486 
  487         offset = offset + num_bytes
  488         _available = _available - num_bytes
  489 
  490         if offset < data.size() then
  491           node()? = (data, offset)
  492         else
  493           _chunks.shift()?
  494         end
  495         r
  496       else
  497         // single array did not have all the bytes needed
  498         (u8()?.u128() << 120) or (u8()?.u128() << 112) or
  499           (u8()?.u128() << 104) or (u8()?.u128() << 96) or
  500           (u8()?.u128() << 88) or (u8()?.u128() << 80) or
  501           (u8()?.u128() << 72) or (u8()?.u128() << 64) or
  502           (u8()?.u128() << 56) or (u8()?.u128() << 48) or
  503           (u8()?.u128() << 40) or (u8()?.u128() << 32) or
  504           (u8()?.u128() << 24) or (u8()?.u128() << 16) or
  505           (u8()?.u128() << 8) or u8()?.u128()
  506       end
  507     else
  508       error
  509     end
  510 
  511   fun ref u128_le(): U128 ? =>
  512     """
  513     Get a little-endian U128.
  514     """
  515     let num_bytes = U128(0).bytewidth()
  516     if _available >= num_bytes then
  517       let node = _chunks.head()?
  518       (var data, var offset) = node()?
  519       if (data.size() - offset) >= num_bytes then
  520         let r =
  521           ifdef littleendian then
  522             data.read_u128(offset)?
  523           else
  524             data.read_u128(offset)?.bswap()
  525           end
  526 
  527         offset = offset + num_bytes
  528         _available = _available - num_bytes
  529 
  530         if offset < data.size() then
  531           node()? = (data, offset)
  532         else
  533           _chunks.shift()?
  534         end
  535         r
  536       else
  537         // single array did not have all the bytes needed
  538         u8()?.u128() or (u8()?.u128() << 8) or
  539           (u8()?.u128() << 16) or (u8()?.u128() << 24) or
  540           (u8()?.u128() << 32) or (u8()?.u128() << 40) or
  541           (u8()?.u128() << 48) or (u8()?.u128() << 56) or
  542           (u8()?.u128() << 64) or (u8()?.u128() << 72) or
  543           (u8()?.u128() << 80) or (u8()?.u128() << 88) or
  544           (u8()?.u128() << 96) or (u8()?.u128() << 104) or
  545           (u8()?.u128() << 112) or (u8()?.u128() << 120)
  546       end
  547     else
  548       error
  549     end
  550 
  551   fun ref i128_be(): I128 ? =>
  552     """
  553     Get a big-endian I129.
  554     """
  555     u128_be()?.i128()
  556 
  557   fun ref i128_le(): I128 ? =>
  558     """
  559     Get a little-endian I128.
  560     """
  561     u128_le()?.i128()
  562 
  563   fun ref f32_be(): F32 ? =>
  564     """
  565     Get a big-endian F32.
  566     """
  567     F32.from_bits(u32_be()?)
  568 
  569   fun ref f32_le(): F32 ? =>
  570     """
  571     Get a little-endian F32.
  572     """
  573     F32.from_bits(u32_le()?)
  574 
  575   fun ref f64_be(): F64 ? =>
  576     """
  577     Get a big-endian F64.
  578     """
  579     F64.from_bits(u64_be()?)
  580 
  581   fun ref f64_le(): F64 ? =>
  582     """
  583     Get a little-endian F64.
  584     """
  585     F64.from_bits(u64_le()?)
  586 
  587   fun ref _byte(): U8 ? =>
  588     """
  589     Get a single byte.
  590     """
  591     let node = _chunks.head()?
  592     (var data, var offset) = node()?
  593     let r = data(offset)?
  594 
  595     offset = offset + 1
  596     _available = _available - 1
  597 
  598     if offset < data.size() then
  599       node()? = (data, offset)
  600     else
  601       _chunks.shift()?
  602     end
  603     r
  604 
  605   fun box peek_u8(offset: USize = 0): U8 ? =>
  606     """
  607     Peek at a U8 at the given offset. Raise an error if there isn't enough
  608     data.
  609     """
  610     _peek_byte(offset)?
  611 
  612   fun box peek_i8(offset: USize = 0): I8 ? =>
  613     """
  614     Peek at an I8.
  615     """
  616     peek_u8(offset)?.i8()
  617 
  618   fun box peek_u16_be(offset: USize = 0): U16 ? =>
  619     """
  620     Peek at a big-endian U16.
  621     """
  622     (peek_u8(offset)?.u16() << 8) or peek_u8(offset + 1)?.u16()
  623 
  624   fun box peek_u16_le(offset: USize = 0): U16 ? =>
  625     """
  626     Peek at a little-endian U16.
  627     """
  628     peek_u8(offset)?.u16() or (peek_u8(offset + 1)?.u16() << 8)
  629 
  630   fun box peek_i16_be(offset: USize = 0): I16 ? =>
  631     """
  632     Peek at a big-endian I16.
  633     """
  634     peek_u16_be(offset)?.i16()
  635 
  636   fun box peek_i16_le(offset: USize = 0): I16 ? =>
  637     """
  638     Peek at a little-endian I16.
  639     """
  640     peek_u16_le(offset)?.i16()
  641 
  642   fun box peek_u32_be(offset: USize = 0): U32 ? =>
  643     """
  644     Peek at a big-endian U32.
  645     """
  646     (peek_u16_be(offset)?.u32() << 16) or peek_u16_be(offset + 2)?.u32()
  647 
  648   fun box peek_u32_le(offset: USize = 0): U32 ? =>
  649     """
  650     Peek at a little-endian U32.
  651     """
  652     peek_u16_le(offset)?.u32() or (peek_u16_le(offset + 2)?.u32() << 16)
  653 
  654   fun box peek_i32_be(offset: USize = 0): I32 ? =>
  655     """
  656     Peek at a big-endian I32.
  657     """
  658     peek_u32_be(offset)?.i32()
  659 
  660   fun box peek_i32_le(offset: USize = 0): I32 ? =>
  661     """
  662     Peek at a little-endian I32.
  663     """
  664     peek_u32_le(offset)?.i32()
  665 
  666   fun box peek_u64_be(offset: USize = 0): U64 ? =>
  667     """
  668     Peek at a big-endian U64.
  669     """
  670     (peek_u32_be(offset)?.u64() << 32) or peek_u32_be(offset + 4)?.u64()
  671 
  672   fun box peek_u64_le(offset: USize = 0): U64 ? =>
  673     """
  674     Peek at a little-endian U64.
  675     """
  676     peek_u32_le(offset)?.u64() or (peek_u32_le(offset + 4)?.u64() << 32)
  677 
  678   fun box peek_i64_be(offset: USize = 0): I64 ? =>
  679     """
  680     Peek at a big-endian I64.
  681     """
  682     peek_u64_be(offset)?.i64()
  683 
  684   fun box peek_i64_le(offset: USize = 0): I64 ? =>
  685     """
  686     Peek at a little-endian I64.
  687     """
  688     peek_u64_le(offset)?.i64()
  689 
  690   fun box peek_u128_be(offset: USize = 0): U128 ? =>
  691     """
  692     Peek at a big-endian U128.
  693     """
  694     (peek_u64_be(offset)?.u128() << 64) or peek_u64_be(offset + 8)?.u128()
  695 
  696   fun box peek_u128_le(offset: USize = 0): U128 ? =>
  697     """
  698     Peek at a little-endian U128.
  699     """
  700     peek_u64_le(offset)?.u128() or (peek_u64_le(offset + 8)?.u128() << 64)
  701 
  702   fun box peek_i128_be(offset: USize = 0): I128 ? =>
  703     """
  704     Peek at a big-endian I129.
  705     """
  706     peek_u128_be(offset)?.i128()
  707 
  708   fun box peek_i128_le(offset: USize = 0): I128 ? =>
  709     """
  710     Peek at a little-endian I128.
  711     """
  712     peek_u128_le(offset)?.i128()
  713 
  714   fun box peek_f32_be(offset: USize = 0): F32 ? =>
  715     """
  716     Peek at a big-endian F32.
  717     """
  718     F32.from_bits(peek_u32_be(offset)?)
  719 
  720   fun box peek_f32_le(offset: USize = 0): F32 ? =>
  721     """
  722     Peek at a little-endian F32.
  723     """
  724     F32.from_bits(peek_u32_le(offset)?)
  725 
  726   fun box peek_f64_be(offset: USize = 0): F64 ? =>
  727     """
  728     Peek at a big-endian F64.
  729     """
  730     F64.from_bits(peek_u64_be(offset)?)
  731 
  732   fun box peek_f64_le(offset: USize = 0): F64 ? =>
  733     """
  734     Peek at a little-endian F64.
  735     """
  736     F64.from_bits(peek_u64_le(offset)?)
  737 
  738   fun box _peek_byte(offset: USize = 0): U8 ? =>
  739     """
  740     Get the byte at the given offset without moving the cursor forward.
  741     Raise an error if the given offset is not yet available.
  742     """
  743     var offset' = offset
  744     var iter = _chunks.nodes()
  745 
  746     while true do
  747       let node = iter.next()?
  748       (var data, var node_offset) = node()?
  749       offset' = offset' + node_offset
  750 
  751       let data_size = data.size()
  752       if offset' >= data_size then
  753         offset' = offset' - data_size
  754       else
  755         return data(offset')?
  756       end
  757     end
  758 
  759     error
  760 
  761   fun ref _distance_of(byte: U8): USize ? =>
  762     """
  763     Get the distance to the first occurrence of the given byte
  764     """
  765     if _chunks.size() == 0 then
  766       error
  767     end
  768 
  769     var node = _chunks.head()?
  770     var search_len: USize = 0
  771 
  772     while true do
  773       (var data, var offset) = node()?
  774 
  775       try
  776         let len = (search_len + data.find(byte, offset)? + 1) - offset
  777         search_len = 0
  778         return len
  779       end
  780 
  781       search_len = search_len + (data.size() - offset)
  782 
  783       if not node.has_next() then
  784         break
  785       end
  786 
  787       node = node.next() as ListNode[(Array[U8] val, USize)]
  788     end
  789 
  790     error
  791 
  792   fun ref _search_length(): USize ? =>
  793     """
  794     Get the length of a pending line. Raise an error if there is no pending
  795     line.
  796     """
  797     _distance_of('\n')?