"Fossies" - the Fresh Open Source Software Archive

Member "seed7/lib/utf8.s7i" (6 Jun 2020, 9525 Bytes) of package /linux/misc/seed7_05_20210223.tgz:


As a special service "Fossies" has tried to format the requested text file into HTML format (style: standard) with prefixed line numbers. Alternatively you can here view or download the uninterpreted source code file.

    1 
    2 (********************************************************************)
    3 (*                                                                  *)
    4 (*  utf8.s7i      File implementation type for UTF-8 files          *)
    5 (*  Copyright (C) 2005  Thomas Mertes                               *)
    6 (*                                                                  *)
    7 (*  This file is part of the Seed7 Runtime Library.                 *)
    8 (*                                                                  *)
    9 (*  The Seed7 Runtime Library is free software; you can             *)
   10 (*  redistribute it and/or modify it under the terms of the GNU     *)
   11 (*  Lesser General Public License as published by the Free Software *)
   12 (*  Foundation; either version 2.1 of the License, or (at your      *)
   13 (*  option) any later version.                                      *)
   14 (*                                                                  *)
   15 (*  The Seed7 Runtime Library is distributed in the hope that it    *)
   16 (*  will be useful, but WITHOUT ANY WARRANTY; without even the      *)
   17 (*  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR *)
   18 (*  PURPOSE.  See the GNU Lesser General Public License for more    *)
   19 (*  details.                                                        *)
   20 (*                                                                  *)
   21 (*  You should have received a copy of the GNU Lesser General       *)
   22 (*  Public License along with this program; if not, write to the    *)
   23 (*  Free Software Foundation, Inc., 51 Franklin Street,             *)
   24 (*  Fifth Floor, Boston, MA  02110-1301, USA.                       *)
   25 (*                                                                  *)
   26 (********************************************************************)
   27 
   28 
   29 include "external_file.s7i";
   30 
   31 
   32 (**
   33  *  [[file|File]] implementation type for UTF-8 files.
   34  *  This type supports UTF-8 encoded sequential files of the
   35  *  operating system. UTF-8 files are seekable, therefore they
   36  *  support the functions [[external_file#length(in_external_file)|length]],
   37  *  [[#seek(in_utf8_file,in_integer)|seek]] and
   38  *  [[external_file#tell(in_external_file)|tell]].
   39  *)
   40 const type: utf8_file is sub external_file struct
   41   end struct;
   42 
   43 
   44 const func char: utf8_getc (ref clib_file: inFile)                    is action "UT8_GETC";
   45 const func string: utf8_gets (in clib_file: inFile,
   46                               in integer: maxLength)                  is action "UT8_GETS";
   47 const func string: utf8_word_read (ref clib_file: inFile,
   48                                    inout char: terminationChar)       is action "UT8_WORD_READ";
   49 const func string: utf8_line_read (ref clib_file: inFile,
   50                                    inout char: terminationChar)       is action "UT8_LINE_READ";
   51 const proc: utf8_write (ref clib_file: outFile, in string: stri)      is action "UT8_WRITE";
   52 const proc: utf8_seek (ref clib_file: aFile, in integer: position)    is action "UT8_SEEK";
   53 
   54 
   55 (**
   56  *  Opens an Unicode file which uses the UTF-8 encoding.
   57  *  The file is opened with the specified ''path'' and ''mode''.
   58  *  There are text modes and binary modes:
   59  *  *Binary modes:
   60  *  ** "r"   Open file for reading.
   61  *  ** "w"   Truncate to zero length or create file for writing.
   62  *  ** "a"   Append; open or create file for writing at end-of-file.
   63  *  ** "r+"  Open file for update (reading and writing).
   64  *  ** "w+"  Truncate to zero length or create file for update.
   65  *  ** "a+"  Append; open or create file for update, writing at end-of-file.
   66  *  *Text modes:
   67  *  ** "rt"  Open file for reading.
   68  *  ** "wt"  Truncate to zero length or create file for writing.
   69  *  ** "at"  Append; open or create file for writing at end-of-file.
   70  *  ** "rt+" Open file for update (reading and writing).
   71  *  ** "wt+" Truncate to zero length or create file for update.
   72  *  ** "at+" Append; open or create file for update, writing at end-of-file.
   73  *  Note that this modes differ from the ones used by the C function
   74  *  fopen().
   75  *  @param path Path of the file to be opened. The path must
   76  *         use the standard path representation.
   77  *  @param mode Mode of the file to be opened.
   78  *  @return the file opened, or [[null_file#STD_NULL|STD_NULL]]
   79  *          if it could not be opened or if ''path'' refers to
   80  *          a directory.
   81  *  @exception MEMORY_ERROR Not enough memory to convert the path
   82  *             to the system path type.
   83  *  @exception RANGE_ERROR The ''mode'' is not one of the allowed
   84  *             values or ''path'' does not use the standard path
   85  *             representation or ''path'' cannot be converted
   86  *             to the system path type.
   87  *)
   88 const func file: openUtf8 (in string: path, in string: mode) is func
   89   result
   90     var file: newFile is STD_NULL;
   91   local
   92     var clib_file: open_file is CLIB_NULL_FILE;
   93     var utf8_file: new_file is utf8_file.value;
   94   begin
   95     open_file := openClibFile(path, mode);
   96     if open_file <> CLIB_NULL_FILE then
   97       new_file.ext_file := open_file;
   98       new_file.name := path;
   99       newFile := toInterface(new_file);
  100     end if;
  101   end func;
  102 
  103 
  104 (**
  105  *  Write a string to an UTF-8 file.
  106  *  @exception FILE_ERROR A system function returns an error.
  107  *)
  108 const proc: write (in utf8_file: outFile, in string: stri) is func
  109   begin
  110     utf8_write(outFile.ext_file, stri);
  111   end func;
  112 
  113 
  114 (**
  115  *  Read a character from an UTF-8 file.
  116  *  @return the character read, or [[char#EOF|EOF]] at the end of the file.
  117  *  @exception RANGE_ERROR The file contains an illegal encoding.
  118  *)
  119 const func char: getc (in utf8_file: inFile) is
  120   return utf8_getc(inFile.ext_file);
  121 
  122 
  123 (**
  124  *  Return a string read with a maximum length from an UTF-8 file.
  125  *  @return the string read.
  126  *  @exception RANGE_ERROR The parameter ''maxLength'' is negative, or
  127  *             the file contains an illegal encoding.
  128  *)
  129 const func string: gets (in utf8_file: inFile, in integer: maxLength) is
  130   return utf8_gets(inFile.ext_file, maxLength);
  131 
  132 
  133 (**
  134  *  Read a word from an UTF-8 file.
  135  *  Before reading the word it skips spaces and tabs. The function
  136  *  accepts words ending with ' ', '\t', '\n', "\r\n" or [[char#EOF|EOF]].
  137  *  The word ending characters are not copied into the string.
  138  *  That means that the '\r' of a "\r\n" sequence is silently removed.
  139  *  When the function is left inFile.bufferChar contains ' ',
  140  *  '\t', '\n' or [[char#EOF|EOF]].
  141  *  @return the word read.
  142  *  @exception RANGE_ERROR The file contains an illegal encoding.
  143  *  @exception MEMORY_ERROR Not enough memory to represent the result.
  144  *  @exception FILE_ERROR A system function returns an error.
  145  *)
  146 const func string: getwd (inout utf8_file: inFile) is
  147   return utf8_word_read(inFile.ext_file, inFile.bufferChar);
  148 
  149 
  150 (**
  151  *  Read a line from an UTF-8 file.
  152  *  The function accepts lines ending with '\n', "\r\n" or [[char#EOF|EOF]].
  153  *  The line ending characters are not copied into the string.
  154  *  That means that the '\r' of a "\r\n" sequence is silently removed.
  155  *  When the function is left inFile.bufferChar contains '\n' or
  156  *  [[char#EOF|EOF]].
  157  *  @return the line read.
  158  *  @exception RANGE_ERROR The file contains an illegal encoding.
  159  *  @exception MEMORY_ERROR Not enough memory to represent the result.
  160  *  @exception FILE_ERROR A system function returns an error.
  161  *)
  162 const func string: getln (inout utf8_file: inFile) is
  163   return utf8_line_read(inFile.ext_file, inFile.bufferChar);
  164 
  165 
  166 (**
  167  *  Set the current file position.
  168  *  The file position is measured in bytes from the start of the file.
  169  *  The first byte in the file has the position 1.
  170  *  If the file position would be in the middle of an UTF-8 encoded
  171  *  character the position is advanced to the beginning of the next
  172  *  UTF-8 character.
  173  *  @exception RANGE_ERROR The file position is negative or zero or
  174  *             the file position is not representable in the system
  175  *             file position type.
  176  *  @exception FILE_ERROR A system function returns an error.
  177  *)
  178 const proc: seek (in utf8_file: aFile, in integer: position) is func
  179   begin
  180     utf8_seek(aFile.ext_file, position);
  181   end func;
  182 
  183 
  184 (* System STD_UTF8_IN, STD_UTF8_OUT and STD_UTF8_ERR files *)
  185 
  186 
  187 const func utf8_file: INIT_STD_UTF8_FILE (ref clib_file: primitive_file,
  188     in string: path) is func
  189   result
  190     var utf8_file: aFile is utf8_file.value;
  191   begin
  192     aFile.ext_file := primitive_file;
  193     aFile.name := path;
  194   end func;
  195 
  196 
  197 (**
  198  *  UTF-8 version of the standard input file of the operating system.
  199  *  Reading from ''STD_UTF8_IN'' can be done with e.g.:
  200  *   read(STD_UTF8_IN, aVariable);
  201  *  It is also possible to redirect ''STD_UTF8_IN'' to the default
  202  *  input of ''read'' with:
  203  *   IN := STD_UTF8_IN;
  204  *  Afterwards
  205  *   read(aVariable);
  206  *  reads from ''STD_UTF8_IN''.
  207  *)
  208 var utf8_file: STD_UTF8_IN is  INIT_STD_UTF8_FILE(CLIB_INPUT,  "STD_UTF8_IN");
  209 
  210 
  211 (**
  212  *  UTF-8 version of the standard output file of the operating system.
  213  *  Writing to ''STD_UTF8_OUT'' can be done with e.g.:
  214  *   write(STD_UTF8_OUT, something);
  215  *  It is also possible to redirect the default output of ''write''
  216  *  to ''STD_UTF8_OUT'' with:
  217  *   OUT := STD_UTF8_OUT;
  218  *  Afterwards
  219  *   write(something);
  220  *  writes to ''STD_UTF8_OUT''.
  221  *)
  222 var utf8_file: STD_UTF8_OUT is INIT_STD_UTF8_FILE(CLIB_OUTPUT, "STD_UTF8_OUT");
  223 
  224 
  225 (**
  226  *  UTF-8 version of the standard error file of the operating system.
  227  *  Writing to ''STD_UTF8_ERR'' can be done with e.g.:
  228  *   write(STD_UTF8_ERR, something);
  229  *)
  230 var utf8_file: STD_UTF8_ERR is INIT_STD_UTF8_FILE(CLIB_ERROR,  "STD_UTF8_ERR");