"Fossies" - the Fresh Open Source Software Archive

Member "epstool-3.08/src/cmbcs.c" (10 Jun 2005, 4544 Bytes) of package /linux/misc/old/ghost/ghostgum/epstool-3.08-os2.zip:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file.

    1 /* Copyright (C) 2002 Ghostgum Software Pty Ltd.  All rights reserved.
    2 
    3   This software is provided AS-IS with no warranty, either express or
    4   implied.
    5 
    6   This software is distributed under licence and may not be copied,
    7   modified or distributed except as expressly authorised under the terms
    8   of the licence contained in the file LICENCE in this distribution.
    9 
   10   For more information about licensing, please refer to
   11   http://www.ghostgum.com.au/ or contact Ghostsgum Software Pty Ltd, 
   12   218 Gallaghers Rd, Glen Waverley VIC 3150, AUSTRALIA, 
   13   Fax +61 3 9886 6616.
   14 */
   15 
   16 /* $Id: cmbcs.c,v 1.6 2002/08/01 08:27:52 ghostgum Exp $ */
   17 /* Multiple Byte Character Set */
   18 
   19 /* 
   20  * GSview uses Unicode on Windows.
   21  * On Linux it may use a multiple byte character set,
   22  * such as UTF-8, EUC, Shift-JIS.
   23  * This file provides support for stepping over multiple byte
   24  * characters.  This is needed when searching for a particular
   25  * characters such as a tab, space, slash or backslash.
   26  * We assume that the null character will not occur within 
   27  * a MBCS string, and use the C strlen(str)+1 to get the byte
   28  * count for allocating memory.
   29  *
   30  * For Japanese text on Unix, EUC is most commonly used, SJIS is often
   31  * used, UTF-8 and UCS-2 are rarely used.
   32  * For Japanese filenames on Unix, SJIS is most commonly used 
   33  * (for compatibility with Windows), EUC and UTF-8 are sometimes used,
   34  * UCS-2 is rarely used.
   35  * GSview only searches for characters in TCHAR strings we may be able 
   36  * to handle filenames in a different encoding by doing the translation
   37  * in cs_to_narrow().
   38  * 
   39  * FIX: explain TCHAR, cs, MBCS.
   40  */
   41 
   42 #include "common.h"
   43 
   44 #ifndef UNICODE
   45 
   46 CODEPAGE global_codepage = CODEPAGE_SBCS;   /* GLOBAL */
   47 
   48 /* Return number of bytes from current character to start of
   49  * next character.
   50  */
   51 int char_next(const char *str)
   52 {
   53     int i;
   54     const unsigned char *t = (const unsigned char *)str;
   55     switch (global_codepage) {
   56     default:
   57     case CODEPAGE_SBCS:
   58         i = 1;
   59         break;
   60     case CODEPAGE_UTF8:
   61         if (t[0] == 0)
   62         i = 0;
   63         else if ((t[0] > 0) && (t[0] <= 0x7f))
   64         i = 1;
   65         else {
   66         /* multiple byte UTF-8 */
   67         /* scan until we find a byte in a suitable range */
   68         i = 0;
   69         while (t[i] && (t[i] >= 0x80) && (t[i] <= 0xbf))
   70             i++;
   71         }
   72         break;
   73     case CODEPAGE_EUC:
   74         if (t[0] == 0x8f) {
   75         /* 3 bytes */
   76         if (t[1] == '\0')
   77             i = 1;
   78         else if (t[2] == '\0')
   79             i = 2;
   80         else 
   81             i = 3;
   82         }
   83         else if (t[0] & 0x80) {
   84         /* 2 bytes */
   85         if (str[1] == '\0')
   86             i = 1;
   87         else 
   88             i = 2;
   89         }
   90         else
   91         i = 1;
   92     case CODEPAGE_SJIS:
   93         if (t[0] == 0) {
   94         i = 0;
   95         }
   96         else if ((t[0] > 0) && (t[0] <= 0x7f)) {
   97         i = 1;
   98         }
   99         else if ((t[0] >= 0x80) && (t[0] <= 0xbf)) {
  100         if (t[1] == '\0')
  101             i = 1;
  102         else
  103             i = 2;
  104         }
  105         else if ((t[0] >= 0xa0) && (t[0] <= 0xdf)) {
  106         i = 1;
  107         }
  108         else if ((t[0] >= 0xe0) && (t[0] <= 0xef)) {
  109         if (t[1] == '\0')
  110             i = 1;
  111         else
  112             i = 2;
  113         }
  114         else
  115         i = 1;
  116     }
  117     return i;
  118 }
  119 
  120 /* This implementation is for systems that don't support wide characters */
  121 /* Convert a cs (wide or narrow) string to a narrow string.
  122  * If the output narrow string needs to be null terminated,
  123  * the input string length needs to include the null.
  124  * Returns the number of characters written to the narrow string.
  125  * If nlen is 0, the function returns the needed buffer size for nstr.
  126  * If the function fails, it returns 0.
  127  */
  128 int
  129 char_to_narrow(char *nstr, int nlen, LPCTSTR wstr, int wlen)
  130 {
  131     /* no translation */
  132     if (nlen == 0)
  133     return wlen;
  134     if (nlen < wlen)
  135     return 0;
  136     memcpy(nstr, wstr, wlen);
  137     return wlen;
  138 }
  139 
  140 
  141 /* opposite of char_to_narrow */
  142 int 
  143 narrow_to_char(TCHAR *wstr, int wlen, const char *nstr, int nlen)
  144 {
  145     /* no translation */
  146     if (wlen == 0)
  147     return nlen;
  148     if (wlen < nlen)
  149     return 0;
  150     memcpy(wstr, nstr, nlen);
  151     return nlen;
  152 }
  153 
  154 #endif
  155 
  156 /* Convert ISO-Latin1 str to UTF-8 ustr.
  157  * Return byte length of UTF-8 string.
  158  * If ustr is NULL or insufficient space don't copy.
  159  * This is needed for the gtk+ user interface.
  160  */
  161 int 
  162 latin1_to_utf8(char *ustr, int ulen, const char *str, int slen)
  163 {
  164     int i, j;
  165     const char *p = str;
  166     int len = slen;
  167     for (i=0; i<slen; i++)
  168     if (p[i] & 0x80)
  169         len++;
  170     if ((ustr != NULL) && (ulen <= len)) {
  171     p = str;
  172         for (i=0, j=0; i<slen; i++) {
  173         if (*p & 0x80) {
  174         ustr[j++] = (char)(0xc0 | ((*p & 0xc0) >> 6));
  175         ustr[j++] = (char)(0x80 | (*p & 0x3f));
  176         }
  177         else
  178         ustr[j++] = *p;
  179         p++;
  180     }
  181     }
  182     return len;
  183 }
  184