"Fossies" - the Fresh Open Source Software Archive

Member "recoll-1.26.3/utils/base64.cpp" (4 Sep 2019, 10561 Bytes) of package /linux/privat/recoll-1.26.3.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "base64.cpp" see the Fossies "Dox" file reference documentation.

    1 /* Copyright (C) 2005 J.F.Dockes
    2  *   This program is free software; you can redistribute it and/or modify
    3  *   it under the terms of the GNU General Public License as published by
    4  *   the Free Software Foundation; either version 2 of the License, or
    5  *   (at your option) any later version.
    6  *
    7  *   This program is distributed in the hope that it will be useful,
    8  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
    9  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   10  *   GNU General Public License for more details.
   11  *
   12  *   You should have received a copy of the GNU General Public License
   13  *   along with this program; if not, write to the
   14  *   Free Software Foundation, Inc.,
   15  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
   16  */
   17 #include <stdio.h>
   18 
   19 #include <sys/types.h>
   20 #include <cstring>
   21 #include <string>
   22 
   23 using std::string;
   24 
   25 #undef DEBUG_BASE64 
   26 #ifdef DEBUG_BASE64
   27 #define DPRINT(X) fprintf X
   28 #else
   29 #define DPRINT(X)
   30 #endif
   31 
   32 // This is adapted from FreeBSD's code, quite modified for performance.
   33 // Tests on a Mac pro 2.1G with a 166MB base64 file
   34 //
   35 // The original version used strchr to lookup the base64 value from
   36 // the input code:
   37 //   real    0m13.053s user  0m12.574s sys   0m0.471s
   38 // Using a direct access, 256 entries table:
   39 //   real    0m3.073s user   0m2.600s sys    0m0.439s
   40 // Using a variable to hold the array length (instead of in.length()):
   41 //   real    0m2.972s user   0m2.527s sys    0m0.433s
   42 // Using values from the table instead of isspace() (final)
   43 //   real    0m2.513s user   0m2.059s sys    0m0.439s
   44 //
   45 // The table has one entry per char value (0-256). Invalid base64
   46 // chars take value 256, whitespace 255, Pad ('=') 254. 
   47 // Valid char points contain their base64 value (0-63) 
   48 static const int b64values[] = {
   49 /* 0 */ 256,/* 1 */ 256,/* 2 */ 256,/* 3 */ 256,/* 4 */ 256,
   50 /* 5 */ 256,/* 6 */ 256,/* 7 */ 256,/* 8 */ 256,
   51 /*9 ht */ 255,/* 10 nl */ 255,/* 11 vt */ 255,/* 12 np/ff*/ 255,/* 13 cr */ 255,
   52 /* 14 */ 256,/* 15 */ 256,/* 16 */ 256,/* 17 */ 256,/* 18 */ 256,/* 19 */ 256,
   53 /* 20 */ 256,/* 21 */ 256,/* 22 */ 256,/* 23 */ 256,/* 24 */ 256,/* 25 */ 256,
   54 /* 26 */ 256,/* 27 */ 256,/* 28 */ 256,/* 29 */ 256,/* 30 */ 256,/* 31 */ 256,
   55 /* 32 sp  */ 255,
   56 /* ! */ 256,/* " */ 256,/* # */ 256,/* $ */ 256,/* % */ 256,
   57 /* & */ 256,/* ' */ 256,/* ( */ 256,/* ) */ 256,/* * */ 256,
   58 /* + */ 62,
   59 /* , */ 256,/* - */ 256,/* . */ 256,
   60 /* / */ 63,
   61 /* 0 */ 52,/* 1 */ 53,/* 2 */ 54,/* 3 */ 55,/* 4 */ 56,/* 5 */ 57,/* 6 */ 58,
   62 /* 7 */ 59,/* 8 */ 60,/* 9 */ 61,
   63 /* : */ 256,/* ; */ 256,/* < */ 256,
   64 /* = */ 254,
   65 /* > */ 256,/* ? */ 256,/* @ */ 256,
   66 /* A */ 0,/* B */ 1,/* C */ 2,/* D */ 3,/* E */ 4,/* F */ 5,/* G */ 6,/* H */ 7,
   67 /* I */ 8,/* J */ 9,/* K */ 10,/* L */ 11,/* M */ 12,/* N */ 13,/* O */ 14,
   68 /* P */ 15,/* Q */ 16,/* R */ 17,/* S */ 18,/* T */ 19,/* U */ 20,/* V */ 21,
   69 /* W */ 22,/* X */ 23,/* Y */ 24,/* Z */ 25,
   70 /* [ */ 256,/* \ */ 256,/* ] */ 256,/* ^ */ 256,/* _ */ 256,/* ` */ 256,
   71 /* a */ 26,/* b */ 27,/* c */ 28,/* d */ 29,/* e */ 30,/* f */ 31,/* g */ 32,
   72 /* h */ 33,/* i */ 34,/* j */ 35,/* k */ 36,/* l */ 37,/* m */ 38,/* n */ 39,
   73 /* o */ 40,/* p */ 41,/* q */ 42,/* r */ 43,/* s */ 44,/* t */ 45,/* u */ 46,
   74 /* v */ 47,/* w */ 48,/* x */ 49,/* y */ 50,/* z */ 51,
   75 /* { */ 256,/* | */ 256,/* } */ 256,/* ~ */ 256,
   76 256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,
   77 256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,
   78 256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,
   79 256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,
   80 256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,
   81 256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,
   82 256,256,256,256,256,256,256,256,
   83 };
   84 static const char Base64[] =
   85     "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
   86 static const char Pad64 = '=';
   87 
   88 bool base64_decode(const string& in, string& out)
   89 {
   90     int io = 0, state = 0, ch = 0;
   91     unsigned int ii = 0;
   92     out.clear();
   93     size_t ilen = in.length();
   94     out.reserve(ilen);
   95 
   96     for (ii = 0; ii < ilen; ii++) {
   97     ch = (unsigned char)in[ii];
   98     int value = b64values[ch];
   99 
  100     if (value == 255)        /* Skip whitespace anywhere. */
  101         continue;
  102     if (ch == Pad64)
  103         break;
  104     if (value == 256) {
  105         /* A non-base64 character. */
  106         DPRINT((stderr, "base64_dec: non-base64 char at pos %d\n", ii));
  107         return false;
  108     }
  109 
  110     switch (state) {
  111     case 0:
  112         out += value << 2;
  113         state = 1;
  114         break;
  115     case 1:
  116         out[io]   |=  value >> 4;
  117         out += (value & 0x0f) << 4 ;
  118         io++;
  119         state = 2;
  120         break;
  121     case 2:
  122         out[io]   |=  value >> 2;
  123         out += (value & 0x03) << 6;
  124         io++;
  125         state = 3;
  126         break;
  127     case 3:
  128         out[io] |= value;
  129         io++;
  130         state = 0;
  131         break;
  132     default:
  133         fprintf(stderr, "base64_dec: internal!bad state!\n");
  134         return false;
  135     }
  136     }
  137 
  138     /*
  139      * We are done decoding Base-64 chars.  Let's see if we ended
  140      * on a byte boundary, and/or with erroneous trailing characters.
  141      */
  142 
  143     if (ch == Pad64) {      /* We got a pad char. */
  144     ch = in[ii++];      /* Skip it, get next. */
  145     switch (state) {
  146     case 0:     /* Invalid = in first position */
  147     case 1:     /* Invalid = in second position */
  148         DPRINT((stderr, "base64_dec: pad char in state 0/1\n"));
  149         return false;
  150 
  151     case 2:     /* Valid, means one byte of info */
  152             /* Skip any number of spaces. */
  153         for (; ii < in.length(); ch = in[ii++])
  154         if (!isspace((unsigned char)ch))
  155             break;
  156         /* Make sure there is another trailing = sign. */
  157         if (ch != Pad64) {
  158         DPRINT((stderr, "base64_dec: missing pad char!\n"));
  159         // Well, there are bad encoders out there. Let it pass
  160         // return false;
  161         }
  162         ch = in[ii++];      /* Skip the = */
  163         /* Fall through to "single trailing =" case. */
  164         /* FALLTHROUGH */
  165 
  166     case 3:     /* Valid, means two bytes of info */
  167         /*
  168          * We know this char is an =.  Is there anything but
  169          * whitespace after it?
  170          */
  171         for (; ii < in.length(); ch = in[ii++])
  172         if (!isspace((unsigned char)ch)) {
  173             DPRINT((stderr, "base64_dec: non-white at eod: 0x%x\n", 
  174                 (unsigned int)((unsigned char)ch)));
  175             // Well, there are bad encoders out there. Let it pass
  176             //return false;
  177         }
  178 
  179         /*
  180          * Now make sure for cases 2 and 3 that the "extra"
  181          * bits that slopped past the last full byte were
  182          * zeros.  If we don't check them, they become a
  183          * subliminal channel.
  184          */
  185         if (out[io] != 0) {
  186         DPRINT((stderr, "base64_dec: bad extra bits!\n"));
  187         // Well, there are bad encoders out there. Let it pass
  188         out[io] = 0;
  189         // return false;
  190         }
  191         // We've appended an extra 0.
  192         out.resize(io);
  193     }
  194     } else {
  195     /*
  196      * We ended by seeing the end of the string.  Make sure we
  197      * have no partial bytes lying around.
  198      */
  199     if (state != 0) {
  200         DPRINT((stderr, "base64_dec: bad final state\n"));
  201         return false;
  202     }
  203     }
  204 
  205     DPRINT((stderr, "base64_dec: ret ok, io %d sz %d len %d value [%s]\n", 
  206         io, (int)out.size(), (int)out.length(), out.c_str()));
  207     return true;
  208 }
  209 
  210 #undef Assert
  211 #define Assert(X)
  212 
  213 void base64_encode(const string &in, string &out)
  214 {
  215     unsigned char input[3];
  216     unsigned char output[4];
  217 
  218     out.clear();
  219 
  220     string::size_type srclength = in.length();
  221     int sidx = 0;
  222     while (2 < srclength) {
  223     input[0] = in[sidx++];
  224     input[1] = in[sidx++];
  225     input[2] = in[sidx++];
  226     srclength -= 3;
  227 
  228     output[0] = input[0] >> 2;
  229     output[1] = ((input[0] & 0x03) << 4) + (input[1] >> 4);
  230     output[2] = ((input[1] & 0x0f) << 2) + (input[2] >> 6);
  231     output[3] = input[2] & 0x3f;
  232     Assert(output[0] < 64);
  233     Assert(output[1] < 64);
  234     Assert(output[2] < 64);
  235     Assert(output[3] < 64);
  236 
  237     out += Base64[output[0]];
  238     out += Base64[output[1]];
  239     out += Base64[output[2]];
  240     out += Base64[output[3]];
  241     }
  242     
  243     /* Now we worry about padding. */
  244     if (0 != srclength) {
  245     /* Get what's left. */
  246     input[0] = input[1] = input[2] = '\0';
  247     for (string::size_type i = 0; i < srclength; i++)
  248         input[i] = in[sidx++];
  249     
  250     output[0] = input[0] >> 2;
  251     output[1] = ((input[0] & 0x03) << 4) + (input[1] >> 4);
  252     output[2] = ((input[1] & 0x0f) << 2) + (input[2] >> 6);
  253     Assert(output[0] < 64);
  254     Assert(output[1] < 64);
  255     Assert(output[2] < 64);
  256 
  257     out += Base64[output[0]];
  258     out += Base64[output[1]];
  259     if (srclength == 1)
  260         out += Pad64;
  261     else
  262         out += Base64[output[2]];
  263     out += Pad64;
  264     }
  265     return;
  266 }
  267 
  268 #ifdef TEST_BASE64
  269 #include <stdio.h>
  270 #include <stdlib.h>
  271 
  272 #include "readfile.h"
  273 
  274 const char *thisprog;
  275 static char usage [] = "testfile\n\n"
  276 ;
  277 static void
  278 Usage(void)
  279 {
  280     fprintf(stderr, "%s: usage:\n%s", thisprog, usage);
  281     exit(1);
  282 }
  283 
  284 static int     op_flags;
  285 #define OPT_MOINS 0x1
  286 #define OPT_i     0x2 
  287 #define OPT_P     0x4 
  288 
  289 int main(int argc, char **argv)
  290 {
  291     thisprog = argv[0];
  292     argc--; argv++;
  293 
  294     while (argc > 0 && **argv == '-') {
  295     (*argv)++;
  296     if (!(**argv))
  297         /* Cas du "adb - core" */
  298         Usage();
  299     while (**argv)
  300         switch (*(*argv)++) {
  301         case 'i':   op_flags |= OPT_i; break;
  302         default: Usage();   break;
  303         }
  304     argc--; argv++;
  305     }
  306     
  307     if (op_flags & OPT_i)  {
  308     const char *values[] = {"", "1", "12", "123", "1234", 
  309                 "12345", "123456"};
  310     int nvalues = sizeof(values) / sizeof(char *);
  311     string in, out, back;
  312     int err = 0;
  313     for (int i = 0; i < nvalues; i++) {
  314         in = values[i];
  315         base64_encode(in, out);
  316         base64_decode(out, back);
  317         if (in != back) {
  318         fprintf(stderr, "In [%s] %d != back [%s] %d (out [%s] %d\n", 
  319             in.c_str(), int(in.length()), 
  320             back.c_str(), int(back.length()),
  321             out.c_str(), int(out.length())
  322             );
  323         err++;
  324         }
  325     }
  326     in.erase();
  327     in += char(0);
  328     in += char(0);
  329     in += char(0);
  330     in += char(0);
  331     base64_encode(in, out);
  332     base64_decode(out, back);
  333     if (in != back) {
  334         fprintf(stderr, "In [%s] %d != back [%s] %d (out [%s] %d\n", 
  335             in.c_str(), int(in.length()), 
  336             back.c_str(), int(back.length()),
  337             out.c_str(), int(out.length())
  338             );
  339         err++;
  340     }
  341     exit(!(err == 0));
  342     } else {
  343     if (argc > 1)
  344         Usage();
  345     string infile;
  346     if (argc == 1)
  347         infile = *argv++;argc--;
  348     string idata, reason;
  349     if (!file_to_string(infile, idata, &reason)) {
  350         fprintf(stderr, "Can't read file: %s\n", reason.c_str());
  351         exit(1);
  352     }
  353     string odata;
  354     if (!base64_decode(idata, odata)) {
  355         fprintf(stderr, "Decoding failed\n");
  356         exit(1);
  357     }
  358     fwrite(odata.c_str(), 1,
  359            odata.size() * sizeof(string::value_type), stdout);
  360     exit(0);
  361     }
  362 }
  363 #endif