"Fossies" - the Fresh Open Source Software Archive

Member "unrar/filestr.cpp" (4 May 2022, 4158 Bytes) of package /linux/misc/unrarsrc-6.1.7.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "filestr.cpp" see the Fossies "Dox" file reference documentation.

    1 #include "rar.hpp"
    2 
    3 bool ReadTextFile(
    4   const wchar *Name,
    5   StringList *List,
    6   bool Config,
    7   bool AbortOnError,
    8   RAR_CHARSET SrcCharset,
    9   bool Unquote,
   10   bool SkipComments,
   11   bool ExpandEnvStr)
   12 {
   13   wchar FileName[NM];
   14   *FileName=0;
   15 
   16   if (Name!=NULL)
   17     if (Config)
   18       GetConfigName(Name,FileName,ASIZE(FileName),true,false);
   19     else
   20       wcsncpyz(FileName,Name,ASIZE(FileName));
   21 
   22   File SrcFile;
   23   if (*FileName!=0)
   24   {
   25     bool OpenCode=AbortOnError ? SrcFile.WOpen(FileName):SrcFile.Open(FileName,0);
   26 
   27     if (!OpenCode)
   28     {
   29       if (AbortOnError)
   30         ErrHandler.Exit(RARX_OPEN);
   31       return false;
   32     }
   33   }
   34   else
   35     SrcFile.SetHandleType(FILE_HANDLESTD);
   36 
   37   uint DataSize=0,ReadSize;
   38   const int ReadBlock=4096;
   39 
   40   Array<byte> Data(ReadBlock);
   41   while ((ReadSize=SrcFile.Read(&Data[DataSize],ReadBlock))!=0)
   42   {
   43     DataSize+=ReadSize;
   44     Data.Add(ReadSize); // Always have ReadBlock available for next data.
   45   }
   46   // Set to really read size, so we can zero terminate it correctly.
   47   Data.Alloc(DataSize);
   48 
   49   int LittleEndian=DataSize>=2 && Data[0]==255 && Data[1]==254 ? 1:0;
   50   int BigEndian=DataSize>=2 && Data[0]==254 && Data[1]==255 ? 1:0;
   51   bool Utf8=DataSize>=3 && Data[0]==0xef && Data[1]==0xbb && Data[2]==0xbf;
   52 
   53   if (SrcCharset==RCH_DEFAULT)
   54     SrcCharset=DetectTextEncoding(&Data[0],DataSize);
   55 
   56   Array<wchar> DataW;
   57 
   58   if (SrcCharset==RCH_DEFAULT || SrcCharset==RCH_OEM || SrcCharset==RCH_ANSI)
   59   {
   60     Data.Push(0); // Zero terminate.
   61 #if defined(_WIN_ALL)
   62     if (SrcCharset==RCH_OEM)
   63       OemToCharA((char *)&Data[0],(char *)&Data[0]);
   64 #endif
   65     DataW.Alloc(Data.Size());
   66     CharToWide((char *)&Data[0],&DataW[0],DataW.Size());
   67   }
   68 
   69   if (SrcCharset==RCH_UNICODE)
   70   {
   71     size_t Start=2; // Skip byte order mark.
   72     if (!LittleEndian && !BigEndian) // No byte order mask.
   73     {
   74       Start=0;
   75       LittleEndian=1;
   76     }
   77     
   78     DataW.Alloc(Data.Size()/2+1);
   79     size_t End=Data.Size() & ~1; // We need even bytes number for UTF-16.
   80     for (size_t I=Start;I<End;I+=2)
   81       DataW[(I-Start)/2]=Data[I+BigEndian]+Data[I+LittleEndian]*256;
   82     DataW[(End-Start)/2]=0;
   83   }
   84 
   85   if (SrcCharset==RCH_UTF8)
   86   {
   87     Data.Push(0); // Zero terminate data.
   88     DataW.Alloc(Data.Size());
   89     UtfToWide((const char *)(Data+(Utf8 ? 3:0)),&DataW[0],DataW.Size());
   90   }
   91 
   92   wchar *CurStr=&DataW[0];
   93 
   94   while (*CurStr!=0)
   95   {
   96     wchar *NextStr=CurStr,*CmtPtr=NULL;
   97     while (*NextStr!='\r' && *NextStr!='\n' && *NextStr!=0)
   98     {
   99       if (SkipComments && NextStr[0]=='/' && NextStr[1]=='/')
  100       {
  101         *NextStr=0;
  102         CmtPtr=NextStr;
  103       }
  104       NextStr++;
  105     }
  106     bool Done=*NextStr==0;
  107 
  108     *NextStr=0;
  109     for (wchar *SpacePtr=(CmtPtr!=NULL ? CmtPtr:NextStr)-1;SpacePtr>=CurStr;SpacePtr--)
  110     {
  111       if (*SpacePtr!=' ' && *SpacePtr!='\t')
  112         break;
  113       *SpacePtr=0;
  114     }
  115     
  116     if (Unquote && *CurStr=='\"')
  117     {
  118       size_t Length=wcslen(CurStr);
  119       if (CurStr[Length-1]=='\"')
  120       {
  121         CurStr[Length-1]=0;
  122         CurStr++;
  123       }
  124     }
  125 
  126     bool Expanded=false;
  127 #if defined(_WIN_ALL)
  128     if (ExpandEnvStr && *CurStr=='%') // Expand environment variables in Windows.
  129     {
  130       wchar ExpName[NM];
  131       *ExpName=0;
  132       DWORD Result=ExpandEnvironmentStrings(CurStr,ExpName,ASIZE(ExpName));
  133       Expanded=Result!=0 && Result<ASIZE(ExpName);
  134       if (Expanded && *ExpName!=0)
  135         List->AddString(ExpName);
  136     }
  137 #endif
  138     if (!Expanded && *CurStr!=0)
  139       List->AddString(CurStr);
  140 
  141     if (Done)
  142       break;
  143     CurStr=NextStr+1;
  144     while (*CurStr=='\r' || *CurStr=='\n')
  145       CurStr++;
  146   }
  147   return true;
  148 }
  149 
  150 
  151 RAR_CHARSET DetectTextEncoding(const byte *Data,size_t DataSize)
  152 {
  153   if (DataSize>3 && Data[0]==0xef && Data[1]==0xbb && Data[2]==0xbf &&
  154       IsTextUtf8(Data+3,DataSize-3))
  155     return RCH_UTF8;
  156 
  157   bool LittleEndian=DataSize>2 && Data[0]==255 && Data[1]==254;
  158   bool BigEndian=DataSize>2 && Data[0]==254 && Data[1]==255;
  159 
  160   if (LittleEndian || BigEndian)  
  161     for (size_t I=LittleEndian ? 3 : 2;I<DataSize;I+=2)
  162       if (Data[I]<32 && Data[I]!='\r' && Data[I]!='\n')
  163         return RCH_UNICODE; // High byte in UTF-16 char is found.
  164 
  165   return RCH_DEFAULT;
  166 }