"Fossies" - the Fresh Open Source Software Archive 
Member "unrar/filestr.cpp" (4 May 2022, 4158 Bytes) of package /linux/misc/unrarsrc-6.1.7.tar.gz:
As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style:
standard) with prefixed line numbers and
code folding option.
Alternatively you can here
view or
download the uninterpreted source code file.
For more information about "filestr.cpp" see the
Fossies "Dox" file reference documentation.
1 #include "rar.hpp"
2
3 bool ReadTextFile(
4 const wchar *Name,
5 StringList *List,
6 bool Config,
7 bool AbortOnError,
8 RAR_CHARSET SrcCharset,
9 bool Unquote,
10 bool SkipComments,
11 bool ExpandEnvStr)
12 {
13 wchar FileName[NM];
14 *FileName=0;
15
16 if (Name!=NULL)
17 if (Config)
18 GetConfigName(Name,FileName,ASIZE(FileName),true,false);
19 else
20 wcsncpyz(FileName,Name,ASIZE(FileName));
21
22 File SrcFile;
23 if (*FileName!=0)
24 {
25 bool OpenCode=AbortOnError ? SrcFile.WOpen(FileName):SrcFile.Open(FileName,0);
26
27 if (!OpenCode)
28 {
29 if (AbortOnError)
30 ErrHandler.Exit(RARX_OPEN);
31 return false;
32 }
33 }
34 else
35 SrcFile.SetHandleType(FILE_HANDLESTD);
36
37 uint DataSize=0,ReadSize;
38 const int ReadBlock=4096;
39
40 Array<byte> Data(ReadBlock);
41 while ((ReadSize=SrcFile.Read(&Data[DataSize],ReadBlock))!=0)
42 {
43 DataSize+=ReadSize;
44 Data.Add(ReadSize); // Always have ReadBlock available for next data.
45 }
46 // Set to really read size, so we can zero terminate it correctly.
47 Data.Alloc(DataSize);
48
49 int LittleEndian=DataSize>=2 && Data[0]==255 && Data[1]==254 ? 1:0;
50 int BigEndian=DataSize>=2 && Data[0]==254 && Data[1]==255 ? 1:0;
51 bool Utf8=DataSize>=3 && Data[0]==0xef && Data[1]==0xbb && Data[2]==0xbf;
52
53 if (SrcCharset==RCH_DEFAULT)
54 SrcCharset=DetectTextEncoding(&Data[0],DataSize);
55
56 Array<wchar> DataW;
57
58 if (SrcCharset==RCH_DEFAULT || SrcCharset==RCH_OEM || SrcCharset==RCH_ANSI)
59 {
60 Data.Push(0); // Zero terminate.
61 #if defined(_WIN_ALL)
62 if (SrcCharset==RCH_OEM)
63 OemToCharA((char *)&Data[0],(char *)&Data[0]);
64 #endif
65 DataW.Alloc(Data.Size());
66 CharToWide((char *)&Data[0],&DataW[0],DataW.Size());
67 }
68
69 if (SrcCharset==RCH_UNICODE)
70 {
71 size_t Start=2; // Skip byte order mark.
72 if (!LittleEndian && !BigEndian) // No byte order mask.
73 {
74 Start=0;
75 LittleEndian=1;
76 }
77
78 DataW.Alloc(Data.Size()/2+1);
79 size_t End=Data.Size() & ~1; // We need even bytes number for UTF-16.
80 for (size_t I=Start;I<End;I+=2)
81 DataW[(I-Start)/2]=Data[I+BigEndian]+Data[I+LittleEndian]*256;
82 DataW[(End-Start)/2]=0;
83 }
84
85 if (SrcCharset==RCH_UTF8)
86 {
87 Data.Push(0); // Zero terminate data.
88 DataW.Alloc(Data.Size());
89 UtfToWide((const char *)(Data+(Utf8 ? 3:0)),&DataW[0],DataW.Size());
90 }
91
92 wchar *CurStr=&DataW[0];
93
94 while (*CurStr!=0)
95 {
96 wchar *NextStr=CurStr,*CmtPtr=NULL;
97 while (*NextStr!='\r' && *NextStr!='\n' && *NextStr!=0)
98 {
99 if (SkipComments && NextStr[0]=='/' && NextStr[1]=='/')
100 {
101 *NextStr=0;
102 CmtPtr=NextStr;
103 }
104 NextStr++;
105 }
106 bool Done=*NextStr==0;
107
108 *NextStr=0;
109 for (wchar *SpacePtr=(CmtPtr!=NULL ? CmtPtr:NextStr)-1;SpacePtr>=CurStr;SpacePtr--)
110 {
111 if (*SpacePtr!=' ' && *SpacePtr!='\t')
112 break;
113 *SpacePtr=0;
114 }
115
116 if (Unquote && *CurStr=='\"')
117 {
118 size_t Length=wcslen(CurStr);
119 if (CurStr[Length-1]=='\"')
120 {
121 CurStr[Length-1]=0;
122 CurStr++;
123 }
124 }
125
126 bool Expanded=false;
127 #if defined(_WIN_ALL)
128 if (ExpandEnvStr && *CurStr=='%') // Expand environment variables in Windows.
129 {
130 wchar ExpName[NM];
131 *ExpName=0;
132 DWORD Result=ExpandEnvironmentStrings(CurStr,ExpName,ASIZE(ExpName));
133 Expanded=Result!=0 && Result<ASIZE(ExpName);
134 if (Expanded && *ExpName!=0)
135 List->AddString(ExpName);
136 }
137 #endif
138 if (!Expanded && *CurStr!=0)
139 List->AddString(CurStr);
140
141 if (Done)
142 break;
143 CurStr=NextStr+1;
144 while (*CurStr=='\r' || *CurStr=='\n')
145 CurStr++;
146 }
147 return true;
148 }
149
150
151 RAR_CHARSET DetectTextEncoding(const byte *Data,size_t DataSize)
152 {
153 if (DataSize>3 && Data[0]==0xef && Data[1]==0xbb && Data[2]==0xbf &&
154 IsTextUtf8(Data+3,DataSize-3))
155 return RCH_UTF8;
156
157 bool LittleEndian=DataSize>2 && Data[0]==255 && Data[1]==254;
158 bool BigEndian=DataSize>2 && Data[0]==254 && Data[1]==255;
159
160 if (LittleEndian || BigEndian)
161 for (size_t I=LittleEndian ? 3 : 2;I<DataSize;I+=2)
162 if (Data[I]<32 && Data[I]!='\r' && Data[I]!='\n')
163 return RCH_UNICODE; // High byte in UTF-16 char is found.
164
165 return RCH_DEFAULT;
166 }