unrarsrc  6.1.7
About: unrar extracts, views and tests the contents of archives created with the RAR archiver.
  Fossies Dox: unrarsrc-6.1.7.tar.gz  ("unofficial" and yet experimental doxygen-generated source code documentation)  

filestr.cpp
Go to the documentation of this file.
1#include "rar.hpp"
2
4 const wchar *Name,
5 StringList *List,
6 bool Config,
7 bool AbortOnError,
8 RAR_CHARSET SrcCharset,
9 bool Unquote,
10 bool SkipComments,
11 bool ExpandEnvStr)
12{
13 wchar FileName[NM];
14 *FileName=0;
15
16 if (Name!=NULL)
17 if (Config)
18 GetConfigName(Name,FileName,ASIZE(FileName),true,false);
19 else
20 wcsncpyz(FileName,Name,ASIZE(FileName));
21
22 File SrcFile;
23 if (*FileName!=0)
24 {
25 bool OpenCode=AbortOnError ? SrcFile.WOpen(FileName):SrcFile.Open(FileName,0);
26
27 if (!OpenCode)
28 {
29 if (AbortOnError)
31 return false;
32 }
33 }
34 else
36
37 uint DataSize=0,ReadSize;
38 const int ReadBlock=4096;
39
40 Array<byte> Data(ReadBlock);
41 while ((ReadSize=SrcFile.Read(&Data[DataSize],ReadBlock))!=0)
42 {
43 DataSize+=ReadSize;
44 Data.Add(ReadSize); // Always have ReadBlock available for next data.
45 }
46 // Set to really read size, so we can zero terminate it correctly.
47 Data.Alloc(DataSize);
48
49 int LittleEndian=DataSize>=2 && Data[0]==255 && Data[1]==254 ? 1:0;
50 int BigEndian=DataSize>=2 && Data[0]==254 && Data[1]==255 ? 1:0;
51 bool Utf8=DataSize>=3 && Data[0]==0xef && Data[1]==0xbb && Data[2]==0xbf;
52
53 if (SrcCharset==RCH_DEFAULT)
54 SrcCharset=DetectTextEncoding(&Data[0],DataSize);
55
56 Array<wchar> DataW;
57
58 if (SrcCharset==RCH_DEFAULT || SrcCharset==RCH_OEM || SrcCharset==RCH_ANSI)
59 {
60 Data.Push(0); // Zero terminate.
61#if defined(_WIN_ALL)
62 if (SrcCharset==RCH_OEM)
63 OemToCharA((char *)&Data[0],(char *)&Data[0]);
64#endif
65 DataW.Alloc(Data.Size());
66 CharToWide((char *)&Data[0],&DataW[0],DataW.Size());
67 }
68
69 if (SrcCharset==RCH_UNICODE)
70 {
71 size_t Start=2; // Skip byte order mark.
72 if (!LittleEndian && !BigEndian) // No byte order mask.
73 {
74 Start=0;
75 LittleEndian=1;
76 }
77
78 DataW.Alloc(Data.Size()/2+1);
79 size_t End=Data.Size() & ~1; // We need even bytes number for UTF-16.
80 for (size_t I=Start;I<End;I+=2)
81 DataW[(I-Start)/2]=Data[I+BigEndian]+Data[I+LittleEndian]*256;
82 DataW[(End-Start)/2]=0;
83 }
84
85 if (SrcCharset==RCH_UTF8)
86 {
87 Data.Push(0); // Zero terminate data.
88 DataW.Alloc(Data.Size());
89 UtfToWide((const char *)(Data+(Utf8 ? 3:0)),&DataW[0],DataW.Size());
90 }
91
92 wchar *CurStr=&DataW[0];
93
94 while (*CurStr!=0)
95 {
96 wchar *NextStr=CurStr,*CmtPtr=NULL;
97 while (*NextStr!='\r' && *NextStr!='\n' && *NextStr!=0)
98 {
99 if (SkipComments && NextStr[0]=='/' && NextStr[1]=='/')
100 {
101 *NextStr=0;
102 CmtPtr=NextStr;
103 }
104 NextStr++;
105 }
106 bool Done=*NextStr==0;
107
108 *NextStr=0;
109 for (wchar *SpacePtr=(CmtPtr!=NULL ? CmtPtr:NextStr)-1;SpacePtr>=CurStr;SpacePtr--)
110 {
111 if (*SpacePtr!=' ' && *SpacePtr!='\t')
112 break;
113 *SpacePtr=0;
114 }
115
116 if (Unquote && *CurStr=='\"')
117 {
118 size_t Length=wcslen(CurStr);
119 if (CurStr[Length-1]=='\"')
120 {
121 CurStr[Length-1]=0;
122 CurStr++;
123 }
124 }
125
126 bool Expanded=false;
127#if defined(_WIN_ALL)
128 if (ExpandEnvStr && *CurStr=='%') // Expand environment variables in Windows.
129 {
130 wchar ExpName[NM];
131 *ExpName=0;
132 DWORD Result=ExpandEnvironmentStrings(CurStr,ExpName,ASIZE(ExpName));
133 Expanded=Result!=0 && Result<ASIZE(ExpName);
134 if (Expanded && *ExpName!=0)
135 List->AddString(ExpName);
136 }
137#endif
138 if (!Expanded && *CurStr!=0)
139 List->AddString(CurStr);
140
141 if (Done)
142 break;
143 CurStr=NextStr+1;
144 while (*CurStr=='\r' || *CurStr=='\n')
145 CurStr++;
146 }
147 return true;
148}
149
150
151RAR_CHARSET DetectTextEncoding(const byte *Data,size_t DataSize)
152{
153 if (DataSize>3 && Data[0]==0xef && Data[1]==0xbb && Data[2]==0xbf &&
154 IsTextUtf8(Data+3,DataSize-3))
155 return RCH_UTF8;
156
157 bool LittleEndian=DataSize>2 && Data[0]==255 && Data[1]==254;
158 bool BigEndian=DataSize>2 && Data[0]==254 && Data[1]==255;
159
160 if (LittleEndian || BigEndian)
161 for (size_t I=LittleEndian ? 3 : 2;I<DataSize;I+=2)
162 if (Data[I]<32 && Data[I]!='\r' && Data[I]!='\n')
163 return RCH_UNICODE; // High byte in UTF-16 char is found.
164
165 return RCH_DEFAULT;
166}
ErrorHandler ErrHandler
size_t Size()
Definition: array.hpp:94
void Add(size_t Items)
Definition: array.hpp:100
void Alloc(size_t Items)
Definition: array.hpp:139
void Push(T Item)
Definition: array.hpp:177
void Exit(RAR_EXIT ExitCode)
Definition: errhnd.cpp:236
Definition: file.hpp:57
void SetHandleType(FILE_HANDLETYPE Type)
Definition: file.hpp:124
virtual int Read(void *Data, size_t Size)
Definition: file.cpp:374
virtual bool Open(const wchar *Name, uint Mode=FMF_READ)
Definition: file.cpp:48
bool WOpen(const wchar *Name)
Definition: file.cpp:167
void AddString(const wchar *Str)
Definition: strlist.cpp:26
@ RARX_OPEN
Definition: errhnd.hpp:12
@ FILE_HANDLESTD
Definition: file.hpp:19
RAR_CHARSET DetectTextEncoding(const byte *Data, size_t DataSize)
Definition: filestr.cpp:151
bool ReadTextFile(const wchar *Name, StringList *List, bool Config, bool AbortOnError, RAR_CHARSET SrcCharset, bool Unquote, bool SkipComments, bool ExpandEnvStr)
Definition: filestr.cpp:3
RAR_CHARSET
Definition: options.hpp:51
@ RCH_UTF8
Definition: options.hpp:51
@ RCH_OEM
Definition: options.hpp:51
@ RCH_ANSI
Definition: options.hpp:51
@ RCH_DEFAULT
Definition: options.hpp:51
@ RCH_UNICODE
Definition: options.hpp:51
void GetConfigName(const wchar *Name, wchar *FullName, size_t MaxSize, bool CheckExist, bool Create)
Definition: pathfn.cpp:304
#define ASIZE(x)
Definition: rardefs.hpp:10
wchar_t wchar
Definition: rartypes.hpp:13
unsigned int uint
Definition: rartypes.hpp:8
void wcsncpyz(wchar *dest, const wchar *src, size_t maxlen)
Definition: strfn.cpp:275
bool CharToWide(const char *Src, wchar *Dest, size_t DestSize)
Definition: unicode.cpp:85
bool UtfToWide(const char *Src, wchar *Dest, size_t DestSize)
Definition: unicode.cpp:324
bool IsTextUtf8(const byte *Src)
Definition: unicode.cpp:400