apt  2.2.4
About: Apt (Advanced Package Tool) is a management system for software packages (Debian/Ubuntu). Release series 2.2.
  Fossies Dox: apt-2.2.4.tar.gz  ("unofficial" and yet experimental doxygen-generated source code documentation)  

extracttar.cc
Go to the documentation of this file.
1 // -*- mode: cpp; mode: fold -*-
2 // Description /*{{{*/
3 /* ######################################################################
4 
5  Extract a Tar - Tar Extractor
6 
7  Some performance measurements showed that zlib performed quite poorly
8  in comparison to a forked gzip process. This tar extractor makes use
9  of the fact that dup'd file descriptors have the same seek pointer
10  and that gzip will not read past the end of a compressed stream,
11  even if there is more data. We use the dup property to track extraction
12  progress and the gzip feature to just feed gzip a fd in the middle
13  of an AR file.
14 
15  ##################################################################### */
16  /*}}}*/
17 // Include Files /*{{{*/
18 #include <config.h>
19 
20 #include <apt-pkg/configuration.h>
21 #include <apt-pkg/dirstream.h>
22 #include <apt-pkg/error.h>
23 #include <apt-pkg/extracttar.h>
24 #include <apt-pkg/fileutl.h>
25 #include <apt-pkg/strutl.h>
26 
27 #include <algorithm>
28 #include <iostream>
29 #include <string>
30 #include <fcntl.h>
31 #include <signal.h>
32 #include <string.h>
33 #include <unistd.h>
34 
35 #include <apti18n.h>
36  /*}}}*/
37 
38 using namespace std;
39 
40 // The on disk header for a tar file.
42 {
43  char Name[100];
44  char Mode[8];
45  char UserID[8];
46  char GroupID[8];
47  char Size[12];
48  char MTime[12];
49  char Checksum[8];
50  char LinkFlag;
51  char LinkName[100];
52  char MagicNumber[8];
53  char UserName[32];
54  char GroupName[32];
55  char Major[8];
56  char Minor[8];
57 };
58 
59 // We need to read long names (names and link targets) into memory, so let's
60 // have a limit (shamelessly stolen from libarchive) to avoid people OOMing
61 // us with large streams.
62 static const unsigned long long APT_LONGNAME_LIMIT = 1048576llu;
63 
64 // A file size limit that we allow extracting. Currently, that's 128 GB.
65 // We also should leave some wiggle room for code adding files to it, and
66 // possibly conversion for signed, so this should not be larger than like 2**62.
67 static const unsigned long long APT_FILESIZE_LIMIT = 1llu << 37;
68 
69 // ExtractTar::ExtractTar - Constructor /*{{{*/
70 // ---------------------------------------------------------------------
71 /* */
72 ExtractTar::ExtractTar(FileFd &Fd,unsigned long long Max,string DecompressionProgram)
73  : File(Fd), MaxInSize(Max), DecompressProg(DecompressionProgram)
74 {
75  GZPid = -1;
76  Eof = false;
77 }
78  /*}}}*/
79 // ExtractTar::ExtractTar - Destructor /*{{{*/
80 // ---------------------------------------------------------------------
81 /* */
83 {
84  // Error close
85  Done();
86 }
87  /*}}}*/
88 // ExtractTar::Done - Reap the gzip sub process /*{{{*/
90 {
91  return InFd.Close();
92 }
93  /*}}}*/
94 // ExtractTar::StartGzip - Startup gzip /*{{{*/
95 // ---------------------------------------------------------------------
96 /* This creates a gzip sub process that has its input as the file itself.
97  If this tar file is embedded into something like an ar file then
98  gzip will efficiently ignore the extra bits. */
100 {
101  if (DecompressProg.empty())
102  {
104  return true;
105  }
106 
107  std::vector<APT::Configuration::Compressor> const compressors = APT::Configuration::getCompressors();
108  std::vector<APT::Configuration::Compressor>::const_iterator compressor = compressors.begin();
109  for (; compressor != compressors.end(); ++compressor) {
110  if (compressor->Name == DecompressProg) {
111  return InFd.OpenDescriptor(File.Fd(), FileFd::ReadOnly, *compressor, false);
112  }
113  }
114 
115  return _error->Error(_("Cannot find a configured compressor for '%s'"),
116  DecompressProg.c_str());
117 
118 }
119  /*}}}*/
120 // ExtractTar::Go - Perform extraction /*{{{*/
121 // ---------------------------------------------------------------------
122 /* This reads each 512 byte block from the archive and extracts the header
123  information into the Item structure. Then it resolves the UID/GID and
124  invokes the correct processing function. */
126 {
127  if (StartGzip() == false)
128  return false;
129 
130  // Loop over all blocks
131  string LastLongLink, ItemLink;
132  string LastLongName, ItemName;
133  while (1)
134  {
135  bool BadRecord = false;
136  unsigned char Block[512];
137  if (InFd.Read(Block,sizeof(Block),true) == false)
138  return false;
139 
140  if (InFd.Eof() == true)
141  break;
142 
143  // Get the checksum
144  TarHeader *Tar = (TarHeader *)Block;
145  unsigned long CheckSum;
146  if (StrToNum(Tar->Checksum,CheckSum,sizeof(Tar->Checksum),8) == false)
147  return _error->Error(_("Corrupted archive"));
148 
149  /* Compute the checksum field. The actual checksum is blanked out
150  with spaces so it is not included in the computation */
151  unsigned long NewSum = 0;
152  memset(Tar->Checksum,' ',sizeof(Tar->Checksum));
153  for (int I = 0; I != sizeof(Block); I++)
154  NewSum += Block[I];
155 
156  /* Check for a block of nulls - in this case we kill gzip, GNU tar
157  does this.. */
158  if (NewSum == ' '*sizeof(Tar->Checksum))
159  return Done();
160 
161  if (NewSum != CheckSum)
162  return _error->Error(_("Tar checksum failed, archive corrupted"));
163 
164  // Decode all of the fields
165  pkgDirStream::Item Itm;
166  if (StrToNum(Tar->Mode,Itm.Mode,sizeof(Tar->Mode),8) == false ||
167  (Base256ToNum(Tar->UserID,Itm.UID,8) == false &&
168  StrToNum(Tar->UserID,Itm.UID,sizeof(Tar->UserID),8) == false) ||
169  (Base256ToNum(Tar->GroupID,Itm.GID,8) == false &&
170  StrToNum(Tar->GroupID,Itm.GID,sizeof(Tar->GroupID),8) == false) ||
171  (Base256ToNum(Tar->Size,Itm.Size,12) == false &&
172  StrToNum(Tar->Size,Itm.Size,sizeof(Tar->Size),8) == false) ||
173  (Base256ToNum(Tar->MTime,Itm.MTime,12) == false &&
174  StrToNum(Tar->MTime,Itm.MTime,sizeof(Tar->MTime),8) == false) ||
175  StrToNum(Tar->Major,Itm.Major,sizeof(Tar->Major),8) == false ||
176  StrToNum(Tar->Minor,Itm.Minor,sizeof(Tar->Minor),8) == false)
177  return _error->Error(_("Corrupted archive"));
178 
179  // Security check. Prevents overflows below the code when rounding up in skip/copy code,
180  // and provides modest protection against decompression bombs.
181  if (Itm.Size > APT_FILESIZE_LIMIT)
182  return _error->Error("Tar member too large: %llu > %llu bytes", Itm.Size, APT_FILESIZE_LIMIT);
183 
184  // Grab the filename and link target: use last long name if one was
185  // set, otherwise use the header value as-is, but remember that it may
186  // fill the entire 100-byte block and needs to be zero-terminated.
187  // See Debian Bug #689582.
188  if (LastLongName.empty() == false)
189  Itm.Name = (char *)LastLongName.c_str();
190  else
191  Itm.Name = (char *)ItemName.assign(Tar->Name, sizeof(Tar->Name)).c_str();
192  if (Itm.Name[0] == '.' && Itm.Name[1] == '/' && Itm.Name[2] != 0)
193  Itm.Name += 2;
194 
195  if (LastLongLink.empty() == false)
196  Itm.LinkTarget = (char *)LastLongLink.c_str();
197  else
198  Itm.LinkTarget = (char *)ItemLink.assign(Tar->LinkName, sizeof(Tar->LinkName)).c_str();
199 
200  // Convert the type over
201  switch (Tar->LinkFlag)
202  {
203  case NormalFile0:
204  case NormalFile:
206  break;
207 
208  case HardLink:
210  break;
211 
212  case SymbolicLink:
214  break;
215 
216  case CharacterDevice:
218  break;
219 
220  case BlockDevice:
222  break;
223 
224  case Directory:
226  break;
227 
228  case FIFO:
230  break;
231 
232  case GNU_LongLink:
233  {
234  unsigned long long Length = Itm.Size;
235  unsigned char Block[512];
236  if (Length > APT_LONGNAME_LIMIT)
237  return _error->Error("Long name to large: %llu bytes > %llu bytes", Length, APT_LONGNAME_LIMIT);
238  while (Length > 0)
239  {
240  if (InFd.Read(Block,sizeof(Block),true) == false)
241  return false;
242  if (Length <= sizeof(Block))
243  {
244  LastLongLink.append(Block,Block+sizeof(Block));
245  break;
246  }
247  LastLongLink.append(Block,Block+sizeof(Block));
248  Length -= sizeof(Block);
249  }
250  continue;
251  }
252 
253  case GNU_LongName:
254  {
255  unsigned long long Length = Itm.Size;
256  unsigned char Block[512];
257  if (Length > APT_LONGNAME_LIMIT)
258  return _error->Error("Long name to large: %llu bytes > %llu bytes", Length, APT_LONGNAME_LIMIT);
259  while (Length > 0)
260  {
261  if (InFd.Read(Block,sizeof(Block),true) == false)
262  return false;
263  if (Length < sizeof(Block))
264  {
265  LastLongName.append(Block,Block+sizeof(Block));
266  break;
267  }
268  LastLongName.append(Block,Block+sizeof(Block));
269  Length -= sizeof(Block);
270  }
271  continue;
272  }
273 
274  default:
275  BadRecord = true;
276  _error->Warning(_("Unknown TAR header type %u"), (unsigned)Tar->LinkFlag);
277  break;
278  }
279 
280  int Fd = -1;
281  if (not BadRecord && not Stream.DoItem(Itm, Fd))
282  return false;
283 
284  if (Fd == -1 || Fd < -2 || BadRecord)
285  {
286  if (Itm.Size > 0 && not InFd.Skip(((Itm.Size + (sizeof(Block) - 1)) / sizeof(Block)) * sizeof(Block)))
287  return false;
288  }
289  else if (Itm.Size != 0)
290  {
291  // Copy the file over the FD
292  auto Size = Itm.Size;
293  unsigned char Junk[32*1024];
294  do
295  {
296  auto const Read = std::min<unsigned long long>(Size, sizeof(Junk));
297  if (not InFd.Read(Junk, ((Read + (sizeof(Block) - 1)) / sizeof(Block)) * sizeof(Block)))
298  return false;
299 
300  if (Fd > 0)
301  {
302  if (not FileFd::Write(Fd, Junk, Read))
303  return Stream.Fail(Itm, Fd);
304  }
305  // An Fd of -2 means to send to a special processing function
306  else if (Fd == -2)
307  {
308  if (not Stream.Process(Itm, Junk, Read, Itm.Size - Size))
309  return Stream.Fail(Itm, Fd);
310  }
311 
312  Size -= Read;
313  } while (Size != 0);
314  }
315 
316  // And finish up
317  if (not BadRecord && not Stream.FinishedFile(Itm, Fd))
318  return false;
319  LastLongName.erase();
320  LastLongLink.erase();
321  }
322 
323  return Done();
324 }
325  /*}}}*/
static bool std::string const metaIndex const *const pkgAcqMetaClearSig *const pkgAcquire::Item *const I
bool Eof
Definition: extracttar.h:40
std::string DecompressProg
Definition: extracttar.h:41
virtual ~ExtractTar()
Definition: extracttar.cc:82
@ GNU_LongLink
Definition: extracttar.h:34
@ SymbolicLink
Definition: extracttar.h:32
@ CharacterDevice
Definition: extracttar.h:32
@ GNU_LongName
Definition: extracttar.h:34
bool Done()
Definition: extracttar.cc:89
FileFd InFd
Definition: extracttar.h:39
bool StartGzip()
Definition: extracttar.cc:99
FileFd & File
Definition: extracttar.h:36
ExtractTar(FileFd &Fd, unsigned long long Max, std::string DecompressionProgram)
Definition: extracttar.cc:72
bool Go(pkgDirStream &Stream)
Definition: extracttar.cc:125
Definition: fileutl.h:39
bool OpenDescriptor(int Fd, unsigned int const Mode, CompressMode Compress, bool AutoClose=false)
Definition: fileutl.cc:2572
bool Skip(unsigned long long To)
Definition: fileutl.cc:2884
@ None
Definition: fileutl.h:79
@ ReadOnly
Definition: fileutl.h:59
bool Write(const void *From, unsigned long long Size)
Definition: fileutl.cc:2819
bool Eof()
Definition: fileutl.h:154
int Fd()
Definition: fileutl.h:147
bool Read(void *To, unsigned long long Size, bool AllowEof)
Definition: fileutl.h:89
bool Close()
Definition: fileutl.cc:2977
virtual bool FinishedFile(Item &Itm, int Fd)
Definition: dirstream.cc:89
virtual bool DoItem(Item &Itm, int &Fd)
Definition: dirstream.cc:30
virtual bool Fail(Item &Itm, int Fd)
Definition: dirstream.cc:110
virtual bool Process(Item &, const unsigned char *, unsigned long long, unsigned long long)
Definition: dirstream.h:52
static const unsigned long long APT_LONGNAME_LIMIT
Definition: extracttar.cc:62
static const unsigned long long APT_FILESIZE_LIMIT
Definition: extracttar.cc:67
APT_PUBLIC std::vector< Compressor > const getCompressors(bool const Cached=true)
Return a vector of Compressors supported for data.tar's.
unsigned long Major
Definition: dirstream.h:45
unsigned long MTime
Definition: dirstream.h:44
unsigned long long Size
Definition: dirstream.h:43
enum pkgDirStream::Item::Type_t Type
unsigned long Minor
Definition: dirstream.h:46
unsigned long UID
Definition: dirstream.h:41
unsigned long GID
Definition: dirstream.h:42
unsigned long Mode
Definition: dirstream.h:40
bool StrToNum(const char *Str, unsigned long &Res, unsigned Len, unsigned Base)
Definition: strutl.cc:1146
bool Base256ToNum(const char *Str, unsigned long long &Res, unsigned int Len)
Definition: strutl.cc:1191