w32tex
About: TeX Live provides a comprehensive TeX system including all the major TeX-related programs, macro packages, and fonts that are free software. Windows sources.
  Fossies Dox: w32tex-src.tar.xz  ("unofficial" and yet experimental doxygen-generated source code documentation)  

pdfdetach.cc
Go to the documentation of this file.
1 //========================================================================
2 //
3 // pdfdetach.cc
4 //
5 // Copyright 2010 Glyph & Cog, LLC
6 //
7 //========================================================================
8 
9 //========================================================================
10 //
11 // Modified under the Poppler project - http://poppler.freedesktop.org
12 //
13 // All changes made under the Poppler project to this file are licensed
14 // under GPL version 2 or later
15 //
16 // Copyright (C) 2011 Carlos Garcia Campos <carlosgc@gnome.org>
17 // Copyright (C) 2013 Yury G. Kudryashov <urkud.urkud@gmail.com>
18 // Copyright (C) 2014, 2017 Adrian Johnson <ajohnson@redneon.com>
19 // Copyright (C) 2018, 2020 Albert Astals Cid <aacid@kde.org>
20 // Copyright (C) 2018 Adam Reichold <adam.reichold@t-online.de>
21 // Copyright (C) 2019, 2021 Oliver Sander <oliver.sander@tu-dresden.de>
22 // Copyright (C) 2020 <r.coeffier@bee-buzziness.com>
23 //
24 // To see a description of the changes please see the Changelog file that
25 // came with your tarball or type make ChangeLog if you are building from git
26 //
27 //========================================================================
28 
29 #include "config.h"
30 #include <poppler-config.h>
31 #include <cstdio>
32 #include "goo/gmem.h"
33 #include "parseargs.h"
34 #include "Annot.h"
35 #include "GlobalParams.h"
36 #include "Page.h"
37 #include "PDFDoc.h"
38 #include "PDFDocFactory.h"
39 #include "FileSpec.h"
40 #include "CharTypes.h"
41 #include "Catalog.h"
42 #include "UnicodeMap.h"
43 #include "PDFDocEncoding.h"
44 #include "Error.h"
45 #include "Win32Console.h"
46 
47 static bool doList = false;
48 static int saveNum = 0;
49 static char saveFile[128] = "";
50 static bool saveAll = false;
51 static char savePath[1024] = "";
52 static char textEncName[128] = "";
53 static char ownerPassword[33] = "\001";
54 static char userPassword[33] = "\001";
55 static bool printVersion = false;
56 static bool printHelp = false;
57 
58 static const ArgDesc argDesc[] = { { "-list", argFlag, &doList, 0, "list all embedded files" },
59  { "-save", argInt, &saveNum, 0, "save the specified embedded file (file number)" },
60  { "-savefile", argString, &saveFile, sizeof(saveFile), "save the specified embedded file (file name)" },
61  { "-saveall", argFlag, &saveAll, 0, "save all embedded files" },
62  { "-o", argString, savePath, sizeof(savePath), "file name for the saved embedded file" },
63  { "-enc", argString, textEncName, sizeof(textEncName), "output text encoding name" },
64  { "-opw", argString, ownerPassword, sizeof(ownerPassword), "owner password (for encrypted files)" },
65  { "-upw", argString, userPassword, sizeof(userPassword), "user password (for encrypted files)" },
66  { "-v", argFlag, &printVersion, 0, "print copyright and version info" },
67  { "-h", argFlag, &printHelp, 0, "print usage information" },
68  { "-help", argFlag, &printHelp, 0, "print usage information" },
69  { "--help", argFlag, &printHelp, 0, "print usage information" },
70  { "-?", argFlag, &printHelp, 0, "print usage information" },
71  {} };
72 
73 int main(int argc, char *argv[])
74 {
75  std::unique_ptr<PDFDoc> doc;
77  const UnicodeMap *uMap;
78  GooString *ownerPW, *userPW;
79  char uBuf[8];
80  char path[1024];
81  char *p;
82  bool ok;
83  bool hasSaveFile;
84  int exitCode;
85  std::vector<FileSpec *> embeddedFiles;
86  int nFiles, nPages, n, i, j;
87  FileSpec *fileSpec;
88  Page *page;
89  Annots *annots;
90  Annot *annot;
91  const GooString *s1;
92  Unicode u;
93  bool isUnicode;
94 
95  Win32Console win32Console(&argc, &argv);
96  exitCode = 99;
97 
98  // parse args
99  ok = parseArgs(argDesc, &argc, argv);
100  hasSaveFile = strlen(saveFile) > 0;
101  if ((doList ? 1 : 0) + ((saveNum != 0) ? 1 : 0) + ((hasSaveFile != 0) ? 1 : 0) + (saveAll ? 1 : 0) != 1) {
102  ok = false;
103  }
104  if (!ok || argc != 2 || printVersion || printHelp) {
105  fprintf(stderr, "pdfdetach version %s\n", PACKAGE_VERSION);
106  fprintf(stderr, "%s\n", popplerCopyright);
107  fprintf(stderr, "%s\n", xpdfCopyright);
108  if (!printVersion) {
109  printUsage("pdfdetach", "<PDF-file>", argDesc);
110  }
111  goto err0;
112  }
113  fileName = new GooString(argv[1]);
114 
115  // read config file
116  globalParams = std::make_unique<GlobalParams>();
117  if (textEncName[0]) {
119  }
120 
121  // get mapping to output encoding
122  if (!(uMap = globalParams->getTextEncoding())) {
123  error(errConfig, -1, "Couldn't get text encoding");
124  delete fileName;
125  goto err0;
126  }
127 
128  // open PDF file
129  if (ownerPassword[0] != '\001') {
130  ownerPW = new GooString(ownerPassword);
131  } else {
132  ownerPW = nullptr;
133  }
134  if (userPassword[0] != '\001') {
135  userPW = new GooString(userPassword);
136  } else {
137  userPW = nullptr;
138  }
139 
140  doc = PDFDocFactory().createPDFDoc(*fileName, ownerPW, userPW);
141 
142  if (userPW) {
143  delete userPW;
144  }
145  if (ownerPW) {
146  delete ownerPW;
147  }
148  if (!doc->isOk()) {
149  exitCode = 1;
150  goto err2;
151  }
152 
153  for (i = 0; i < doc->getCatalog()->numEmbeddedFiles(); ++i)
154  embeddedFiles.push_back(doc->getCatalog()->embeddedFile(i));
155 
156  nPages = doc->getCatalog()->getNumPages();
157  for (i = 0; i < nPages; ++i) {
158  page = doc->getCatalog()->getPage(i + 1);
159  if (!page)
160  continue;
161  annots = page->getAnnots();
162  if (!annots)
163  break;
164 
165  for (j = 0; j < annots->getNumAnnots(); ++j) {
166  annot = annots->getAnnot(j);
167  if (annot->getType() != Annot::typeFileAttachment)
168  continue;
169  embeddedFiles.push_back(new FileSpec(static_cast<AnnotFileAttachment *>(annot)->getFile()));
170  }
171  }
172 
173  nFiles = embeddedFiles.size();
174 
175  // list embedded files
176  if (doList) {
177  printf("%d embedded files\n", nFiles);
178  for (i = 0; i < nFiles; ++i) {
179  fileSpec = embeddedFiles[i];
180  printf("%d: ", i + 1);
181  s1 = fileSpec->getFileName();
182  if (!s1) {
183  exitCode = 3;
184  goto err2;
185  }
186  if (s1->hasUnicodeMarker()) {
187  isUnicode = true;
188  j = 2;
189  } else {
190  isUnicode = false;
191  j = 0;
192  }
193  while (j < s1->getLength()) {
194  if (isUnicode) {
195  u = ((s1->getChar(j) & 0xff) << 8) | (s1->getChar(j + 1) & 0xff);
196  j += 2;
197  } else {
198  u = pdfDocEncoding[s1->getChar(j) & 0xff];
199  ++j;
200  }
201  n = uMap->mapUnicode(u, uBuf, sizeof(uBuf));
202  fwrite(uBuf, 1, n, stdout);
203  }
204  fputc('\n', stdout);
205  }
206 
207  // save all embedded files
208  } else if (saveAll) {
209  for (i = 0; i < nFiles; ++i) {
210  fileSpec = embeddedFiles[i];
211  if (savePath[0]) {
212  n = strlen(savePath);
213  if (n > (int)sizeof(path) - 2) {
214  n = sizeof(path) - 2;
215  }
216  memcpy(path, savePath, n);
217  path[n] = '/';
218  p = path + n + 1;
219  } else {
220  p = path;
221  }
222  s1 = fileSpec->getFileName();
223  if (!s1) {
224  exitCode = 3;
225  goto err2;
226  }
227  if (s1->hasUnicodeMarker()) {
228  isUnicode = true;
229  j = 2;
230  } else {
231  isUnicode = false;
232  j = 0;
233  }
234  while (j < s1->getLength()) {
235  if (isUnicode) {
236  u = ((s1->getChar(j) & 0xff) << 8) | (s1->getChar(j + 1) & 0xff);
237  j += 2;
238  } else {
239  u = pdfDocEncoding[s1->getChar(j) & 0xff];
240  ++j;
241  }
242  n = uMap->mapUnicode(u, uBuf, sizeof(uBuf));
243  if (p + n >= path + sizeof(path))
244  break;
245  memcpy(p, uBuf, n);
246  p += n;
247  }
248  *p = '\0';
249 
250  auto *embFile = fileSpec->getEmbeddedFile();
251  if (!embFile || !embFile->isOk()) {
252  exitCode = 3;
253  goto err2;
254  }
255  if (!embFile->save(path)) {
256  error(errIO, -1, "Error saving embedded file as '{0:s}'", p);
257  exitCode = 2;
258  goto err2;
259  }
260  }
261 
262  // save an embedded file
263  } else {
264  if (hasSaveFile) {
265  for (i = 0; i < nFiles; ++i) {
266  fileSpec = embeddedFiles[i];
267  s1 = fileSpec->getFileName();
268  if (strcmp(s1->c_str(), saveFile) == 0) {
269  saveNum = i + 1;
270  break;
271  }
272  }
273  }
274  if (saveNum < 1 || saveNum > nFiles) {
275  error(errCommandLine, -1, hasSaveFile ? "Invalid file name" : "Invalid file number");
276  goto err2;
277  }
278 
279  fileSpec = embeddedFiles[saveNum - 1];
280  if (savePath[0]) {
281  p = savePath;
282  } else {
283  p = path;
284  s1 = fileSpec->getFileName();
285  if (!s1) {
286  exitCode = 3;
287  goto err2;
288  }
289  if (s1->hasUnicodeMarker()) {
290  isUnicode = true;
291  j = 2;
292  } else {
293  isUnicode = false;
294  j = 0;
295  }
296  while (j < s1->getLength()) {
297  if (isUnicode) {
298  u = ((s1->getChar(j) & 0xff) << 8) | (s1->getChar(j + 1) & 0xff);
299  j += 2;
300  } else {
301  u = pdfDocEncoding[s1->getChar(j) & 0xff];
302  ++j;
303  }
304  n = uMap->mapUnicode(u, uBuf, sizeof(uBuf));
305  if (p + n >= path + sizeof(path))
306  break;
307  memcpy(p, uBuf, n);
308  p += n;
309  }
310  *p = '\0';
311  p = path;
312  }
313 
314  auto *embFile = fileSpec->getEmbeddedFile();
315  if (!embFile || !embFile->isOk()) {
316  exitCode = 3;
317  goto err2;
318  }
319  if (!embFile->save(p)) {
320  error(errIO, -1, "Error saving embedded file as '{0:s}'", p);
321  exitCode = 2;
322  goto err2;
323  }
324  }
325 
326  exitCode = 0;
327 
328  // clean up
329 err2:
330  for (auto &file : embeddedFiles)
331  delete file;
332 err0:
333 
334  return exitCode;
335 }
Definition: Annot.h:80
@ typeFileAttachment
Definition: Annot.h:645
GString * getType()
Definition: Annot.h:89
Definition: Annot.h:152
int getNumAnnots()
Definition: Annot.h:161
Annot * getAnnot(int i)
Definition: Annot.h:162
Page * getPage(int i)
Definition: Catalog.cc:300
int getNumPages()
Definition: Catalog.h:53
FileSpec * embeddedFile(int i)
Definition: Catalog.cc:425
EmbFile * getEmbeddedFile()
Definition: FileSpec.cc:157
const GooString * getFileName() const
Definition: FileSpec.h:62
void setTextEncoding(const char *encodingName)
UnicodeMap * getTextEncoding()
std::unique_ptr< PDFDoc > createPDFDoc(const GooString &uri, GooString *ownerPassword=nullptr, GooString *userPassword=nullptr, void *guiDataA=nullptr)
Catalog * getCatalog()
Definition: PDFDoc.h:75
GBool isOk()
Definition: PDFDoc.h:60
Definition: Page.h:112
int mapUnicode(Unicode u, char *buf, int bufSize)
Definition: UnicodeMap.cc:183
#define n
Definition: t4ht.c:1290
int strcmp()
Definition: coll.cpp:143
int printf()
#define error(a)
Definition: dviinfo.c:48
#define memcpy(d, s, n)
Definition: gsftopk.c:64
small capitals from c petite p scientific f u
Definition: afcover.h:88
small capitals from c petite p
Definition: afcover.h:72
small capitals from c petite p scientific i
Definition: afcover.h:80
#define PACKAGE_VERSION
Definition: config.h:105
@ argFlag
Definition: parseargs.h:22
@ argInt
Definition: parseargs.h:24
@ argString
Definition: parseargs.h:28
@ errCommandLine
Definition: Error.h:28
@ errIO
Definition: Error.h:30
@ errConfig
Definition: Error.h:27
GlobalParams * globalParams
Unicode pdfDocEncoding[256]
#define xpdfCopyright
Definition: config.h:31
int main(int argc, char *argv[])
Definition: pdfdetach.cc:64
static PDFDoc * doc
Definition: pdffonts.cc:89
#define fprintf
Definition: mendex.h:64
Code related to b fwrite(a, sizeof(char), b, stdout) @d C_printf(c
int getLength(char *s)
Definition: lengths.c:99
GBool parseArgs(ArgDesc *args, int *argc, char *argv[])
Definition: parseargs.c:19
void printUsage()
#define popplerCopyright
static char textEncName[128]
Definition: pdfdetach.cc:52
static int saveNum
Definition: pdfdetach.cc:48
static bool printVersion
Definition: pdfdetach.cc:55
static const ArgDesc argDesc[]
Definition: pdfdetach.cc:58
static bool printHelp
Definition: pdfdetach.cc:56
static bool saveAll
Definition: pdfdetach.cc:50
static bool doList
Definition: pdfdetach.cc:47
static char ownerPassword[33]
Definition: pdfdetach.cc:53
static char savePath[1024]
Definition: pdfdetach.cc:51
static char userPassword[33]
Definition: pdfdetach.cc:54
static char saveFile[128]
Definition: pdfdetach.cc:49
Definition: filedef.h:30
Definition: mendex.h:14
Definition: tpic.c:45
pointer path
Definition: t1imager.h:36
int j
Definition: t4ht.c:1589
s1
Definition: t4ht.c:1059
*job_name strlen((char *) job_name) - 4)
char * file
Definition: t4ht.c:931
page
Definition: tex4ht.c:3916
const char * fileName
Definition: ugrep.cpp:52
#define argv
Definition: xmain.c:270
#define argc
Definition: xmain.c:269