irods  4.2.8
About: iRODS (the integrated Rule Oriented Data System) is a distributed data-management system for creating data grids, digital libraries, persistent archives, and real-time data systems.
  Fossies Dox: irods-4.2.8.tar.gz  ("unofficial" and yet experimental doxygen-generated source code documentation)  

irods_path_recursion.cpp
Go to the documentation of this file.
2 
3 #include "rodsPath.h"
4 #include "rodsErrorTable.h"
5 #include "irods_exception.hpp"
6 #include "rodsLog.h"
7 
8 #include <stdlib.h>
9 #include <cstdlib>
10 #include <iomanip>
11 
12 #include <string>
13 #include <sstream>
14 #include <iostream>
15 #include <algorithm>
16 #include <map>
17 
18 #include <boost/filesystem.hpp>
19 #include <boost/filesystem/operations.hpp>
20 #include <boost/filesystem/convenience.hpp>
21 #include <boost/format.hpp>
22 
23 // Returns nothing, throws exception if file system loop is detected.
24 // Each path cannonical path which is inserted into the map, also gets
25 // a second string - the user path which led us here. This serves us
26 // for a descriptive error message if the same canonical path shows up
27 // again - with a different user path.
28 void
29 irods::check_for_filesystem_loop(boost::filesystem::path const & cpath,
30  boost::filesystem::path const & upath,
31  recursion_map_t &pathmap)
32 {
33  namespace fs = boost::filesystem;
34 
35  const std::string& pathstring(cpath.string()); // Canonical path string
36 
37  recursion_map_t::iterator it(pathmap.lower_bound(pathstring));
38  if (it == pathmap.end() || pathstring < it->first)
39  {
40  // not found - this is the first time we're seeing this canonical path
41  pathmap.insert(it, make_pair(pathstring, upath.string())); // hinted insertion
42 
43  } else {
44  // File system loop discovered.
45  // We have the current candidate fs:path (upath).
46  // Need another fs:path for the original user path that's in the map<>
47  const fs::path& origpath(it->second);
48 
49  // Either or both could be the offending symlink.
50  // Both cases are displayed differently.
51 
52  std::ostringstream sstr;
53  if (fs::is_symlink(upath) && fs::is_symlink(origpath))
54  {
55  // Both are symlinks
56  sstr << "File system loop detected: Both "
57  << "\"" << upath.string() << "\""
58  << " and \"" << it->second << "\""
59  << " are symbolic links to canonical path \"" << pathstring << "\"";
60  }
61  else
62  {
63  // Only one of them is a symlink:
64  // Figure out which of the two is the symlink for the message below
65  const std::string sympath( (fs::is_symlink(upath)? upath.string() : it->second ));
66 
67  sstr << "File system loop detected: \"" << sympath << "\""
68  << " is a symbolic link to canonical path \"" << pathstring << "\"";
69  }
70  THROW( USER_INPUT_PATH_ERR, sstr.str().c_str() );
71  }
72 }
73 
74 // Called in places where file system loop detection is not desired/needed.
75 // The rodsArguments_t object is for checking for the "--link" flag, and the
76 // character buffer is for the user filename.
77 // Checks for existence of path as a symlink or a directory.
78 // Will throw irods::exception if boost file system errors occur in the process.
79 bool
80 irods::is_path_valid_for_recursion( boost::filesystem::path const & userpath,
81  recursion_map_t &usermap,
82  bool dashdashlink )
83 {
84  namespace fs = boost::filesystem;
85 
86  const fs::path resolved = [&userpath](const fs::path& up)
87  {
88  try {
89  return fs::canonical(up);
90  }
91  catch(const fs::filesystem_error &_ec)
92  {
93  std::ostringstream sstr;
94  sstr << _ec.code().message() << "\n" << "Path = " << userpath.string();
95  THROW( USER_INPUT_PATH_ERR, sstr.str().c_str() );
96  }
97  }(userpath);
98 
99  const std::string& canpath = resolved.string();
100 
101  bool path_exists = [&canpath,&userpath](const fs::path& up)
102  {
103  try {
104  return fs::exists(up);
105  }
106  catch(const fs::filesystem_error &_ec)
107  {
108  std::ostringstream sstr;
109  sstr << _ec.code().message() << "\n" << "Path = " << userpath.string();
110  if (canpath.size() > 0)
111  {
112  sstr << "\nCanonical path = " << canpath;
113  }
114  THROW( USER_INPUT_PATH_ERR, sstr.str().c_str() );
115  }
116  }(resolved);
117 
118  if (path_exists)
119  {
120  if (dashdashlink && fs::is_symlink(userpath))
121  {
122  // A symlink with the --link flag turned on
123  return false;
124  }
125  else if (fs::is_directory( resolved ))
126  {
127  try {
128  // Adds the path to the usermap, if it's not there yet.
129  // Throws an irods::exception if a loop is found (there is
130  // already an instance of the path in usermap).
131  check_for_filesystem_loop(resolved, userpath, usermap);
132  } catch ( const irods::exception & _e ) {
133  return false;
134  }
135 
136  }
137  // Whether it is a file or directory, it can be included.
138  return true;
139  }
140  else
141  {
142  // The canonical path does not exist. The code can
143  // really never get here, because exists() would have
144  // thrown an exception above. But - belt and suspenders.
145  return false;
146  }
147 }
148 
149 // Called in from places where file system loop detection is not desired/needed,
150 // regardless of whether or not the recursion_map_t has been initialized by
151 // check_directories_for_loops().
152 //
153 // The rodsArguments_t object is for checking for the "--link" flag, and the
154 // character buffer is for the user filename.
155 //
156 // Checks for existence of path as a symlink or a directory.
157 // Will throw irods::exception if boost file system errors occur in the process.
158 bool
159 irods::is_path_valid_for_recursion( rodsArguments_t const * const rodsArgs, const char *myPath )
160 {
161  namespace fs = boost::filesystem;
162 
163  // This variant of the function is only concerned with whether the path
164  // is a symlink to a file or directory, whether the "--link" flag was
165  // specified, and if the path exists:
166  irods::recursion_map_t dummyset;
167  const fs::path p( myPath );
168 
169  if ( rodsArgs != NULL && rodsArgs->link == True )
170  {
171  // The "--link" flag has been specified. If it can be
172  // determined that the file exists and is a symlink, we
173  // can return immediately with false - the path is not
174  // valid for recursion.
175  try
176  {
177  if ( fs::exists( p ) && fs::is_symlink( p ) ) {
178  return false;
179  }
180  }
181  catch(const fs::filesystem_error &ec)
182  {
183  std::ostringstream sstr;
184 
185  sstr << ec.code().message() << "\nPath: " << p.string();
186  rodsLog( LOG_ERROR, sstr.str().c_str() );
187 
188  return false;
189  }
190  }
191  // The path is not a symlink, or the "--link" has not been specified.
192  // Check for recursion. This call may throws n exception if the path
193  // creates a file system loop, or if a boost::filesystem exception were
194  // thrown in the process.
195  return irods::is_path_valid_for_recursion(p, dummyset, (rodsArgs->link == True? true: false));
196 }
197 
198 // see .hpp for comment
199 int
200 irods::check_directories_for_loops( boost::filesystem::path const & dirpath,
201  irods::recursion_map_t& pathmap,
202  bool dashdashlink )
203 {
204  namespace fs = boost::filesystem;
205  int last_error = 0;
206 
207  try {
208  // Throws an exception if the path creates a file system
209  // loop, or if a boost::filesystem exception was thrown in
210  // the process. Does not care whether or not the path is valid
211  // for recursion, since when it is, it will be added to the map<>
212  // by is_path_valid_for_recursion(), or not added if it is not valid.
213  // We only care about the exception at this point.
214  irods::is_path_valid_for_recursion(dirpath, pathmap, dashdashlink);
215  } catch ( const irods::exception& _e ) {
217  return USER_INPUT_PATH_ERR;
218  }
219 
220  // This outer try/catch block will detect fs:: exceptions thrown by
221  // fs::recursive_directory_iterator, which will happen if the iterator
222  // comes across a directory it does not have permission to examine.
223  // Whereas this causes an error that interrupts the scan, the inner
224  // try/catch will simply note the error and continue the scan until
225  // all symbolic links are examined.
226  try {
227  // Default constructor creates an end iterator
228  fs::recursive_directory_iterator end_itr;
229  const fs::symlink_option opt = (dashdashlink? fs::symlink_option::none : fs::symlink_option::recurse);
230  for ( fs::recursive_directory_iterator itr( dirpath, opt ); itr != end_itr; ++itr )
231  {
232  fs::path p = itr->path();
233  try {
234  // See comment in the try{} clause above. We want to
235  // continue regardless if the path is valid for recursion.
236  irods::is_path_valid_for_recursion(p, pathmap, dashdashlink);
237  } catch ( const irods::exception& _e ) {
239  last_error = USER_INPUT_PATH_ERR;
240  }
241  }
242  } catch (const fs::filesystem_error& _fsx) {
243  // This is to catch exceptions from the iterator.
244  rodsLog( LOG_ERROR, _fsx.what() );
245 
246  // Cannot continue scanning - the iterator is messed up
247  return USER_INPUT_PATH_ERR;
248  }
249 
250  return last_error; // Can be 0 if no errors
251 }
252 
253 // Issue 3988: For irsync and iput mostly, scan all source physical directories
254 int
256  const std::vector<std::string>& dirvec,
257  bool dashdashlink)
258 {
259  namespace fs = boost::filesystem;
260 
261  int status = 0;
262  int savedStatus = 0;
263 
264  for (auto const& srcpath: dirvec)
265  {
266  fs::path p(srcpath);
267  if ((status = irods::check_directories_for_loops(p, pathmap, dashdashlink)) < 0 )
268  {
269  savedStatus = status;
270  }
271  }
272  if (savedStatus < 0)
273  {
274  return savedStatus;
275  }
276  return status;
277 }
278 
279 // This function does the filesystem loop and sanity check
280 // for both irsync and iput
281 int
283  rodsArguments_t const * const rodsArgs,
284  rodsPathInp_t const * const rodsPathInp )
285 {
286  irods::scantime sctime; // initialized to now()
287  int status = 0;
288  std::vector<std::string> dirvec;
289 
290  // Check if a directory scan is necessary. Only if there is at
291  // least one physical source directory targeted at a collection,
292  // is the scan initiated.
293  for ( int i = 0; i < rodsPathInp->numSrc; i++ )
294  {
295  if ( rodsPathInp->srcPath[i].objType == LOCAL_DIR_T &&
296  rodsPathInp->targPath[i].objType == COLL_OBJ_T)
297  {
298  dirvec.push_back(const_cast<const char *>(rodsPathInp->srcPath[i].outPath));
299  }
300  }
301 
302 #define DISPLAY_PREFLIGHT true
303 
304  if (dirvec.size() > 0)
305  {
306  if ( rodsArgs->recursive != True ) {
307  std::string strlist(dirvec.size() > 1? "directories " : "directory ");
308 
309  // make up the directory path list for the error message
310  size_t i = 0;
311  for (auto const& srcpath: dirvec)
312  {
313  if (i != 0)
314  {
315  strlist += ", ";
316  }
317  strlist += "\"";
318  strlist += srcpath + "\"";
319  ++i;
320  }
321  rodsLog( LOG_ERROR, "file_system_sanity_check: -r option must be used for %s.", strlist.c_str() );
322  return USER_INPUT_OPTION_ERR;
323  }
324 
325  if (DISPLAY_PREFLIGHT) { std::cout << "Running recursive pre-scan... " << std::flush; }
326 
328  dirvec,
329  (rodsArgs->link == True? true: false))) < 0)
330  {
331  if (DISPLAY_PREFLIGHT) {
332  std::cout << "pre-scan complete... errors found.\n" << std::flush;
333  std::cout << "Aborting data transfer.\n" << std::flush;
334  }
335  if (getenv(irods::chrono_env)) {
336  std::cout << "Directory scan duration: " << sctime.get_duration_string() << " seconds\n" << std::flush;
337  }
338  return status;
339  }
340  if (DISPLAY_PREFLIGHT) {
341  std::cout << "pre-scan complete... " << std::flush;
342  std::cout << "transferring data...\n" << std::flush;
343  }
344  if (getenv(irods::chrono_env)) {
345  std::cout << "Directory scan duration: " << sctime.get_duration_string() << " seconds\n" << std::flush;
346  }
347  }
348  return status;
349 }
350 
351 // Issue 4006: disallow mixed files and directory sources with the
352 // recursive (-r) option.
353 int
355  rodsPathInp_t const * const rodsPathInp )
356 {
357  // If the "-r" flag is not used, there's nothing to check.
358  if ( rodsArgs->recursive != True )
359  {
360  return 0;
361  }
362 
363  std::vector<std::string> filevec;
364 
365  // There cannot be any regular file sources on the command line
366  for ( int i = 0; i < rodsPathInp->numSrc; i++ )
367  {
368  if ( rodsPathInp->srcPath[i].objType == LOCAL_FILE_T )
369  {
370  filevec.push_back(const_cast<const char *>(rodsPathInp->srcPath[i].outPath));
371  }
372  }
373 
374  if (filevec.size() == 0)
375  {
376  // No regular file sources found
377  return 0;
378  }
379  std::string strlist(filevec.size() > 1? "files " : "file ");
380 
381  // make up the directory path list for the error message
382  size_t i = 0;
383  for (auto const& srcpath: filevec)
384  {
385  if (i != 0)
386  {
387  strlist += ", ";
388  }
389  strlist += "\"";
390  strlist += srcpath + "\"";
391  ++i;
392  }
394  "disallow_file_dir_mix_on_command_line: Cannot include regular %s on the command line with the \"-r\" flag.",
395  strlist.c_str() );
396  return USER_INPUT_OPTION_ERR;
397 }
398 
399 
400 // Class members for irods::scantime
401 //
403  start_(std::chrono::high_resolution_clock::now())
404 { ; }
405 
407 { ; }
408 
409 std::string
411 {
412  std::chrono::time_point<std::chrono::high_resolution_clock> endt( std::chrono::high_resolution_clock::now() );
413  double elapsedTime = std::chrono::duration<double, std::milli>(endt - start_).count();
414 
415  std::ostringstream sstr;
416  sstr << std::setprecision(4) << (elapsedTime / 1000.0);
417  return sstr.str();
418 }
rodsLog
void rodsLog(int level, const char *formatStr,...)
Definition: rodsLog.cpp:86
NULL
#define NULL
Definition: rodsDef.h:70
irods::scan_all_source_directories_for_loops
int scan_all_source_directories_for_loops(irods::recursion_map_t &, const std::vector< std::string > &, bool)
Definition: irods_path_recursion.cpp:255
True
#define True
Definition: parseCommandLine.h:11
rodsPath.h
THROW
#define THROW(_code, _msg)
Definition: irods_exception.hpp:68
rodsArguments_t
Definition: parseCommandLine.h:14
irods::scantime::~scantime
virtual ~scantime()
Definition: irods_path_recursion.cpp:406
irods::chrono_env
static const char * chrono_env
Definition: irods_path_recursion.hpp:67
irods::disallow_file_dir_mix_on_command_line
int disallow_file_dir_mix_on_command_line(rodsArguments_t const *const rodsArgs, rodsPathInp_t const *const rodsPathInp)
Definition: irods_path_recursion.cpp:354
RodsPathInp
Definition: rodsPath.h:33
irods_exception.hpp
DISPLAY_PREFLIGHT
#define DISPLAY_PREFLIGHT
irods::scantime
Definition: irods_path_recursion.hpp:70
irods::experimental::administration::client::v1::exists
auto exists(rcComm_t &conn, const user &user) -> bool
Definition: user_administration.cpp:359
pid_age.p
p
Definition: pid_age.py:13
RodsPathInp::srcPath
rodsPath_t * srcPath
Definition: rodsPath.h:35
LOG_ERROR
#define LOG_ERROR
Definition: rodsLog.h:43
RodsPath::objType
objType_t objType
Definition: rodsPath.h:19
irods::scantime::get_duration_string
std::string get_duration_string() const
Definition: irods_path_recursion.cpp:410
COLL_OBJ_T
@ COLL_OBJ_T
Definition: rodsType.h:39
RodsPathInp::targPath
rodsPath_t * targPath
Definition: rodsPath.h:37
rodsArguments_t::link
int link
Definition: parseCommandLine.h:57
irods::file_system_sanity_check
int file_system_sanity_check(irods::recursion_map_t &, rodsArguments_t const *const, rodsPathInp_t const *const)
Definition: irods_path_recursion.cpp:282
irods.pypyodbc.status
status
Definition: pypyodbc.py:467
rodsLog.h
irods::is_path_valid_for_recursion
bool is_path_valid_for_recursion(boost::filesystem::path const &, recursion_map_t &, bool)
Definition: irods_path_recursion.cpp:80
irods::exception::client_display_what
virtual const char * client_display_what() const
Definition: irods_exception.cpp:52
irods::check_directories_for_loops
int check_directories_for_loops(boost::filesystem::path const &, irods::recursion_map_t &, bool)
Definition: irods_path_recursion.cpp:200
RodsPath::outPath
char outPath[(1024+64)]
Definition: rodsPath.h:24
rodsArguments_t::recursive
int recursive
Definition: parseCommandLine.h:85
irods::scantime::scantime
scantime()
Definition: irods_path_recursion.cpp:402
USER_INPUT_OPTION_ERR
@ USER_INPUT_OPTION_ERR
Definition: rodsErrorTable.h:249
LOCAL_DIR_T
@ LOCAL_DIR_T
Definition: rodsType.h:42
RodsPathInp::numSrc
int numSrc
Definition: rodsPath.h:34
irods::exception
Definition: irods_exception.hpp:15
rodsErrorTable.h
irods_path_recursion.hpp
irods::check_for_filesystem_loop
void check_for_filesystem_loop(boost::filesystem::path const &, boost::filesystem::path const &, recursion_map_t &)
Definition: irods_path_recursion.cpp:29
LOCAL_FILE_T
@ LOCAL_FILE_T
Definition: rodsType.h:41
irods::recursion_map_t
std::map< std::string, std::string > recursion_map_t
Definition: irods_path_recursion.hpp:20
USER_INPUT_PATH_ERR
@ USER_INPUT_PATH_ERR
Definition: rodsErrorTable.h:248