irods  4.2.8
About: iRODS (the integrated Rule Oriented Data System) is a distributed data-management system for creating data grids, digital libraries, persistent archives, and real-time data systems.
  Fossies Dox: irods-4.2.8.tar.gz  ("unofficial" and yet experimental doxygen-generated source code documentation)  

scanUtil.cpp
Go to the documentation of this file.
1 
3 /* Written by Jean-Yves Nief of CCIN2P3 and copyright assigned to Data Intensive Cyberinfrastructure Foundation */
4 
5 #include "rodsPath.h"
6 #include "rodsErrorTable.h"
7 #include "rodsLog.h"
8 #include "scanUtil.h"
9 #include "miscUtil.h"
10 #include "rcGlobalExtern.h"
11 #include "rcMisc.h"
12 
13 #include <boost/filesystem/operations.hpp>
14 #include <boost/filesystem/convenience.hpp>
15 
16 using namespace boost::filesystem;
17 
18 int
20  rodsArguments_t *myRodsArgs,
21  rodsPathInp_t *rodsPathInp,
22  const char * hostname ) {
23 
24  if ( rodsPathInp->numSrc != 1 ) {
25  rodsLog( LOG_ERROR, "scanObj: gave %i input source path, should give one and only one", rodsPathInp->numSrc );
26  return USER_INPUT_PATH_ERR;
27  }
28 
29  char * inpPathO = rodsPathInp->srcPath[0].outPath;
30  if ( rodsPathInp->srcPath[0].objType == LOCAL_FILE_T ||
31  rodsPathInp->srcPath[0].objType == LOCAL_DIR_T ) {
32  path p( inpPathO );
33  if ( !exists( p ) ) {
34  rodsLog( LOG_ERROR, "scanObj: %s does not exist", inpPathO );
35  return USER_INPUT_PATH_ERR;
36  }
37  /* don't do anything if it is symlink */
38  if ( is_symlink( p ) ) {
39  return 0;
40  }
41 
42  char inpPath[ LONG_NAME_LEN ];
43  snprintf( inpPath, sizeof( inpPath ), "%s", inpPathO );
44  // if it is part of a mounted collection, abort
45  if ( is_directory( p ) ) {
46  if ( int status = checkIsMount( conn, inpPath ) ) {
47  rodsLog( LOG_ERROR, "The directory %s or one of its "
48  "subdirectories to be scanned is declared as being "
49  "used for a mounted collection: abort!", inpPath );
50  return status;
51  }
52  }
53  return scanObjDir( conn, myRodsArgs, inpPath, hostname );
54  }
55  else if ( rodsPathInp->srcPath[0].objType == UNKNOWN_OBJ_T ||
56  rodsPathInp->srcPath[0].objType == COLL_OBJ_T ) {
57  return scanObjCol( conn, myRodsArgs, inpPathO );
58  }
59  else {
60  rodsLog( LOG_ERROR, "scanObj: %s does not exist", inpPathO );
61  return USER_INPUT_PATH_ERR;
62  }
63 }
64 
65 int
66 scanObjDir( rcComm_t *conn, rodsArguments_t *myRodsArgs, const char *inpPath, const char *hostname ) {
67  int status = 0;
68  char fullPath[LONG_NAME_LEN] = "\0";
69 
70  /* check if it is a directory */
71  path srcDirPath( inpPath );
72  if ( is_symlink( srcDirPath ) ) {
73  /* don't do anything if it is symlink */
74  return 0;
75  }
76  else if ( !is_directory( srcDirPath ) ) {
77  status = chkObjExist( conn, inpPath, hostname );
78  return status;
79  }
80 
81  // This variable will contain either the 0, or the last error
82  // encountered as the loop below iterates through all of the
83  // entries in the physical directory.
84  int return_status = 0;
85  directory_iterator end_itr; // default construction yields past-the-end
86  for ( directory_iterator itr( srcDirPath ); itr != end_itr; ++itr ) {
87  path cp = itr->path();
88  snprintf( fullPath, LONG_NAME_LEN, "%s",
89  cp.c_str() );
90  if ( is_symlink( cp ) ) {
91  /* don't do anything if it is symlink */
92  continue;
93  }
94  else if ( is_directory( cp ) ) {
95  if ( myRodsArgs->recursive == True ) {
96  status = scanObjDir( conn, myRodsArgs, fullPath, hostname );
97  }
98  }
99  else {
100  status = chkObjExist( conn, fullPath, hostname );
101  }
102  if (status != 0) {
103  return_status = status;
104  }
105  }
106  return return_status;
107 }
108 
109 int
110 scanObjCol( rcComm_t *conn, rodsArguments_t *myRodsArgs, const char *inpPath ) {
111  int isColl, status;
112  genQueryInp_t genQueryInp1, genQueryInp2;
113  genQueryOut_t *genQueryOut1 = NULL, *genQueryOut2 = NULL;
114  char condStr1[MAX_NAME_LEN], condStr2[MAX_NAME_LEN];
115  char firstPart[MAX_NAME_LEN] = "";
116 
117  /* check if inpPath is a file or a collection */
118  const char *lastPart = strrchr( inpPath, '/' ) + 1;
119  strncpy( firstPart, inpPath, strlen( inpPath ) - strlen( lastPart ) - 1 );
120  memset( &genQueryInp1, 0, sizeof( genQueryInp1 ) );
121  addInxIval( &genQueryInp1.selectInp, COL_COLL_ID, 1 );
122  genQueryInp1.maxRows = MAX_SQL_ROWS;
123 
124  snprintf( condStr1, MAX_NAME_LEN, "='%s'", firstPart );
125  addInxVal( &genQueryInp1.sqlCondInp, COL_COLL_NAME, condStr1 );
126  snprintf( condStr1, MAX_NAME_LEN, "='%s'", lastPart );
127  addInxVal( &genQueryInp1.sqlCondInp, COL_DATA_NAME, condStr1 );
128 
129  status = rcGenQuery( conn, &genQueryInp1, &genQueryOut1 );
130  if ( status == CAT_NO_ROWS_FOUND ) {
131  isColl = 1;
132  }
133  else {
134  isColl = 0;
135  }
136 
137  /* for each files check if the physical file associated to it exists on the
138  physical resource */
139  memset( &genQueryInp2, 0, sizeof( genQueryInp2 ) );
140  addInxIval( &genQueryInp2.selectInp, COL_D_DATA_PATH, 1 );
141  addInxIval( &genQueryInp2.selectInp, COL_DATA_SIZE, 1 );
142  addInxIval( &genQueryInp2.selectInp, COL_R_LOC, 1 );
143  addInxIval( &genQueryInp2.selectInp, COL_R_ZONE_NAME, 1 );
144  addInxIval( &genQueryInp2.selectInp, COL_DATA_NAME, 1 );
145  addInxIval( &genQueryInp2.selectInp, COL_COLL_NAME, 1 );
146  addInxIval( &genQueryInp2.selectInp, COL_D_RESC_ID, 1 );
147  genQueryInp2.maxRows = MAX_SQL_ROWS;
148 
149  if ( isColl ) {
150  if ( myRodsArgs->recursive == True ) {
151  snprintf( condStr2, MAX_NAME_LEN, "like '%s%s'", inpPath, "%" );
152  }
153  else {
154  snprintf( condStr2, MAX_NAME_LEN, "='%s'", inpPath );
155  }
156  addInxVal( &genQueryInp2.sqlCondInp, COL_COLL_NAME, condStr2 );
157  }
158  else {
159  snprintf( condStr2, MAX_NAME_LEN, "='%s'", firstPart );
160  addInxVal( &genQueryInp2.sqlCondInp, COL_COLL_NAME, condStr2 );
161  snprintf( condStr2, MAX_NAME_LEN, "='%s'", lastPart );
162  addInxVal( &genQueryInp2.sqlCondInp, COL_DATA_NAME, condStr2 );
163  }
164 
165  /* check if the physical file corresponding to the iRODS object does exist */
166  status = rcGenQuery( conn, &genQueryInp2, &genQueryOut2 );
167  if (0 == status) {
168  status = statPhysFile( conn, genQueryOut2 );
169  }
170  else {
171  printf( "Could not find the requested data object or collection in iRODS.\n" );
172  }
173 
174  while ( (0 == status || getIrodsErrno(status) == UNIX_FILE_STAT_ERR)
175  && genQueryOut2->continueInx > 0)
176  {
177  genQueryInp2.continueInx = genQueryOut2->continueInx;
178  status = rcGenQuery( conn, &genQueryInp2, &genQueryOut2 );
179  if ( 0 == status ) {
180  status = statPhysFile( conn, genQueryOut2 );
181  }
182  }
183 
184  freeGenQueryOut( &genQueryOut1 );
185  freeGenQueryOut( &genQueryOut2 );
186 
187  return status;
188 
189 }
190 
191 int
192 statPhysFile( rcComm_t *conn, genQueryOut_t *genQueryOut2 ) {
193 
194  int rcPriv = 0, rcStat = 0, rcOther = 0;
195 
196  for ( int i = 0; i < genQueryOut2->rowCnt; i++ ) {
197  sqlResult_t *dataPathStruct = getSqlResultByInx( genQueryOut2, COL_D_DATA_PATH );
198  sqlResult_t *dataSizeStruct = getSqlResultByInx( genQueryOut2, COL_DATA_SIZE );
199  sqlResult_t *locStruct = getSqlResultByInx( genQueryOut2, COL_R_LOC );
200  sqlResult_t *zoneStruct = getSqlResultByInx( genQueryOut2, COL_R_ZONE_NAME );
201  sqlResult_t *dataNameStruct = getSqlResultByInx( genQueryOut2, COL_DATA_NAME );
202  sqlResult_t *collNameStruct = getSqlResultByInx( genQueryOut2, COL_COLL_NAME );
203  sqlResult_t *rescIDStruct = getSqlResultByInx( genQueryOut2, COL_D_RESC_ID );
204  if ( dataPathStruct == NULL || dataSizeStruct == NULL || locStruct == NULL ||
205  zoneStruct == NULL || dataNameStruct == NULL || collNameStruct == NULL ||
206  rescIDStruct == NULL ) {
207  printf( "getSqlResultByInx returned null in statPhysFile." );
208  return -1;
209  }
210 
211  char *dataPath = &dataPathStruct->value[dataPathStruct->len * i];
212  char *loc = &locStruct->value[locStruct->len * i];
213  char *zone = &zoneStruct->value[zoneStruct->len * i];
214  char *dataName = &dataNameStruct->value[dataNameStruct->len * i];
215  char *collName = &collNameStruct->value[collNameStruct->len * i];
216  char *rescID = &rescIDStruct->value[rescIDStruct->len * i];
217 
218  /* check if the physical file does exist on the filesystem */
219  fileStatInp_t fileStatInp;
220  rstrcpy( fileStatInp.addr.hostAddr, loc, sizeof( fileStatInp.addr.hostAddr ) );
221  rstrcpy( fileStatInp.addr.zoneName, zone, sizeof( fileStatInp.addr.zoneName ) );
222  rstrcpy( fileStatInp.fileName, dataPath, sizeof( fileStatInp.fileName ) );
223  fileStatInp.rescId = strtoll( rescID, 0, 0 );
224  snprintf( fileStatInp.objPath, sizeof( fileStatInp.objPath ), "%s/%s", collName, dataName );
225  rodsStat_t *fileStatOut;
226  int status = rcFileStat( conn, &fileStatInp, &fileStatOut );
227  if ( SYS_NO_API_PRIV == status ) {
228  printf( "User must be a rodsadmin to scan iRODS data objects.\n" );
229  rcPriv = status;
230  }
231  else if ( status < 0 ) {
233  printf( "Physical file %s on server %s is missing, corresponding to "
234  "iRODS object %s/%s\n", dataPath, loc, collName, dataName );
235  rcStat = status;
236  }
237  else {
238  rcOther = status;
239  }
240  }
241  } // for each data object i = 0 to rowCnt-1
242 
243  /*
244  * Prioritize the error returned
245  */
246  if (rcPriv != 0) { return rcPriv; }
247  else if (rcStat != 0) { return rcStat; }
248  else if (rcOther!= 0) { return rcOther; }
249 
250  return 0;
251 }
252 
253 int
254 chkObjExist( rcComm_t *conn, const char *inpPath, const char *hostname ) {
255  int status;
256  genQueryInp_t genQueryInp;
257  genQueryOut_t *genQueryOut = NULL;
258  char condStr[MAX_NAME_LEN];
259 
260  memset( &genQueryInp, 0, sizeof( genQueryInp ) );
261  addInxIval( &genQueryInp.selectInp, COL_D_DATA_ID, 1 );
262  genQueryInp.maxRows = 0;
263  /*
264  Use the AUTO_CLOSE option to close down the statement after the
265  query, avoiding later 'too many concurrent statements' errors (and
266  CAT_SQL_ERR: -806000) later. This could also be done by asking for 2
267  rows (maxRows), but the rows are not needed, just the status.
268  This may also fix a segfault error which might be related.
269  */
270  genQueryInp.options = AUTO_CLOSE;
271 
272  snprintf( condStr, MAX_NAME_LEN, "='%s'", inpPath );
273  addInxVal( &genQueryInp.sqlCondInp, COL_D_DATA_PATH, condStr );
274  snprintf( condStr, MAX_NAME_LEN, "like '%s%%' || ='%s'", hostname, hostname );
275  addInxVal( &genQueryInp.sqlCondInp, COL_R_LOC, condStr );
276 
277  status = rcGenQuery( conn, &genQueryInp, &genQueryOut );
278  if ( status == CAT_NO_ROWS_FOUND ) {
279  printf( "%s is not registered in iRODS\n", inpPath );
280  }
281 
282  clearGenQueryInp( &genQueryInp );
283  freeGenQueryOut( &genQueryOut );
284 
285  return status;
286 
287 }
288 
289 int
290 checkIsMount( rcComm_t *conn, const char *inpPath ) {
291  int i, minLen, status, status1;
292  genQueryInp_t genQueryInp;
293  genQueryOut_t *genQueryOut = NULL;
294  char condStr[MAX_NAME_LEN], *dirMPath;
295 
296  memset( &genQueryInp, 0, sizeof( genQueryInp ) );
297  addInxIval( &genQueryInp.selectInp, COL_COLL_INFO1, 1 );
298  genQueryInp.maxRows = MAX_SQL_ROWS;
299 
300  snprintf( condStr, MAX_NAME_LEN, "='%s'", "mountPoint" );
301  addInxVal( &genQueryInp.sqlCondInp, COL_COLL_TYPE, condStr );
302 
303  status1 = rcGenQuery( conn, &genQueryInp, &genQueryOut );
304  if ( status1 == CAT_NO_ROWS_FOUND ) {
305  status = 0; /* there is no mounted collection, so no potential problem */
306  }
307  else { /* check if inpPath is part of one of the mounted collections */
308  status = 0;
309  for ( i = 0; i < genQueryOut->rowCnt; i++ ) {
310  dirMPath = genQueryOut->sqlResult[0].value;
311  dirMPath += i * genQueryOut->sqlResult[0].len;
312  if ( strlen( dirMPath ) <= strlen( inpPath ) ) {
313  minLen = strlen( dirMPath );
314  }
315  else {
316  minLen = strlen( inpPath );
317  }
318  if ( strncmp( dirMPath, inpPath, minLen ) == 0 ) {
319  status = -1;
320  }
321  }
322  }
323 
324  clearGenQueryInp( &genQueryInp );
325  freeGenQueryOut( &genQueryOut );
326 
327  return status;
328 
329 }
rodsLog
void rodsLog(int level, const char *formatStr,...)
Definition: rodsLog.cpp:86
MAX_SQL_ROWS
#define MAX_SQL_ROWS
Definition: rodsGenQuery.h:16
rcComm_t
Definition: rcConnect.h:95
NULL
#define NULL
Definition: rodsDef.h:70
True
#define True
Definition: parseCommandLine.h:11
rodsPath.h
GenQueryInp::continueInx
int continueInx
Definition: rodsGenQuery.h:28
SYS_NO_API_PRIV
@ SYS_NO_API_PRIV
Definition: rodsErrorTable.h:81
rodsArguments_t
Definition: parseCommandLine.h:14
COL_R_LOC
#define COL_R_LOC
Definition: rodsGenQuery.h:148
scanObj
int scanObj(rcComm_t *conn, rodsArguments_t *myRodsArgs, rodsPathInp_t *rodsPathInp, const char *hostname)
Definition: scanUtil.cpp:19
isColl
int isColl(rsComm_t *rsComm, char *objName, rodsLong_t *collId)
Definition: objMetaOpr.cpp:204
UNKNOWN_OBJ_T
@ UNKNOWN_OBJ_T
Definition: rodsType.h:37
RodsPathInp
Definition: rodsPath.h:33
statPhysFile
int statPhysFile(rcComm_t *conn, genQueryOut_t *genQueryOut2)
Definition: scanUtil.cpp:192
GenQueryInp
Definition: rodsGenQuery.h:24
AUTO_CLOSE
#define AUTO_CLOSE
Definition: rodsGenQuery.h:89
irods::experimental::administration::client::v1::exists
auto exists(rcComm_t &conn, const user &user) -> bool
Definition: user_administration.cpp:359
rcMisc.h
pid_age.p
p
Definition: pid_age.py:13
fileStatInp_t
Definition: fileStat.h:7
LONG_NAME_LEN
#define LONG_NAME_LEN
Definition: rodsDef.h:57
COL_COLL_ID
#define COL_COLL_ID
Definition: rodsGenQuery.h:188
rcGlobalExtern.h
RodsPathInp::srcPath
rodsPath_t * srcPath
Definition: rodsPath.h:35
addInxVal
int addInxVal(inxValPair_t *inxValPair, int inx, const char *value)
Definition: rcMisc.cpp:921
LOG_ERROR
#define LOG_ERROR
Definition: rodsLog.h:43
RodsPath::objType
objType_t objType
Definition: rodsPath.h:19
GenQueryInp::selectInp
inxIvalPair_t selectInp
Definition: rodsGenQuery.h:53
GenQueryInp::maxRows
int maxRows
Definition: rodsGenQuery.h:25
GenQueryOut::sqlResult
sqlResult_t sqlResult[50]
Definition: rodsGenQuery.h:72
COLL_OBJ_T
@ COLL_OBJ_T
Definition: rodsType.h:39
COL_DATA_NAME
#define COL_DATA_NAME
Definition: rodsGenQuery.h:165
rodsStat
Definition: rodsType.h:52
fileStatInp_t::objPath
char objPath[(1024+64)]
Definition: fileStat.h:11
getSqlResultByInx
sqlResult_t * getSqlResultByInx(genQueryOut_t *genQueryOut, int attriInx)
Definition: rcMisc.cpp:1387
fileStatInp_t::addr
rodsHostAddr_t addr
Definition: fileStat.h:8
rcGenQuery
int rcGenQuery(rcComm_t *conn, genQueryInp_t *genQueryInp, genQueryOut_t **genQueryOut)
Definition: rcGenQuery.cpp:89
getIrodsErrno
int getIrodsErrno(int irodError)
Definition: rcMisc.cpp:3272
GenQueryInp::sqlCondInp
inxValPair_t sqlCondInp
Definition: rodsGenQuery.h:56
MAX_NAME_LEN
#define MAX_NAME_LEN
Definition: rodsDef.h:61
fileStatInp_t::rescId
rodsLong_t rescId
Definition: fileStat.h:12
addInxIval
int addInxIval(inxIvalPair_t *inxIvalPair, int inx, int value)
Definition: rcMisc.cpp:883
GenQueryOut
Definition: rodsGenQuery.h:67
irods.pypyodbc.status
status
Definition: pypyodbc.py:467
rodsLog.h
GenQueryOut::rowCnt
int rowCnt
Definition: rodsGenQuery.h:68
scanObjCol
int scanObjCol(rcComm_t *conn, rodsArguments_t *myRodsArgs, const char *inpPath)
Definition: scanUtil.cpp:110
scanUtil.h
COL_COLL_TYPE
#define COL_COLL_TYPE
Definition: rodsGenQuery.h:198
RodsPath::outPath
char outPath[(1024+64)]
Definition: rodsPath.h:24
SqlResult::value
char * value
Definition: rodsGenQuery.h:64
rodsArguments_t::recursive
int recursive
Definition: parseCommandLine.h:85
COL_R_ZONE_NAME
#define COL_R_ZONE_NAME
Definition: rodsGenQuery.h:145
fileStatInp_t::fileName
char fileName[(1024+64)]
Definition: fileStat.h:9
rodsHostAddr_t::hostAddr
char hostAddr[256]
Definition: rodsDef.h:297
CAT_NO_ROWS_FOUND
@ CAT_NO_ROWS_FOUND
Definition: rodsErrorTable.h:423
SqlResult
Definition: rodsGenQuery.h:61
COL_COLL_NAME
#define COL_COLL_NAME
Definition: rodsGenQuery.h:189
LOCAL_DIR_T
@ LOCAL_DIR_T
Definition: rodsType.h:42
chkObjExist
int chkObjExist(rcComm_t *conn, const char *inpPath, const char *hostname)
Definition: scanUtil.cpp:254
RodsPathInp::numSrc
int numSrc
Definition: rodsPath.h:34
checkIsMount
int checkIsMount(rcComm_t *conn, const char *inpPath)
Definition: scanUtil.cpp:290
SqlResult::len
int len
Definition: rodsGenQuery.h:63
rodsErrorTable.h
UNIX_FILE_STAT_ERR
@ UNIX_FILE_STAT_ERR
Definition: rodsErrorTable.h:306
miscUtil.h
rstrcpy
char * rstrcpy(char *dest, const char *src, int maxLen)
Definition: stringOpr.cpp:51
COL_D_DATA_PATH
#define COL_D_DATA_PATH
Definition: rodsGenQuery.h:172
LOCAL_FILE_T
@ LOCAL_FILE_T
Definition: rodsType.h:41
COL_COLL_INFO1
#define COL_COLL_INFO1
Definition: rodsGenQuery.h:199
freeGenQueryOut
int freeGenQueryOut(genQueryOut_t **genQueryOut)
Definition: rcMisc.cpp:1133
scanObjDir
int scanObjDir(rcComm_t *conn, rodsArguments_t *myRodsArgs, const char *inpPath, const char *hostname)
Definition: scanUtil.cpp:66
USER_INPUT_PATH_ERR
@ USER_INPUT_PATH_ERR
Definition: rodsErrorTable.h:248
COL_DATA_SIZE
#define COL_DATA_SIZE
Definition: rodsGenQuery.h:169
clearGenQueryInp
void clearGenQueryInp(void *voidInp)
Definition: rcMisc.cpp:1118
rodsHostAddr_t::zoneName
char zoneName[64]
Definition: rodsDef.h:298
GenQueryInp::options
int options
Definition: rodsGenQuery.h:34
rcFileStat
int rcFileStat(rcComm_t *conn, fileStatInp_t *fileStatInp, rodsStat_t **fileStatOut)
Definition: rcFileStat.cpp:24
COL_D_RESC_ID
#define COL_D_RESC_ID
Definition: rodsGenQuery.h:185
COL_D_DATA_ID
#define COL_D_DATA_ID
Definition: rodsGenQuery.h:163