CifFile.cpp (pymol-v1.8.6.0.tar.bz2) | : | CifFile.cpp (pymol-v2.1.0.tar.bz2) | ||
---|---|---|---|---|
skipping to change at line 15 | skipping to change at line 15 | |||
* | * | |||
* (c) 2014 Schrodinger, Inc. | * (c) 2014 Schrodinger, Inc. | |||
*/ | */ | |||
#include <stdio.h> | #include <stdio.h> | |||
#include <stdlib.h> | #include <stdlib.h> | |||
#include <string.h> | #include <string.h> | |||
#include <vector> | #include <vector> | |||
#include <iostream> | #include <iostream> | |||
#include <stdexcept> | ||||
#include "CifFile.h" | #include "CifFile.h" | |||
#include "File.h" | #include "File.h" | |||
#include "MemoryDebug.h" | #include "MemoryDebug.h" | |||
#include "strcasecmp.h" | ||||
// basic IO and string handling | // basic IO and string handling | |||
/* | /* | |||
* atof which ignores uncertainty notation | * atof which ignores uncertainty notation | |||
* 1.23(45)e2 -> 1.23e2 | * 1.23(45)e2 -> 1.23e2 | |||
*/ | */ | |||
double scifloat(const char *str) { | double scifloat(const char *str) { | |||
const char *close, *open = strchr(str, '('); | const char *close, *open = strchr(str, '('); | |||
if (open && (close = strchr(open, ')'))) { | if (open && (close = strchr(open, ')'))) { | |||
skipping to change at line 93 | skipping to change at line 93 | |||
if (*p <= 'Z' && *p >= 'A') | if (*p <= 'Z' && *p >= 'A') | |||
*p -= 'Z' - 'z'; | *p -= 'Z' - 'z'; | |||
} | } | |||
} | } | |||
// CIF stuff | // CIF stuff | |||
static const char * EMPTY_STRING = ""; | static const char * EMPTY_STRING = ""; | |||
static cif_array EMPTY_ARRAY(NULL); | static cif_array EMPTY_ARRAY(NULL); | |||
/* | ||||
* Class to store CIF loops. Only for parsing, do not use in any higher level | ||||
* reading functions. | ||||
*/ | ||||
class cif_loop { | ||||
public: | ||||
int ncols; | ||||
int nrows; | ||||
const char **values; | ||||
// methods | ||||
const char * get_value_raw(int row, int col) const; | ||||
}; | ||||
// get table value, return NULL if indices out of bounds | // get table value, return NULL if indices out of bounds | |||
const char * cif_loop::get_value_raw(int row, int col) const { | const char * cif_loop::get_value_raw(int row, int col) const { | |||
if (row >= nrows) | if (row >= nrows) | |||
return NULL; | return NULL; | |||
return values[row * ncols + col]; | return values[row * ncols + col]; | |||
} | } | |||
// get the number of elements in this array | ||||
int cif_array::get_nrows() const { | ||||
return (col < 0) ? 1 : pointer.loop->nrows; | ||||
} | ||||
// get array value, return NULL if row-index out of bounds | // get array value, return NULL if row-index out of bounds | |||
const char * cif_array::get_value_raw(int row) const { | // or value in ['.', '?'] | |||
const char * cif_array::get_value(int row) const { | ||||
if (col < 0) | if (col < 0) | |||
return (row > 0) ? NULL : pointer.value; | return (row > 0) ? NULL : pointer.value; | |||
return pointer.loop->get_value_raw(row, col); | return pointer.loop->get_value_raw(row, col); | |||
}; | ||||
// get array value, return NULL if value in ['.', '?'] | ||||
const char * cif_array::get_value(int row) const { | ||||
const char * s = get_value_raw(row); | ||||
return (s && (s[0] == '?' || s[0] == '.') && !s[1]) ? NULL : s; | ||||
} | } | |||
// get array value, return an empty string if missing | // get array value, return an empty string if missing | |||
const char * cif_array::as_s(int row) const { | const char * cif_array::as_s(int row) const { | |||
const char * s = get_value(row); | const char * s = get_value(row); | |||
return s ? s : EMPTY_STRING; | return s ? s : EMPTY_STRING; | |||
} | } | |||
// get array value as integer, return d (default 0) if missing | // get array value as integer, return d (default 0) if missing | |||
int cif_array::as_i(int row, int d) const { | int cif_array::as_i(int row, int d) const { | |||
const char * s = get_value(row); | const char * s = get_value(row); | |||
return s ? atoi(s) : d; | return s ? atoi(s) : d; | |||
} | } | |||
// get array value as double, return d (default 0.0) if missing | // get array value as double, return d (default 0.0) if missing | |||
double cif_array::as_d(int row, double d) const { | double cif_array::as_d(int row, double d) const { | |||
const char * s = get_value(row); | const char * s = get_value(row); | |||
return s ? scifloat(s) : d; | return s ? scifloat(s) : d; | |||
} | } | |||
// true if all values in ['.', '?'] | ||||
bool cif_array::is_missing_all() const { | ||||
int n = get_nrows(); | ||||
for (int i = 0; i < n; ++i) { | ||||
if (!is_missing(i)) | ||||
return false; | ||||
} | ||||
return true; | ||||
} | ||||
// templated getters | // templated getters | |||
template <> const char* cif_array::as<const char* >(int row) const { return as_s (row); } | template <> const char* cif_array::as<const char* >(int row) const { return get_ value(row); } | |||
template <> std::string cif_array::as<std::string >(int row) const { return as_s (row); } | template <> std::string cif_array::as<std::string >(int row) const { return as_s (row); } | |||
template <> int cif_array::as<int >(int row) const { return as_i (row); } | template <> int cif_array::as<int >(int row) const { return as_i (row); } | |||
template <> double cif_array::as<double >(int row) const { return as_d (row); } | template <> double cif_array::as<double >(int row) const { return as_d (row); } | |||
template <> float cif_array::as<float >(int row) const { return as_d (row); } | template <> float cif_array::as<float >(int row) const { return as_d (row); } | |||
/* | /* | |||
* Get a pointer to array or NULL if not found | * Get a pointer to array or NULL if not found | |||
* | * | |||
* Can lookup up to 3 different aliases, the first one found is returned. | * Can lookup up to 3 different aliases, the first one found is returned. | |||
* Also supports an alias shortcut for the trivial case where mmCIF uses | * Also supports an alias shortcut for the trivial case where mmCIF uses | |||
skipping to change at line 219 | skipping to change at line 245 | |||
it_end = loops.end(); it != it_end; ++it) | it_end = loops.end(); it != it_end; ++it) | |||
delete *it; | delete *it; | |||
} | } | |||
// parse CIF contents | // parse CIF contents | |||
bool cif_file::parse() { | bool cif_file::parse() { | |||
char *p = contents; | char *p = contents; | |||
char quote; | char quote; | |||
char prev = '\0'; | char prev = '\0'; | |||
std::vector<char> codes; | std::vector<bool> keypossible; | |||
// tokenize | // tokenize | |||
while (true) { | while (true) { | |||
while (iswhitespace(*p)) | while (iswhitespace(*p)) | |||
prev = *(p++); | prev = *(p++); | |||
if (!*p) | if (!*p) | |||
break; | break; | |||
if (*p == '#') { | if (*p == '#') { | |||
while (!(islinefeed0(*++p))); | while (!(islinefeed0(*++p))); | |||
prev = *p; | prev = *p; | |||
} else if (isquote(*p)) { // will NULL the closing quote | } else if (isquote(*p)) { // will NULL the closing quote | |||
quote = *p; | quote = *p; | |||
codes.push_back('Q'); | keypossible.push_back(false); | |||
tokens.push_back(p + 1); | tokens.push_back(p + 1); | |||
while (*++p && !(*p == quote && iswhitespace0(p[1]))); | while (*++p && !(*p == quote && iswhitespace0(p[1]))); | |||
if (*p) | if (*p) | |||
*(p++) = 0; | *(p++) = 0; | |||
prev = *p; | prev = *p; | |||
} else if (*p == ';' && islinefeed(prev)) { // will NULL the line feed befor e the closing semicolon | } else if (*p == ';' && islinefeed(prev)) { // will NULL the line feed befor e the closing semicolon | |||
codes.push_back('Q'); | keypossible.push_back(false); | |||
tokens.push_back(p + 1); | tokens.push_back(p + 1); | |||
while (*++p && !(islinefeed(*p) && p[1] == ';')); | while (*++p && !(islinefeed(*p) && p[1] == ';')); | |||
if (*p) { | if (*p) { | |||
*p = 0; | *p = 0; | |||
p += 2; | p += 2; | |||
} | } | |||
prev = ';'; | prev = ';'; | |||
} else { // will null the whitespace | } else { // will null the whitespace | |||
codes.push_back('R'); | char * q = p++; | |||
tokens.push_back(p); | ||||
while (!iswhitespace0(*p)) ++p; | while (!iswhitespace0(*p)) ++p; | |||
prev = *p; | prev = *p; | |||
if (*p) | if (p - q == 1 && (*q == '?' || *q == '.')) { | |||
*(p++) = 0; | // store values '.' (inapplicable) and '?' (unknown) as null-pointers | |||
q = NULL; | ||||
keypossible.push_back(false); | ||||
} else { | ||||
if (*p) | ||||
*(p++) = 0; | ||||
keypossible.push_back(true); | ||||
} | ||||
tokens.push_back(q); | ||||
} | } | |||
} | } | |||
cif_data *current_data = NULL, *current_frame = NULL, *global_block = NULL; | cif_data *current_data = NULL, *current_frame = NULL, *global_block = NULL; | |||
// parse into dictionary | // parse into dictionary | |||
for (unsigned int i = 0, n = tokens.size(); i < n; i++) { | for (unsigned int i = 0, n = tokens.size(); i < n; i++) { | |||
if (codes[i] == 'Q') { | if (!keypossible[i]) { | |||
std::cout << "ERROR" << std::endl; | std::cout << "ERROR" << std::endl; | |||
break; | break; | |||
} else if (tokens[i][0] == '_') { | } else if (tokens[i][0] == '_') { | |||
if (i + 1 == n) { | ||||
std::cout << "ERROR truncated" << std::endl; | ||||
break; | ||||
} | ||||
if (current_frame) { | if (current_frame) { | |||
tolowerinplace(tokens[i]); | tolowerinplace(tokens[i]); | |||
current_frame->dict[tokens[i]].set_value(tokens[i + 1]); | current_frame->dict[tokens[i]].set_value(tokens[i + 1]); | |||
} | } | |||
i++; | i++; | |||
} else if (strcasecmp("loop_", tokens[i]) == 0) { | } else if (strcasecmp("loop_", tokens[i]) == 0) { | |||
int ncols = 0; | int ncols = 0; | |||
int nrows = 0; | int nrows = 0; | |||
cif_loop *loop = NULL; | cif_loop *loop = NULL; | |||
// loop data | // loop data | |||
if (current_frame) { | if (current_frame) { | |||
loop = new cif_loop; | loop = new cif_loop; | |||
// add to loops list | // add to loops list | |||
current_frame->loops.push_back(loop); | current_frame->loops.push_back(loop); | |||
} | } | |||
// columns | // columns | |||
while (++i < n && codes[i] != 'Q' && tokens[i][0] == '_') { | while (++i < n && keypossible[i] && tokens[i][0] == '_') { | |||
tolowerinplace(tokens[i]); | tolowerinplace(tokens[i]); | |||
if (current_frame) { | if (current_frame) { | |||
current_frame->dict[tokens[i]].set_loop(loop, ncols); | current_frame->dict[tokens[i]].set_loop(loop, ncols); | |||
} | } | |||
ncols++; | ncols++; | |||
} | } | |||
if (loop) { | if (loop) { | |||
// loop data | // loop data | |||
loop->values = (const char **) &tokens[i]; | loop->values = (const char **) &tokens[i]; | |||
loop->ncols = ncols; | loop->ncols = ncols; | |||
} | } | |||
// rows | // rows | |||
while (i < n && (codes[i] == 'Q' || !isspecial(tokens[i]))) { | while (i < n && !(keypossible[i] && isspecial(tokens[i]))) { | |||
i += ncols; | i += ncols; | |||
if (i > n) { | ||||
std::cout << "ERROR truncated loop" << std::endl; | ||||
break; | ||||
} | ||||
nrows++; | nrows++; | |||
} | } | |||
// loop data | // loop data | |||
if (loop) { | if (loop) { | |||
loop->nrows = nrows; | loop->nrows = nrows; | |||
} | } | |||
i--; | i--; | |||
} else if (strncasecmp("data_", tokens[i], 5) == 0) { | } else if (strncasecmp("data_", tokens[i], 5) == 0) { | |||
const char * key(tokens[i] + 5); | const char * key(tokens[i] + 5); | |||
datablocks[key] = current_data = current_frame = new cif_data; | datablocks[key] = current_data = current_frame = new cif_data; | |||
} else if (strncasecmp("global_", tokens[i], 5) == 0) { | } else if (strncasecmp("global_", tokens[i], 5) == 0) { | |||
// STAR feature, not supported in CIF | // STAR feature, not supported in CIF | |||
global_block = current_data = current_frame = new cif_data; | global_block = current_data = current_frame = new cif_data; | |||
} else if (strncasecmp("save_", tokens[i], 5) == 0) { | } else if (strncasecmp("save_", tokens[i], 5) == 0) { | |||
if (tokens[i][5]) { | if (tokens[i][5] && current_data) { | |||
// begin | // begin | |||
const char * key(tokens[i] + 5); | const char * key(tokens[i] + 5); | |||
current_data->saveframes[key] = current_frame = new cif_data; | current_data->saveframes[key] = current_frame = new cif_data; | |||
} else { | } else { | |||
// end | // end | |||
current_frame = current_data; | current_frame = current_data; | |||
} | } | |||
} else { | } else { | |||
std::cout << "ERROR" << std::endl; | std::cout << "ERROR" << std::endl; | |||
break; | break; | |||
End of changes. 19 change blocks. | ||||
20 lines changed or deleted | 64 lines changed or added |