"Fossies" - the Fresh Open Source Software Archive  

Source code changes of the file "layer2/CifMoleculeReader.cpp" between
pymol-open-source-2.2.0.tar.gz and pymol-open-source-2.3.0.tar.gz

About: PyMOL is a Python-enhanced molecular graphics tool. It excels at 3D visualization of proteins, small molecules, density, surfaces, and trajectories. It also includes molecular editing, ray tracing, and movies. Open Source version.

CifMoleculeReader.cpp  (pymol-open-source-2.2.0):CifMoleculeReader.cpp  (pymol-open-source-2.3.0)
skipping to change at line 13 skipping to change at line 13
* *
* (c) 2014 Schrodinger, Inc. * (c) 2014 Schrodinger, Inc.
*/ */
#include <algorithm> #include <algorithm>
#include <string> #include <string>
#include <map> #include <map>
#include <set> #include <set>
#include <vector> #include <vector>
#include <memory> #include <memory>
#include <array>
#include "os_predef.h" #include "os_predef.h"
#include "os_std.h" #include "os_std.h"
#include "MemoryDebug.h" #include "MemoryDebug.h"
#include "Err.h" #include "Err.h"
#include "AssemblyHelpers.h" #include "AssemblyHelpers.h"
#include "AtomInfo.h" #include "AtomInfo.h"
#include "Base.h" #include "Base.h"
skipping to change at line 97 skipping to change at line 98
printf("error: i(%d) < 1\n", i); printf("error: i(%d) < 1\n", i);
return; return;
} }
if (i > size()) if (i > size())
resize(i); resize(i);
(*this)[i - 1] = mon_id; (*this)[i - 1] = mon_id;
} }
const char * get(int i) const { const char * get(int i) const {
if (i < 1 || i > size()) if (i < 1 || i > size())
return NULL; return nullptr;
return (*this)[i - 1].c_str(); return (*this)[i - 1].c_str();
} }
}; };
// structure to collect information about a data block // structure to collect information about a data block
struct CifContentInfo { struct CifContentInfo {
PyMOLGlobals * G; PyMOLGlobals * G;
CifDataType type; CifDataType type;
bool fractional; bool fractional;
bool use_auth; bool use_auth;
std::set<lexidx_t> chains_filter; std::set<lexidx_t> chains_filter;
std::set<std::string> polypeptide_entities; // entity ids std::set<std::string> polypeptide_entities; // entity ids
std::map<std::string, seqvec_t> sequences; // entity_id -> [resn1, resn2, ... ] std::map<std::string, seqvec_t> sequences; // entity_id -> [resn1, resn2, ... ]
bool is_excluded_chain(const char * chain) { bool is_excluded_chain(const char * chain) {
if (chains_filter.empty()) if (chains_filter.empty())
return false; return false;
auto result = OVLexicon_BorrowFromCString(G->Lexicon, chain); auto borrowed = LexBorrow(G, chain);
if (OVreturn_IS_OK(result)) if (borrowed != LEX_BORROW_NOTFOUND)
return is_excluded_chain(result.word); return is_excluded_chain(borrowed);
return false; return false;
} }
bool is_excluded_chain(lexidx_t chain) { bool is_excluded_chain(const lexborrow_t& chain) {
return (!chains_filter.empty() && return (!chains_filter.empty() &&
chains_filter.count(chain) == 0); chains_filter.count(reinterpret_cast<const lexidx_t&>(chain)) == 0);
} }
bool is_polypeptide(const char * entity_id) { bool is_polypeptide(const char * entity_id) {
return polypeptide_entities.count(entity_id); return polypeptide_entities.count(entity_id);
} }
CifContentInfo(PyMOLGlobals * G, bool use_auth=true) : CifContentInfo(PyMOLGlobals * G, bool use_auth=true) :
G(G), G(G),
type(CIF_UNKNOWN), type(CIF_UNKNOWN),
fractional(false), fractional(false),
skipping to change at line 218 skipping to change at line 219
if (!find1(dict, value1, key1)) if (!find1(dict, value1, key1))
return false; return false;
if (!find1(dict, value2, key2)) if (!find1(dict, value2, key2))
return false; return false;
return true; return true;
} }
static void AtomInfoSetEntityId(PyMOLGlobals * G, AtomInfoType * ai, const char * entity_id) { static void AtomInfoSetEntityId(PyMOLGlobals * G, AtomInfoType * ai, const char * entity_id) {
ai->custom = LexIdx(G, entity_id); ai->custom = LexIdx(G, entity_id);
#ifdef _PYMOL_IP_EXTRAS #ifdef _PYMOL_IP_PROPERTIES
PropertySet(G, ai, "entity_id", entity_id); PropertySet(G, ai, "entity_id", entity_id);
#endif #endif
} }
/* /*
* Add one bond without checking if it already exists * Add one bond without checking if it already exists
*/ */
static void ObjectMoleculeAddBond2(ObjectMolecule * I, int i1, int i2, int order ) { static void ObjectMoleculeAddBond2(ObjectMolecule * I, int i1, int i2, int order ) {
VLACheck(I->Bond, BondType, I->NBond); VLACheck(I->Bond, BondType, I->NBond);
BondTypeInit2(I->Bond + I->NBond, i1, i2, order); BondTypeInit2(I->Bond + I->NBond, i1, i2, order);
skipping to change at line 241 skipping to change at line 242
/* /*
* Distance based connectivity for discrete objects * Distance based connectivity for discrete objects
*/ */
static void ObjectMoleculeConnectDiscrete(ObjectMolecule * I) { static void ObjectMoleculeConnectDiscrete(ObjectMolecule * I) {
for (int i = 0; i < I->NCSet; i++) { for (int i = 0; i < I->NCSet; i++) {
if (!I->CSet[i]) if (!I->CSet[i])
continue; continue;
int nbond = 0; int nbond = 0;
BondType * bond = NULL; BondType * bond = nullptr;
ObjectMoleculeConnect(I, &nbond, &bond, I->AtomInfo, I->CSet[i], true, 3); ObjectMoleculeConnect(I, &nbond, &bond, I->AtomInfo, I->CSet[i], true, 3);
if (!bond) if (!bond)
continue; continue;
if (!I->Bond) { if (!I->Bond) {
I->Bond = bond; I->Bond = bond;
} else { } else {
VLASize(I->Bond, BondType, I->NBond + nbond); VLASize(I->Bond, BondType, I->NBond + nbond);
memcpy(I->Bond + I->NBond, bond, nbond * sizeof(*bond)); std::copy(bond, bond + nbond, I->Bond + I->NBond);
VLAFreeP(bond); VLAFreeP(bond);
} }
I->NBond += nbond; I->NBond += nbond;
} }
} }
/* /*
* Get the distance between two atoms in ObjectMolecule * Get the distance between two atoms in ObjectMolecule
*/ */
skipping to change at line 359 skipping to change at line 360
/* /*
* parse $PYMOL_DATA/chem_comp_bond-top100.cif (subset of components.cif) into * parse $PYMOL_DATA/chem_comp_bond-top100.cif (subset of components.cif) into
* a static (global) dictionary. * a static (global) dictionary.
*/ */
static bond_dict_t * get_global_components_bond_dict(PyMOLGlobals * G) { static bond_dict_t * get_global_components_bond_dict(PyMOLGlobals * G) {
static bond_dict_t bond_dict; static bond_dict_t bond_dict;
if (bond_dict.empty()) { if (bond_dict.empty()) {
const char * pymol_data = getenv("PYMOL_DATA"); const char * pymol_data = getenv("PYMOL_DATA");
if (!pymol_data || !pymol_data[0]) if (!pymol_data || !pymol_data[0])
return NULL; return nullptr;
std::string path(pymol_data); std::string path(pymol_data);
path.append(PATH_SEP).append("chem_comp_bond-top100.cif"); path.append(PATH_SEP).append("chem_comp_bond-top100.cif");
cif_file cif(path.c_str()); cif_file cif(path.c_str());
for (m_str_cifdatap_t::iterator data_it = cif.datablocks.begin(), for (const auto& datablock : cif.datablocks) {
data_it_end = cif.datablocks.end(); data_it != data_it_end; ++data_it) { read_chem_comp_bond_dict(datablock.second, bond_dict);
read_chem_comp_bond_dict(data_it->second, bond_dict);
} }
} }
return &bond_dict; return &bond_dict;
} }
/* /*
* True for N-H1 and N-H3, those are not in the chemical components dictionary. * True for N-H1 and N-H3, those are not in the chemical components dictionary.
*/ */
static bool is_N_H1_or_H3(PyMOLGlobals * G, static bool is_N_H1_or_H3(PyMOLGlobals * G,
skipping to change at line 405 skipping to change at line 405
if (i_end - i_start < 2) if (i_end - i_start < 2)
return; return;
auto G = I->Obj.G; auto G = I->Obj.G;
AtomInfoType *a1, *a2, *ai = I->AtomInfo; AtomInfoType *a1, *a2, *ai = I->AtomInfo;
int order; int order;
// get residue bond dictionary // get residue bond dictionary
auto res_dict = bond_dict->get(G, LexStr(G, ai[i_start].resn)); auto res_dict = bond_dict->get(G, LexStr(G, ai[i_start].resn));
if (res_dict == NULL) if (res_dict == nullptr)
return; return;
// for all pairs of atoms in given set // for all pairs of atoms in given set
for (int i1 = i_start + 1; i1 < i_end; i1++) { for (int i1 = i_start + 1; i1 < i_end; i1++) {
for (int i2 = i_start; i2 < i1; i2++) { for (int i2 = i_start; i2 < i1; i2++) {
a1 = ai + i1; a1 = ai + i1;
a2 = ai + i2; a2 = ai + i2;
// don't connect different alt codes // don't connect different alt codes
if (a1->alt[0] && a2->alt[0] && strcmp(a1->alt, a2->alt) != 0) { if (a1->alt[0] && a2->alt[0] && strcmp(a1->alt, a2->alt) != 0) {
skipping to change at line 447 skipping to change at line 447
ObjectMoleculeAddBond2(I, i1, i2, order); ObjectMoleculeAddBond2(I, i1, i2, order);
} }
} }
} }
/* /*
* Add intra residue bonds based on components.cif, and common polymer * Add intra residue bonds based on components.cif, and common polymer
* connecting bonds (C->N, O3*->P) * connecting bonds (C->N, O3*->P)
*/ */
static int ObjectMoleculeConnectComponents(ObjectMolecule * I, static int ObjectMoleculeConnectComponents(ObjectMolecule * I,
bond_dict_t * bond_dict=NULL) { bond_dict_t * bond_dict=nullptr) {
PyMOLGlobals * G = I->Obj.G; PyMOLGlobals * G = I->Obj.G;
int i_start = 0, i_prev_c = 0, i_prev_o3 = 0; int i_start = 0, i_prev_c = 0, i_prev_o3 = 0;
if (!bond_dict) { if (!bond_dict) {
// read components.cif // read components.cif
if (!(bond_dict = get_global_components_bond_dict(G))) if (!(bond_dict = get_global_components_bond_dict(G)))
return false; return false;
} }
skipping to change at line 512 skipping to change at line 512
VLASize(I->Bond, BondType, I->NBond); VLASize(I->Bond, BondType, I->NBond);
return true; return true;
} }
/* /*
* secondary structure hash * secondary structure hash
*/ */
class sshashkey { class sshashkey {
public: public:
lexidx_t chain; // borrowed ref lexborrow_t chain; // borrowed ref
int resv; int resv;
char inscode; char inscode;
void assign(int asym_id_, int resv_, char ins_code_ = '\0') { void assign(const lexborrow_t& asym_id_, int resv_, char ins_code_ = '\0') {
chain = asym_id_; chain = asym_id_;
resv = resv_; resv = resv_;
inscode = ins_code_; inscode = ins_code_;
} }
// comparable to sshashkey and AtomInfoType // comparable to sshashkey and AtomInfoType
template <typename T> int compare(const T &other) const { template <typename T> int compare(const T &other) const {
int test = resv - other.resv; int test = resv - other.resv;
if (test == 0) { if (test == 0) {
test = (chain - other.chain); test = (chain - other.chain);
skipping to change at line 542 skipping to change at line 542
bool operator<(const sshashkey &other) const { return compare(other) < 0; } bool operator<(const sshashkey &other) const { return compare(other) < 0; }
bool operator>(const sshashkey &other) const { return compare(other) > 0; } bool operator>(const sshashkey &other) const { return compare(other) > 0; }
}; };
class sshashvalue { class sshashvalue {
public: public:
char ss; char ss;
sshashkey end; sshashkey end;
}; };
typedef std::map<sshashkey, sshashvalue> sshashmap; typedef std::map<sshashkey, sshashvalue> sshashmap;
// std::array for pre-C++11
template <typename T, size_t N>
class myarray {
T m_data[N];
public:
T * data() { return m_data; }
};
// PDBX_STRUCT_OPER_LIST type // PDBX_STRUCT_OPER_LIST type
typedef std::map<std::string, myarray<float, 16> > oper_list_t; typedef std::map<std::string, std::array<float, 16> > oper_list_t;
// type for parsed PDBX_STRUCT_OPER_LIST // type for parsed PDBX_STRUCT_OPER_LIST
typedef std::vector<std::vector<std::string> > oper_collection_t; typedef std::vector<std::vector<std::string> > oper_collection_t;
/* /*
* Parse operation expressions like (1,2)(3-6) * Parse operation expressions like (1,2)(3-6)
*/ */
static oper_collection_t parse_oper_expression(const std::string &expr) { static oper_collection_t parse_oper_expression(const std::string &expr) {
using namespace std; using namespace std;
oper_collection_t collection; oper_collection_t collection;
// first step to split parenthesized chunks // first step to split parenthesized chunks
vector<string> a_vec = strsplit(expr, ')'); vector<string> a_vec = strsplit(expr, ')');
// loop over chunks (still include leading '(') // loop over chunks (still include leading '(')
for (auto a_it = a_vec.begin(); a_it != a_vec.end(); ++a_it) { for (auto& a_item : a_vec) {
const char * a_chunk = a_it->c_str(); const char * a_chunk = a_item.c_str();
// finish chunk // finish chunk
while (*a_chunk == '(') while (*a_chunk == '(')
++a_chunk; ++a_chunk;
// skip empty chunks // skip empty chunks
if (!*a_chunk) if (!*a_chunk)
continue; continue;
collection.resize(collection.size() + 1); collection.resize(collection.size() + 1);
oper_collection_t::reference ids = collection.back(); oper_collection_t::reference ids = collection.back();
// split chunk by commas // split chunk by commas
vector<string> b_vec = strsplit(a_chunk, ','); vector<string> b_vec = strsplit(a_chunk, ',');
// look for ranges // look for ranges
for (vector<string>::iterator for (auto& b_item : b_vec) {
b_it = b_vec.begin();
b_it != b_vec.end(); ++b_it) {
// "c_d" will have either one (no range) or two items // "c_d" will have either one (no range) or two items
vector<string> c_d = strsplit(*b_it, '-'); vector<string> c_d = strsplit(b_item, '-');
ids.push_back(c_d[0]); ids.push_back(c_d[0]);
if (c_d.size() == 2) if (c_d.size() == 2)
for (int i = atoi(c_d[0].c_str()) + 1, for (int i = atoi(c_d[0].c_str()) + 1,
j = atoi(c_d[1].c_str()) + 1; i < j; ++i) j = atoi(c_d[1].c_str()) + 1; i < j; ++i)
{ {
char i_str[16]; char i_str[16];
snprintf(i_str, sizeof(i_str), "%d", i); snprintf(i_str, sizeof(i_str), "%d", i);
ids.push_back(i_str); ids.push_back(i_str);
skipping to change at line 621 skipping to change at line 611
* assembly_chains: output set * assembly_chains: output set
* assembly_id: ID of the assembly or NULL to use first assembly * assembly_id: ID of the assembly or NULL to use first assembly
*/ */
static bool get_assembly_chains(PyMOLGlobals * G, static bool get_assembly_chains(PyMOLGlobals * G,
const cif_data * data, const cif_data * data,
std::set<lexidx_t> &assembly_chains, std::set<lexidx_t> &assembly_chains,
const char * assembly_id) { const char * assembly_id) {
const cif_array *arr_id, *arr_asym_id_list; const cif_array *arr_id, *arr_asym_id_list;
if ((arr_id = data->get_arr("_pdbx_struct_assembly_gen.assembly_id") if ((arr_id = data->get_arr("_pdbx_struct_assembly_gen.assembly_id")
) == NULL || ) == nullptr ||
(arr_asym_id_list = data->get_arr("_pdbx_struct_assembly_gen.asym_id_list" (arr_asym_id_list = data->get_arr("_pdbx_struct_assembly_gen.asym_id_list"
)) == NULL) )) == nullptr)
return false; return false;
for (int i = 0, nrows = arr_id->get_nrows(); i < nrows; ++i) { for (int i = 0, nrows = arr_id->get_nrows(); i < nrows; ++i) {
if (strcmp(assembly_id, arr_id->as_s(i))) if (strcmp(assembly_id, arr_id->as_s(i)))
continue; continue;
const char * asym_id_list = arr_asym_id_list->as_s(i); const char * asym_id_list = arr_asym_id_list->as_s(i);
std::vector<std::string> chains = strsplit(asym_id_list, ','); std::vector<std::string> chains = strsplit(asym_id_list, ',');
for (auto it = chains.begin(); it != chains.end(); ++it) { for (auto& chain : chains) {
assembly_chains.insert(LexIdx(G, it->c_str())); assembly_chains.insert(LexIdx(G, chain.c_str()));
} }
} }
return !assembly_chains.empty(); return !assembly_chains.empty();
} }
/* /*
* Read assembly * Read assembly
* *
* atInfo: atom info array to use for chain check * atInfo: atom info array to use for chain check
skipping to change at line 657 skipping to change at line 647
*/ */
static static
CoordSet ** read_pdbx_struct_assembly(PyMOLGlobals * G, CoordSet ** read_pdbx_struct_assembly(PyMOLGlobals * G,
const cif_data * data, const cif_data * data,
const AtomInfoType * atInfo, const AtomInfoType * atInfo,
const CoordSet * cset, const CoordSet * cset,
const char * assembly_id) { const char * assembly_id) {
const cif_array *arr_id, *arr_assembly_id, *arr_oper_expr, *arr_asym_id_list; const cif_array *arr_id, *arr_assembly_id, *arr_oper_expr, *arr_asym_id_list;
if ((arr_id = data->get_arr("_pdbx_struct_oper_list.id")) == NULL || if ((arr_id = data->get_arr("_pdbx_struct_oper_list.id")) == nullptr
(arr_assembly_id = data->get_arr("_pdbx_struct_assembly_gen.assembly_id") ||
) == NULL || (arr_assembly_id = data->get_arr("_pdbx_struct_assembly_gen.assembly_id")
(arr_oper_expr = data->get_arr("_pdbx_struct_assembly_gen.oper_expressi ) == nullptr ||
on")) == NULL || (arr_oper_expr = data->get_arr("_pdbx_struct_assembly_gen.oper_expressi
(arr_asym_id_list = data->get_arr("_pdbx_struct_assembly_gen.asym_id_list" on")) == nullptr ||
)) == NULL) (arr_asym_id_list = data->get_arr("_pdbx_struct_assembly_gen.asym_id_list"
return NULL; )) == nullptr)
return nullptr;
const cif_array * arr_matrix[] = { const cif_array * arr_matrix[] = {
data->get_opt("_pdbx_struct_oper_list.matrix[1][1]"), data->get_opt("_pdbx_struct_oper_list.matrix[1][1]"),
data->get_opt("_pdbx_struct_oper_list.matrix[1][2]"), data->get_opt("_pdbx_struct_oper_list.matrix[1][2]"),
data->get_opt("_pdbx_struct_oper_list.matrix[1][3]"), data->get_opt("_pdbx_struct_oper_list.matrix[1][3]"),
data->get_opt("_pdbx_struct_oper_list.vector[1]"), data->get_opt("_pdbx_struct_oper_list.vector[1]"),
data->get_opt("_pdbx_struct_oper_list.matrix[2][1]"), data->get_opt("_pdbx_struct_oper_list.matrix[2][1]"),
data->get_opt("_pdbx_struct_oper_list.matrix[2][2]"), data->get_opt("_pdbx_struct_oper_list.matrix[2][2]"),
data->get_opt("_pdbx_struct_oper_list.matrix[2][3]"), data->get_opt("_pdbx_struct_oper_list.matrix[2][3]"),
data->get_opt("_pdbx_struct_oper_list.vector[2]"), data->get_opt("_pdbx_struct_oper_list.vector[2]"),
skipping to change at line 692 skipping to change at line 682
for (int i = 0, nrows = arr_id->get_nrows(); i < nrows; ++i) { for (int i = 0, nrows = arr_id->get_nrows(); i < nrows; ++i) {
float * matrix = oper_list[arr_id->as_s(i)].data(); float * matrix = oper_list[arr_id->as_s(i)].data();
identity44f(matrix); identity44f(matrix);
for (int j = 0; j < 12; ++j) { for (int j = 0; j < 12; ++j) {
matrix[j] = arr_matrix[j]->as_d(i); matrix[j] = arr_matrix[j]->as_d(i);
} }
} }
CoordSet ** csets = NULL; CoordSet ** csets = nullptr;
int csetbeginidx = 0; int csetbeginidx = 0;
// assembly // assembly
for (int i = 0, nrows = arr_oper_expr->get_nrows(); i < nrows; ++i) { for (int i = 0, nrows = arr_oper_expr->get_nrows(); i < nrows; ++i) {
if (strcmp(assembly_id, arr_assembly_id->as_s(i))) if (strcmp(assembly_id, arr_assembly_id->as_s(i)))
continue; continue;
const char * oper_expr = arr_oper_expr->as_s(i); const char * oper_expr = arr_oper_expr->as_s(i);
const char * asym_id_list = arr_asym_id_list->as_s(i); const char * asym_id_list = arr_asym_id_list->as_s(i);
oper_collection_t collection = parse_oper_expression(oper_expr); oper_collection_t collection = parse_oper_expression(oper_expr);
std::vector<std::string> chains = strsplit(asym_id_list, ','); std::vector<std::string> chains = strsplit(asym_id_list, ',');
std::set<lexidx_t> chains_set; std::set<lexborrow_t> chains_set;
for (auto it = chains.begin(); it != chains.end(); ++it) { for (auto& chain : chains) {
auto result = OVLexicon_BorrowFromCString(G->Lexicon, it->c_str()); auto borrowed = LexBorrow(G, chain.c_str());
if (OVreturn_IS_OK(result)) { if (borrowed != LEX_BORROW_NOTFOUND) {
chains_set.insert(result.word); chains_set.insert(borrowed);
} }
} }
// new coord set VLA // new coord set VLA
int ncsets = 1; int ncsets = 1;
for (auto c_it = collection.begin(); c_it != collection.end(); ++c_it) { for (const auto& c_item : collection) {
ncsets *= c_it->size(); ncsets *= c_item.size();
} }
if (!csets) { if (!csets) {
csets = VLACalloc(CoordSet*, ncsets); csets = VLACalloc(CoordSet*, ncsets);
} else { } else {
csetbeginidx = VLAGetSize(csets); csetbeginidx = VLAGetSize(csets);
VLASize(csets, CoordSet*, csetbeginidx + ncsets); VLASize(csets, CoordSet*, csetbeginidx + ncsets);
} }
// for cartesian product // for cartesian product
skipping to change at line 746 skipping to change at line 736
int j = c_src_len; int j = c_src_len;
while (j < c_src_len * c_it->size()) { while (j < c_src_len * c_it->size()) {
// cartesian product // cartesian product
for (int k = 0; k < c_src_len; ++k, ++j) { for (int k = 0; k < c_src_len; ++k, ++j) {
c_csets[j] = CoordSetCopy(c_csets[k]); c_csets[j] = CoordSetCopy(c_csets[k]);
} }
} }
// transform // transform
j = 0; j = 0;
for (auto s_it = c_it->begin(); s_it != c_it->end(); ++s_it) { for (auto& s_item : *c_it) {
const float * matrix = oper_list[*s_it].data(); const float * matrix = oper_list[s_item].data();
// cartesian product // cartesian product
for (int k = 0; k < c_src_len; ++k, ++j) { for (int k = 0; k < c_src_len; ++k, ++j) {
CoordSetTransform44f(c_csets[j], matrix); CoordSetTransform44f(c_csets[j], matrix);
} }
} }
// cartesian product // cartesian product
// Note: currently, "1m4x" seems to be the only structure in the PDB // Note: currently, "1m4x" seems to be the only structure in the PDB
// which uses a cartesian product expression // which uses a cartesian product expression
skipping to change at line 818 skipping to change at line 808
const cif_array * cell[6] = { const cif_array * cell[6] = {
data->get_arr("_cell?length_a"), data->get_arr("_cell?length_a"),
data->get_arr("_cell?length_b"), data->get_arr("_cell?length_b"),
data->get_arr("_cell?length_c"), data->get_arr("_cell?length_c"),
data->get_arr("_cell?angle_alpha"), data->get_arr("_cell?angle_alpha"),
data->get_arr("_cell?angle_beta"), data->get_arr("_cell?angle_beta"),
data->get_arr("_cell?angle_gamma") data->get_arr("_cell?angle_gamma")
}; };
for (int i = 0; i < 6; i++) for (int i = 0; i < 6; i++)
if (cell[i] == NULL) if (cell[i] == nullptr)
return NULL; return nullptr;
CSymmetry * symmetry = SymmetryNew(G); CSymmetry * symmetry = SymmetryNew(G);
if (!symmetry) if (!symmetry)
return NULL; return nullptr;
for (int i = 0; i < 3; i++) { for (int i = 0; i < 3; i++) {
symmetry->Crystal->Dim[i] = cell[i]->as_d(); symmetry->Crystal->Dim[i] = cell[i]->as_d();
symmetry->Crystal->Angle[i] = cell[i + 3]->as_d(); symmetry->Crystal->Angle[i] = cell[i + 3]->as_d();
} }
strncpy(symmetry->SpaceGroup, strncpy(symmetry->SpaceGroup,
data->get_opt("_symmetry?space_group_name_h-m")->as_s(), data->get_opt("_symmetry?space_group_name_h-m")->as_s(),
WordLength - 1); WordLength - 1);
skipping to change at line 857 skipping to change at line 847
return symmetry; return symmetry;
} }
/* /*
* Read CHEM_COMP_ATOM * Read CHEM_COMP_ATOM
*/ */
static CoordSet ** read_chem_comp_atom_model(PyMOLGlobals * G, cif_data * data, static CoordSet ** read_chem_comp_atom_model(PyMOLGlobals * G, cif_data * data,
AtomInfoType ** atInfoPtr) { AtomInfoType ** atInfoPtr) {
const cif_array *arr_x, *arr_y = NULL, *arr_z = NULL; const cif_array *arr_x, *arr_y = nullptr, *arr_z = nullptr;
// setting to exclude one or more coordinate columns // setting to exclude one or more coordinate columns
unsigned mask = SettingGetGlobal_i(G, cSetting_chem_comp_cartn_use); unsigned mask = SettingGetGlobal_i(G, cSetting_chem_comp_cartn_use);
const char * feedback = ""; const char * feedback = "";
if (!mask) { if (!mask) {
mask = 0xFF; mask = 0xFF;
} }
if ((mask & 0x01) if ((mask & 0x01)
skipping to change at line 887 skipping to change at line 877
feedback = ".model_Cartn_{x,y,z}"; feedback = ".model_Cartn_{x,y,z}";
} else if ((mask & 0x04) } else if ((mask & 0x04)
&& (arr_x = data->get_arr("_chem_comp_atom.x")) && (arr_x = data->get_arr("_chem_comp_atom.x"))
&& !arr_x->is_missing_all()) { && !arr_x->is_missing_all()) {
arr_y = data->get_arr("_chem_comp_atom.y"); arr_y = data->get_arr("_chem_comp_atom.y");
arr_z = data->get_arr("_chem_comp_atom.z"); arr_z = data->get_arr("_chem_comp_atom.z");
feedback = ".{x,y,z}"; feedback = ".{x,y,z}";
} }
if (!arr_x || !arr_y || !arr_z) { if (!arr_x || !arr_y || !arr_z) {
return NULL; return nullptr;
} }
PRINTFB(G, FB_Executive, FB_Details) PRINTFB(G, FB_Executive, FB_Details)
" ExecutiveLoad-Detail: Detected chem_comp CIF (%s)\n", feedback " ExecutiveLoad-Detail: Detected chem_comp CIF (%s)\n", feedback
ENDFB(G); ENDFB(G);
const cif_array * arr_name = data->get_opt("_chem_comp_atom.atom_id "); const cif_array * arr_name = data->get_opt("_chem_comp_atom.atom_id ");
const cif_array * arr_symbol = data->get_opt("_chem_comp_atom.type_sy mbol"); const cif_array * arr_symbol = data->get_opt("_chem_comp_atom.type_sy mbol");
const cif_array * arr_resn = data->get_opt("_chem_comp_atom.comp_id "); const cif_array * arr_resn = data->get_opt("_chem_comp_atom.comp_id ");
const cif_array * arr_partial_charge = data->get_opt("_chem_comp_atom.partial _charge"); const cif_array * arr_partial_charge = data->get_opt("_chem_comp_atom.partial _charge");
skipping to change at line 925 skipping to change at line 915
ai->rank = atomCount; ai->rank = atomCount;
ai->id = atomCount + 1; ai->id = atomCount + 1;
LexAssign(G, ai->name, arr_name->as_s(i)); LexAssign(G, ai->name, arr_name->as_s(i));
LexAssign(G, ai->resn, arr_resn->as_s(i)); LexAssign(G, ai->resn, arr_resn->as_s(i));
strncpy(ai->elem, arr_symbol->as_s(i), cElemNameLen); strncpy(ai->elem, arr_symbol->as_s(i), cElemNameLen);
ai->partialCharge = arr_partial_charge->as_d(i); ai->partialCharge = arr_partial_charge->as_d(i);
ai->formalCharge = arr_formal_charge->as_i(i); ai->formalCharge = arr_formal_charge->as_i(i);
ai->hetatm = 1; ai->hetatm = true;
ai->visRep = auto_show; ai->visRep = auto_show;
AtomInfoSetStereo(ai, arr_stereo->as_s(i)); AtomInfoSetStereo(ai, arr_stereo->as_s(i));
AtomInfoAssignParameters(G, ai); AtomInfoAssignParameters(G, ai);
AtomInfoAssignColors(G, ai); AtomInfoAssignColors(G, ai);
coord[atomCount * 3 + 0] = arr_x->as_d(i); coord[atomCount * 3 + 0] = arr_x->as_d(i);
coord[atomCount * 3 + 1] = arr_y->as_d(i); coord[atomCount * 3 + 1] = arr_y->as_d(i);
coord[atomCount * 3 + 2] = arr_z->as_d(i); coord[atomCount * 3 + 2] = arr_z->as_d(i);
skipping to change at line 987 skipping to change at line 977
* *
* atInfoPtr: atom info array to fill * atInfoPtr: atom info array to fill
* info: data content configuration to populate with collected information * info: data content configuration to populate with collected information
* *
* return: models as VLA of coordinate sets * return: models as VLA of coordinate sets
*/ */
static CoordSet ** read_atom_site(PyMOLGlobals * G, cif_data * data, static CoordSet ** read_atom_site(PyMOLGlobals * G, cif_data * data,
AtomInfoType ** atInfoPtr, CifContentInfo &info, bool discrete) { AtomInfoType ** atInfoPtr, CifContentInfo &info, bool discrete) {
const cif_array *arr_x, *arr_y, *arr_z; const cif_array *arr_x, *arr_y, *arr_z;
const cif_array *arr_name = NULL, *arr_resn = NULL, *arr_resi = NULL, const cif_array *arr_name = nullptr, *arr_resn = nullptr, *arr_resi = nullptr,
*arr_chain = NULL, *arr_symbol, *arr_chain = nullptr, *arr_symbol,
*arr_group_pdb, *arr_alt, *arr_ins_code = NULL, *arr_b, *arr_u, *arr_group_pdb, *arr_alt, *arr_ins_code = nullptr, *arr_b, *arr_u,
*arr_q, *arr_ID, *arr_mod_num, *arr_entity_id, *arr_segi; *arr_q, *arr_ID, *arr_mod_num, *arr_entity_id, *arr_segi;
if ((arr_x = data->get_arr("_atom_site?cartn_x")) && if ((arr_x = data->get_arr("_atom_site?cartn_x")) &&
(arr_y = data->get_arr("_atom_site?cartn_y")) && (arr_y = data->get_arr("_atom_site?cartn_y")) &&
(arr_z = data->get_arr("_atom_site?cartn_z"))) { (arr_z = data->get_arr("_atom_site?cartn_z"))) {
} else if ( } else if (
(arr_x = data->get_arr("_atom_site?fract_x")) && (arr_x = data->get_arr("_atom_site?fract_x")) &&
(arr_y = data->get_arr("_atom_site?fract_y")) && (arr_y = data->get_arr("_atom_site?fract_y")) &&
(arr_z = data->get_arr("_atom_site?fract_z"))) { (arr_z = data->get_arr("_atom_site?fract_z"))) {
info.fractional = true; info.fractional = true;
} else { } else {
return NULL; return nullptr;
} }
if (info.use_auth) { if (info.use_auth) {
arr_name = data->get_arr("_atom_site.auth_atom_id"); arr_name = data->get_arr("_atom_site.auth_atom_id");
arr_resn = data->get_arr("_atom_site.auth_comp_id"); arr_resn = data->get_arr("_atom_site.auth_comp_id");
arr_resi = data->get_arr("_atom_site.auth_seq_id"); arr_resi = data->get_arr("_atom_site.auth_seq_id");
arr_chain = data->get_arr("_atom_site.auth_asym_id"); arr_chain = data->get_arr("_atom_site.auth_asym_id");
arr_ins_code = data->get_arr("_atom_site.pdbx_pdb_ins_code"); arr_ins_code = data->get_arr("_atom_site.pdbx_pdb_ins_code");
} }
skipping to change at line 1069 skipping to change at line 1059
int mod_num, ncsets = 0; int mod_num, ncsets = 0;
// collect number of atoms per model and number of coord sets // collect number of atoms per model and number of coord sets
std::map<int, int> atoms_per_model; std::map<int, int> atoms_per_model;
for (int i = 0, n = nrows; i < n; i++) { for (int i = 0, n = nrows; i < n; i++) {
mod_num = model_to_state(arr_mod_num->as_i(i, 1)); mod_num = model_to_state(arr_mod_num->as_i(i, 1));
if (mod_num < 1) { if (mod_num < 1) {
PRINTFB(G, FB_ObjectMolecule, FB_Errors) PRINTFB(G, FB_ObjectMolecule, FB_Errors)
" Error: model numbers < 1 not supported: %d\n", mod_num ENDFB(G); " Error: model numbers < 1 not supported: %d\n", mod_num ENDFB(G);
return NULL; return nullptr;
} }
atoms_per_model[mod_num - 1] += 1; atoms_per_model[mod_num - 1] += 1;
if (ncsets < mod_num) if (ncsets < mod_num)
ncsets = mod_num; ncsets = mod_num;
} }
// set up coordinate sets // set up coordinate sets
CoordSet ** csets = VLACalloc(CoordSet*, ncsets); CoordSet ** csets = VLACalloc(CoordSet*, ncsets);
skipping to change at line 1138 skipping to change at line 1128
cset->IdxToAtm[idx] = atomCount; cset->IdxToAtm[idx] = atomCount;
VLACheck(*atInfoPtr, AtomInfoType, atomCount); VLACheck(*atInfoPtr, AtomInfoType, atomCount);
ai = *atInfoPtr + atomCount; ai = *atInfoPtr + atomCount;
ai->rank = atomCount; ai->rank = atomCount;
ai->alt[0] = arr_alt->as_s(i)[0]; ai->alt[0] = arr_alt->as_s(i)[0];
ai->id = arr_ID->as_i(i); ai->id = arr_ID->as_i(i);
ai->b = (arr_u != NULL) ? ai->b = (arr_u != nullptr) ?
arr_u->as_d(i) * 78.95683520871486 : // B = U * 8 * pi^2 arr_u->as_d(i) * 78.95683520871486 : // B = U * 8 * pi^2
arr_b->as_d(i); arr_b->as_d(i);
ai->q = arr_q->as_d(i, 1.0); ai->q = arr_q->as_d(i, 1.0);
strncpy(ai->elem, arr_symbol->as_s(i), cElemNameLen); strncpy(ai->elem, arr_symbol->as_s(i), cElemNameLen);
ai->chain = LexIdx(G, arr_chain->as_s(i)); ai->chain = LexIdx(G, arr_chain->as_s(i));
ai->name = LexIdx(G, arr_name->as_s(i)); ai->name = LexIdx(G, arr_name->as_s(i));
ai->resn = LexIdx(G, arr_resn->as_s(i)); ai->resn = LexIdx(G, arr_resn->as_s(i));
ai->segi = segi; // steal reference ai->segi = std::move(segi); // steal reference
if ('H' == arr_group_pdb->as_s(i)[0]) { if ('H' == arr_group_pdb->as_s(i)[0]) {
ai->hetatm = 1; ai->hetatm = true;
ai->flags = cAtomFlag_ignore; ai->flags = cAtomFlag_ignore;
} }
ai->resv = arr_resi->as_i(i); ai->resv = arr_resi->as_i(i);
ai->temp1 = arr_label_seq_id->as_i(i); // for add_missing_ca ai->temp1 = arr_label_seq_id->as_i(i); // for add_missing_ca
if (arr_ins_code) { if (arr_ins_code) {
ai->setInscode(arr_ins_code->as_s(i)[0]); ai->setInscode(arr_ins_code->as_s(i)[0]);
} }
skipping to change at line 1180 skipping to change at line 1170
ai->formalCharge = arr_formal_charge->as_i(i); ai->formalCharge = arr_formal_charge->as_i(i);
AtomInfoAssignParameters(G, ai); AtomInfoAssignParameters(G, ai);
if (arr_color) { if (arr_color) {
ai->color = arr_color->as_i(i); ai->color = arr_color->as_i(i);
} else { } else {
AtomInfoAssignColors(G, ai); AtomInfoAssignColors(G, ai);
} }
if (arr_entity_id != NULL) { if (arr_entity_id != nullptr) {
AtomInfoSetEntityId(G, ai, arr_entity_id->as_s(i)); AtomInfoSetEntityId(G, ai, arr_entity_id->as_s(i));
} }
atomCount++; atomCount++;
} }
VLASize(*atInfoPtr, AtomInfoType, atomCount); VLASize(*atInfoPtr, AtomInfoType, atomCount);
return csets; return csets;
} }
/* /*
* Update `info` with entity polymer information * Update `info` with entity polymer information
*/ */
static bool read_entity_poly(PyMOLGlobals * G, const cif_data * data, CifContent Info &info) { static bool read_entity_poly(PyMOLGlobals * G, const cif_data * data, CifContent Info &info) {
const cif_array *arr_entity_id = NULL, *arr_type = NULL, const cif_array *arr_entity_id = nullptr, *arr_type = nullptr,
*arr_num = NULL, *arr_mon_id = NULL; *arr_num = nullptr, *arr_mon_id = nullptr;
if (!(arr_entity_id = data->get_arr("_entity_poly.entity_id")) || if (!(arr_entity_id = data->get_arr("_entity_poly.entity_id")) ||
!(arr_type = data->get_arr("_entity_poly.type"))) !(arr_type = data->get_arr("_entity_poly.type")))
return false; return false;
const cif_array * arr_seq_one_letter = data->get_arr("_entity_poly.pdbx_seq_on e_letter_code"); const cif_array * arr_seq_one_letter = data->get_arr("_entity_poly.pdbx_seq_on e_letter_code");
// polypeptides // polypeptides
for (int i = 0, n = arr_entity_id->get_nrows(); i < n; i++) { for (int i = 0, n = arr_entity_id->get_nrows(); i < n; i++) {
if (!strncasecmp("polypeptide", arr_type->as_s(i), 11)) { if (!strncasecmp("polypeptide", arr_type->as_s(i), 11)) {
skipping to change at line 1251 skipping to change at line 1241
info.sequences[arr_entity_id->as_s(i)].set( info.sequences[arr_entity_id->as_s(i)].set(
arr_num->as_i(i), arr_num->as_i(i),
arr_mon_id->as_s(i)); arr_mon_id->as_s(i));
} }
} }
} }
return true; return true;
} }
#if 0
/*
* Read missing residues.
*
* This is under the assumption that _pdbx_poly_seq_scheme has complete
* mon_id, asym_id, and seq_id arrays, but auth_seq_num only for residues with
* coordinates (present in _atom_site) and ? values for missing residues.
*
* Append CA atoms to atInfoPtr, with no modification to coord sets. Sorting
* will be necesarry to move those atoms to the correct place in the sequence.
*/
static bool read_pdbx_poly_seq_scheme(PyMOLGlobals * G, const cif_data * data,
AtomInfoType ** atInfoPtr, CifContentInfo &info) {
const cif_array *arr_resn = NULL, *arr_resi = NULL, *arr_chain = NULL,
*arr_segi = NULL, *arr_ins_code = NULL, *arr_auth_seq_num = NULL,
*arr_entity_id = NULL;
if (!(arr_resn = data->get_arr("_pdbx_poly_seq_scheme.mon_id")) ||
!(arr_segi = data->get_arr("_pdbx_poly_seq_scheme.asym_id")) ||
!(arr_entity_id = data->get_arr("_pdbx_poly_seq_scheme.entity_id")) ||
!(arr_auth_seq_num = data->get_arr("_pdbx_poly_seq_scheme.auth_seq_num"))
)
return false;
if (info.use_auth) {
arr_resi = data->get_arr("_pdbx_poly_seq_scheme.pdb_seq_num");
arr_chain = data->get_arr("_pdbx_poly_seq_scheme.pdb_strand_id");
arr_ins_code = data->get_arr("_pdbx_poly_seq_scheme.pdb_ins_code");
}
if (!arr_resi && !(arr_resi = data->get_arr("_pdbx_poly_seq_scheme.seq_id")))
return false;
if (!arr_chain)
arr_chain = arr_segi;
int nrows = arr_resn->get_nrows();
const char * resi;
AtomInfoType *ai;
int atomCount = VLAGetSize(*atInfoPtr);
for (int i = 0, n = nrows; i < n; i++) {
if (!arr_auth_seq_num->is_missing(i))
continue;
const char * segi = arr_segi->as_s(i);
if (info.is_excluded_chain(segi))
continue;
const char * entity_id = arr_entity_id->as_s(i);
if (!info.is_polypeptide(entity_id))
continue;
VLACheck(*atInfoPtr, AtomInfoType, atomCount); // auto-zero
ai = *atInfoPtr + atomCount;
ai->rank = atomCount;
ai->id = -1;
ai->elem[0] = 'C';
ai->name = LexIdx(G, "CA");
ai->resn = LexIdx(G, arr_resn->as_s(i));
ai->segi = LexIdx(G, segi);
ai->chain = LexIdx(G, arr_chain->as_s(i));
ai->resv = arr_resi->as_i(i);
if (arr_ins_code) {
ai->setInscode(arr_ins_code->as_s(i)[0]);
}
AtomInfoAssignParameters(G, ai);
AtomInfoAssignColors(G, ai);
AtomInfoSetEntityId(G, ai, entity_id);
atomCount++;
}
VLASize(*atInfoPtr, AtomInfoType, atomCount);
return true;
}
#endif
/* /*
* Sub-routine for `add_missing_ca` * Sub-routine for `add_missing_ca`
*/ */
static void add_missing_ca_sub(PyMOLGlobals * G, static void add_missing_ca_sub(PyMOLGlobals * G,
AtomInfoType *& atInfo, AtomInfoType *& atInfo,
int& current_resv, int& current_resv,
int& atomCount, int& atomCount,
const int i_ref, int resv, const int i_ref, int resv,
const seqvec_t * current_seq, const seqvec_t * current_seq,
const char * entity_id) const char * entity_id)
skipping to change at line 1395 skipping to change at line 1300
* Use the _entity_poly and _entity_poly_seq information to identify * Use the _entity_poly and _entity_poly_seq information to identify
* missing residues in partially present chains. Add CA atoms for those * missing residues in partially present chains. Add CA atoms for those
* to present complete sequences in the sequence viewer. * to present complete sequences in the sequence viewer.
*/ */
static bool add_missing_ca(PyMOLGlobals * G, static bool add_missing_ca(PyMOLGlobals * G,
AtomInfoType *& atInfo, CifContentInfo &info) { AtomInfoType *& atInfo, CifContentInfo &info) {
int oldAtomCount = VLAGetSize(atInfo); int oldAtomCount = VLAGetSize(atInfo);
int atomCount = oldAtomCount; int atomCount = oldAtomCount;
int current_resv = 0; int current_resv = 0;
const seqvec_t * current_seq = NULL; const seqvec_t * current_seq = nullptr;
const char * current_entity_id = ""; const char * current_entity_id = "";
for (int i = 0; i < oldAtomCount; ++i) { for (int i = 0; i < oldAtomCount; ++i) {
const char * entity_id = LexStr(G, atInfo[i].custom); const char * entity_id = LexStr(G, atInfo[i].custom);
if (i == 0 if (i == 0
|| atInfo[i].chain != atInfo[i - 1].chain || atInfo[i].chain != atInfo[i - 1].chain
|| strcmp(entity_id, current_entity_id)) { || strcmp(entity_id, current_entity_id)) {
// finish prev seq // finish prev seq
if (current_seq && i > 0) { if (current_seq && i > 0) {
add_missing_ca_sub(G, add_missing_ca_sub(G,
atInfo, current_resv, atomCount, atInfo, current_resv, atomCount,
i - 1, current_seq->size() + 1, i - 1, current_seq->size() + 1,
current_seq, current_entity_id); current_seq, current_entity_id);
} }
current_resv = 0; current_resv = 0;
current_seq = NULL; current_seq = nullptr;
current_entity_id = entity_id; current_entity_id = entity_id;
if (info.is_polypeptide(entity_id) && !info.is_excluded_chain(atInfo[i].se gi)) { if (info.is_polypeptide(entity_id) && !info.is_excluded_chain(atInfo[i].se gi)) {
// get new sequence // get new sequence
auto it = info.sequences.find(entity_id); auto it = info.sequences.find(entity_id);
if (it != info.sequences.end()) { if (it != info.sequences.end()) {
current_seq = &it->second; current_seq = &it->second;
} }
} }
skipping to change at line 1455 skipping to change at line 1360
return true; return true;
} }
/* /*
* Read secondary structure from STRUCT_CONF or STRUCT_SHEET_RANGE * Read secondary structure from STRUCT_CONF or STRUCT_SHEET_RANGE
*/ */
static bool read_ss_(PyMOLGlobals * G, cif_data * data, char ss, static bool read_ss_(PyMOLGlobals * G, cif_data * data, char ss,
sshashmap &ssrecords, CifContentInfo &info) sshashmap &ssrecords, CifContentInfo &info)
{ {
const cif_array *arr_beg_chain = NULL, *arr_beg_resi = NULL, const cif_array *arr_beg_chain = nullptr, *arr_beg_resi = nullptr,
*arr_end_chain = NULL, *arr_end_resi = NULL, *arr_end_chain = nullptr, *arr_end_resi = nullptr,
*arr_beg_ins_code = NULL, *arr_end_ins_code = NULL; *arr_beg_ins_code = nullptr, *arr_end_ins_code = nullptr;
std::string prefix = "_struct_conf."; std::string prefix = "_struct_conf.";
if (ss == 'S') if (ss == 'S')
prefix = "_struct_sheet_range."; prefix = "_struct_sheet_range.";
if (info.use_auth && if (info.use_auth &&
(arr_beg_chain = data->get_arr((prefix + "beg_auth_asym_id").c_str())) && (arr_beg_chain = data->get_arr((prefix + "beg_auth_asym_id").c_str())) &&
(arr_beg_resi = data->get_arr((prefix + "beg_auth_seq_id").c_str())) && (arr_beg_resi = data->get_arr((prefix + "beg_auth_seq_id").c_str())) &&
(arr_end_chain = data->get_arr((prefix + "end_auth_asym_id").c_str())) && (arr_end_chain = data->get_arr((prefix + "end_auth_asym_id").c_str())) &&
(arr_end_resi = data->get_arr((prefix + "end_auth_seq_id").c_str()))) { (arr_end_resi = data->get_arr((prefix + "end_auth_seq_id").c_str()))) {
skipping to change at line 1479 skipping to change at line 1384
arr_beg_ins_code = data->get_arr((prefix + "pdbx_beg_pdb_ins_code").c_str()) ; arr_beg_ins_code = data->get_arr((prefix + "pdbx_beg_pdb_ins_code").c_str()) ;
arr_end_ins_code = data->get_arr((prefix + "pdbx_end_pdb_ins_code").c_str()) ; arr_end_ins_code = data->get_arr((prefix + "pdbx_end_pdb_ins_code").c_str()) ;
} else if ( } else if (
!(arr_beg_chain = data->get_arr((prefix + "beg_label_asym_id").c_str())) | | !(arr_beg_chain = data->get_arr((prefix + "beg_label_asym_id").c_str())) | |
!(arr_beg_resi = data->get_arr((prefix + "beg_label_seq_id").c_str())) || !(arr_beg_resi = data->get_arr((prefix + "beg_label_seq_id").c_str())) ||
!(arr_end_chain = data->get_arr((prefix + "end_label_asym_id").c_str())) | | !(arr_end_chain = data->get_arr((prefix + "end_label_asym_id").c_str())) | |
!(arr_end_resi = data->get_arr((prefix + "end_label_seq_id").c_str()))) { !(arr_end_resi = data->get_arr((prefix + "end_label_seq_id").c_str()))) {
return false; return false;
} }
const cif_array *arr_conf_type_id = (ss == 'S') ? NULL : const cif_array *arr_conf_type_id = (ss == 'S') ? nullptr :
data->get_arr("_struct_conf.conf_type_id"); data->get_arr("_struct_conf.conf_type_id");
int nrows = arr_beg_chain->get_nrows(); int nrows = arr_beg_chain->get_nrows();
sshashkey key; sshashkey key;
for (int i = 0; i < nrows; i++) { for (int i = 0; i < nrows; i++) {
// first character of conf_type_id (one of H, S, T) // first character of conf_type_id (one of H, S, T)
char ss_i = arr_conf_type_id ? arr_conf_type_id->as_s(i)[0] : ss; char ss_i = arr_conf_type_id ? arr_conf_type_id->as_s(i)[0] : ss;
// exclude TURN_* (include HELX_* and STRN) // exclude TURN_* (include HELX_* and STRN)
skipping to change at line 1645 skipping to change at line 1550
if (mmcif) { if (mmcif) {
id_dict[ai->id] = ai; id_dict[ai->id] = ai;
} else { } else {
std::string key(LexStr(G, ai->name)); std::string key(LexStr(G, ai->name));
name_dict[key] = ai; name_dict[key] = ai;
} }
} }
// read aniso table // read aniso table
for (int i = 0; i < arr_u11->get_nrows(); i++) { for (int i = 0; i < arr_u11->get_nrows(); i++) {
ai = NULL; ai = nullptr;
if (mmcif) { if (mmcif) {
find1(id_dict, ai, arr_label->as_i(i)); find1(id_dict, ai, arr_label->as_i(i));
} else { } else {
find1(name_dict, ai, arr_label->as_s(i)); find1(name_dict, ai, arr_label->as_s(i));
} }
if (!ai) { if (!ai) {
// expected for multi-models // expected for multi-models
continue; continue;
skipping to change at line 1680 skipping to change at line 1585
/* /*
* Read GEOM_BOND * Read GEOM_BOND
* *
* return: BondType VLA * return: BondType VLA
*/ */
static BondType * read_geom_bond(PyMOLGlobals * G, cif_data * data, static BondType * read_geom_bond(PyMOLGlobals * G, cif_data * data,
AtomInfoType * atInfo) { AtomInfoType * atInfo) {
const cif_array *arr_ID_1, *arr_ID_2; const cif_array *arr_ID_1, *arr_ID_2;
if ((arr_ID_1 = data->get_arr("_geom_bond.atom_site_id_1", if ((arr_ID_1 = data->get_arr("_geom_bond.atom_site_id_1",
"_geom_bond_atom_site_label_1")) == NULL || "_geom_bond_atom_site_label_1")) == nullptr ||
(arr_ID_2 = data->get_arr("_geom_bond.atom_site_id_2", (arr_ID_2 = data->get_arr("_geom_bond.atom_site_id_2",
"_geom_bond_atom_site_label_2")) == NULL) "_geom_bond_atom_site_label_2")) == nullptr)
return NULL; return nullptr;
const cif_array *arr_symm_1 = data->get_opt("_geom_bond?site_symmetry_1"); const cif_array *arr_symm_1 = data->get_opt("_geom_bond?site_symmetry_1");
const cif_array *arr_symm_2 = data->get_opt("_geom_bond?site_symmetry_2"); const cif_array *arr_symm_2 = data->get_opt("_geom_bond?site_symmetry_2");
int nrows = arr_ID_1->get_nrows(); int nrows = arr_ID_1->get_nrows();
int nAtom = VLAGetSize(atInfo); int nAtom = VLAGetSize(atInfo);
int nBond = 0; int nBond = 0;
BondType *bondvla, *bond; BondType *bondvla, *bond;
bondvla = bond = VLACalloc(BondType, 6 * nAtom); bondvla = bond = VLACalloc(BondType, 6 * nAtom);
skipping to change at line 1745 skipping to change at line 1650
/* /*
* Read CHEMICAL_CONN_BOND * Read CHEMICAL_CONN_BOND
* *
* return: BondType VLA * return: BondType VLA
*/ */
static BondType * read_chemical_conn_bond(PyMOLGlobals * G, cif_data * data) { static BondType * read_chemical_conn_bond(PyMOLGlobals * G, cif_data * data) {
const cif_array *arr_number, *arr_atom_1, *arr_atom_2, *arr_type; const cif_array *arr_number, *arr_atom_1, *arr_atom_2, *arr_type;
if ((arr_number = data->get_arr("_atom_site?chemical_conn_number")) == NULL || if ((arr_number = data->get_arr("_atom_site?chemical_conn_number")) == nullptr
(arr_atom_1 = data->get_arr("_chemical_conn_bond?atom_1")) == NULL || ||
(arr_atom_2 = data->get_arr("_chemical_conn_bond?atom_2")) == NULL || (arr_atom_1 = data->get_arr("_chemical_conn_bond?atom_1")) == nullptr ||
(arr_type = data->get_arr("_chemical_conn_bond?type")) == NULL) (arr_atom_2 = data->get_arr("_chemical_conn_bond?atom_2")) == nullptr ||
return NULL; (arr_type = data->get_arr("_chemical_conn_bond?type")) == nullptr)
return nullptr;
int nAtom = arr_number->get_nrows(); int nAtom = arr_number->get_nrows();
int nBond = arr_atom_1->get_nrows(); int nBond = arr_atom_1->get_nrows();
BondType *bondvla, *bond; BondType *bondvla, *bond;
bondvla = bond = VLACalloc(BondType, nBond); bondvla = bond = VLACalloc(BondType, nBond);
// chemical_conn_number -> atom index // chemical_conn_number -> atom index
std::map<int, int> number_dict; std::map<int, int> number_dict;
skipping to change at line 1799 skipping to change at line 1704
static bool read_struct_conn_(PyMOLGlobals * G, cif_data * data, static bool read_struct_conn_(PyMOLGlobals * G, cif_data * data,
AtomInfoType * atInfo, CoordSet * cset, AtomInfoType * atInfo, CoordSet * cset,
CifContentInfo &info) { CifContentInfo &info) {
const cif_array *col_type_id = data->get_arr("_struct_conn.conn_type_id"); const cif_array *col_type_id = data->get_arr("_struct_conn.conn_type_id");
if (!col_type_id) if (!col_type_id)
return false; return false;
const cif_array const cif_array
*col_asym_id[2] = {NULL, NULL}, *col_asym_id[2] = {nullptr, nullptr},
*col_comp_id[2] = {NULL, NULL}, *col_comp_id[2] = {nullptr, nullptr},
*col_seq_id[2] = {NULL, NULL}, *col_seq_id[2] = {nullptr, nullptr},
*col_atom_id[2] = {NULL, NULL}, *col_atom_id[2] = {nullptr, nullptr},
*col_alt_id[2] = {NULL, NULL}, *col_alt_id[2] = {nullptr, nullptr},
*col_ins_code[2] = {NULL, NULL}, *col_ins_code[2] = {nullptr, nullptr},
*col_symm[2] = {NULL, NULL}; *col_symm[2] = {nullptr, nullptr};
if (info.use_auth) { if (info.use_auth) {
col_asym_id[0] = data->get_arr("_struct_conn.ptnr1_auth_asym_id"); col_asym_id[0] = data->get_arr("_struct_conn.ptnr1_auth_asym_id");
col_comp_id[0] = data->get_arr("_struct_conn.ptnr1_auth_comp_id"); col_comp_id[0] = data->get_arr("_struct_conn.ptnr1_auth_comp_id");
col_seq_id[0] = data->get_arr("_struct_conn.ptnr1_auth_seq_id"); col_seq_id[0] = data->get_arr("_struct_conn.ptnr1_auth_seq_id");
col_atom_id[0] = data->get_arr("_struct_conn.ptnr1_auth_atom_id"); col_atom_id[0] = data->get_arr("_struct_conn.ptnr1_auth_atom_id");
col_asym_id[1] = data->get_arr("_struct_conn.ptnr2_auth_asym_id"); col_asym_id[1] = data->get_arr("_struct_conn.ptnr2_auth_asym_id");
col_comp_id[1] = data->get_arr("_struct_conn.ptnr2_auth_comp_id"); col_comp_id[1] = data->get_arr("_struct_conn.ptnr2_auth_comp_id");
col_seq_id[1] = data->get_arr("_struct_conn.ptnr2_auth_seq_id"); col_seq_id[1] = data->get_arr("_struct_conn.ptnr2_auth_seq_id");
col_atom_id[1] = data->get_arr("_struct_conn.ptnr2_auth_atom_id"); col_atom_id[1] = data->get_arr("_struct_conn.ptnr2_auth_atom_id");
skipping to change at line 1945 skipping to change at line 1850
/* /*
* Read bonds from CHEM_COMP_BOND * Read bonds from CHEM_COMP_BOND
* *
* return: BondType VLA * return: BondType VLA
*/ */
static BondType * read_chem_comp_bond(PyMOLGlobals * G, cif_data * data, static BondType * read_chem_comp_bond(PyMOLGlobals * G, cif_data * data,
AtomInfoType * atInfo) { AtomInfoType * atInfo) {
const cif_array *col_ID_1, *col_ID_2, *col_comp_id; const cif_array *col_ID_1, *col_ID_2, *col_comp_id;
if ((col_ID_1 = data->get_arr("_chem_comp_bond.atom_id_1")) == NULL || if ((col_ID_1 = data->get_arr("_chem_comp_bond.atom_id_1")) == nullptr ||
(col_ID_2 = data->get_arr("_chem_comp_bond.atom_id_2")) == NULL || (col_ID_2 = data->get_arr("_chem_comp_bond.atom_id_2")) == nullptr ||
(col_comp_id = data->get_arr("_chem_comp_bond.comp_id")) == NULL) (col_comp_id = data->get_arr("_chem_comp_bond.comp_id")) == nullptr)
return NULL; return nullptr;
// "_chem_comp_bond.type" seems to be non-standard here. It's found in the // "_chem_comp_bond.type" seems to be non-standard here. It's found in the
// wild with values like "double" and "aromatic". mmcif_nmr-star.dic defines // wild with values like "double" and "aromatic". mmcif_nmr-star.dic defines
// it, but with different vocabulary (e.g. "amide", "ether", etc.). // it, but with different vocabulary (e.g. "amide", "ether", etc.).
const cif_array *col_order = data->get_opt( const cif_array *col_order = data->get_opt(
"_chem_comp_bond.value_order", "_chem_comp_bond.value_order",
"_chem_comp_bond.type"); "_chem_comp_bond.type");
int nrows = col_ID_1->get_nrows(); int nrows = col_ID_1->get_nrows();
skipping to change at line 2011 skipping to change at line 1916
/* /*
* Read bonds from _pymol_bond (non-standard extension) * Read bonds from _pymol_bond (non-standard extension)
* *
* return: BondType VLA * return: BondType VLA
*/ */
static BondType * read_pymol_bond(PyMOLGlobals * G, cif_data * data, static BondType * read_pymol_bond(PyMOLGlobals * G, cif_data * data,
AtomInfoType * atInfo) { AtomInfoType * atInfo) {
const cif_array *col_ID_1, *col_ID_2, *col_order; const cif_array *col_ID_1, *col_ID_2, *col_order;
if ((col_ID_1 = data->get_arr("_pymol_bond.atom_site_id_1")) == NULL || if ((col_ID_1 = data->get_arr("_pymol_bond.atom_site_id_1")) == nullptr ||
(col_ID_2 = data->get_arr("_pymol_bond.atom_site_id_2")) == NULL || (col_ID_2 = data->get_arr("_pymol_bond.atom_site_id_2")) == nullptr ||
(col_order = data->get_arr("_pymol_bond.order")) == NULL) (col_order = data->get_arr("_pymol_bond.order")) == nullptr)
return NULL; return nullptr;
int nrows = col_ID_1->get_nrows(); int nrows = col_ID_1->get_nrows();
int nAtom = VLAGetSize(atInfo); int nAtom = VLAGetSize(atInfo);
BondType *bondvla, *bond; BondType *bondvla, *bond;
bondvla = bond = VLACalloc(BondType, nrows); bondvla = bond = VLACalloc(BondType, nrows);
// ID -> atom index // ID -> atom index
std::map<int, int> id_dict; std::map<int, int> id_dict;
skipping to change at line 2053 skipping to change at line 1958
return bondvla; return bondvla;
} }
/* /*
* Create a new (multi-state) object-molecule from datablock * Create a new (multi-state) object-molecule from datablock
*/ */
static ObjectMolecule *ObjectMoleculeReadCifData(PyMOLGlobals * G, static ObjectMolecule *ObjectMoleculeReadCifData(PyMOLGlobals * G,
cif_data * datablock, int discrete, bool quiet) cif_data * datablock, int discrete, bool quiet)
{ {
CoordSet ** csets = NULL; CoordSet ** csets = nullptr;
int ncsets; int ncsets;
CifContentInfo info(G, SettingGetGlobal_b(G, cSetting_cif_use_auth)); CifContentInfo info(G, SettingGetGlobal_b(G, cSetting_cif_use_auth));
const char * assembly_id = SettingGetGlobal_s(G, cSetting_assembly); const char * assembly_id = SettingGetGlobal_s(G, cSetting_assembly);
// title "echo tag" // title "echo tag"
const char * title = datablock->get_opt("_struct.title")->as_s(); const char * title = datablock->get_opt("_struct.title")->as_s();
if (!quiet && title[0] && if (!quiet && title[0] &&
strstr(SettingGetGlobal_s(G, cSetting_pdb_echo_tags), "TITLE")) { strstr(SettingGetGlobal_s(G, cSetting_pdb_echo_tags), "TITLE")) {
PRINTFB(G, FB_ObjectMolecule, FB_Details) PRINTFB(G, FB_ObjectMolecule, FB_Details)
"TITLE %s\n", title ENDFB(G); "TITLE %s\n", title ENDFB(G);
skipping to change at line 2092 skipping to change at line 1997
read_ss(G, datablock, I->AtomInfo, info); read_ss(G, datablock, I->AtomInfo, info);
// trace atoms // trace atoms
read_pdbx_coordinate_model(G, datablock, I); read_pdbx_coordinate_model(G, datablock, I);
// polymer information // polymer information
read_entity_poly(G, datablock, info); read_entity_poly(G, datablock, info);
// missing residues // missing residues
if (!I->DiscreteFlag && !SettingGetGlobal_i(G, cSetting_retain_order)) { if (!I->DiscreteFlag && !SettingGetGlobal_i(G, cSetting_retain_order)) {
#if 0
read_pdbx_poly_seq_scheme(G, datablock, &I->AtomInfo, info);
#else
add_missing_ca(G, I->AtomInfo, info); add_missing_ca(G, I->AtomInfo, info);
#endif
} }
} else if ((csets = read_chem_comp_atom_model(G, datablock, &I->AtomInfo))) { } else if ((csets = read_chem_comp_atom_model(G, datablock, &I->AtomInfo))) {
info.type = CIF_CHEM_COMP; info.type = CIF_CHEM_COMP;
} else { } else {
ObjectMoleculeFree(I); ObjectMoleculeFree(I);
return NULL; return nullptr;
} }
// get number of atoms and coordinate sets // get number of atoms and coordinate sets
I->NAtom = VLAGetSize(I->AtomInfo); I->NAtom = VLAGetSize(I->AtomInfo);
ncsets = VLAGetSize(csets); ncsets = VLAGetSize(csets);
// initialize the new coordsets (not data, but indices, etc.) // initialize the new coordsets (not data, but indices, etc.)
for (int i = 0; i < ncsets; i++) { for (int i = 0; i < ncsets; i++) {
if (csets[i]) { if (csets[i]) {
csets[i]->Obj = I; csets[i]->Obj = I;
skipping to change at line 2199 skipping to change at line 2100
// if non of the above created I->Bond, then do distance based bonding // if non of the above created I->Bond, then do distance based bonding
if (!I->Bond) { if (!I->Bond) {
if (I->DiscreteFlag) { if (I->DiscreteFlag) {
ObjectMoleculeConnectDiscrete(I); ObjectMoleculeConnectDiscrete(I);
} else if (cset) { } else if (cset) {
ObjectMoleculeConnect(I, &I->NBond, &I->Bond, I->AtomInfo, cset, true, 3); ObjectMoleculeConnect(I, &I->NBond, &I->Bond, I->AtomInfo, cset, true, 3);
} }
// guess valences for distance based bonding // guess valences for distance based bonding
if (SettingGetGlobal_b(G, cSetting_pdb_hetatm_guess_valences)) { if (SettingGetGlobal_b(G, cSetting_pdb_hetatm_guess_valences)) {
ObjectMoleculeGuessValences(I, 0, NULL, NULL, false); ObjectMoleculeGuessValences(I, 0, nullptr, nullptr, false);
} }
} else { } else {
if (!I->NBond) if (!I->NBond)
I->NBond = VLAGetSize(I->Bond); I->NBond = VLAGetSize(I->Bond);
// bonds from coordset // bonds from coordset
if (cset && cset->TmpBond && cset->NTmpBond) { if (cset && cset->TmpBond && cset->NTmpBond) {
for (int i = 0; i < cset->NTmpBond; ++i) { for (int i = 0; i < cset->NTmpBond; ++i) {
ObjectMoleculeAddBond2(I, ObjectMoleculeAddBond2(I,
cset->IdxToAtm[cset->TmpBond[i].index[0]], cset->IdxToAtm[cset->TmpBond[i].index[0]],
skipping to change at line 2258 skipping to change at line 2159
*/ */
ObjectMolecule *ObjectMoleculeReadCifStr(PyMOLGlobals * G, ObjectMolecule * I, ObjectMolecule *ObjectMoleculeReadCifStr(PyMOLGlobals * G, ObjectMolecule * I,
const char *st, int frame, const char *st, int frame,
int discrete, int quiet, int multiplex, int discrete, int quiet, int multiplex,
int zoom) int zoom)
{ {
if (I) { if (I) {
PRINTFB(G, FB_ObjectMolecule, FB_Errors) PRINTFB(G, FB_ObjectMolecule, FB_Errors)
" Error: loading mmCIF into existing object not supported, please use 'cre ate'\n" " Error: loading mmCIF into existing object not supported, please use 'cre ate'\n"
" to append to an existing object.\n" ENDFB(G); " to append to an existing object.\n" ENDFB(G);
return NULL; return nullptr;
} }
if (multiplex > 0) { if (multiplex > 0) {
PRINTFB(G, FB_ObjectMolecule, FB_Errors) PRINTFB(G, FB_ObjectMolecule, FB_Errors)
" Error: loading mmCIF with multiplex=1 not supported, please use 'split_s tates'.\n" " Error: loading mmCIF with multiplex=1 not supported, please use 'split_s tates'.\n"
" after loading the object." ENDFB(G); " after loading the object." ENDFB(G);
return NULL; return nullptr;
} }
const char * filename = NULL; const char * filename = nullptr;
#ifndef _PYMOL_NO_CXX11
auto cif = std::make_shared<cif_file>(filename, st); auto cif = std::make_shared<cif_file>(filename, st);
#else
cif_file _cif_stack(filename, st);
auto cif = &_cif_stack;
#endif
for (auto it = cif->datablocks.begin(); it != cif->datablocks.end(); ++it) { for (auto it = cif->datablocks.begin(); it != cif->datablocks.end(); ++it) {
ObjectMolecule * obj = ObjectMoleculeReadCifData(G, it->second, discrete, qu iet); ObjectMolecule * obj = ObjectMoleculeReadCifData(G, it->second, discrete, qu iet);
if (!obj) { if (!obj) {
PRINTFB(G, FB_ObjectMolecule, FB_Warnings) PRINTFB(G, FB_ObjectMolecule, FB_Warnings)
" mmCIF-Warning: no coordinates found in data_%s\n", it->first ENDFB(G); " mmCIF-Warning: no coordinates found in data_%s\n", it->first ENDFB(G);
continue; continue;
} }
#if !defined(_PYMOL_NOPY) && !defined(_PYMOL_NO_CXX11) #ifndef _PYMOL_NOPY
// we only provide access from the Python API so far // we only provide access from the Python API so far
if (SettingGetGlobal_b(G, cSetting_cif_keepinmemory)) { if (SettingGetGlobal_b(G, cSetting_cif_keepinmemory)) {
obj->m_cifdata = it->second; obj->m_cifdata = it->second;
obj->m_ciffile = cif; obj->m_ciffile = cif;
} }
#endif #endif
if (cif->datablocks.size() == 1 || multiplex == 0) if (cif->datablocks.size() == 1 || multiplex == 0)
return obj; return obj;
// multiplexing // multiplexing
ObjectSetName((CObject*) obj, it->first); ObjectSetName((CObject*) obj, it->first);
ExecutiveDelete(G, obj->Obj.Name); ExecutiveDelete(G, obj->Obj.Name);
ExecutiveManageObject(G, (CObject*) obj, zoom, true); ExecutiveManageObject(G, (CObject*) obj, zoom, true);
} }
return NULL; return nullptr;
} }
/* /*
* Bond dictionary getter, with on-demand download of residue dictionaries * Bond dictionary getter, with on-demand download of residue dictionaries
*/ */
const bond_dict_t::mapped_type * bond_dict_t::get(PyMOLGlobals * G, const char * resn, bool try_download) { const bond_dict_t::mapped_type * bond_dict_t::get(PyMOLGlobals * G, const char * resn, bool try_download) {
auto key = make_key(resn); auto key = make_key(resn);
auto it = find(key); auto it = find(key);
if (it != end()) if (it != end())
return &it->second; return &it->second;
if (unknown_resn.count(key)) if (unknown_resn.count(key))
return NULL; return nullptr;
#ifndef _PYMOL_NOPY #ifndef _PYMOL_NOPY
if (try_download) { if (try_download) {
int blocked = PAutoBlock(G); int blocked = PAutoBlock(G);
bool downloaded = false; bool downloaded = false;
// call into Python // call into Python
PyObject * pyfilename = PYOBJECT_CALLMETHOD(G->P_inst->cmd, PyObject * pyfilename = PYOBJECT_CALLMETHOD(G->P_inst->cmd,
"download_chem_comp", "siO", resn, "download_chem_comp", "siO", resn,
!Feedback(G, FB_Executive, FB_Details), !Feedback(G, FB_Executive, FB_Details),
G->P_inst->cmd); G->P_inst->cmd);
if (pyfilename) { if (pyfilename) {
const char * filename = PyString_AsString(pyfilename); const char * filename = PyString_AsString(pyfilename);
// update // update
if ((downloaded = (filename && filename[0]))) { if ((downloaded = (filename && filename[0]))) {
cif_file cif(filename); cif_file cif(filename);
for (auto it = cif.datablocks.begin(); it != cif.datablocks.end(); ++it) for (auto &item : cif.datablocks)
read_chem_comp_bond_dict(it->second, *this); read_chem_comp_bond_dict(item.second, *this);
} }
Py_DECREF(pyfilename); Py_DECREF(pyfilename);
} }
PAutoUnblock(G, blocked); PAutoUnblock(G, blocked);
if (downloaded) { if (downloaded) {
// second attempt to look up, from eventually updated dictionary // second attempt to look up, from eventually updated dictionary
return get(G, resn, false); return get(G, resn, false);
skipping to change at line 2358 skipping to change at line 2254
} }
#endif #endif
PRINTFB(G, FB_Executive, FB_Warnings) PRINTFB(G, FB_Executive, FB_Warnings)
" ExecutiveLoad-Warning: No _chem_comp_bond data for residue '%s'\n", resn " ExecutiveLoad-Warning: No _chem_comp_bond data for residue '%s'\n", resn
ENDFB(G); ENDFB(G);
// don't try downloading again // don't try downloading again
unknown_resn.insert(key); unknown_resn.insert(key);
return NULL; return nullptr;
} }
// vi:sw=2:ts=2:expandtab // vi:sw=2:ts=2:expandtab
 End of changes. 65 change blocks. 
210 lines changed or deleted 107 lines changed or added

Home  |  About  |  Features  |  All  |  Newest  |  Dox  |  Diffs  |  RSS Feeds  |  Screenshots  |  Comments  |  Imprint  |  Privacy  |  HTTP(S)