"Fossies" - the Fresh Open Source Software Archive  

Source code changes of the file "drbd/drbd_req.c" between
drbd-9.1.8.tar.gz and drbd-9.1.9.tar.gz

About: DRBD (9.x) is a block device which is designed to build high availability clusters. This is done by mirroring a whole block device via (a dedicated) network. You could see it as a "network RAID-1". The driver code got integrated into the vanilla Linux kernel.

drbd_req.c  (drbd-9.1.8):drbd_req.c  (drbd-9.1.9)
skipping to change at line 607 skipping to change at line 607
if (connection->todo.req_next != req) if (connection->todo.req_next != req)
return; return;
rcu_read_lock(); rcu_read_lock();
list_for_each_entry_continue_rcu(req, &connection->resource->transfer_log , tl_requests) { list_for_each_entry_continue_rcu(req, &connection->resource->transfer_log , tl_requests) {
const unsigned s = req->net_rq_state[connection->peer_node_id]; const unsigned s = req->net_rq_state[connection->peer_node_id];
connection->send.seen_dagtag_sector = req->dagtag_sector; connection->send.seen_dagtag_sector = req->dagtag_sector;
if (s & RQ_NET_QUEUED) { if (s & RQ_NET_QUEUED) {
found_req = req; found_req = req;
break; break;
} }
/* Found a request which is for this peer but not yet queued.
* Do not skip past it. */
if (s & RQ_NET_PENDING && !(s & RQ_NET_SENT))
break;
} }
rcu_read_unlock(); rcu_read_unlock();
connection->todo.req_next = found_req; connection->todo.req_next = found_req;
} }
/**
* set_cache_ptr_if_null() - Set caching pointer to given request if not current
ly set.
* @cache_ptr: Pointer to set.
* @req: Request to potentially set the pointer to.
*
* The caching pointer system is designed to track the oldest request in the
* transfer log fulfilling some condition. In particular, a combination of
* flags towards a given peer. This condition must guarantee that the request
* will not be destroyed.
*
* This system is implemented by set_cache_ptr_if_null() and
* advance_cache_ptr(). A request must be in the transfer log and fulfil the
* condition before set_cache_ptr_if_null() is called. If
* set_cache_ptr_if_null() is called before this request is in the transfer log
* or before it fulfils the condition, the pointer may be advanced past this
* request, or unset, which also has the effect of skipping the request.
*
* Once the condition is no longer fulfilled for a request, advance_cache_ptr()
* must be called. If the caching pointer currently points to this request,
* this will advance it to the next request fulfilling the condition.
*
* set_cache_ptr_if_null() may be called concurrently with itself and with
* advance_cache_ptr(). However, advance_cache_ptr() must not be called
* concurrently for a given caching pointer. If it were, the call for the older
* request may advance the pointer to the newer request, although the newer
* request has concurrently been modified such that it no longer fulfils the
* condition.
*/
static void set_cache_ptr_if_null(struct drbd_request **cache_ptr, struct drbd_r equest *req) static void set_cache_ptr_if_null(struct drbd_request **cache_ptr, struct drbd_r equest *req)
{ {
struct drbd_request *prev_req, *old_req = NULL; struct drbd_request *prev_req, *old_req = NULL;
rcu_read_lock(); rcu_read_lock();
prev_req = cmpxchg(cache_ptr, old_req, req); prev_req = cmpxchg(cache_ptr, old_req, req);
while (prev_req != old_req) { while (prev_req != old_req) {
if (prev_req && req->dagtag_sector > prev_req->dagtag_sector) if (prev_req && req->dagtag_sector > prev_req->dagtag_sector)
break; break;
old_req = prev_req; old_req = prev_req;
prev_req = cmpxchg(cache_ptr, old_req, req); prev_req = cmpxchg(cache_ptr, old_req, req);
} }
rcu_read_unlock(); rcu_read_unlock();
} }
/* See set_cache_ptr_if_null(). */
static void advance_cache_ptr(struct drbd_connection *connection, static void advance_cache_ptr(struct drbd_connection *connection,
struct drbd_request __rcu **cache_ptr, struct drbd_ request *req, struct drbd_request __rcu **cache_ptr, struct drbd_ request *req,
unsigned int is_set, unsigned int is_clear) unsigned int is_set, unsigned int is_clear)
{ {
struct drbd_request *old_req; struct drbd_request *old_req;
struct drbd_request *found_req = NULL; struct drbd_request *found_req = NULL;
rcu_read_lock(); rcu_read_lock();
old_req = rcu_dereference(*cache_ptr); old_req = rcu_dereference(*cache_ptr);
if (old_req != req) { if (old_req != req) {
skipping to change at line 702 skipping to change at line 735
/* apply */ /* apply */
spin_lock(&req->rq_lock); /* local IRQ already disabled */ spin_lock(&req->rq_lock); /* local IRQ already disabled */
old_local = req->local_rq_state; old_local = req->local_rq_state;
req->local_rq_state &= ~clear_local; req->local_rq_state &= ~clear_local;
req->local_rq_state |= set_local; req->local_rq_state |= set_local;
if (idx != -1) { if (idx != -1) {
old_net = req->net_rq_state[idx]; old_net = req->net_rq_state[idx];
req->net_rq_state[idx] &= ~clear; WRITE_ONCE(req->net_rq_state[idx], (req->net_rq_state[idx] & ~cle
req->net_rq_state[idx] |= set; ar) | set);
connection = peer_device->connection; connection = peer_device->connection;
} }
/* no change? */ /* no change? */
unchanged = req->local_rq_state == old_local && unchanged = req->local_rq_state == old_local &&
(idx == -1 || req->net_rq_state[idx] == old_net); (idx == -1 || req->net_rq_state[idx] == old_net);
spin_unlock(&req->rq_lock); spin_unlock(&req->rq_lock);
if (unchanged) if (unchanged)
skipping to change at line 950 skipping to change at line 982
mod_rq_state(req, m, peer_device, RQ_LOCAL_PENDING, RQ_LOCAL_COMP LETED); mod_rq_state(req, m, peer_device, RQ_LOCAL_PENDING, RQ_LOCAL_COMP LETED);
break; break;
case DISCARD_COMPLETED_NOTSUPP: case DISCARD_COMPLETED_NOTSUPP:
case DISCARD_COMPLETED_WITH_ERROR: case DISCARD_COMPLETED_WITH_ERROR:
/* I'd rather not detach from local disk just because it /* I'd rather not detach from local disk just because it
* failed a REQ_OP_DISCARD. */ * failed a REQ_OP_DISCARD. */
mod_rq_state(req, m, peer_device, RQ_LOCAL_PENDING, RQ_LOCAL_COMP LETED); mod_rq_state(req, m, peer_device, RQ_LOCAL_PENDING, RQ_LOCAL_COMP LETED);
break; break;
case QUEUE_FOR_NET_READ: case NEW_NET_READ:
/* READ, and /* READ, and
* no local disk, * no local disk,
* or target area marked as invalid, * or target area marked as invalid,
* or just got an io-error. */ * or just got an io-error. */
/* from __drbd_make_request /* from __drbd_make_request
* or from bio_endio during read io-error recovery */ * or from bio_endio during read io-error recovery */
/* So we can verify the handle in the answer packet. /* So we can verify the handle in the answer packet.
* Corresponding drbd_remove_request_interval is in * Corresponding drbd_remove_request_interval is in
* drbd_req_complete() */ * drbd_req_complete() */
D_ASSERT(device, drbd_interval_empty(&req->i)); D_ASSERT(device, drbd_interval_empty(&req->i));
spin_lock_irqsave(&device->interval_lock, flags); spin_lock_irqsave(&device->interval_lock, flags);
drbd_insert_interval(&device->read_requests, &req->i); drbd_insert_interval(&device->read_requests, &req->i);
spin_unlock_irqrestore(&device->interval_lock, flags); spin_unlock_irqrestore(&device->interval_lock, flags);
D_ASSERT(device, !(req->net_rq_state[idx] & RQ_NET_MASK)); D_ASSERT(device, !(req->net_rq_state[idx] & RQ_NET_MASK));
D_ASSERT(device, !(req->local_rq_state & RQ_LOCAL_MASK)); D_ASSERT(device, !(req->local_rq_state & RQ_LOCAL_MASK));
mod_rq_state(req, m, peer_device, 0, RQ_NET_PENDING|RQ_NET_QUEUED ); mod_rq_state(req, m, peer_device, 0, RQ_NET_PENDING);
break; break;
case QUEUE_FOR_NET_WRITE: case NEW_NET_WRITE:
/* assert something? */ /* assert something? */
/* from __drbd_make_request only */ /* from __drbd_make_request only */
/* NOTE /* NOTE
* In case the req ended up on the transfer log before being * In case the req ended up on the transfer log before being
* queued on the worker, it could lead to this request being * queued on the worker, it could lead to this request being
* missed during cleanup after connection loss. * missed during cleanup after connection loss.
* So we have to do both operations here, * So we have to do both operations here,
* within the same lock that protects the transfer log. * within the same lock that protects the transfer log.
* *
* _req_add_to_epoch(req); this has to be after the * _req_add_to_epoch(req); this has to be after the
* _maybe_start_new_epoch(req); which happened in * _maybe_start_new_epoch(req); which happened in
* __drbd_make_request, because we now may set the bit * __drbd_make_request, because we now may set the bit
* again ourselves to close the current epoch. * again ourselves to close the current epoch.
* *
* Add req to the (now) current epoch (barrier). */ * Add req to the (now) current epoch (barrier). */
D_ASSERT(device, !(req->net_rq_state[idx] & RQ_NET_MASK)); D_ASSERT(device, !(req->net_rq_state[idx] & RQ_NET_MASK));
/* queue work item to send data */ /* queue work item to send data */
mod_rq_state(req, m, peer_device, 0, RQ_NET_PENDING|RQ_NET_QUEUED |RQ_EXP_BARR_ACK| mod_rq_state(req, m, peer_device, 0, RQ_NET_PENDING|RQ_EXP_BARR_A CK|
drbd_protocol_state_bits(peer_device->connection) ); drbd_protocol_state_bits(peer_device->connection) );
/* Close the epoch, in case it outgrew the limit. /* Close the epoch, in case it outgrew the limit.
* Or if this is a "batch bio", and some of our peers is "old", * Or if this is a "batch bio", and some of our peers is "old",
* because a batch bio "storm" (like, large scale discarding * because a batch bio "storm" (like, large scale discarding
* during mkfs time) would be likely to starve out the peers * during mkfs time) would be likely to starve out the peers
* activity log, if it is smaller than ours (or we don't have * activity log, if it is smaller than ours (or we don't have
* any). And a fix for the resulting potential distributed * any). And a fix for the resulting potential distributed
* deadlock was only implemented with P_CONFIRM_STABLE with * deadlock was only implemented with P_CONFIRM_STABLE with
* protocol version 114. * protocol version 114.
skipping to change at line 1017 skipping to change at line 1049
else { else {
rcu_read_lock(); rcu_read_lock();
nc = rcu_dereference(peer_device->connection->transport.n et_conf); nc = rcu_dereference(peer_device->connection->transport.n et_conf);
p = nc->max_epoch_size; p = nc->max_epoch_size;
rcu_read_unlock(); rcu_read_unlock();
} }
if (device->resource->current_tle_writes >= p) if (device->resource->current_tle_writes >= p)
start_new_tl_epoch(device->resource); start_new_tl_epoch(device->resource);
break; break;
case QUEUE_FOR_SEND_OOS: case NEW_NET_OOS:
mod_rq_state(req, m, peer_device, 0, RQ_NET_PENDING);
break;
case ADDED_TO_TRANSFER_LOG:
mod_rq_state(req, m, peer_device, 0, RQ_NET_QUEUED); mod_rq_state(req, m, peer_device, 0, RQ_NET_QUEUED);
break; break;
case SEND_CANCELED: case SEND_CANCELED:
case SEND_FAILED: case SEND_FAILED:
/* Just update flags so it is no longer marked as on the sender /* Just update flags so it is no longer marked as on the sender
* queue; real cleanup will be done from * queue; real cleanup will be done from
* tl_walk(,CONNECTION_LOST*). */ * tl_walk(,CONNECTION_LOST*). */
mod_rq_state(req, m, peer_device, RQ_NET_QUEUED, 0); mod_rq_state(req, m, peer_device, RQ_NET_QUEUED, 0);
break; break;
skipping to change at line 1044 skipping to change at line 1080
mod_rq_state(req, m, peer_device, RQ_NET_QUEUED|RQ_NET_PE NDING, mod_rq_state(req, m, peer_device, RQ_NET_QUEUED|RQ_NET_PE NDING,
RQ_NET_SENT|RQ_NET_OK); RQ_NET_SENT|RQ_NET_OK);
else else
mod_rq_state(req, m, peer_device, RQ_NET_QUEUED, RQ_NET_S ENT); mod_rq_state(req, m, peer_device, RQ_NET_QUEUED, RQ_NET_S ENT);
/* It is still not yet RQ_NET_DONE until the /* It is still not yet RQ_NET_DONE until the
* corresponding epoch barrier got acked as well, * corresponding epoch barrier got acked as well,
* so we know what to dirty on connection loss. */ * so we know what to dirty on connection loss. */
break; break;
case OOS_HANDED_TO_NETWORK: case OOS_HANDED_TO_NETWORK:
/* Was not set PENDING, no longer QUEUED, so is now DONE /* No longer PENDING or QUEUED, so is now DONE
* as far as this connection is concerned. */ * as far as this connection is concerned. */
mod_rq_state(req, m, peer_device, RQ_NET_QUEUED, RQ_NET_DONE); mod_rq_state(req, m, peer_device, RQ_NET_PENDING|RQ_NET_QUEUED, R Q_NET_DONE);
break; break;
case CONNECTION_LOST: case CONNECTION_LOST:
case CONNECTION_LOST_WHILE_SUSPENDED: case CONNECTION_LOST_WHILE_SUSPENDED:
/* Only apply to requests that were for this peer but not done. * / /* Only apply to requests that were for this peer but not done. * /
if (!(req->net_rq_state[idx] & RQ_NET_MASK) || req->net_rq_state[ idx] & RQ_NET_DONE) if (!(req->net_rq_state[idx] & RQ_NET_MASK) || req->net_rq_state[ idx] & RQ_NET_DONE)
break; break;
/* For protocol A, or when not suspended, we consider the /* For protocol A, or when not suspended, we consider the
* request to be lost towards this peer. * request to be lost towards this peer.
skipping to change at line 1392 skipping to change at line 1428
(peer_disk_state == D_INCONSISTENT && (peer_disk_state == D_INCONSISTENT &&
(repl_state == L_ESTABLISHED || (repl_state == L_ESTABLISHED ||
(repl_state >= L_WF_BITMAP_T && repl_state < L_AHEAD))); (repl_state >= L_WF_BITMAP_T && repl_state < L_AHEAD)));
/* Before proto 96 that was >= CONNECTED instead of >= L_WF_BITMAP_T. /* Before proto 96 that was >= CONNECTED instead of >= L_WF_BITMAP_T.
That is equivalent since before 96 IO was frozen in the L_WF_BITMAP* That is equivalent since before 96 IO was frozen in the L_WF_BITMAP*
states. */ states. */
} }
static bool drbd_should_send_out_of_sync(struct drbd_peer_device *peer_device) static bool drbd_should_send_out_of_sync(struct drbd_peer_device *peer_device)
{ {
return peer_device->repl_state[NOW] == L_AHEAD || peer_device->repl_state enum drbd_disk_state peer_disk_state = peer_device->disk_state[NOW];
[NOW] == L_WF_BITMAP_S; enum drbd_repl_state repl_state = peer_device->repl_state[NOW];
/* pdsk = D_INCONSISTENT as a consequence. Protocol 96 check not necessar
y return repl_state == L_AHEAD ||
since we enter state L_AHEAD only if proto >= 96 */ repl_state == L_WF_BITMAP_S ||
(peer_disk_state == D_OUTDATED && repl_state >= L_ESTABLISHED);
/* proto 96 check omitted, there was no L_AHEAD back then,
* peer disk was never Outdated while connection was established,
* and IO was frozen during bitmap exchange */
} }
/* Prefer to read from protcol C peers, then B, last A */ /* Prefer to read from protcol C peers, then B, last A */
static u64 calc_nodes_to_read_from(struct drbd_device *device) static u64 calc_nodes_to_read_from(struct drbd_device *device)
{ {
struct drbd_peer_device *peer_device; struct drbd_peer_device *peer_device;
u64 candidates[DRBD_PROT_C] = {}; u64 candidates[DRBD_PROT_C] = {};
int wp; int wp;
rcu_read_lock(); rcu_read_lock();
skipping to change at line 1547 skipping to change at line 1590
remote = drbd_should_do_remote(peer_device, NOW); remote = drbd_should_do_remote(peer_device, NOW);
send_oos = drbd_should_send_out_of_sync(peer_device); send_oos = drbd_should_send_out_of_sync(peer_device);
if (!remote && !send_oos) if (!remote && !send_oos)
continue; continue;
D_ASSERT(device, !(remote && send_oos)); D_ASSERT(device, !(remote && send_oos));
if (remote) { if (remote) {
++count; ++count;
_req_mod(req, QUEUE_FOR_NET_WRITE, peer_device); _req_mod(req, NEW_NET_WRITE, peer_device);
} else } else
_req_mod(req, QUEUE_FOR_SEND_OOS, peer_device); _req_mod(req, NEW_NET_OOS, peer_device);
} }
return count; return count;
} }
static void drbd_queue_request(struct drbd_request *req)
{
struct drbd_device *device = req->device;
struct drbd_peer_device *peer_device;
for_each_peer_device(peer_device, device) {
if (req->net_rq_state[peer_device->node_id] & RQ_NET_PENDING)
_req_mod(req, ADDED_TO_TRANSFER_LOG, peer_device);
}
}
static void drbd_process_discard_or_zeroes_req(struct drbd_request *req, int fla gs) static void drbd_process_discard_or_zeroes_req(struct drbd_request *req, int fla gs)
{ {
int err = drbd_issue_discard_or_zero_out(req->device, int err = drbd_issue_discard_or_zero_out(req->device,
req->i.sector, req->i.size >> 9, flags); req->i.sector, req->i.size >> 9, flags);
if (err) if (err)
req->private_bio->bi_status = BLK_STS_IOERR; req->private_bio->bi_status = BLK_STS_IOERR;
bio_endio(req->private_bio); bio_endio(req->private_bio);
} }
static void static void
skipping to change at line 1888 skipping to change at line 1942
if (prev_write) { if (prev_write) {
kref_get(&req->kref); kref_get(&req->kref);
prev_write->destroy_next = req; prev_write->destroy_next = req;
} }
if (!drbd_process_write_request(req)) if (!drbd_process_write_request(req))
no_remote = true; no_remote = true;
} else { } else {
if (peer_device) if (peer_device)
_req_mod(req, QUEUE_FOR_NET_READ, peer_device); _req_mod(req, NEW_NET_READ, peer_device);
else else
no_remote = true; no_remote = true;
} }
/* req may now be accessed by other threads - do not modify /* req may now be accessed by other threads - do not modify
* "immutable" fields after this point */ * "immutable" fields after this point */
list_add_tail_rcu(&req->tl_requests, &resource->transfer_log); list_add_tail_rcu(&req->tl_requests, &resource->transfer_log);
/* Do this after adding to the transfer log so that the
* caching pointer req_not_net_done is set if
* necessary. */
drbd_queue_request(req);
} }
spin_unlock(&resource->tl_update_lock); spin_unlock(&resource->tl_update_lock);
if (rw == WRITE) if (rw == WRITE)
wake_all_senders(resource); wake_all_senders(resource);
else if (peer_device) else if (peer_device)
wake_up(&peer_device->connection->sender_work.q_wait); wake_up(&peer_device->connection->sender_work.q_wait);
if (no_remote == false) { if (no_remote == false) {
struct drbd_plug_cb *plug = drbd_check_plugged(resource); struct drbd_plug_cb *plug = drbd_check_plugged(resource);
 End of changes. 17 change blocks. 
17 lines changed or deleted 76 lines changed or added

Home  |  About  |  Features  |  All  |  Newest  |  Dox  |  Diffs  |  RSS Feeds  |  Screenshots  |  Comments  |  Imprint  |  Privacy  |  HTTP(S)