"Fossies" - the Fresh Open Source Software archive 
Member "evms-2.5.5/plugins/bbr/bbr.c" of archive evms-2.5.5.tar.gz:
/*
* (C) Copyright IBM Corp. 2001, 2003
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
* the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* Module: bbr.c
*
* BBR == bad block relocation
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <sys/ioctl.h>
#include <fcntl.h>
#include <unistd.h>
#include <plugin.h>
#include "bbr.h"
#include "bbroptions.h"
#include "dm.h"
static int WriteBBRTable( storage_object_t * parent,
storage_object_t * child,
u_int64_t lsn,
u_int64_t sector_count,
vsector_t * bbr_table,
boolean backup );
/*-------------------------------------------------------------------------------------+
+ +
+ PRIVATE DATA AREAS AND SUBROUTINES +
+ +
+-------------------------------------------------------------------------------------*/
static plugin_record_t bbr_plugin_record;
plugin_record_t * my_plugin_record = &bbr_plugin_record;
struct engine_functions_s * EngFncs=NULL;
/*
* Only called when debugging ... to display current bbr mapping table.
*
static void display_bbr_remap_info( storage_object_t *object, BBR_Private_Data *pdata )
{
int i,j;
evms_bbr_table_t *table=pdata->bbr_table;
LOG_DEBUG("BBR REMAP INFO...\n");
LOG_DEBUG(" TABLE INFO ...\n");
LOG_DEBUG(" 1st table LSN: %"PRIu64"\n", pdata->bbr_table_lsn1 );
LOG_DEBUG(" 2nd table LSN: %"PRIu64"\n", pdata->bbr_table_lsn2 );
LOG_DEBUG(" table size: %"PRIu64"\n", pdata->bbr_table_size_in_sectors );
LOG_DEBUG(" table ptr: %p\n", pdata->bbr_table );
for(i=0; i<pdata->bbr_table_size_in_sectors; i++) {
LOG_DEBUG(" TABLE SECTOR %d\n", i );
LOG_DEBUG(" in use count: %d\n", table->in_use_cnt );
if (table->in_use_cnt > 0) {
for (j=0; j<EVMS_BBR_ENTRIES_PER_SECT; j++) {
if ( table->entries[j].replacement_sect != 0 ) {
LOG_DEBUG("BAD LSN: %"PRIu64" REMAP LSN: %"PRIu64"\n", table->entries[j].bad_sect, table->entries[j].replacement_sect );
}
}
}
++table;
}
LOG_DEBUG(" REPL BLOCKS...\n");
LOG_DEBUG(" total replacement sectors = %"PRIu64"\n", pdata->bbr_table_size_in_sectors );
LOG_DEBUG(" 1st replacement sector LSN = %"PRIu64"\n", pdata->replacement_blocks_lsn );
}
*/
/*
* Called to ask the kernel bbr plugin to perform a sector
* READ or WRITE for us.
*
* Returns: the status field set by the kernel bbr plugin
* errno if the ioctl itself fails
*/
static int kernel_bbr_sector_io( storage_object_t * object,
lsn_t lsn,
sector_count_t count,
void * buffer,
int rw )
{
int rc=EIO, fd;
int32_t bytes;
LOG_ENTRY();
fd = EngFncs->open_object(object, O_RDWR | O_SYNC);
if (fd>=0) {
if (rw) {
bytes = EngFncs->read_object(object, fd, buffer,
count << EVMS_VSECTOR_SIZE_SHIFT,
lsn << EVMS_VSECTOR_SIZE_SHIFT);
} else {
bytes = EngFncs->write_object(object, fd, buffer,
count << EVMS_VSECTOR_SIZE_SHIFT,
lsn << EVMS_VSECTOR_SIZE_SHIFT);
}
if (bytes == count << EVMS_VSECTOR_SIZE_SHIFT) {
rc = 0;
}
EngFncs->close_object(object, fd );
}
LOG_EXIT_INT(rc);
return rc;
}
/*
* Called to see if the kernel bbr plugin is active on this object.
*
* Returns: TRUE if BBR storage object exists in the kernel
* FALSE otherwise
*/
static boolean isa_kernel_bbr_object( storage_object_t *object )
{
boolean result = FALSE;
LOG_ENTRY();
if ( object->flags & SOFLAG_ACTIVE ) {
result = TRUE;
}
LOG_EXIT_BOOL(result);
return result;
}
/*
* Called to get a count of bad blocks being remapped in the
* bbr mapping table.
*
* Returns: count of remapped sectors if successful
* otherwise ... 0
*/
static sector_count_t get_bad_block_count( evms_bbr_table_t *bbr_table,
sector_count_t sector_count)
{
int i;
sector_count_t bad_blocks = 0;
evms_bbr_table_t *table = bbr_table;
LOG_ENTRY();
if (bbr_table && sector_count) {
for(i=0; i<sector_count ; i++) {
bad_blocks += table->in_use_cnt;
++table;
}
}
LOG_DEBUG(" returning count= %"PRIu64"\n", bad_blocks );
LOG_EXIT_INT((int)bad_blocks);
return bad_blocks;
}
/*
* Called to get a count of bad blocks being remapped to good
* sectors by the kernel bbr feature.
*
* Returns: count of remapped sectors if successful
* otherwise ... 0
*/
static sector_count_t get_kernel_bbr_remap_sector_count( storage_object_t *object )
{
int rc;
BBR_Private_Data *pdata = (BBR_Private_Data *)object->private_data;
sector_count_t bad_blocks=0, bad_blocks1=0, bad_blocks2=0, sector_count;
evms_bbr_table_t *table;
LOG_ENTRY();
if (pdata) {
if (pdata->bbr_table_size_in_sectors) {
table = malloc( pdata->bbr_table_size_in_sectors<<EVMS_VSECTOR_SIZE_SHIFT);
if (table) {
sector_count = pdata->bbr_table_size_in_sectors;
rc = kernel_bbr_sector_io( object,
pdata->bbr_table_lsn1,
sector_count,
(void *) table,
SECTOR_IO_READ );
if (!rc) {
bad_blocks1 = get_bad_block_count(table,sector_count);
}
rc = kernel_bbr_sector_io( object,
pdata->bbr_table_lsn2,
sector_count,
(void *) table,
SECTOR_IO_READ );
if (!rc) {
bad_blocks2 = get_bad_block_count(table,sector_count);
}
free(table);
}
}
}
if (bad_blocks1 == bad_blocks2) {
bad_blocks = bad_blocks1;
}
else if (bad_blocks1 > bad_blocks2) {
bad_blocks = bad_blocks1;
}
else {
bad_blocks = bad_blocks2;
}
LOG_DEBUG(" returning count= %"PRIu64"\n", bad_blocks );
LOG_EXIT_INT((int)bad_blocks);
return bad_blocks;
}
/*
* Called to get a count of bad blocks being remapped to good
* sectors by examining the bbr mapping table.
*
* Returns: count of remapped sectors if successful
* otherwise ... 0
*/
static sector_count_t get_engine_remap_sector_count( storage_object_t *object )
{
BBR_Private_Data *pdata = (BBR_Private_Data *)object->private_data;
sector_count_t bad_blocks=0;
LOG_ENTRY();
if (pdata) {
bad_blocks = get_bad_block_count( pdata->bbr_table,
pdata->bbr_table_size_in_sectors);
}
LOG_DEBUG(" returning count= %"PRIu64"\n", bad_blocks );
LOG_EXIT_INT((int)bad_blocks);
return bad_blocks;
}
/*
* Called to test if the kernel bbr feature is actively remapping
* sectors.
*
* Returns: TRUE if there is at least 1 sector being remapped
* FALSE otherwise
*/
static boolean kernel_bbr_remap_active( storage_object_t *object )
{
sector_count_t bad_blocks;
boolean rc=FALSE;
LOG_ENTRY();
if ( isa_kernel_bbr_object( object ) == TRUE ) {
bad_blocks = get_kernel_bbr_remap_sector_count( object );
if ( bad_blocks > 0 ) {
rc = TRUE;
}
}
LOG_EXIT_INT(rc);
return rc;
}
/*
* Called to tell the kernel bbr feature to stop remapping sectors.
* so we can resize the bbr object. The kernel bbr feature will not
* start remapping sectors on this object again until a rediscover
* cmd is sent to the kernel runtime. It also will not honor any
* existing remapped sectors, failing the i/o request in this case.
*
* Returns: 0 if we received an ack from BBR to the stop request
* ENODEV otherwise
*/
static int stop_kernel_bbr_remapping( storage_object_t *object )
{
int rc=ENOSYS;
LOG_ENTRY();
LOG_EXIT_INT(rc);
return rc;
}
/*
* Called to convert LSN to a remapped LSN.
*
* Returns: the LSN of the replacement block if the sector has been remapped
* otherwise ... it returns the callers LSN
*/
static lsn_t get_lsn( BBR_Private_Data *pdata, lsn_t lsn )
{
int i,j;
evms_bbr_table_t *table=pdata->bbr_table;
LOG_ENTRY();
for(i=0; i<pdata->bbr_table_size_in_sectors; i++) {
if (table->in_use_cnt > 0) {
for (j=0; j<EVMS_BBR_ENTRIES_PER_SECT; j++) {
if ( ( table->entries[j].bad_sect == lsn ) &&
( table->entries[j].replacement_sect != 0)) {
LOG_EXIT_INT((int)(table->entries[j].replacement_sect));
return table->entries[j].replacement_sect;
}
}
}
++table;
}
LOG_EXIT_INT((int)lsn);
return lsn;
}
/*
* Called to get the next available (unused) replacement block LSN
*
* Returns: success: LSN of the replacement block
* failure: returns invalid LSN ( LSN==0 )
*/
static lsn_t get_next_avail_repl_block_lsn( BBR_Private_Data *pdata )
{
int i;
evms_bbr_table_t *table = pdata->bbr_table;
lsn_t lsn = 0;
int sectors_in_use=0;
LOG_ENTRY();
// count replacement sectors being used
for(i=0; i<pdata->bbr_table_size_in_sectors ; i++) {
sectors_in_use += table->in_use_cnt;
++table;
}
// next avail is N+1 but check that 1 is still available.
if ( sectors_in_use ) {
if ( sectors_in_use < pdata->replacement_blocks_size_in_sectors ) {
lsn = pdata->replacement_blocks_lsn + sectors_in_use;
}
else { // error, there are no replacement blocks available
LOG_ERROR("error, unable to provide remap because all replacement blocks are used\n");
lsn = 0;
}
}
else { // ok, grab 1st cuz there are no replacement blocks being used right now
lsn = pdata->replacement_blocks_lsn;
}
LOG_EXIT_INT((int)lsn);
return lsn;
}
/*
* Called to remap a bad block LSN to a replacement BLOCK LSN.
*
* Returns: success: LSN of the replacement block
* failure: invalid LSN (LSN==0)
*/
static lsn_t remap_lsn( storage_object_t *parent, BBR_Private_Data *pdata, lsn_t lsn )
{
int i,j;
evms_bbr_table_t *table=pdata->bbr_table;
lsn_t replacement_sect;
LOG_ENTRY();
for(i=0; i<pdata->bbr_table_size_in_sectors; i++) {
if ( table->in_use_cnt < EVMS_BBR_ENTRIES_PER_SECT ) {
for (j=0; j<EVMS_BBR_ENTRIES_PER_SECT; j++) {
if ( ( table->entries[j].bad_sect == 0 ) &&
( table->entries[j].replacement_sect == 0)) {
replacement_sect = get_next_avail_repl_block_lsn(pdata);
if ( replacement_sect ) {
// update bbr table
table->entries[j].bad_sect = lsn;
table->entries[j].replacement_sect = replacement_sect;
++table->in_use_cnt;
// if not a new bbr object then we need to
// sync the bbr table with the on-disk copy
if ( (parent->flags & SOFLAG_NEW) == 0 ) {
WriteBBRTable( parent, pdata->child,
pdata->bbr_table_lsn1,
pdata->bbr_table_size_in_sectors,
(vsector_t *)pdata->bbr_table, FALSE );
WriteBBRTable( parent, pdata->child,
pdata->bbr_table_lsn2,
pdata->bbr_table_size_in_sectors,
(vsector_t *)pdata->bbr_table, FALSE );
}
}
LOG_EXIT_INT((int)replacement_sect);
return replacement_sect;
}
}
}
++table;
}
LOG_ERROR("error, no repl blks available\n");
LOG_EXIT_INT(0);
return 0;
}
/*
* Called to test if we own the specified storage object. This is also
* an opportunity to place code here to further inspect an object prior
* to making any changes to it.
*/
static boolean i_can_modify_object( storage_object_t *object )
{
LOG_ENTRY();
if (object) {
// check that we claimed this storage object
if (object->plugin == my_plugin_record ) {
// check if we have a private data area for it
if ( object->private_data ) {
// test for BBR signature
if ( ((BBR_Private_Data *)object->private_data)->signature==EVMS_BBR_SIGNATURE) {
LOG_EXIT_BOOL(TRUE);
return TRUE;
}
}
}
}
LOG_EXIT_BOOL(FALSE);
return FALSE;
}
/*
* Debug routine - called to display the feature header by writing it
* to the log file.
*/
static void display_feature_header(evms_feature_header_t * feature_header)
{
LOG_DEBUG("Feature Header ...\n");
LOG_DEBUG("\t FH flags: 0x%X\n", feature_header->flags );
LOG_DEBUG("\t FH feat id: 0x%X\n", feature_header->feature_id );
LOG_DEBUG("\t FH major: 0x%X\n", feature_header->version.major );
LOG_DEBUG("\t FH minor: 0x%X\n", feature_header->version.minor );
LOG_DEBUG("\t FH patch: 0x%X\n", feature_header->version.patchlevel );
LOG_DEBUG("\t FH sequence: %"PRIu64"\n", feature_header->sequence_number );
LOG_DEBUG("\t FH padding: %"PRIu64"\n", feature_header->alignment_padding );
LOG_DEBUG("\t FH lsn1: %"PRIu64"\n", feature_header->feature_data1_start_lsn );
LOG_DEBUG("\t FH data size1: %"PRIu64"\n", feature_header->feature_data1_size );
LOG_DEBUG("\t FH lsn2: %"PRIu64"\n", feature_header->feature_data2_start_lsn );
LOG_DEBUG("\t FH data size2: %"PRIu64"\n", feature_header->feature_data2_size );
LOG_DEBUG("\t FH obj name: %s\n", feature_header->object_name);
}
/*
* Get the useable area of a BBR child object
*/
static sector_count_t get_child_useable_size( storage_object_t *parent, storage_object_t *child)
{
sector_count_t child_useable_size=0;
sector_count_t metadata_sectors=0;
BBR_Private_Data *pdata;
if (parent&&child) {
pdata = (BBR_Private_Data *) parent->private_data;
if (pdata) {
metadata_sectors = pdata->feature_header_data1_size + pdata->feature_header_data2_size + 2;
child_useable_size = child->size - metadata_sectors;
}
}
return child_useable_size;
}
/*
* Called to validate that a run of disk sectors are Ok.
*
* Just a simple read test on the specified run of sectors on the storage object.
*
* Returns the number of sectors we read successfully, prior to an error or successful completion.
*
*/
static int validate_sectors( storage_object_t *object,
u_int64_t start,
u_int64_t count,
u_int64_t *good_sector_count )
{
int rc=0;
int i;
int use_progress;
u_int64_t lsn = start;
char buffer[EVMS_VSECTOR_SIZE];
progress_t progress;
LOG_ENTRY();
*good_sector_count = 0;
/*
* Use the progress service if the UI supports it. Otherwise,
* use the user message service to provide some status.
*/
progress.id = 0;
progress.title = "Performing I/O tests on replacements blocks...";
progress.description = "";
progress.type = DISPLAY_PERCENT;
progress.count = 0;
progress.total_count = count;
progress.remaining_seconds = 0;
progress.plugin_private_data = NULL;
progress.ui_private_data = NULL;
use_progress = EngFncs->progress(&progress) == 0;
if (!use_progress) {
MESSAGE(_("Performaing I/O tests on replacement blocks for object %s. "
"This will take a moment or two.\n"),
object->name );
}
for (i=0; (i<count)&&(rc==0); i++,lsn++) {
if (use_progress && !(i % 100)) {
progress.count = i;
EngFncs->progress(&progress);
}
rc = READ(object, lsn, 1, buffer);
if (rc == 0) {
++ *good_sector_count;
}
}
if (use_progress) {
progress.title = "Finished testing replacement blocks.";
progress.count = count;
EngFncs->progress(&progress);
}
else {
MESSAGE(_("Finished testing replacement blocks.\n"));
}
LOG_EXIT_INT(rc);
return rc;
}
/*
* Called to move a block from the source LBA to the destination LBA
*/
static int move_block(storage_object_t *child, lba_t src, lba_t dest )
{
int rc = EINVAL;
char buffer[EVMS_VSECTOR_SIZE];
LOG_ENTRY();
// pick up replacement blocks before writing out metadata cuz stuff is moving around.
rc = READ( child, src, 1, buffer);
if (rc==0) {
rc = WRITE(child, dest, 1, buffer);
}
LOG_EXIT_INT(rc);
return rc;
}
/*
* This routine is called when metadata is being moved (due to a resize) and the
* bbr table needs to be updated because the replacement blocks are moving.
*
* NOTE: this routine should only be called during commit and the movement MUST
* be coordinated with the kernel.
*/
static void remap_bbr_table_and_move_replacement_blocks( storage_object_t *parent,
BBR_Private_Data *pdata )
{
int rc = 0;
int i,j;
evms_bbr_table_t *old=NULL;
evms_bbr_table_t *new=NULL;
u_int64_t offset;
boolean expanding=FALSE;
int in_use;
storage_object_t *child;
int remap_sector_count=0;
LOG_ENTRY();
// get old and new bbr table ptrs
old = pdata->active_bbr_table;
new = pdata->bbr_table;
// need the bbr child object for moving blocks
child = GET_BBR_CHILD(parent);
// make sure we have an old bbr table and a new one.
if ( ( old != NULL ) &&
( new != NULL ) &&
( child ) ) {
LOG_DEBUG("Active BBR Table ....\n");
LOG_DEBUG(" LSN1: %"PRIu64"\n", pdata->active_bbr_table_lsn1 );
LOG_DEBUG(" LSN2: %"PRIu64"\n", pdata->active_bbr_table_lsn1 );
LOG_DEBUG(" sectors: %"PRIu64"\n", pdata->active_bbr_table_size_in_sectors );
LOG_DEBUG("New BBR Table ....\n");
LOG_DEBUG(" LSN1: %"PRIu64"\n", pdata->bbr_table_lsn1 );
LOG_DEBUG(" LSN2: %"PRIu64"\n", pdata->bbr_table_lsn1 );
LOG_DEBUG(" sectors: %"PRIu64"\n", pdata->bbr_table_size_in_sectors );
//
// calc offset between old and new locations
//
// ... AND ...
//
// figure out which direction the replacement blocks will be moving
//
if ( pdata->replacement_blocks_lsn > pdata->active_replacement_blocks_lsn ) {
expanding = TRUE;
offset = pdata->replacement_blocks_lsn - pdata->active_replacement_blocks_lsn;
}
else {
offset = pdata->active_replacement_blocks_lsn - pdata->replacement_blocks_lsn;
}
//
// in case the new and old bbr tables have different sizes ... we will only
// remap the smallest bbr table sector count
//
if ( pdata->active_bbr_table_size_in_sectors <= pdata->bbr_table_size_in_sectors ) {
remap_sector_count = pdata->active_bbr_table_size_in_sectors;
}
else {
remap_sector_count = pdata->bbr_table_size_in_sectors;
}
//
// remap bbr table ... one sector at a time ... one entry at a time
//
for (i=0; (i<remap_sector_count)&&(rc==0); i++) {
// initialize the number of entries being used.
in_use = 0;
// for each entry
for (j=0; j<EVMS_BBR_ENTRIES_PER_SECT; j++) {
// (1)dont need to remap bad sectors that are no longer in the object's address space.
// (2)dont need to remap entries that are not currently used
if ( ( old->entries[j].replacement_sect != 0 ) &&
( old->entries[j].bad_sect < parent->size ) ) {
new->entries[j].bad_sect = old->entries[j].bad_sect;
if (expanding==TRUE)
new->entries[j].replacement_sect = old->entries[j].replacement_sect + offset;
else
new->entries[j].replacement_sect = old->entries[j].replacement_sect - offset;
LOG_DEBUG(" entry : %d\n", j );
LOG_DEBUG(" bad sector : %"PRIu64"\n", old->entries[j].bad_sect );
LOG_DEBUG(" old lba : %"PRIu64"\n", old->entries[j].replacement_sect );
LOG_DEBUG(" new lba : %"PRIu64"\n", new->entries[j].replacement_sect );
++in_use;
}
else {
new->entries[j].bad_sect = 0;
new->entries[j].replacement_sect = 0;
}
}
// how many entries are used in this sector ?
new->in_use_cnt = in_use;
// goto next sector in bbr table
++new;
++old;
}
// Now refresh bbr table ptrs prior to moving replacement blocks.
old = pdata->active_bbr_table;
new = pdata->bbr_table;
// move the replacement blocks ... one sector at a time ... one entry at a time
if (expanding == TRUE) { // expanding ... start from last bbr table sector, always moving sectors
// from end of bbr table to higher LBA ... cant overlap
new += remap_sector_count-1;
old += remap_sector_count-1;
for (i=0; i<remap_sector_count; i++) {
// for each entry
for (j=EVMS_BBR_ENTRIES_PER_SECT-1; j>=0; j--) {
// dont move unused entries
if ( old->entries[j].replacement_sect != 0 ) {
rc = move_block( child,
old->entries[j].replacement_sect,
new->entries[j].replacement_sect );
}
}
// goto previous sector in bbr table
--new;
--old;
}
}
else { // shrinking ... start from 1st bbr table sector, always moving sectors
// from start of table to a lower LBA ... cant overlap
for (i=0; i<remap_sector_count; i++) {
// for each entry
for (j=0; j<EVMS_BBR_ENTRIES_PER_SECT; j++) {
// dont move unused entries
if ( old->entries[j].replacement_sect != 0 ) {
rc = move_block( child,
old->entries[j].replacement_sect,
new->entries[j].replacement_sect );
}
}
// goto next sector in bbr table
++new;
++old;
}
}
}
LOG_EXIT_VOID();
}
/*
* This routine is called to initialize a new bbr mapping table that doesnt map any
* replacement sectors.
*/
static void initialize_bbr_table( evms_bbr_table_t *bbr_table, u_int64_t sector_count )
{
int i;
evms_bbr_table_t *table = bbr_table;
LOG_ENTRY();
LOG_DEBUG("table addr= %p sector count= %"PRIu64"\n", bbr_table, sector_count);
if ( table != NULL ) {
// one sector at a time
for (i=0; i<sector_count; i++) {
table->signature = EVMS_BBR_TABLE_SIGNATURE;
++table;
}
}
LOG_EXIT_VOID();
}
/*
* Called to fill in BBR metadata info for a BBR object. This
* routine is called from the CREATE api code.
*
* The goal is to reserve no more than 1% of the region for
* replacement blocks.
*
* The minimum amount we'll reserve is 63 sectors ... about a track
* of disk space in most cases.
*
*
* Returns: rc==0 if successful
*
*/
static int create_bbr_metadata( storage_object_t *parent,
storage_object_t *child,
u_int32_t replacement_block_count,
char *name )
{
int rc=0;
u_int64_t TotalReplacementSectors=0;
BBR_Private_Data *pdata;
u_int64_t good_sectors;
u_int64_t repl_blocks_lsn=0;
int vsects_per_hardsect;
int metadata_size_in_vsects;
LOG_ENTRY();
LOG_DEBUG("block count= %d \n", replacement_block_count );
vsects_per_hardsect = child->geometry.bytes_per_sector >> EVMS_VSECTOR_SIZE_SHIFT;
LOG_DEBUG("hard sector size = %d bytes\n", child->geometry.bytes_per_sector);
LOG_DEBUG("there are %d vsectors in a hard sector\n", vsects_per_hardsect);
if ( parent != NULL &&
child != NULL &&
name != NULL &&
( vsects_per_hardsect == 1 || // looking for power of 2
vsects_per_hardsect == 2 ||
vsects_per_hardsect == 4 ||
vsects_per_hardsect == 8 ||
vsects_per_hardsect == 16 )) {
// get BBR private data area
pdata = (BBR_Private_Data *) parent->private_data;
// size of replacement blocks
pdata->block_size = EVMS_VSECTOR_SIZE;
// size of the bbr metadata sector in vsectors
metadata_size_in_vsects = sizeof(evms_bbr_metadata_t) >> EVMS_VSECTOR_SIZE_SHIFT;
if (metadata_size_in_vsects % vsects_per_hardsect) {
metadata_size_in_vsects =
(metadata_size_in_vsects / vsects_per_hardsect) + vsects_per_hardsect;
}
LOG_DEBUG("using %d vsects to map the single bbr metadata sector\n",metadata_size_in_vsects);
// copy object name and serial number to BBR private data
strncpy( pdata->feature_header_object_name,
name,
EVMS_VOLUME_NAME_SIZE );
// init to nothing moved
pdata->repl_blocks_moved = FALSE;
// figure out the number of replacement blocks needed. if the caller
// doesnt pass us a replacement block count then well calculate a
// default amount, trying for 1% of the storage region.
if (replacement_block_count > 0) {
pdata->replacement_blocks_needed = replacement_block_count;
}
else {
pdata->replacement_blocks_needed = child->size / 100;
}
LOG_DEBUG("set repl blocks needed to %"PRIu64"\n", pdata->replacement_blocks_needed);
// enforce MINIMUM and MAXIMUM and HARDSECTOR SIZING rules
if ( pdata->replacement_blocks_needed < BBR_MIN_REPLACEMENT_BLOCKS ) {
pdata->replacement_blocks_needed = BBR_MIN_REPLACEMENT_BLOCKS;
}
else if (pdata->replacement_blocks_needed > BBR_MAX_REPLACEMENT_BLOCKS ) {
pdata->replacement_blocks_needed = BBR_MAX_REPLACEMENT_BLOCKS;
}
else if (pdata->replacement_blocks_needed % vsects_per_hardsect) {
LOG_DEBUG("oops ... repl blocks are not multiple of hard sector size ... adjusting\n");
pdata->replacement_blocks_needed -= pdata->replacement_blocks_needed % vsects_per_hardsect;
pdata->replacement_blocks_needed += vsects_per_hardsect;
}
LOG_DEBUG("adjusted repl blocks needed to %"PRIu64"\n", pdata->replacement_blocks_needed);
// total number of vsectors we need
TotalReplacementSectors = pdata->replacement_blocks_needed;
// ALLIGN THE START OF THE REPLACEMENT BLOCKS ON A HARDSECTOR BOUNDARY
//
// this next step is very important, all other addresses are calculated
// relative to the start of the replacement blocks.
repl_blocks_lsn = (child->size - 2) - TotalReplacementSectors;
LOG_DEBUG(" child end lsn : %"PRIu64"\n", child->size -1 );
LOG_DEBUG(" repl blocks lsn : %"PRIu64"\n", repl_blocks_lsn);
repl_blocks_lsn = rounddown_to_hardsect_boundary(child, repl_blocks_lsn);
LOG_DEBUG("adjusted repl blocks lsn : %"PRIu64"\n", repl_blocks_lsn);
// validate that we are not using BAD replacement sectors
rc = validate_sectors( child, repl_blocks_lsn, TotalReplacementSectors, &good_sectors );
if (rc == 0) {
pdata->replacement_blocks_lsn = repl_blocks_lsn;
pdata->replacement_blocks_size_in_sectors = TotalReplacementSectors;
}
else {
LOG_ERROR("unable to lay down requested number of replacement sectors, only first %"PRIu64" sectors were Ok\n", good_sectors);
LOG_EXIT_INT(rc);
return rc;
}
// then the number of BBR table entries needed, based on the replacement
// sectors being used.
pdata->bbr_table_size_in_sectors = pdata->replacement_blocks_needed / EVMS_BBR_ENTRIES_PER_SECT;
if ( (pdata->replacement_blocks_needed % EVMS_BBR_ENTRIES_PER_SECT) !=0 ){
++pdata->bbr_table_size_in_sectors;
}
LOG_DEBUG(" need bbr table sectors : %"PRIu64"\n", pdata->bbr_table_size_in_sectors );
// force it to a multiple of hardsector size
if (pdata->bbr_table_size_in_sectors % vsects_per_hardsect) {
LOG_DEBUG("need to force bbr table sectors to multiple of hard sector size \n");
pdata->bbr_table_size_in_sectors -= pdata->bbr_table_size_in_sectors % vsects_per_hardsect;
pdata->bbr_table_size_in_sectors += vsects_per_hardsect;
}
LOG_DEBUG("adjstd bbr table sectors : %"PRIu64"\n", pdata->bbr_table_size_in_sectors );
// malloc a bbr table
pdata->bbr_table = EngFncs->engine_alloc( (pdata->bbr_table_size_in_sectors*EVMS_VSECTOR_SIZE) );
if (pdata->bbr_table == NULL) {
LOG_ERROR("unable to malloc a new BBR mapping table of %"PRIu64" sectors, rc= ENOMEM\n", pdata->bbr_table_size_in_sectors);
LOG_EXIT_INT(ENOMEM);
return ENOMEM;
}
else {
initialize_bbr_table( pdata->bbr_table, pdata->bbr_table_size_in_sectors );
}
// Ok everything should be allocated in hard sector size amounts at this point. All that
// remains is to place all metadata on hard sector boundaries.
pdata ->bbr_table_lsn1 = pdata->replacement_blocks_lsn - pdata->bbr_table_size_in_sectors;
pdata ->feature_header_data1_start_lsn = pdata->bbr_table_lsn1 - metadata_size_in_vsects;
pdata ->feature_header_data1_size = pdata->bbr_table_size_in_sectors +
pdata->replacement_blocks_size_in_sectors +
metadata_size_in_vsects;
pdata ->bbr_table_lsn2 = pdata->feature_header_data1_start_lsn - pdata->bbr_table_size_in_sectors;
pdata ->feature_header_data2_start_lsn = pdata->bbr_table_lsn2 - metadata_size_in_vsects;
pdata ->feature_header_data2_size = pdata->bbr_table_size_in_sectors +
metadata_size_in_vsects;
LOG_DEBUG("\tBBR LSN Calculations...\n");
LOG_DEBUG("\t child name: %s\n", child->name );
LOG_DEBUG("\t child size: %"PRIu64"\n", child->size );
LOG_DEBUG("\t feature header lsn1: %"PRIu64"\n", child->size - 1 );
LOG_DEBUG("\t feature header lsn2: %"PRIu64"\n", child->size - 2 );
LOG_DEBUG("\t repl_sectors_lsn: %"PRIu64"\n", pdata->replacement_blocks_lsn );
LOG_DEBUG("\t nr_repl_blocks: %"PRIu64" (%"PRIu64" sectors)\n", pdata->replacement_blocks_needed,pdata->replacement_blocks_size_in_sectors);
LOG_DEBUG("\t bbr_table_lsn1: %"PRIu64"\n", pdata->bbr_table_lsn1 );
LOG_DEBUG("\t bbr_table_ptr: %p\n", pdata->bbr_table );
LOG_DEBUG("\t nr_bbr_table_sectors: %"PRIu64"\n", pdata->bbr_table_size_in_sectors );
LOG_DEBUG("\t1st copy of metadata lsn: %"PRIu64"\n", pdata->feature_header_data1_start_lsn );
LOG_DEBUG("\t 2nd copy bbr table lsn: %"PRIu64"\n", pdata->bbr_table_lsn2 );
LOG_DEBUG("\t2nd copy of metadata lsn: %"PRIu64"\n", pdata->feature_header_data2_start_lsn );
}
else {
rc = EINVAL;
}
LOG_EXIT_INT(rc);
return rc;
}
/*
* Called to build a bbr feature header for the specified child object.
* The feature header is constructed from our plugin runtime data. It
* is not just a rewrite of a feature header we read earlier.
*
* Returns: feature header in the supplied buffer if RC=0
*
*/
static int BuildFeatureHeader( storage_object_t *parent, // bbr object
evms_feature_header_t *feature_header, // buffer
boolean backup )
{
int rc = EINVAL;
BBR_Private_Data *pdata;
LOG_ENTRY();
if ( ( parent ) &&
( parent->private_data ) &&
( feature_header )) {
memset(feature_header, 0, sizeof(evms_feature_header_t));
pdata = (BBR_Private_Data *) parent->private_data;
feature_header->signature = EVMS_FEATURE_HEADER_SIGNATURE;
feature_header->feature_id = EVMS_BBR_FEATURE_PLUGIN_ID;
feature_header->feature_data1_size = pdata->feature_header_data1_size;
feature_header->feature_data2_size = pdata->feature_header_data2_size;
feature_header->feature_data1_start_lsn = pdata->feature_header_data1_start_lsn;
feature_header->feature_data2_start_lsn = pdata->feature_header_data2_start_lsn;
strncpy( &feature_header->object_name[0],
&pdata->feature_header_object_name[0],
EVMS_VOLUME_NAME_SIZE );
display_feature_header(feature_header);
if (!backup) {
pdata->child->flags |= SOFLAG_FEATURE_HEADER_DIRTY;
}
rc = 0;
}
LOG_EXIT_INT(rc);
return rc;
}
/*
* Called to build a BBR feature metadata structure from the BBR private data
* info. It will build a copy of the metadata that can be layed down
* at location ... feature_header->feature_data_start_lsn1.
*
* The second copy of the feature data can be produced simply by changing
* the lsn of the bbr table ... which is going to be the lsn of the
* second copy of the table (pdata->bbr_table_lsn2). See illustration below:
*
*/
static void BuildMetadata( storage_object_t *obj, evms_bbr_metadata_t *metadata )
{
BBR_Private_Data *pdata = (BBR_Private_Data *) obj->private_data;
LOG_ENTRY();
memset(metadata, 0, sizeof(evms_bbr_metadata_t) );
metadata->signature = EVMS_BBR_SIGNATURE;
// bbr table info
metadata->start_sect_bbr_table = pdata ->bbr_table_lsn1;
metadata->nr_sects_bbr_table = pdata ->bbr_table_size_in_sectors;
// replacement sector info
metadata->start_replacement_sect = pdata ->replacement_blocks_lsn;
metadata->nr_replacement_blks = pdata ->replacement_blocks_needed;
// block size
metadata->block_size = pdata ->block_size;
LOG_EXIT_VOID();
}
/*
* This routine is called to convert a bbr table from disk format to cpu format
*/
static void Disk_BBR_Table_To_CPU_Table( evms_bbr_table_t *bbr_table, sector_count_t sector_count )
{
sector_count_t i;
int j;
vsector_t *sector_ptr;
evms_bbr_table_t *table;
LOG_ENTRY();
if ( bbr_table != NULL ) {
sector_ptr = (vsector_t *) bbr_table;
// one sector at a time
for (i=0; i<sector_count; i++) {
table = (evms_bbr_table_t *) sector_ptr;
table->signature = DISK_TO_CPU32(table->signature);
table->crc = DISK_TO_CPU32(table->crc);
table->sequence_number = DISK_TO_CPU32(table->sequence_number);
table->in_use_cnt = DISK_TO_CPU32(table->in_use_cnt);
for (j=0; j<EVMS_BBR_ENTRIES_PER_SECT; j++) {
table->entries[j].bad_sect = DISK_TO_CPU64(table->entries[j].bad_sect);
table->entries[j].replacement_sect = DISK_TO_CPU64(table->entries[j].replacement_sect);
}
++sector_ptr;
}
}
LOG_EXIT_VOID();
}
/*
* This routine is called to convert a bbr table from cpu format to disk format
*/
static void CPU_BBR_Table_To_Disk_Table( evms_bbr_table_t *bbr_table, sector_count_t sector_count )
{
sector_count_t i;
int j;
vsector_t *sector_ptr;
evms_bbr_table_t *table;
LOG_ENTRY();
if ( bbr_table != NULL ) {
sector_ptr = (vsector_t *) bbr_table;
// one sector at a time
for (i=0; i<sector_count; i++) {
table = (evms_bbr_table_t *) sector_ptr;
table->signature = CPU_TO_DISK32(table->signature);
table->crc = CPU_TO_DISK32(table->crc);
table->sequence_number = CPU_TO_DISK32(table->sequence_number);
table->in_use_cnt = CPU_TO_DISK32(table->in_use_cnt);
for (j=0; j<EVMS_BBR_ENTRIES_PER_SECT; j++) {
table->entries[j].bad_sect = CPU_TO_DISK64(table->entries[j].bad_sect);
table->entries[j].replacement_sect = CPU_TO_DISK64(table->entries[j].replacement_sect);
}
++sector_ptr;
}
}
LOG_EXIT_VOID();
}
/*
* Called to read the BBR table. The LBA of the table is obtained
* from the BBR metadata.
*
* Returns: BBR Table in the supplied buffer if RC=0
*
*/
static int ReadBBRTable( storage_object_t * object, void * bbr_table, u_int64_t lsn, u_int64_t count )
{
int rc;
int i;
u_int32_t crc;
u_int32_t calculated_crc;
vsector_t *table;
LOG_ENTRY();
LOG_DEBUG("\tbbr table lsn= %"PRIu64" nr_sects= %"PRIu64"\n", lsn, count);
// read the sectors off of the disk
rc = READ( object, lsn, count, bbr_table );
// if read is Ok then ...
if (rc==0) {
table = (vsector_t *) bbr_table;
// check signature and crc of each sector in table
for (i=0; (i<count) && (rc==0); i++) {
if ( DISK_TO_CPU32(((evms_bbr_table_t *)table)->signature) == EVMS_BBR_TABLE_SIGNATURE ) {
crc = DISK_TO_CPU32(((evms_bbr_table_t *)table)->crc);
((evms_bbr_table_t *)table)->crc = 0;
calculated_crc = EngFncs->calculate_CRC( EVMS_INITIAL_CRC, table, EVMS_VSECTOR_SIZE );
((evms_bbr_table_t *)table)->crc = CPU_TO_DISK32(crc);
if ( crc == calculated_crc ) {
rc = 0;
}
else {
LOG_ERROR("crc failed on bbr_table[%d] sector, expected %X calcd %X\n", i, crc, calculated_crc);
rc = ENODATA;
}
}
else {
LOG_ERROR("not our signature\n");
rc = ENODATA;
}
++table; // advance to next sector in table
}
if (rc == 0) {
Disk_BBR_Table_To_CPU_Table( (evms_bbr_table_t *) bbr_table, count );
}
}
LOG_EXIT_INT(rc);
return rc;
}
/*
* Called to get a copy of the BBR table. The LBA of the table is obtained
* from the BBR private data.
*
* BBR Table memory is allocated by caller!!
*
* Returns: BBR Table in the supplied buffer if RC=0
*
*/
static int GetBBRTable( storage_object_t * child,
void * bbr_table,
sector_count_t count,
lba_t bbr_table_lsn1,
lba_t bbr_table_lsn2 )
{
int i;
int rc=EINVAL;
int rc1=ENODATA;
int rc2=ENODATA;
char *table1=NULL;
char *table2=NULL;
vsector_t *bbrtable1;
vsector_t *bbrtable2;
u_int32_t sequence1;
u_int32_t sequence2;
u_int32_t in_use1;
u_int32_t in_use2;
LOG_ENTRY();
if ( (child == NULL) ||
(bbr_table == NULL) ||
(bbr_table_lsn1==0)) {
LOG_EXIT_INT(rc);
return rc;
}
// read table 1
table1 = EngFncs->engine_alloc( count * EVMS_VSECTOR_SIZE );
if (table1) {
rc1 = ReadBBRTable( child, (void *)table1, bbr_table_lsn1, count );
}
else {
rc1 = ENOMEM;
}
// read table 2 ... if we have a second copy
if ( ( bbr_table_lsn1 != bbr_table_lsn2 ) &&
( bbr_table_lsn2 != 0 ) ) {
table2 = EngFncs->engine_alloc( count * EVMS_VSECTOR_SIZE );
if (table2) {
rc2 = ReadBBRTable( child, (void *)table2, bbr_table_lsn2, count );
}
else {
rc2 = ENOMEM;
}
}
if (( rc1 == 0) && (rc2 == 0)){
// ------------------ COMBINE THE TWO BBR MAPPING TABLES INTO ONE -------------------
// merge 2 good tables into 1, using the most recent copy of each sector in the BBR table
// by inspecting the sequence numbers and sector used counters. The most recent
// copy of a BBR table sector should have a higher sequence number but if the kernel didnt
// update this field yet ... then the in_use counter should tell us which is the most recent
// sector
//------------------------------------------------------------------------------------
bbrtable1 = (vsector_t *)table1;
bbrtable2 = (vsector_t *)table2;
for(i=0; i<count; i++){
sequence1 = ((evms_bbr_table_t *)bbrtable1)->sequence_number;
sequence2 = ((evms_bbr_table_t *)bbrtable2)->sequence_number;
in_use1 = ((evms_bbr_table_t *)table1)->in_use_cnt;
in_use2 = ((evms_bbr_table_t *)table2)->in_use_cnt;
if ( sequence2 > sequence1 ) {
memcpy(bbrtable1, bbrtable2, sizeof(vsector_t));
}
else if ( in_use2 > in_use1 ) {
memcpy(bbrtable1, bbrtable2, sizeof(vsector_t));
}
++bbrtable1;
++bbrtable2;
}
memcpy( bbr_table, table1, count * EVMS_VSECTOR_SIZE );
rc = 0;
}
else if (rc1 == 0) {
memcpy( bbr_table, table1, count * EVMS_VSECTOR_SIZE );
rc = 0;
}
else if (rc2 == 0) {
memcpy( bbr_table, table2, count * EVMS_VSECTOR_SIZE );
rc = 0;
}
else {
rc = ENODATA;
}
if (table1) EngFncs->engine_free( table1 );
if (table2) EngFncs->engine_free( table2 );
LOG_EXIT_INT(rc);
return rc;
}
/*
* Called to write the BBR table. The LSN of the table is obtained
* from the BBR feature metadata.
*
* Assumes: evms_bbr_metadata_t and evms_bbr_table_t are padded to 512 bytes
*
* Returns: 0 if BBR Table is successfully written to disk
*
*/
static int WriteBBRTable( storage_object_t * parent,
storage_object_t * child,
u_int64_t lsn,
u_int64_t sector_count,
vsector_t * bbr_table,
boolean backup )
{
int rc=ENOMEM;
int i;
u_int32_t crc;
vsector_t *table=NULL;
void *buffer=NULL;
LOG_ENTRY();
LOG_DEBUG("Writing BBR mapping table on object %s at lsn %"PRIu64"\n", child->name, lsn );
// need a buffer to hold endian neutral table
buffer = (void *) malloc( sector_count * EVMS_VSECTOR_SIZE );
table = (vsector_t *)buffer;
if (table != NULL) {
memcpy(table, bbr_table, (sector_count * EVMS_VSECTOR_SIZE) );
// convert to disk format
CPU_BBR_Table_To_Disk_Table( (evms_bbr_table_t*) table, (sector_count_t) sector_count );
// step through the bbr mapping table and calc the crc for each sector.
for (i=0; i < sector_count; i++) {
((evms_bbr_table_t *)table)->crc = 0;
crc = EngFncs->calculate_CRC( EVMS_INITIAL_CRC, table, EVMS_VSECTOR_SIZE );
((evms_bbr_table_t *)table)->crc = CPU_TO_DISK32(crc);
++table; // advance to next sector in table
}
if (backup) {
rc = EngFncs->save_metadata( parent->name, child->name,
lsn, sector_count, buffer );
} else {
rc = WRITE( child, lsn, sector_count, (void *) buffer );
}
free(buffer);
}
LOG_EXIT_INT(rc);
return rc;
}
static void Disk_Metadata_To_CPU( evms_bbr_metadata_t *metadata )
{
metadata->signature = DISK_TO_CPU32(metadata->signature);
metadata->crc = DISK_TO_CPU32(metadata->crc);
metadata->sequence_number = DISK_TO_CPU64(metadata->sequence_number);
metadata->start_sect_bbr_table = DISK_TO_CPU64(metadata->start_sect_bbr_table);
metadata->nr_sects_bbr_table = DISK_TO_CPU64(metadata->nr_sects_bbr_table);
metadata->block_size = DISK_TO_CPU32(metadata->block_size);
metadata->start_replacement_sect = DISK_TO_CPU64(metadata->start_replacement_sect);
metadata->nr_replacement_blks = DISK_TO_CPU64(metadata->nr_replacement_blks);
metadata->flags = DISK_TO_CPU32(metadata->flags);
}
static void CPU_Metadata_To_Disk( evms_bbr_metadata_t *metadata )
{
metadata->signature = CPU_TO_DISK32(metadata->signature);
metadata->crc = CPU_TO_DISK32(metadata->crc);
metadata->sequence_number = CPU_TO_DISK64(metadata->sequence_number);
metadata->start_sect_bbr_table = CPU_TO_DISK64(metadata->start_sect_bbr_table);
metadata->nr_sects_bbr_table = CPU_TO_DISK64(metadata->nr_sects_bbr_table);
metadata->block_size = CPU_TO_DISK32(metadata->block_size);
metadata->start_replacement_sect = CPU_TO_DISK64(metadata->start_replacement_sect);
metadata->nr_replacement_blks = CPU_TO_DISK64(metadata->nr_replacement_blks);
metadata->flags = CPU_TO_DISK32(metadata->flags);
}
/*
* Called to read the meta data for a feature into the specified
* buffer. If the feature header ptr is NULL it means we need to
* get the feature header for the storage object by reading it off
* the disk. We need the feature header because it tells us where
* the feature data is located in the storage object.
*
* Returns: feature meta data in the supplied buffer if RC=0
*
*/
static int ReadMetaData( storage_object_t * object,
evms_bbr_metadata_t * metadata,
evms_feature_header_t * feature_header )
{
int rc=ENODATA;
int rc1, rc2;
evms_bbr_metadata_t metadata2;
u_int32_t crc;
u_int32_t calculated_crc;
LOG_ENTRY();
// If rc!=0 it means we dont have a feature header which tells us
// whose metadata is available (signature field) and where it is located
// on the drive. So, we cant read the metadata. Return NO DATA AVAILABLE.
if (object==NULL || metadata==NULL || feature_header==NULL ) {
LOG_ERROR("unable to get the feature header\n");
LOG_EXIT_INT(rc);
return rc;
}
LOG_DEBUG("\tfeathdr->lsn1= %"PRIu64" feathdr->lsn2= %"PRIu64"\n",feature_header->feature_data1_start_lsn, feature_header->feature_data2_start_lsn);
// read the first copy of metadata
rc1 = READ( object, feature_header->feature_data1_start_lsn, 1, (void *) metadata );
// if there is a second copy ... read it too
if ( feature_header->feature_data2_size != 0 ) {
rc2 = READ( object, feature_header->feature_data2_start_lsn, 1, (void *) &metadata2 );
}
else { // else just mark second copy as bad so we dont try to use it
rc2 = ENODATA;
}
LOG_DEBUG("\tread metadata, 1st copy rc= %d 2nd copy rc= %d\n", rc1, rc2 );
if (rc1==0) {
if ( DISK_TO_CPU32(metadata->signature) == EVMS_BBR_SIGNATURE) {
crc = DISK_TO_CPU32(metadata->crc);
metadata->crc = 0;
calculated_crc = EngFncs->calculate_CRC( EVMS_INITIAL_CRC, metadata, sizeof(evms_bbr_metadata_t) );
metadata->crc = CPU_TO_DISK32(crc);
if ( (crc!=0) && (crc!=calculated_crc) ) {
rc1 = ENODATA;
}
else {
Disk_Metadata_To_CPU( metadata );
}
}
else {
rc1 = ENODATA;
}
}
if (rc2==0) {
if ( DISK_TO_CPU32(metadata2.signature) == EVMS_BBR_SIGNATURE ) {
crc = DISK_TO_CPU32(metadata2.crc);
metadata2.crc = 0;
calculated_crc = EngFncs->calculate_CRC( EVMS_INITIAL_CRC, &metadata2, sizeof(evms_bbr_metadata_t) );
metadata2.crc = CPU_TO_DISK32(crc);
if ((crc!=0) && (crc != calculated_crc)) {
rc2 = ENODATA;
}
else {
Disk_Metadata_To_CPU( &metadata2 );
}
}
else {
rc2 = ENODATA;
}
}
// if both versions of the metadata are Ok then choose between the
// two copies using the sequence number. This number is incremented
// every time the kernel makes a change to the metadata. The highest
// number will be the most recent version. Normally both numbers will
// be the same ... but just in case ... choose highest number.
if (rc1==0 && rc2==0) {
LOG_DEBUG("\tboth copies of metadata are Ok, seq_number_1= %"PRIu64" seq_number_2= %"PRIu64"\n",
metadata->sequence_number, metadata2.sequence_number );
if ( metadata->sequence_number >= metadata2.sequence_number) {
LOG_DEBUG("\tusing 1st copy cuz seq numbers are same or 1st is > 2nd\n");
rc = 0;
}
else {
LOG_DEBUG("\tusing 2nd copy of metadata cuz of seq numbers\n");
memcpy(metadata, &metadata2, sizeof(evms_bbr_metadata_t ));
rc = 0;
}
}
// if only the first copy is good then we dont have a choice to make
else if (rc1==0) {
LOG_DEBUG("\tusing 1st copy of metadata cuz 2nd is missing or bad\n");
rc = 0;
}
// if the second copy is the only good one then we need to copy the data
// to the callers metadata buffer
else if (rc2==0) {
LOG_DEBUG("\tusing 2nd copy of metadata cuz 1st is missing or bad\n");
// copy over metadata sector to callers buffer
memcpy(metadata, &metadata2, sizeof(evms_bbr_metadata_t ));
rc = 0;
}
// if neither copy is any good then return NO DATA AVAILABLE.
else {
LOG_ERROR("both copies of metadata are missing or bad\n");
rc = ENODATA;
}
LOG_EXIT_INT(rc);
return rc;
}
/*
* Called to write meta data for a feature out to disk. If the
* feature header ptr is NULL it means we need to get the feature
* header by reading it off the disk. We need the feature header
* because it tells us where the feature data is located on the
* storage object.
*
* Returns: RC=0 if successful
*
*/
static int WriteMetaData( storage_object_t * parent,
evms_bbr_metadata_t * metadata,
evms_feature_header_t * feature_header,
uint commit_phase,
boolean backup )
{
int rc=EINVAL;
u_int32_t crc;
storage_object_t *child=NULL;
struct plugin_functions_s *Fncs;
BBR_Private_Data *pdata = (BBR_Private_Data *) parent->private_data;
LOG_ENTRY();
// get bbr child object
child = GET_BBR_CHILD(parent);
if (child==NULL) {
LOG_ERROR("bbr object doesnt have a child object\n");
LOG_EXIT_INT(rc);
return rc;
}
else {
Fncs = (struct plugin_functions_s *)child->plugin->functions.plugin;
}
if (commit_phase == 1) {
// 1st copy of metadata points to ... 1st copy of bbr mapping table
metadata->start_sect_bbr_table = pdata->bbr_table_lsn1;
// convert metadata to disk format
CPU_Metadata_To_Disk( metadata );
// crc the 1st copy of metadata
metadata->crc = 0;
crc = EngFncs->calculate_CRC( EVMS_INITIAL_CRC, metadata, EVMS_VSECTOR_SIZE );
metadata->crc = CPU_TO_DISK32(crc);
LOG_DEBUG("commit phase 1, writing metadata to LSN %"PRIu64"\n",pdata->feature_header_data1_start_lsn);
// write it out to disk
if (backup) {
rc = EngFncs->save_metadata( parent->name, child->name,
pdata->feature_header_data1_start_lsn,
1, metadata );
} else {
rc = Fncs->write( child, pdata->feature_header_data1_start_lsn, 1, (void *) metadata );
}
// now write out the first copy of the bbr mapping table
if (pdata->bbr_table) {
rc += WriteBBRTable( parent, child,
pdata->bbr_table_lsn1,
pdata->bbr_table_size_in_sectors,
(vsector_t *)pdata->bbr_table, backup );
}
}
else if (commit_phase == 2) {
// if the feature header calls for two copies of metadata then
// write out the second copy.
if ( pdata->feature_header_data1_start_lsn != pdata->feature_header_data2_start_lsn ) {
// 2nd copy of metadata points to ... 2nd copy of bbr mapping table
metadata->start_sect_bbr_table = pdata->bbr_table_lsn2;
// convert metadata to disk format
CPU_Metadata_To_Disk( metadata );
// crc the metadata sector
metadata->crc = 0;
crc = EngFncs->calculate_CRC( EVMS_INITIAL_CRC, metadata, EVMS_VSECTOR_SIZE );
metadata->crc = CPU_TO_DISK32(crc);
LOG_DEBUG("commit phase 2, writing metadata to LSN %"PRIu64"\n",pdata->feature_header_data2_start_lsn);
// write out the second copy of the metadata
if (backup) {
rc = EngFncs->save_metadata( parent->name, child->name,
pdata->feature_header_data2_start_lsn,
1, metadata );
} else {
rc = Fncs->write( child, pdata->feature_header_data2_start_lsn, 1, (void *) metadata );
}
// write out the second copy of the bbr table
if (pdata->bbr_table) {
rc = WriteBBRTable( parent, child,
pdata->bbr_table_lsn2,
pdata->bbr_table_size_in_sectors,
(vsector_t *)pdata->bbr_table, backup );
}
}
}
LOG_EXIT_INT(rc);
return rc;
}
/*
* Called to free up a parent BBR storage object. Walks the
* object, freeing any private data structs before calling
* the engine to free the object itself.
*
* Returns: RC=0 if object memory was freed
*/
static int free_bbr_object( storage_object_t * bbr )
{
int rc = EINVAL;
BBR_Private_Data *pdata;
LOG_ENTRY();
if (bbr) {
pdata = (BBR_Private_Data *) bbr->private_data;
if (pdata) {
if (pdata->bbr_table) EngFncs->engine_free(pdata->bbr_table);
if (pdata->active_bbr_table) EngFncs->engine_free(pdata->active_bbr_table);
free(pdata);
}
rc = EngFncs->free_evms_object(bbr);
}
LOG_EXIT_INT(rc);
return rc;
}
/*
* Called to obtain memory for a BBR storage object, allocating
* any private data areas needed in addition to the storage object
* struct itself.
*
* Returns: ptr to a bbr storage object if successful, NULL if not successful
*/
static storage_object_t * malloc_bbr_object( void )
{
int rc;
storage_object_t *bbr;
BBR_Private_Data *pdata;
LOG_ENTRY();
rc = EngFncs->allocate_evms_object(NULL, &bbr);
if ( rc == 0 ) {
pdata = calloc(1,sizeof(BBR_Private_Data));
if ( pdata == NULL ) {
EngFncs->free_evms_object(bbr);
bbr = NULL;
}
else {
bbr->plugin = my_plugin_record;
bbr->private_data = pdata;
pdata->signature = EVMS_BBR_SIGNATURE;
pdata->repl_blocks_moved = FALSE;
}
}
LOG_EXIT_PTR(bbr);
return bbr;
}
/*
* Called to commmit metadata for the specified bbr object.
*
* NOTHING MUST STOP THE COMMIT!!
*
* We may be resizing and other plugins may depend on us.
*
* Returns: RC=0 if feature metadata was committed to the storage object
* by writing it out to disk.
*/
static int Commit_BBR_Object( storage_object_t *parent, uint commit_phase, boolean backup )
{
int rc;
BBR_Private_Data *pdata = (BBR_Private_Data *) parent->private_data;
evms_bbr_metadata_t metadata;
evms_feature_header_t *feature_header=NULL;
storage_object_t *child;
LOG_ENTRY();
// catch possible case when object isnt dirty
if (!(parent->flags & SOFLAG_DIRTY) && !backup) {
LOG_EXIT_INT(0);
return 0;
}
// get child object
child = GET_BBR_CHILD( parent );
// get feature header
if (child) {
feature_header = child->feature_header;
}
if ( child && feature_header ) {
memset(&metadata, 0, sizeof(evms_bbr_metadata_t) );
// only need to build feature header once and mark it
// dirty so the engine will commit it.
if (commit_phase == 1) {
rc = BuildFeatureHeader( parent, feature_header, backup );
}
else {
rc = 0;
}
if (rc==0) {
BuildMetadata( parent, &metadata);
// need to pickup fresh copy of active BBR table if we are resizing
// because the kernel bbr feature could be modifying it.
if ( ( commit_phase == 1) &&
( pdata->repl_blocks_moved == TRUE ) &&
( (parent->flags & SOFLAG_NEW) == 0)) {
// test if we still need to allocate the active bbr mapping table
if (pdata->active_bbr_table == NULL) {
pdata->active_bbr_table = EngFncs->engine_alloc( (pdata->active_bbr_table_size_in_sectors*EVMS_VSECTOR_SIZE) );
// now we need to worry!
if (pdata->active_bbr_table == NULL) {
MESSAGE(_("Error: Unable to malloc a new BBR mapping table "
"during commit of resized bbr object %s. "
"Unable to move remap information to new location and so "
"knowledge about remapped sectors will be lost.\n"),
parent->name);
// cant stop commit ... so just dont try to move bbr sectors
pdata->repl_blocks_moved = FALSE;
}
}
// read bbr table off disk if we have a bbr table buffer
if ( pdata->active_bbr_table != NULL) {
rc = GetBBRTable( child,
pdata->active_bbr_table,
pdata->active_bbr_table_size_in_sectors,
pdata->active_bbr_table_lsn1,
pdata->active_bbr_table_lsn2 );
if (rc) {
MESSAGE(_("Error: Unable to read existing BBR mapping table "
"during commit of resized bbr object %s. "
"Unable to move remap information to new location and so "
"knowledge about remapped sectors will be lost.\n"),
parent->name);
// cant stop commit ... so just dont try to move bbr sectors
pdata->repl_blocks_moved = FALSE;
}
}
// NOW ... check if we still need to move the replacement blocks and remap bbr table
if ( pdata->repl_blocks_moved == TRUE ) {
remap_bbr_table_and_move_replacement_blocks( parent, pdata );
}
}
rc = WriteMetaData( parent, &metadata, feature_header, commit_phase, backup );
if (rc == 0 && !backup) {
// after the first commit phase we have active BBR data
// out on DISK so copy active info to metadata.
if ( commit_phase == 1 ) {
// we just completed 1st commit ... so ... there is definitely
// active bbr info ... so ... update active info area of bbr
// private data.
pdata->active_replacement_blocks_lsn = pdata->replacement_blocks_lsn;
pdata->active_replacement_blocks_needed = pdata->replacement_blocks_needed;
pdata->active_bbr_table_lsn1 = pdata->bbr_table_lsn1;
pdata->active_bbr_table_lsn2 = pdata->bbr_table_lsn2;
pdata->active_bbr_table_size_in_sectors = pdata->bbr_table_size_in_sectors;
// free old active table if we have one and realloc another
if (pdata->active_bbr_table) EngFncs->engine_free(pdata->active_bbr_table);
// alloc new active table and make copy of bbr table
pdata->active_bbr_table = EngFncs->engine_alloc( (pdata->bbr_table_size_in_sectors*EVMS_VSECTOR_SIZE) );
if (pdata->active_bbr_table == NULL) {
// dont error exit ... just try and malloc at a later time!
LOG_ERROR("error, unable to malloc a new BBR mapping table but can try again later.\n");
}
else {
memcpy(pdata->active_bbr_table, pdata->bbr_table, (pdata->bbr_table_size_in_sectors*EVMS_VSECTOR_SIZE));
}
pdata->repl_blocks_moved = FALSE;
}
else if (commit_phase == 2) {
parent->flags &= ~SOFLAG_DIRTY;
parent->flags &= ~SOFLAG_NEW;
}
}
}
}
else {
rc = EINVAL;
}
LOG_EXIT_INT(rc);
return rc;
}
/*
* This is how a feature applies itself to a storage object, either
* during discovery or during create.
*
* It inserts the child object in the parent storage object list.
* It then updates fields in the two objects to denote this relationship.
*
* Note: the parent BBR object must have valid metadata in its private
* data area.
*
*/
static int consume_storage_object( storage_object_t * parent,
storage_object_t * child )
{
int rc = 0;
u_int64_t child_useable_size;
LOG_ENTRY();
// Parent must have BBR metadata for use to calculate the useable area of the BBR object
if ( (parent->private_data == NULL) ||
(((BBR_Private_Data *)parent->private_data)->signature!=EVMS_BBR_SIGNATURE)) {
rc = EINVAL;
LOG_EXIT_INT(rc);
return rc;
}
// calculate the useable size of the child object
child_useable_size = get_child_useable_size(parent, child);
if (child_useable_size > 0) {
// consumed object is inserted into our child list
EngFncs->insert_thing(child->parent_objects, parent,
INSERT_BEFORE, NULL);
// new bbr object is inserted into childs parent list
EngFncs->insert_thing(parent->child_objects, child,
INSERT_BEFORE, NULL);
// update fields to show association and to reduce the useable
// size of the PARENT storage object by the amount of sectors
// we are using for metadata.
parent->size = child_useable_size;
}
else {
rc = EINVAL;
}
LOG_EXIT_INT(rc);
return rc;
}
/*
* This routine expects to be called only from DISCOVERY with a child
* storage object that has BBR installed on top of it. Meaning that
* the child object has a feature_header that names BBR as the
* current top most feature and that BBR metadata is layed down
* on the disk.
*
* This routine is called to create a BBR storage object by consuming
* a child storage object that contains BBR metadata and by filling
* in the BBR private data from the feature header and feature metadata.
*
* Returns: RC=0 if successful and the new bbr object is built Ok
*
*/
static int Build_New_BBR_Object( storage_object_t *child,
evms_bbr_metadata_t *metadata,
storage_object_t **bbr_object )
{
int rc;
storage_object_t *parent;
BBR_Private_Data *pdata=NULL;
sector_count_t bad_blocks;
// allocate memory for the new storage object
parent = malloc_bbr_object();
if (parent) {
pdata = (BBR_Private_Data *) parent->private_data;
// copy feature header info and feature metadata info to our private data
pdata ->child = child;
pdata ->feature_header_data1_size = child->feature_header->feature_data1_size;
pdata ->feature_header_data2_size = child->feature_header->feature_data2_size;
pdata ->feature_header_data1_start_lsn = child->feature_header->feature_data1_start_lsn;
pdata ->feature_header_data2_start_lsn = child->feature_header->feature_data2_start_lsn;
parent->name[0] = '\0';
if (child->disk_group) {
strncat(parent->name, child->disk_group->name, EVMS_NAME_SIZE);
strncat(parent->name, "/", EVMS_NAME_SIZE-strlen(parent->name));
}
strncat(parent->name, child->feature_header->object_name,
EVMS_NAME_SIZE-strlen(parent->name));
memcpy( &pdata->feature_header_object_name,
child->feature_header->object_name,
EVMS_VOLUME_NAME_SIZE );
pdata ->replacement_blocks_lsn = metadata->start_replacement_sect;
pdata ->replacement_blocks_needed = metadata->nr_replacement_blks;
pdata ->replacement_blocks_size_in_sectors = metadata->nr_replacement_blks; // * metadata->block_size;
pdata ->block_size = metadata->block_size;
pdata ->bbr_table_lsn1 = metadata->start_sect_bbr_table;
pdata ->bbr_table_lsn2 = child->feature_header->feature_data2_start_lsn+1;
pdata ->bbr_table_size_in_sectors = metadata->nr_sects_bbr_table;
// called by discovery routine ... there is definitely active bbr info out
// on the disk ... so record this fact.
pdata->active_replacement_blocks_lsn = pdata->replacement_blocks_lsn;
pdata->active_replacement_blocks_needed = pdata->replacement_blocks_needed;
pdata->active_bbr_table_lsn1 = pdata->bbr_table_lsn1;
pdata->active_bbr_table_lsn2 = pdata->bbr_table_lsn2;
pdata->active_bbr_table_size_in_sectors = pdata->bbr_table_size_in_sectors;
pdata->active_bbr_table = EngFncs->engine_alloc( (pdata->bbr_table_size_in_sectors*EVMS_VSECTOR_SIZE) );
if (pdata->active_bbr_table == NULL) {
free_bbr_object(parent);
LOG_ERROR("unable to malloc a new BBR mapping table, rc= ENOMEM\n");
LOG_EXIT_INT(ENOMEM);
return ENOMEM;
}
// malloc a bbr table
pdata->bbr_table = EngFncs->engine_alloc( (pdata->bbr_table_size_in_sectors*EVMS_VSECTOR_SIZE) );
if (pdata->bbr_table == NULL) {
free_bbr_object(parent);
LOG_ERROR("unable to malloc a new BBR mapping table, rc= ENOMEM\n");
LOG_EXIT_INT(ENOMEM);
return ENOMEM;
}
else {
if ( pdata->feature_header_data2_size > 0 ){
rc = GetBBRTable( child,
pdata->bbr_table,
pdata->bbr_table_size_in_sectors,
pdata->bbr_table_lsn1,
pdata->bbr_table_lsn2 );
}
else {
rc = GetBBRTable( child,
pdata->bbr_table,
pdata->bbr_table_size_in_sectors,
pdata->bbr_table_lsn1,
0 );
}
if (rc) {
free_bbr_object(parent);
LOG_ERROR("failed to read a good BBR mapping table, rc= ENODATA\n");
LOG_EXIT_INT(ENODATA);
return ENODATA;
}
}
pdata ->sequence_number = metadata->sequence_number;
// register the new evms object name
rc = EngFncs->register_name( parent->name );
if (rc == 0) {
// final step is to consume the child storage object
rc = consume_storage_object( parent, child );
if (!rc) {
*bbr_object = parent;
}
else {
free_bbr_object(parent);
}
}
else {
free_bbr_object(parent);
}
}
else {
rc = ENOMEM;
}
// if successful then see if kernel has remapped sectors
// and warn evms user.
if (rc == 0) {
// copy geometry to new storage object
memcpy(&parent->geometry, &child->geometry, sizeof(geometry_t));
if ( isa_kernel_bbr_object( parent ) == TRUE ) {
bad_blocks = get_kernel_bbr_remap_sector_count( parent );
}
else {
bad_blocks = get_engine_remap_sector_count( parent );
}
if (bad_blocks > 0) {
char number_buffer[64];
sprintf(number_buffer, "%"PRIu64, bad_blocks);
MESSAGE(_("Warning: The kernel BBR feature is reporting that %s bad sector(s) were "
"discovered on object %s. "
"Though replacement sectors are being used to remedy the problem, "
"you would be well advised to take corrective actions by replacing "
"the storage object.\n"),
number_buffer, parent->name );
}
// BBR_DEBUG if (pdata) display_bbr_remap_info( parent, pdata );
}
return rc;
}
/*-------------------------------------------------------------------------------------+
+ +
+ Start Of EVMS Plugin Functions +
+ (exported to engine via function table) +
+ +
+-------------------------------------------------------------------------------------*/
/*
* Called by EVMS, after inspecting and validating our plugin record. This is the final
* step when loading a plugin and means we have been validated by EVMS and will be used.
* Most important item here is to get the Engine's function table so we can call engine
* API(s) later.
*/
static int BBR_SetupEVMSPlugin( engine_functions_t * engine_functions)
{
int rc = 0;
EngFncs = engine_functions;
LOG_ENTRY();
LOG_EXIT_INT(rc);
return rc;
}
static void BBR_Cleanup( void )
{
LOG_ENTRY();
// NOP
LOG_EXIT_VOID();
}
/*
* I can allow an object to be a volume if:
*
* - I own the object
*
*/
static int BBR_CanSetVolume( storage_object_t * object, boolean flag )
{
int rc = EINVAL;
LOG_ENTRY();
if ( i_can_modify_object(object)==TRUE ) {
rc = 0;
}
LOG_EXIT_INT(rc);
return rc;
}
/*
* I can expand the bbr object if:
*
* - I own the parent object
* - I have a child object
* - the bbr child says OK to expand
* - kernel is not actively remapping bad blocks
*
*/
static int BBR_CanExpand( storage_object_t *object, // bbr object
sector_count_t expand_limit, // ?
list_anchor_t expansion_points ) // list to place expand object on
{
int rc = EINVAL;
storage_object_t *child;
struct plugin_functions_s *fncs;
LOG_ENTRY();
if ( ( i_can_modify_object(object)==TRUE ) &&
( kernel_bbr_remap_active(object)==FALSE ) ) {
child = GET_BBR_CHILD( object );
if (child) {
// get child plugin function table
fncs = (struct plugin_functions_s *)child->plugin->functions.plugin;
// pass cmd down feature stack
rc = fncs->can_expand(child, expand_limit, expansion_points );
}
}
LOG_EXIT_INT(rc);
return rc;
}
/*
* I can remove bbr from this object if:
*
* - I own it
*
*/
static int BBR_CanDelete( storage_object_t * object)
{
int rc = EINVAL;
if ( i_can_modify_object(object)==TRUE ) {
rc = 0;
}
return rc;
}
/*
* I can allow a child object to expand if:
*
* - I own the parent object
* - A child exists for the bbr object
* - kernel is not actively remapping bad blocks
*
*/
static int BBR_CanExpandBy(storage_object_t * object, sector_count_t *size)
{
int rc = EINVAL;
storage_object_t *child;
LOG_ENTRY();
if ( ( i_can_modify_object(object) == TRUE ) &&
( kernel_bbr_remap_active(object) == FALSE )) {
child = GET_BBR_CHILD( object );
if ( child ) {
rc = 0;
}
}
LOG_EXIT_INT(rc);
return rc;
}
/*
* I can shrink this bbr object if:
*
* - I own it
* - lower layer plugins say Ok to shrink
*
*/
static int BBR_CanShrink( storage_object_t * object,
sector_count_t shrink_limit,
list_anchor_t shrink_points )
{
int rc = EINVAL;
storage_object_t *child;
struct plugin_functions_s *Fncs;
LOG_ENTRY();
if ( ( i_can_modify_object(object) == TRUE ) &&
( kernel_bbr_remap_active(object) == FALSE ) &&
( get_engine_remap_sector_count(object) == 0 )) {
child = GET_BBR_CHILD( object );
if (child) {
// get child plugin function table
Fncs = (struct plugin_functions_s *)child->plugin->functions.plugin;
// pass cmd down feature stack
rc = Fncs->can_shrink(child, shrink_limit, shrink_points);
}
}
LOG_EXIT_INT(rc);
return rc;
}
/*
* I can shrink the bbr object by size sectors if:
*
* - I own the object
* - I dont own the shrink point
* - Some plugin below me says Ok
*/
static int BBR_CanShrinkBy( storage_object_t * object, sector_count_t *size)
{
int rc = EINVAL;
LOG_ENTRY();
if ( ( i_can_modify_object(object)==TRUE ) &&
( kernel_bbr_remap_active(object)==FALSE ) &&
( get_engine_remap_sector_count(object) == 0 ) &&
( *size < object->size ) ) {
rc = 0;
}
LOG_EXIT_INT(rc);
return rc;
}
/*
* Function: bbr_get_devmap_info
*
* Called to test if the bbr object has an active device mapper
* node in the kernel and set the object info accordingly.
*/
static void bbr_get_devmap_info( storage_object_t * bbr_object )
{
storage_object_t *child=NULL;
int rc=0;
dm_target_t * trgts=NULL;
BBR_Private_Data* pdata=NULL;
LOG_ENTRY();
EngFncs->dm_update_status(bbr_object);
if ( bbr_object->flags & SOFLAG_ACTIVE ) {
pdata = (BBR_Private_Data*)bbr_object->private_data;
child = GET_BBR_CHILD(bbr_object);
rc = EngFncs->dm_get_targets( bbr_object, &trgts );
if (!rc) {
if ( (trgts->next != NULL) ||
(trgts->start != 0) ||
(trgts->length != bbr_object->size) ) {
LOG_DEBUG("kernel dm target info is incorrect ... needs activate\n");
bbr_object->flags |= SOFLAG_NEEDS_ACTIVATE;
}
else {
if ( (trgts->type != DM_TARGET_BBR) ||
(trgts->data.bbr->device.major != child->dev_major) ||
(trgts->data.bbr->device.minor != child->dev_minor) ||
(trgts->data.bbr->device.start != bbr_object->start) ||
(trgts->data.bbr->table1_lba != pdata->bbr_table_lsn1) ||
(trgts->data.bbr->table2_lba != pdata->bbr_table_lsn2) ||
(trgts->data.bbr->replacement_blocks_lba!= pdata->replacement_blocks_lsn) ||
(trgts->data.bbr->table_size != pdata->bbr_table_size_in_sectors) ||
(trgts->data.bbr->num_replacement_blocks != pdata->replacement_blocks_needed) ||
(trgts->data.bbr->block_size != pdata->block_size) ) {
LOG_DEBUG("kernel object has some incorrect bbr metadata\n");
bbr_object->flags |= SOFLAG_NEEDS_ACTIVATE;
}
}
EngFncs->dm_deallocate_targets( trgts );
}
}
LOG_EXIT_VOID();
}
/*
* Called by the engine with a list of storage objects that it believes
* are BBR storage objects. We are suppose to examine the objects and
* apply the BBR feature to the feature stack on this object once we are
* convinced that it is a valid BBR object. BBR is applied as follows:
*
* - Validate the object by looking for BBR metadata. If we can get a
* copy of the metadata and the BBR table then we have a valid object.
* Otherwise, we will fail this API call.
*
* - Create a new BBR storage object and put the storage object from
* the engine list in the BBR object's child_object list. I call
* this consuming the child object. Fill in BBR fields as appropriate,
* e.g. the useable area after we adjust down for our metadata sectors.
*
* - Set the SOFLAG_VOLUME in the new BBR object ... if the feature header
* has EVMS_FEATURE_VOLUME_COMPLETE flag bit set.
*/
static int BBR_Feature_Discovery( list_anchor_t input_objects,
list_anchor_t output_objects,
boolean FinalCall )
{
storage_object_t *child;
storage_object_t *bbr_object = NULL;
evms_bbr_metadata_t metadata;
int rc;
LOG_ENTRY();
child = EngFncs->first_thing(input_objects, NULL);
LOG_DEBUG("\tExamining storage object %s...\n", child->name );
// read and validate bbr feature metadata
rc = ReadMetaData( child, &metadata, child->feature_header );
if (!rc) {
rc = Build_New_BBR_Object( child, &metadata, &bbr_object );
if (!rc) {
display_feature_header(child->feature_header);
bbr_get_devmap_info(bbr_object);
EngFncs->insert_thing(output_objects, bbr_object, INSERT_AFTER, NULL);
}
}
else {
LOG_ERROR("I/O Error reading metadata from storage object\n");
}
LOG_EXIT_INT(rc);
return rc;
}
static int BBR_w_delete(storage_object_t *object, list_anchor_t child_objects, boolean destroy)
{
int rc = EINVAL;
BBR_Private_Data *pdata;
storage_object_t *child;
struct plugin_functions_s *Fncs;
LOG_ENTRY();
if ( i_can_modify_object(object) == TRUE ) {
pdata = (BBR_Private_Data *) object->private_data;
child = GET_BBR_CHILD(object);
if (child) {
if (destroy) {
// zap feature headers
Fncs = child->plugin->functions.plugin;
rc = Fncs->add_sectors_to_kill_list( child, child->size-1, 1 );
rc = Fncs->add_sectors_to_kill_list( child, 0, 1 );
} else {
rc = 0;
}
// put child objects on engine list
if (rc==0) {
EngFncs->remove_thing(child->parent_objects, object);
EngFncs->unregister_name( object->name );
if (child_objects)
EngFncs->insert_thing(child_objects, child, INSERT_BEFORE, NULL);
free_bbr_object(object);
} else {
rc = EIO;
}
}
}
LOG_EXIT_INT(rc);
return rc;
}
/*
* This is a non-destructive remove. Essentially, we do all the same work
* as a destroy only we dont tell lower features to also destroy.
*
* - Remove the feature from the object.
* - Free any privately allocated data.
* - Remove your parent pointer from your child objects.
* - Put BBR child onto the list provided in the second parameter.
*
* Returns: RC=0 if successful
*
*/
static int BBR_Delete( storage_object_t * object, list_anchor_t child_objects )
{
int rc;
LOG_ENTRY();
rc = BBR_w_delete(object, child_objects, TRUE);
LOG_EXIT_INT(rc);
return rc;
}
/*
* Function: BBR_discard
*
* This function is similar to delete. Just call delete to free all
* data structures related to the BBR objects.
*/
int BBR_Discard(list_anchor_t objects)
{
storage_object_t * object;
list_element_t le;
LOG_ENTRY();
LIST_FOR_EACH(objects, le, object) {
BBR_w_delete(object, NULL, FALSE);
}
LOG_EXIT_INT(0);
return 0;
}
/*
* A BBR object is expanded by:
*
* - calling down the plugin stack to expand the child object
* - recalculating metadata
* - remapping the bbr table
* - moving the replacement blocks
*
* I check that:
*
* - I own the object
* - the plugin stack can expand the BBR child object
* - the expanded child is actually larger in size
* - kernel is not actively remapping bad blocks
*
* Returns: RC=0 if we successfully expanded the BBR object
*
*/
static int BBR_Expand( storage_object_t * object,
storage_object_t * expand_object,
list_anchor_t objects,
option_array_t * options )
{
int rc = EINVAL;
BBR_Private_Data *pdata;
BBR_Private_Data *new_pdata;
storage_object_t *child;
u_int64_t starting_size;
struct plugin_functions_s *Fncs;
LOG_ENTRY();
if ( ( i_can_modify_object(object) == TRUE ) &&
( kernel_bbr_remap_active(object) == FALSE ) ) {
pdata = (BBR_Private_Data *) object->private_data;
child = GET_BBR_CHILD(object);
if (child) {
// get his plugin function table
Fncs = child->plugin->functions.plugin;
// get starting size
starting_size = child->size;
// pass expand down the feature stack
rc = Fncs->expand( child, expand_object, objects, options );
// check feature RC
if ( rc==0 ) {
// make sure that the feature actually expanded the object
if (child->size > starting_size) {
// get new metadata memory so we will have the old information for
// moving replacement blocks when we commit.
new_pdata = (BBR_Private_Data *) calloc(1, sizeof(BBR_Private_Data) );
if (new_pdata) {
// switch to new metadata
object->private_data = (void *) new_pdata;
new_pdata->signature = pdata->signature;
new_pdata->child = pdata->child;
new_pdata->sequence_number = pdata->sequence_number;
// renew metadata
rc = create_bbr_metadata( object, child, 0, pdata->feature_header_object_name );
if (rc == 0) {
// IF ..... expanding an object that has BBR info out on disk
// THEN ... we have to carry this info over to the new metadata
if ((object->flags & SOFLAG_NEW)==0) {
new_pdata->active_replacement_blocks_lsn = pdata->replacement_blocks_lsn;
new_pdata->active_replacement_blocks_needed = pdata->replacement_blocks_needed;
new_pdata->active_bbr_table_lsn1 = pdata->bbr_table_lsn1;
new_pdata->active_bbr_table_lsn2 = pdata->bbr_table_lsn2;
new_pdata->active_bbr_table = pdata->bbr_table;
new_pdata->active_bbr_table_size_in_sectors = pdata->bbr_table_size_in_sectors;
// remember we need to move the replacement blocks and remap them
new_pdata->repl_blocks_moved = TRUE;
}
// toss old metadata
free(pdata);
pdata = new_pdata; // now equal
// resize bbr object
object->size = get_child_useable_size(object, child);
// refresh feature header info
BuildFeatureHeader( object, child->feature_header, FALSE );
// set object flags
child->flags |= SOFLAG_FEATURE_HEADER_DIRTY;
object->flags |= SOFLAG_DIRTY;
if (object->flags & SOFLAG_ACTIVE) {
object->flags |= SOFLAG_NEEDS_ACTIVATE;
}
}
}
else {
rc = ENOMEM;
}
}
else {
rc = ENOMEM;
}
}
}
}
LOG_EXIT_INT(rc);
return rc;
}
/*
* A BBR object is shrunk by:
*
* - calling down the plugin stack to shrink the child object
* - recalculating metadata
* - remapping the bbr table
* - moving the replacement blocks
*
* I check that:
*
* - I own the object
* - the plugin stack can shrink the BBR child object
* - the child object is actually smaller in size
*
* Returns: RC=0 if we successfully shrunk the BBR object
*
*/
static int BBR_Shrink( storage_object_t * object,
storage_object_t * shrink_object,
list_anchor_t objects,
option_array_t * options )
{
int rc = EINVAL;
BBR_Private_Data *pdata;
BBR_Private_Data *new_pdata;
storage_object_t *child;
u_int64_t starting_size;
struct plugin_functions_s *fncs;
LOG_ENTRY();
if ( ( i_can_modify_object(object) == TRUE ) &&
( kernel_bbr_remap_active(object) == FALSE ) ) {
pdata = (BBR_Private_Data *) object->private_data;
child = GET_BBR_CHILD(object);
if (child) {
// get his plugin function table
fncs = child->plugin->functions.plugin;
// starting size
starting_size = child->size;
// pass shrink cmd down the feature stack
rc = fncs->shrink( child, shrink_object, objects, options );
if ( rc==0 ) {
// make sure that the feature actually shrunk the object
if (child->size < starting_size) {
// get new metadata memory
new_pdata = (BBR_Private_Data *) calloc(1, sizeof(BBR_Private_Data) );
if (new_pdata) {
// switch to new metadata
object->private_data = (void *) new_pdata;
new_pdata->signature = pdata->signature;
new_pdata->child = pdata->child;
new_pdata->sequence_number = pdata->sequence_number;
// renew metadata
rc = create_bbr_metadata( object, child, 0, pdata->feature_header_object_name );
if (rc == 0) {
// IF ..... shrinking an object that has BBR info out on disk
// THEN ... we have to carry this info over to the new metadata
if ((object->flags & SOFLAG_NEW)==0) {
new_pdata->active_replacement_blocks_lsn = pdata->replacement_blocks_lsn;
new_pdata->active_replacement_blocks_needed = pdata->replacement_blocks_needed;
new_pdata->active_bbr_table_lsn1 = pdata->bbr_table_lsn1;
new_pdata->active_bbr_table_lsn2 = pdata->bbr_table_lsn2;
new_pdata->active_bbr_table = pdata->bbr_table;
new_pdata->active_bbr_table_size_in_sectors = pdata->bbr_table_size_in_sectors;
// remember we need to move the replacement blocks and remap them
// ... need to change commit process to allow this post 1.2 release
// new_pdata->repl_blocks_moved = TRUE;
}
// toss old metadata
free(pdata);
pdata = new_pdata; // now equal
// resize bbr object
object->size = get_child_useable_size(object, child);
// refresh feature header info
BuildFeatureHeader( object, child->feature_header, FALSE );
// set object flags
child->flags |= SOFLAG_FEATURE_HEADER_DIRTY;
object->flags |= SOFLAG_DIRTY;
if (object->flags & SOFLAG_ACTIVE) {
object->flags |= SOFLAG_NEEDS_ACTIVATE;
}
}
}
else {
rc = ENOMEM;
}
}
else {
rc = ENOMEM;
}
}
}
}
LOG_EXIT_INT(rc);
return rc;
}
/*
* Passes the API call down to the BBR child object.
*
* I check that:
*
* - I own the object
* - the logical sectors fall on the useable area of the BBR object
*
* Returns: RC=0 if sectors were added to the kill list successfully
*
*/
static int BBR_AddSectorsToKillList( storage_object_t * object,
lsn_t lsn,
sector_count_t count)
{
int rc=0;
storage_object_t *child;
struct plugin_functions_s *Fncs;
BBR_Private_Data *pdata;
LOG_ENTRY();
if ( ( i_can_modify_object(object)==TRUE ) &&
( lsn+count <= object->size ) ) {
pdata = (BBR_Private_Data *) object->private_data;
child = GET_BBR_CHILD( object );
if ( child ) {
// get his plugin function table
Fncs = child->plugin->functions.plugin;
// pass cmd down feature stack
rc = Fncs->add_sectors_to_kill_list(child, lsn, count);
}
}
else {
rc = EINVAL;
}
LOG_EXIT_INT(rc);
return rc;
}
/*
* Called to commit changes to a BBR storage object.
*
* This is done by committing the feature metadata
*
* I check that:
*
* - I own the object
*
* I do it by calling Commit_BBR_Object() on the BBR object.
*
* Returns: RC=0 if all objects commit feature data successfully
*
*/
static int BBR_CommitChanges( storage_object_t * object, uint commit_phase )
{
int rc=0;
LOG_ENTRY();
LOG_DEBUG("bbr object: name= %s \n", object->name );
if ( i_can_modify_object(object)==TRUE ) {
// we only work during metadata commit phases
// phase 0 = stop bbr remapping of object
// phase 1 = commit 1st copy of metadata & 1st bbr table
// phase 2 = commit 2nd copy of metadata & 2nd bbr table
switch (commit_phase) {
case 0:
stop_kernel_bbr_remapping( object );
break;
case 1:
case 2:
rc = Commit_BBR_Object( object, commit_phase, FALSE );
// BBR_DEBUG display_bbr_remap_info( object, (BBR_Private_Data *)object->private_data );
break;
default:
rc = 0;
break;
}
}
else {
rc = EINVAL;
}
LOG_EXIT_INT(rc);
return rc;
}
static int BBR_backup_metadata( storage_object_t * object )
{
int rc;
LOG_ENTRY();
rc = Commit_BBR_Object( object, FIRST_METADATA_WRITE, TRUE );
if (!rc) {
rc = Commit_BBR_Object( object, SECOND_METADATA_WRITE, TRUE );
}
LOG_EXIT_INT(rc);
return rc;
}
static int BBR_Read( storage_object_t * object,
lsn_t lsn,
sector_count_t count,
void * buffer)
{
int rc = EINVAL;
storage_object_t *child;
struct plugin_functions_s *Fncs;
BBR_Private_Data *pdata;
vsector_t *sector_ptr = (vsector_t *)buffer;
int i;
lsn_t bbr_lsn;
LOG_ENTRY();
if ( ( lsn+count <= object->size ) &&
( i_can_modify_object(object)==TRUE ) ) {
pdata = (BBR_Private_Data *) object->private_data;
child = GET_BBR_CHILD( object );
if (child) {
if ( isa_kernel_bbr_object(object) == TRUE ) {
// build bbr i/o ioctl and let kernel handle it.
rc = kernel_bbr_sector_io( object, lsn, count, buffer, SECTOR_IO_READ );
}
else {
Fncs = child->plugin->functions.plugin;
for (i=0,rc=0; i<count && rc==0; i++) {
bbr_lsn = get_lsn( pdata, lsn+i );
rc = Fncs->read(child, bbr_lsn, 1, sector_ptr);
while ( rc ) {
bbr_lsn = remap_lsn(object, pdata, bbr_lsn);
if ( bbr_lsn ) {
rc = Fncs->read(child, bbr_lsn, 1, sector_ptr);
}
else {
break;
}
}
++sector_ptr;
}
}
}
}
LOG_EXIT_INT(rc);
return rc;
}
static int BBR_Write( storage_object_t * object,
lsn_t lsn,
sector_count_t count,
void * buffer)
{
int rc = EINVAL;
storage_object_t *child;
struct plugin_functions_s *Fncs;
BBR_Private_Data *pdata;
lsn_t bbr_lsn;
vsector_t *sector_ptr = (vsector_t *)buffer;
int i;
LOG_ENTRY();
if ( ( lsn+count <= object->size ) &&
( i_can_modify_object(object)==TRUE )) {
pdata = (BBR_Private_Data *) object->private_data;
child = GET_BBR_CHILD(object);
if (child) {
if ( isa_kernel_bbr_object(object) == TRUE ) {
// build bbr i/o ioctl and let kernel handle it.
rc = kernel_bbr_sector_io( object, lsn, count, buffer, SECTOR_IO_WRITE );
}
else {
Fncs = child->plugin->functions.plugin;
for (i=0,rc=0; i<count && rc==0; i++) {
bbr_lsn = get_lsn( pdata, lsn+i );
rc = Fncs->write(child, bbr_lsn, 1, sector_ptr);
while ( rc ) {
bbr_lsn = remap_lsn(object, pdata, bbr_lsn);
if ( bbr_lsn ) {
rc = Fncs->write(child, bbr_lsn, 1, sector_ptr);
}
else {
break;
}
}
++sector_ptr;
}
}
}
}
LOG_EXIT_INT(rc);
return rc;
}
/*
* I can allow an object to be a volume if:
*
* - I own the object
*
*/
static void BBR_SetVolume( storage_object_t * object, boolean flag )
{
LOG_ENTRY();
// Nothing to do yet.
LOG_EXIT_VOID();
}
/*-------------------------------------------------------------------------------------+
+ +
+ PLUGIN FUNCTION TABLE +
+ +
+--------------------------------------------------------------------------------------*/
static struct plugin_functions_s fft={
// located above
setup_evms_plugin: BBR_SetupEVMSPlugin,
cleanup_evms_plugin: BBR_Cleanup,
can_set_volume: BBR_CanSetVolume,
can_delete: BBR_CanDelete,
can_expand: BBR_CanExpand,
can_expand_by: BBR_CanExpandBy,
can_shrink: BBR_CanShrink,
can_shrink_by: BBR_CanShrinkBy,
discover: BBR_Feature_Discovery,
delete: BBR_Delete,
discard: BBR_Discard,
expand: BBR_Expand,
shrink: BBR_Shrink,
add_sectors_to_kill_list: BBR_AddSectorsToKillList,
commit_changes: BBR_CommitChanges,
read: BBR_Read,
write: BBR_Write,
set_volume: BBR_SetVolume,
can_activate: BBR_can_activate,
activate: BBR_activate,
can_deactivate: BBR_can_deactivate,
deactivate: BBR_deactivate,
backup_metadata: BBR_backup_metadata,
// located in bbroptions.c
get_option_count: BBR_GetOptionCount,
init_task: BBR_InitTask,
set_option: BBR_SetOption,
set_objects: BBR_SetObjects,
get_info: BBR_GetInfo,
get_plugin_info: BBR_GetPluginInfo
};
/*-------------------------------------------------------------------------------------+
+ +
+ BUILD AND EXPORT AN EVMS PLUGIN RECORD +
+ +
+--------------------------------------------------------------------------------------*/
static plugin_record_t bbr_plugin_record = {
id: EVMS_BBR_FEATURE_PLUGIN_ID,
version: {MAJOR_VERSION, MINOR_VERSION, PATCH_LEVEL},
required_engine_api_version: {15,0,0},
required_plugin_api_version: {plugin: {13,1,0} },
short_name: EVMS_BBR_FEATURE_PLUGIN_SHORT_NAME,
long_name: EVMS_BBR_FEATURE_PLUGIN_LONG_NAME,
oem_name: EVMS_IBM_OEM_NAME,
functions: {plugin: &fft},
container_functions: NULL
};
// Vector of plugin record ptrs that we export for the EVMS Engine.
plugin_record_t *evms_plugin_records[] = {
&bbr_plugin_record,
NULL
};