/*
 *   (C) Copyright IBM Corp. 2001, 2003
 *
 *   This program is free software;  you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation; either version 2 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
 *   the GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program;  if not, write to the Free Software
 *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 *
 * Module: LVM Region Manager
 * File: evms2/engine/plugins/lvm/lvm_pv.c
 *
 * Description: This file contains all functions related to the discovery,
 *              creation and management of physical volumes in the LVM region
 *              manager.
 */


#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <sys/utsname.h>
#include <time.h>
#include <plugin.h>
#include "lvmregmgr.h"

/**
 * lvm_get_pe_start
 *
 * This function returns the starting sector of the first data PE on the
 * specified PV. This function also cleans up the metadata mess created
 * by LVM's temporary switch to version 2 PVs.
 *
 * Simple rules:
 * - If pe_start is not filled in, fill it in using the equation
 *   pe_start = pe_on_disk.base + pe_on_disk.size
 * - If pe_on_disk.base + .size != pe_start, reset .size so the
 *   equation holds.
 * - If version == 2, set it to 1.
 * - Mark the PV's group's container dirty so all of this will get
 *   written to disk on the next commit.
 **/
u_int64_t lvm_get_pe_start(lvm_physical_volume_t * pv_entry)
{
	pv_disk_t * pv = pv_entry->pv;
	u_int64_t first_pe_sector = 0;

	LOG_ENTRY();

	if (pv->version == 2) {
		LOG_DETAILS("Detected version 2 metadata on PV %s.\n",
			    pv_entry->segment->name);
		LOG_DETAILS("Reverting to version 1.\n");
		pv->version = 1;
		first_pe_sector = pv->pe_start;
	}

	if (!first_pe_sector) {
		first_pe_sector = bytes_to_sectors(pv->pe_on_disk.base +
						   pv->pe_on_disk.size);
	}

	if (pv->pe_start != bytes_to_sectors(pv->pe_on_disk.base +
					     pv->pe_on_disk.size)) {
		LOG_DETAILS("Detected pe_start/pe_on_disk.size inconsistency "
			    "on PV %s. Fixing.\n", pv_entry->segment->name);
		pv->pe_on_disk.size = sectors_to_bytes(first_pe_sector) -
				      pv->pe_on_disk.base;
		pv->pe_start = first_pe_sector;
	}

	LOG_EXIT_INT((int)first_pe_sector);
	return first_pe_sector;
}


/*** Physical Volume Memory Allocation Functions ***/


/**
 * lvm_allocate_pe_map
 *
 * Calculate the number of sectors the PE map will take up on the PV, then
 * allocate the memory to hold the PE map. The pv_entry must already have
 * its PV structure filled in.
 **/
int lvm_allocate_pe_map(lvm_physical_volume_t * pv_entry)
{
	u_int64_t pe_start;
	u_int32_t i;
	int rc = 0;

	LOG_ENTRY();

	/* Might not always have pe_total filled in yet. */
	if (pv_entry->pv->pe_total == 0) {
		pv_entry->pe_map = NULL;
		goto out;
	}

	pv_entry->pe_map = EngFncs->engine_alloc(pv_entry->pv->pe_total *
						 sizeof(lvm_physical_extent_t));
	if (!pv_entry->pe_map) {
		LOG_CRITICAL("Memory error creating PE map for PV %s.\n",
			     pv_entry->segment->name);
		rc = ENOMEM;
		goto out;
	}

	/* Initialize the constant fields in the PE map. */
	pe_start = lvm_get_pe_start(pv_entry);
	for (i = 0; i < pv_entry->pv->pe_total; i++) {
		pv_entry->pe_map[i].pv = pv_entry;
		pv_entry->pe_map[i].number = i;
		pv_entry->pe_map[i].sector = pe_start +
					     i * pv_entry->pv->pe_size;
	}

out:
	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * lvm_deallocate_pe_map
 *
 * Release the memory used by this PV's PE map.
 **/
static void lvm_deallocate_pe_map(lvm_physical_volume_t * pv_entry)
{
	LOG_ENTRY();

	if (pv_entry->pe_map) {
		EngFncs->engine_free(pv_entry->pe_map);
		pv_entry->pe_map = NULL;
	}

	LOG_EXIT_VOID();
}

/**
 * lvm_allocate_physical_volume
 *
 * Allocates a new lvm_physical_volume structure. Makes a copy of the pv
 * structure that is passed in. A PE map is created based on the number of
 * PEs in this PV.
 *
 * The pv structure passed into this function must already be filled in
 * with the appropriate information.
 **/
lvm_physical_volume_t * lvm_allocate_physical_volume(storage_object_t * segment,
						     pv_disk_t * pv)
{
	lvm_physical_volume_t * new_entry;
	int rc;

	LOG_ENTRY();

	/* The pv_list_entry itself. */
	new_entry = EngFncs->engine_alloc(sizeof(lvm_physical_volume_t));
	if (!new_entry) {
		LOG_CRITICAL("Memory error creating physical volume %s\n",
			     segment->name);
		EngFncs->engine_free(pv);
		goto out;
	}

	/* Fill in the PV entry. */
	new_entry->pv		= pv;
	new_entry->segment	= segment;
	new_entry->move_extents = 0;
	new_entry->number	= pv->pv_number;
	new_entry->flags	= 0;

	/* A new pe_map. */
	rc = lvm_allocate_pe_map(new_entry);
	if (rc) {
		LOG_CRITICAL("Memory error creating PE map for physical volume %s\n",
			     segment->name);
		lvm_deallocate_physical_volume(new_entry);
		new_entry = NULL;
	}

out:
	LOG_EXIT_PTR(new_entry);
	return new_entry;
}

/**
 * lvm_deallocate_physical_volume
 *
 * Free all the memory for this physical volume
 **/
void lvm_deallocate_physical_volume(lvm_physical_volume_t * pv_entry)
{
	lvm_volume_group_t * group = pv_entry->group;

	LOG_ENTRY();

	/* Remove this PV from the group's list. */
	if (group && group->pv_list[pv_entry->number] == pv_entry) {
		group->pv_list[pv_entry->number] = NULL;
		group->pv_count--;
	}
	if (pv_entry->segment) {
		lvm_remove_segment_from_container(pv_entry->segment);
	}

	/* Delete the PE map. */
	lvm_deallocate_pe_map(pv_entry);

	/* Delete the PV metadata. */
	if (pv_entry->pv) {
		EngFncs->engine_free(pv_entry->pv);
		pv_entry->pv = NULL;
	}

	/* The segment just gets dropped. Depending on who called the
	 * deallocate, the segment may need to be added to another list.
	 */
	pv_entry->segment = NULL;
	pv_entry->number = 0;

	EngFncs->engine_free(pv_entry);

	LOG_EXIT_VOID();
}


/*** Physical Volume Creation Functions ***/


/**
 * lvm_find_free_pv_number
 *
 * Search through the list of PVs in this group and return the first
 * unused PV number.
 **/
int lvm_find_free_pv_number(lvm_volume_group_t * group)
{
	int i;

	LOG_ENTRY();

	for (i = 1; i <= MAX_PV; i++) {
		if (! group->pv_list[i]) {
			LOG_EXIT_INT(i);
			return i;
		}
	}

	LOG_ERROR("Container %s has maximum number of objects.\n",
		  group->container->name);
	LOG_EXIT_INT(-1);
	return -1;
}

/**
 * lvm_set_system_id
 *
 * Get the system name for the PV struct. This function is based on the
 * "system_id_set" function from the LVM tools library, from Heinz
 * Mauelshagen and Sistina Software (www.sistina.com).
 **/
static int lvm_set_system_id(pv_disk_t * pv)
{
	struct utsname uts;
	int rc = 0;

	LOG_ENTRY();

	if (uname(&uts)) {
		LOG_ERROR("Error from uname()\n");
		rc = EINVAL;
	} else {
		memset(pv->system_id, 0, NAME_LEN);
		snprintf((char *)pv->system_id, NAME_LEN, "%s%lu",
			 uts.nodename, time(NULL));
	}

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * lvm_calculate_vgda_info
 *
 * Calculate the "base" and "size" fields for all of the on-disk
 * structures. Use the maximum defaults for all sizes. This function does
 * not fill in pe_on_disk.size, since that needs to done when calculating
 * number of PEs, which is done in the next function.
 *
 * This function is now updated to use the new bases and sizes as used in
 * the LVM 0.9.1beta8 release, which aligns each metadata structure on a
 * 4k boundary.
 **/
static void lvm_calculate_vgda_info(pv_disk_t * pv)
{
	LOG_ENTRY();

	pv->pv_on_disk.base		= LVM_PV_DISK_BASE;
	pv->pv_on_disk.size		= LVM_PV_DISK_SIZE;
	pv->vg_on_disk.base		= LVM_VG_DISK_BASE;
	pv->vg_on_disk.size		= LVM_VG_DISK_SIZE;
	pv->pv_uuidlist_on_disk.base	= LVM_PV_UUIDLIST_DISK_BASE;
	pv->pv_uuidlist_on_disk.size	= (MAX_PV + 1) * NAME_LEN;
	pv->lv_on_disk.base		= round_up(pv->pv_uuidlist_on_disk.base
						   + pv->pv_uuidlist_on_disk.size, LVM_VGDA_ALIGN);
	pv->lv_on_disk.size		= (MAX_LV + 1) * sizeof(lv_disk_t);
	pv->pe_on_disk.base		= round_up(pv->lv_on_disk.base
						   + pv->lv_on_disk.size, LVM_VGDA_ALIGN);
	LOG_EXIT_VOID();
}

/**
 * lvm_calculate_pe_total
 *
 * Determine how many PEs are going to be on this PV. This function is
 * based on code from the "vg_setup_for_create" function in the LVM
 * tools library. The rest of the PV structure must be initialized
 * before calling this function. The LVM code was modified to make sure
 * the first data PE starts on a 64k boundary.
 *
 * This function has been updated to emulate the behavior of the new
 * LVM 0.9.1beta8 release, which aligns the first PE on a 64k boundary,
 * and leaves one full PE open between the end of the VGDA and the start
 * of the first data PE. The code is now based on the "setup_pe_table"
 * function from the LVM tools library, from Heinz Mauelshagen and
 * Sistina Software (www.sistina.com).
 **/
static int lvm_calculate_pe_total(pv_disk_t * pv)
{
	u_int32_t pe_map_size = 0;
	u_int32_t data_size;
	u_int32_t rest;
	int rc = 0;

	LOG_ENTRY();

	/* First guess at number of PEs on the PV. */
	rest = pv->pv_size - bytes_to_sectors(pv->pe_on_disk.base);
	pv->pe_total = rest / pv->pe_size;

	/* Elimiate PEs from the total until there is enough space to fit the PE
	 * maps in. There should be a full PE of space (or more) between the PE
	 * maps and the first real PE.
	 */
	for (; pv->pe_total; pv->pe_total--) {
		pe_map_size = round_up(bytes_to_sectors(pv->pe_total *
							sizeof(pe_disk_t)),
				       LVM_PE_ALIGN);
		data_size = pv->pe_total * pv->pe_size;

		if (pe_map_size + data_size + pv->pe_size <= rest) {
			break;
		}
	}

	if (!pv->pe_total) {
		LOG_ERROR("Not enough space on object for any PEs\n");
		rc = EINVAL;
		goto out;
	}

	pv->pe_on_disk.size = sectors_to_bytes(pe_map_size + pv->pe_size);
	pv->pe_start = bytes_to_sectors(pv->pe_on_disk.base +
					pv->pe_on_disk.size);

/* 0.9.1beta7 version of calculating PE map size.
   Leaving this code in here for historical purposes.
	new_pv_size	= pv->pv_size & ~(PE_SIZE_ALIGNMENT_SECTORS-1);
	rest		= new_pv_size - bytes_to_sectors(pv->pe_on_disk.base);
	pv->pe_total	= rest / pv->pe_size;
	rest		-= (pv->pe_total * pv->pe_size);
	while ( (rest * SECTOR_SIZE / sizeof(pe_disk_t)) < pv->pe_total ) {
		rest += pv->pe_size;
		pv->pe_total--;
	}
	pv->pe_on_disk.size = (new_pv_size - pv->pe_size * pv->pe_total)
				* SECTOR_SIZE - pv->pe_on_disk.base;
*/

out:
	LOG_EXIT_INT(rc);
	return rc;
}


/**
 * lvm_initialize_new_pv
 *
 * Set up a new PV disk structure with appropriate initial values. This
 * does not initialize the fields that are group-dependent.
 **/
static int lvm_initialize_new_pv(pv_disk_t * pv,
				 storage_object_t * segment)
{
	int rc;

	LOG_ENTRY();

	memset(pv, 0, sizeof(pv_disk_t));

	pv->id[0]		= 'H';
	pv->id[1]		= 'M';
	pv->version		= LVM_METADATA_VERSION;
	pv->pv_major		= 3;
	pv->pv_size		= segment->size;
	pv->pv_allocatable	= PV_ALLOCATABLE;

	lvm_calculate_vgda_info(pv);

	/* Get a UUID for the PV. */
	memset(pv->pv_uuid, 0, NAME_LEN);
	rc = lvm_create_uuid((char *)pv->pv_uuid);
	if (rc) {
		goto out;
	}

	/* Get the system ID. */
	rc = lvm_set_system_id(pv);

out:
	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * lvm_create_pv_from_segment
 *
 * Create a new PV out of the specified disk segment.
 **/
lvm_physical_volume_t * lvm_create_pv_from_segment(storage_object_t * segment)
{
	lvm_physical_volume_t * pv_entry = NULL;
	pv_disk_t * pv;
	int rc;

	LOG_ENTRY();

	/* Allocate a new pv_disk_t. */
	pv = EngFncs->engine_alloc(sizeof(pv_disk_t));
	if (!pv) {
		LOG_CRITICAL("Memory error creating new PV metadata for object %s.\n",
			     segment->name);
		goto out;
	}

	/* Setup a temporary pv_disk_t structure first. */
	rc = lvm_initialize_new_pv(pv, segment);
	if (rc) {
		goto out;
	}

	/* Create a new physical volume. */
	pv_entry = lvm_allocate_physical_volume(segment, pv);
	if (!pv_entry) {
		LOG_CRITICAL("Memory error creating PV for object %s\n",
			     segment->name);
	}

out:
	LOG_EXIT_PTR(pv_entry);
	return pv_entry;
}

/**
 * lvm_check_segment_for_pe_size
 *
 * Before we can create a new group, we have to make sure every segment
 * going into that group is large enough for the specified PE size.
 */
int lvm_check_segment_for_pe_size(storage_object_t * segment,
				  u_int32_t * pe_size)
{
	int rc = 0;

	LOG_ENTRY();

	if (*pe_size == 0) {
		*pe_size = LVM_MIN_PE_SIZE;
	}

	/* The ratio of the PV size to the PE size must be greater than a
	 * minimum value (currently 5) for the segment to be allowed into
	 * the group.
	 */
	if (segment->size / *pe_size < LVM_PE_SIZE_PV_SIZE_REL) {
		LOG_WARNING("Object %s not large enough for PE size %d\n",
			    segment->name, *pe_size);
		LOG_WARNING("Object %s is %"PRIu64" sectors in size\n",
			    segment->name, segment->size);
		LOG_WARNING("Target PE size requires objects of %d or more sectors\n",
			    LVM_PE_SIZE_PV_SIZE_REL * *pe_size);
		*pe_size = segment->size / LVM_PE_SIZE_PV_SIZE_REL;
		lvm_check_pe_size(pe_size);
		rc = ENOSPC;
	}
	
	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * lvm_get_pv_for_segment
 *
 * Search for and return the physical volume that represents this segment.
 * If the segment is not in a container, return NULL.
 **/
lvm_physical_volume_t * lvm_get_pv_for_segment(storage_object_t * segment)
{
	lvm_volume_group_t * group;
	int i;

	LOG_ENTRY();

	if (segment->consuming_container &&
	    segment->consuming_container->plugin == my_plugin_record) {
		group = segment->consuming_container->private_data;
		for (i = 1; i <= MAX_PV; i++) {
			if (group->pv_list[i] &&
			    group->pv_list[i]->segment == segment) {
				LOG_EXIT_PTR(group->pv_list[i]);
				return group->pv_list[i];
			}
		}
	}

	LOG_EXIT_PTR(NULL);
	return NULL;
}

/**
 * lvm_get_pv_for_name
 *
 * Search for and return the physical volume that has the given name.
 **/
lvm_physical_volume_t * lvm_get_pv_for_name(char * name,
					    lvm_volume_group_t * group)
{
	int i;

	LOG_ENTRY();

	for (i = 1; i <= MAX_PV; i++) {
		if (group->pv_list[i] &&
		    ! strncmp(name, group->pv_list[i]->segment->name, NAME_LEN)) {
			LOG_EXIT_PTR(group->pv_list[i]);
			return group->pv_list[i];
		}
	}

	LOG_EXIT_PTR(NULL);
	return NULL;
}

/**
 * lvm_update_pv_for_group
 *
 * When a PV is moved into a group, several fields must be updated
 * according to the new group's information.
 **/
int lvm_update_pv_for_group(lvm_physical_volume_t * pv_entry,
			    lvm_volume_group_t * group)
{
	pv_disk_t * pv = pv_entry->pv;
	int rc;

	LOG_ENTRY();

	/* Simple fields. */
	pv->lv_cur		= 0;
	pv->pe_allocated	= 0;
	pv->pv_status		= PV_ACTIVE;
	pv->pe_size		= group->vg->pe_size;

	/* New PV number. */
	pv_entry->number = lvm_find_free_pv_number(group);
	if (pv_entry->number <= 0) {
		rc = ENOSPC;
		goto out;
	}
	pv->pv_number = pv_entry->number;

	/* Copy the group name. */
	memset(pv->vg_name, 0, NAME_LEN);
	lvm_translate_container_name_to_vg_name(group, (char *)pv->vg_name);

	/* Reset all the VGDA information. */
	lvm_calculate_vgda_info(pv);

	/* Calculate the number of PEs on this PV. */
	rc = lvm_calculate_pe_total(pv);
	if (rc) {
		goto out;
	}

	/* Allocate new PE maps. */
	lvm_deallocate_pe_map(pv_entry);
	rc = lvm_allocate_pe_map(pv_entry);

out:
	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * lvm_update_pv_for_no_group
 *
 * This function is about the opposite of lvm_update_pv_for_group. It is
 * called after a PV is removed from a group, and resets all of the PV
 * information to neutral values.
 **/
void lvm_update_pv_for_no_group(lvm_physical_volume_t * pv_entry)
{
	pv_disk_t * pv = pv_entry->pv;

	LOG_ENTRY();

	pv->vg_on_disk.base		= 0;
	pv->vg_on_disk.size		= 0;
	pv->pv_uuidlist_on_disk.base	= 0;
	pv->pv_uuidlist_on_disk.size	= 0;
	pv->lv_on_disk.base		= 0;
	pv->lv_on_disk.size		= 0;
	pv->pe_on_disk.base		= 0;
	pv->pe_on_disk.size		= 0;
	pv->pv_number			= 0;
	pv->pv_status			= 0;
	pv->lv_cur			= 0;
	pv->pe_size			= 0;
	pv->pe_total			= 0;
	pv->pe_allocated		= 0;
	pv->pe_start			= 0;
	memset(pv->vg_name, 0, NAME_LEN);

	pv_entry->group			= NULL;
	pv_entry->number		= 0;
	lvm_deallocate_pe_map(pv_entry);

	LOG_EXIT_VOID();
}

/**
 * lvm_get_available_stripes
 *
 * Return the number of PVs in this group that still have extents which
 * can be allocated to new LVs.
 **/
int lvm_get_available_stripes(lvm_volume_group_t * group)
{
	int pvs = 0;
	int i;

	LOG_ENTRY();
	for (i = 1; i <= MAX_PV; i++) {
		if (group->pv_list[i] &&
		    (group->pv_list[i]->pv->pe_total -
		     group->pv_list[i]->pv->pe_allocated -
		     group->pv_list[i]->move_extents)) {
			pvs++;
		}
	}
	LOG_EXIT_INT(pvs);
	return pvs;
}

/**
 * lvm_get_selected_segment
 *
 * Retrieve the current item from the specified list. This item should be a
 * storage-object consumed by an LVM container. Search the container for the
 * PV entries corresponding to this object.
 **/
lvm_physical_volume_t * lvm_get_selected_segment(list_anchor_t list)
{
	storage_object_t * segment;
	lvm_physical_volume_t * pv_entry = NULL;

	LOG_ENTRY();

	segment = EngFncs->first_thing(list, NULL);
	if (segment) {
		pv_entry = lvm_get_pv_for_segment(segment);
	}

	LOG_EXIT_PTR(pv_entry);
	return pv_entry;
}

/**
 * lvm_pv_has_available_extents
 *
 * Check if the specified PV has unused extents available. The total number of
 * extents must be greater than the number of allocated extents plus the number
 * of extents that are targets of a move.
 **/
inline int lvm_pv_has_available_extents(lvm_physical_volume_t * pv_entry)
{
	return ((pv_entry->pv->pe_allocated +
		 pv_entry->move_extents) <
		pv_entry->pv->pe_total);
}

/**
 * lvm_pv_num_available_extents
 *
 * Return the number of available extents on the specified PV.
 **/
inline int lvm_pv_num_available_extents(lvm_physical_volume_t * pv_entry)
{
	return (pv_entry->pv->pe_total -
		pv_entry->pv->pe_allocated -
		pv_entry->move_extents);
}

/**
 * lvm_pe_is_valid
 *
 * Check if the specified PE index is valid for the specified PV.
 **/
inline int lvm_pe_is_valid(lvm_physical_volume_t * pv_entry, u_int32_t pe)
{
	return (pe >= 0 && pe < pv_entry->pv->pe_total);
}

/**
 * lvm_pe_is_available
 *
 * Check if the specified physical extent is unused. This means the PE is not
 * currently mapped to a volume, and the PE is not the target of a move.
 **/
inline int lvm_pe_is_available(lvm_physical_extent_t * pe)
{
	return (!pe->pe.lv_num && !pe->new_le);
}

/**
 * lvm_get_available_objects
 *
 * Find all top-level disks, segments, and regions in the system. This
 * will also retrieve LVM regions, so let the user beware. :)
 **/
int lvm_get_available_objects(storage_container_t *container,
			      list_anchor_t *objects)
{
	storage_container_t *disk_group;
	object_search_flags_t flags;
	int rc;

	LOG_ENTRY();

	disk_group = (container) ? container->disk_group : NULL;
	flags = VALID_INPUT_OBJECT |
		((container && !disk_group) ? NO_DISK_GROUP : 0);
	rc = EngFncs->get_object_list(DISK | SEGMENT | REGION, DATA_TYPE,
				      NULL, disk_group, flags, objects);

	LOG_EXIT_INT(rc);
	return rc;
}

