/*
 * Copyright (c) 2001-2003 The Trustees of Indiana University.  
 *                         All rights reserved.
 * Copyright (c) 1998-2001 University of Notre Dame. 
 *                         All rights reserved.
 * Copyright (c) 1994-1998 The Ohio State University.  
 *                         All rights reserved.
 * 
 * This file is part of the LAM/MPI software package.  For license
 * information, see the LICENSE file in the top level directory of the
 * LAM/MPI source distribution.
 * 
 * $HEADER$
 *
 * $Id: ssi_rpi_gm_dreg.c,v 1.7.2.8 2004/03/12 01:19:11 vsahay Exp $
 *
 *	Function:	- dynamic pinned memory handling
 */

#include <lam_config.h>

#include <string.h>

#include <args.h>
#include <rpisys.h>
#include <etc_misc.h>
#include <lamdebug.h>
#include <lamdebug.h>

#include <rpi_gm.h>
#include <rpi_gm_dreg.h>
#include <rpi_gm_interval.h>


#define ERRORSTR_SIZE 32
#define WANT_MEM_DEBUG 0


#if !WANT_MEM_DEBUG

/* #define out the calls to these functions */

#define MEM_DEBUG_MALLOC(a, b, c)
#define MEM_DEBUG_FREE(a, b)
#define MEM_DEBUG_QUERY(a)

#else /* WANT_MEM_DEBUT */

#include <all_list.h>
static int num_allocs[3] = { 0, 0, 0 };
static int num_bytes[3] = { 0, 0, 0 };
static int alloc_index[3] = { 0, 0, 0 };
typedef enum { ENV, SHORT, LONG } mem_debug_t;
static LIST *allocs[3] = { NULL, NULL, NULL };
struct alloc_item {
  char *ptr;
  int length;
  int index;
};

static int 
mycomp(void *a, void *b) {
  struct alloc_item *aa = (struct alloc_item *) a;
  struct alloc_item *bb = (struct alloc_item *) b;

  return (aa->ptr - bb->ptr);
}

#define MEM_DEBUG_MALLOC(a, b, c) mem_debug_malloc((a), (b), (c))
static void
mem_debug_malloc(mem_debug_t t, void *buffer, int len)
{
  ++num_allocs[t];
  num_bytes[t] += len;
  if (allocs[t] == NULL)
    allocs[t] = al_init(sizeof(struct alloc_item), mycomp);
  {
    struct alloc_item ai;
    ai.ptr = buffer;
    ai.length = len;
    ai.index = alloc_index[t]++;
    al_insert(allocs[t], &ai);
#if 0
    printf("MCW rank %d allocated DMA buf %p (index %d, len %d)\n", 
           lam_myproc->p_gps.gps_grank, buffer, ai.index, len);
#endif
  }
}

#define MEM_DEBUG_FREE(a, b) mem_debug_free((a), (b))
static void
mem_debug_free(mem_debug_t t, void *buf)
{
  struct alloc_item comp, *ai = NULL;

  --num_allocs[t];
  if (allocs[t] != NULL) {
    comp.ptr = buf;
    ai = al_find(allocs[t], &comp);
  }
  if (ai == NULL) 
    printf("WARNING: MCW rank %d dma free'd DMA buf that was never alloced!\n",
           lam_myproc->p_gps.gps_grank);
  else {
#if 0
    printf("MCW rank %d good dma free %p (index %d)\n",
           lam_myproc->p_gps.gps_grank, buf, ai->index);
#endif
    num_bytes[t] -= ai->length;
    al_delete(allocs[t], ai);
  }
}

#define MEM_DEBUG_QUERY lam_ssi_rpi_gm_dreg_query
void
lam_ssi_rpi_gm_dreg_query(mem_debug_t t)
{
  if (t == -1) {
    lam_ssi_rpi_gm_dreg_query(ENV);
    lam_ssi_rpi_gm_dreg_query(SHORT);
    lam_ssi_rpi_gm_dreg_query(LONG);
    printf("**** MCW rank %2d DMA MALLOC QUERY (type %d): "
           "num_mallocs %d, num_bytes %d\n", 
           lam_myproc->p_gps.gps_grank, t, 
           num_allocs[0] + num_allocs[1] + num_allocs[2],
           num_bytes[0] + num_bytes[1] + num_bytes[2]);
  } else {
    printf("**** MCW rank %2d DMA MALLOC QUERY (type %d): "
           "num_mallocs %d, num_bytes %d\n", 
           lam_myproc->p_gps.gps_grank, t, num_allocs[t], num_bytes[t]);
  }
}

#endif

/*
 * local variables
 */
static struct gm_port *my_port = NULL;
static struct lam_ssi_rpi_gm_buf_t *env_pool_head = NULL;
static int env_pool_size = 0;
static struct lam_ssi_rpi_gm_buf_t *short_pool_head = NULL;
static int short_pool_size = 0;


/*
 * local functions
 */
static int do_init(struct lam_ssi_rpi_gm_buf_t **head, int *list_size, 
		   int buffer_size);
static char* do_malloc(struct lam_ssi_rpi_gm_buf_t **head, int *list_size, 
		       long buffer_size);
static void do_free(struct lam_ssi_rpi_gm_buf_t **head, int *size, char *buf);
static void do_destroy(struct lam_ssi_rpi_gm_buf_t **head);
static char* dma_malloc(long size);


/*
 *       dma_init()
 *
 *       Function:        - initialize pool of lam_ssi_rpi_gm_buf_ts
 *       Accepts:         - a lam_ssi_rpi_proc to initialzie
 *       Returns:         - nothing
 */
int
lam_ssi_rpi_gm_dma_init(struct lam_ssi_rpi_proc *p)
{
  my_port = p->cp_gm_port;

  if (do_init(&env_pool_head, &env_pool_size, 
	      lam_ssi_rpi_gm_dma_env_len) != 0)
    return LAMERROR;
  if (do_init(&short_pool_head, &short_pool_size, 
	      lam_ssi_rpi_gm_dma_short_len) != 0)
    return LAMERROR;

  return 0;
}


/*
 *       dma_env_malloc()
 *
 *       Function:        - returns a pinned buffer
 *       Accepts:         - the lam_ssi_rpi_proc to assign the memory to
 *       Returns:         - a pinned buffer
 */
struct lam_ssi_rpi_gm_envl *
lam_ssi_rpi_gm_dma_env_malloc(void)
{
  char *ret = do_malloc(&env_pool_head, &env_pool_size, 
                        lam_ssi_rpi_gm_dma_env_len);
  lam_debug_cond((lam_ssi_rpi_gm_did, "env_dma_malloc: returning %p\n", 
                  ret));
  MEM_DEBUG_MALLOC(ENV, ret, lam_ssi_rpi_gm_dma_env_len);
  return (struct lam_ssi_rpi_gm_envl *) ret;
}


char *
lam_ssi_rpi_gm_dma_short_malloc(void)
{
  char *ret = do_malloc(&short_pool_head, &short_pool_size, 
                        lam_ssi_rpi_gm_dma_short_len);
  lam_debug_cond((lam_ssi_rpi_gm_did, "short_dma_malloc: returning %p\n", 
                  ret));
  MEM_DEBUG_MALLOC(SHORT, ret, lam_ssi_rpi_gm_dma_short_len);
  return ret;
}


/*
 *       dma_env_free()
 *
 *       Function:        - adds a buffer back to the pool
 *       Accepts:         - a buffer to free
 *                        - where it came from
 *       Returns:         - nothing
 */
void
lam_ssi_rpi_gm_dma_env_free(struct lam_ssi_rpi_gm_envl *buf)
{
  do_free(&env_pool_head, &env_pool_size, (char*) buf);
  MEM_DEBUG_FREE(ENV, buf);
}


void
lam_ssi_rpi_gm_dma_short_free(char *buf)
{
  do_free(&short_pool_head, &short_pool_size, buf);
  MEM_DEBUG_FREE(SHORT, buf);
}


/* 
 * Arbirary length mallocing, for long messages.  No pooling.  Use a
 * utility function (below) that doesn't do pooling, but does print a
 * nice error message if an error occurs.
 */
char * 
lam_ssi_rpi_gm_dma_malloc(long length)
{
  char *ret = dma_malloc(length);

  MEM_DEBUG_MALLOC(LONG, ret, length);

  if (ret == NULL)
    return NULL;

  /* Save the fact that this is pinned memory so that we don't try to
     pin it later */

  lam_ssi_rpi_gm_interval_use(ret, length, 0, 0);

  /* All done */

  return ret;
}


/* 
 * Arbirary length free, for long messages.  No pooling.
 */
void 
lam_ssi_rpi_gm_dma_free(char* buf)
{
  gm_dma_free(my_port, buf);
  MEM_DEBUG_FREE(LONG, buf);
}


/*
 * Free all malloc'ed memory
 */
void 
lam_ssi_rpi_gm_dma_destroy(void)
{
  do_destroy(&env_pool_head);
  env_pool_size = 0;
  do_destroy(&short_pool_head);
  short_pool_size = 0;

  MEM_DEBUG_QUERY(-1);
}


/***************************************************************************/

/*
 * local function to indirect setting up a single list
 */
static int 
do_init(struct lam_ssi_rpi_gm_buf_t **head, int *list_size, int buffer_size)
{
#if 0
  *head = NULL;
  *list_size = 0;
#else
  int i, pool_size = 0;
  struct lam_ssi_rpi_gm_buf_t *pool_head, *buf_struc;

  /* Make 10 initial buffers.  The number ten was chosen totally
     arbitrarily, and doesn't really matter because
     lam_ssi_rpi_gm_gm_setup() is going to call the associated
     gm_*_malloc functions to allocate as many buffers as it wants
     (and it will likely be more than 10!). */

  pool_head = NULL;
  for (i = 0; i < 10; ++i) {

    /* create structure and pinned buffer */

    buf_struc = malloc(sizeof(struct lam_ssi_rpi_gm_buf_t));
    buf_struc->buffer = dma_malloc(buffer_size);
    MEM_DEBUG_MALLOC((buffer_size == lam_ssi_rpi_gm_dma_env_len) ? 
                     ENV : SHORT, buf_struc->buffer, buffer_size);

    /* check it was made correctly */

    if (buf_struc->buffer == 0) {
      return LAMERROR;
    }

    /* add to top of list */

    if (pool_head == NULL) {
      pool_head = buf_struc;
      buf_struc->next = NULL;
    } else {
      buf_struc->next = pool_head;
      pool_head = buf_struc;
    }

    /* adjust pool size */

    ++pool_size;
  }

  /* Save the results */

  *head = pool_head;
  *list_size = pool_size;

  return 0;
#endif
}


static char* 
do_malloc(struct lam_ssi_rpi_gm_buf_t **head, int *list_size, 
          long buffer_length)
{
  char *buffer;
  struct lam_ssi_rpi_gm_buf_t *buf_struc;

  if (*head != NULL) {

    /* find guy to return */

    buf_struc = *head;

    /* adjust pool */

    *head = buf_struc->next;
    --(*list_size);

    /* return the buffer */

    buffer = buf_struc->buffer;
    free(buf_struc);
  } else
    buffer = dma_malloc(buffer_length);

  return buffer;
}


static char *
dma_malloc(long len)
{
  char *buffer;
  char myrankstr[ERRORSTR_SIZE];
  char lenstr[ERRORSTR_SIZE];

  /* do the allocation*/
  buffer = gm_dma_malloc(my_port, len);

  /* error check */
  if (buffer == NULL){
    errno = EMPIMEMERROR;  /* set errno so we know that memory has
			      failed */
    memset(myrankstr, 0, ERRORSTR_SIZE);
    snprintf(myrankstr, 31, "%d", lam_myproc->p_gps.gps_grank);
    memset(lenstr, 0, ERRORSTR_SIZE);
    snprintf(lenstr, 31, "%ld", len);
#if WANT_MEM_DEBUG
    MEM_DEBUG_QUERY(-1);
    printf("******** DMA MALLOC FAIL: MCW rank %d num_mallocs %llu, num_bytes %llu (this alloc: + %llu = %llu )\n",
           lam_myproc->p_gps.gps_grank,
           num_allocs, num_bytes, len, num_bytes + len);
#endif
    show_help_file("lam-ssi-rpi-gm-helpfile", "rpi-gm", "gm-dma-malloc-fail", 
	      myrankstr, lenstr, NULL);
    errno = EMPIMEMERROR;  /* show_help() seems to reset errno. tut
                              tut. */
  }
  
  return buffer;
}

static void
do_free(struct lam_ssi_rpi_gm_buf_t **head, int *list_size, char *buf)
{
  /* construct slist info */

  struct lam_ssi_rpi_gm_buf_t *buf_struc;
  buf_struc = malloc(sizeof(*buf_struc));

  /* attach buffer */

  buf_struc->buffer = buf;

  /* add guy to list */

  if (*head == NULL) {
    *head = buf_struc;
    buf_struc->next = NULL;
  } else {
#if LAM_WANT_DEBUG
    /* If we're in debugging mode, add this to the *end* of the list
       so as to recycle buffers as little as possible -- or, at least
       re-use them with as great a period as possible */

    struct lam_ssi_rpi_gm_buf_t *cur = *head;
    buf_struc->next = NULL;

    while (cur->next != NULL)
      cur = cur->next;
    cur->next = buf_struc;
#else
    buf_struc->next = *head;
    *head = buf_struc;
#endif
  }

  /* adjust pool size */

  ++(*list_size);
}


static void 
do_destroy(struct lam_ssi_rpi_gm_buf_t **head)
{
  struct lam_ssi_rpi_gm_buf_t *iter, *olditer;

  for (iter = *head; iter != NULL;) {
    if (iter->buffer != NULL && my_port != 0)
      gm_dma_free(my_port, iter->buffer);
    iter->buffer = 0;
    olditer = iter;
    iter = iter->next;
    free(olditer);
  }
  *head = 0;
}


