/*
    Theseus - maximum likelihood superpositioning of macromolecular structures

    Copyright (C) 2004-2008 Douglas L. Theobald

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the:

    Free Software Foundation, Inc.,
    59 Temple Place, Suite 330,
    Boston, MA  02111-1307  USA

    -/_|:|_|_\-
*/

/* -/_|:|_|_\- */
#include "Embed_local.h"


void
SetupLele(CoordsArray *cdsA)
{
    int             i;

    /* set up matrices and initialize for Lele distmat covariance calculations */
    for (i = 0; i < cdsA->cnum; ++i)
        cdsA->coords[i]->innerprod = MatInit(cdsA->vlen, cdsA->vlen);

    cdsA->avecoords->innerprod = MatInit(cdsA->vlen, cdsA->vlen);
    DistMatsAlloc(cdsA);
}


void
CovInvLAPACK(CoordsArray *cdsA)
{
    int                      i, j;
    long int                 info = 0;
    char                     upper = 'U';
    long int                 vlen = (long int) cdsA->vlen;
    double                 **tmpmat = NULL;

    tmpmat = MatInit(cdsA->vlen, cdsA->vlen);
    for (i = 0; i < cdsA->vlen; ++i)
    {
        tmpmat[i][i] = cdsA->CovMat[i][i];
        for (j = 0; j < i; ++j)
            tmpmat[j][i] = tmpmat[i][j] = cdsA->CovMat[i][j];
    }

    /* normalize the CovMat */
    /*MatPrint(cdsA->CovMat, cdsA->vlen);*/
    NormalizeCovMat(tmpmat, cdsA->vlen);

    /* LAPACK dpotrf and dpotri compute the inverse using cholesky decomposition */
    DPOTRF(&upper, &vlen, &tmpmat[0][0], &vlen, &info);

    if (info == 0)
        DPOTRI(&upper, &vlen, &tmpmat[0][0], &vlen, &info);
    else if (info > 0)
    {
        fprintf(stderr,
                " \n\n LAPACK dpotrf Choleski decomposition of covariance matrix choked; \n");
        fprintf(stderr,
                "           leading minor of order %ld is not positive definite \n", info);
        fprintf(stderr,
                "           using LAPACK dsytrf/dsytri instead \n");
        CovInvIndefLAPACK(cdsA);
        return;
    }

    /* copy lower to upper to WtMat*/
    for (i = 0; i < cdsA->vlen; ++i)
    {
        cdsA->WtMat[i][i] = tmpmat[i][i];
        SCREAMF(cdsA->WtMat[i][i]);
        for (j = 0; j < i; ++j)
            cdsA->WtMat[j][i] = cdsA->WtMat[i][j] = tmpmat[i][j];
    }

    MatDestroy(&tmpmat);
    /*MatPrint(cdsA->WtMat, cdsA->vlen);*/
    /*SCREAMS("LAPACK");*/
}


/* this will invert an indefinite covariance matrix */
void
CovInvIndefLAPACK(CoordsArray *cdsA)
{
    int                      i, j;
    long int                 info = 0;
    char                     upper = 'U';
    long int                 vlen = (long int) cdsA->vlen;
    long int                 lwork;
    long int                *ipiv;
    double                  *work = NULL;
    double                  *w = NULL;

    w = (double *) malloc(vlen * sizeof(double));
    work = (double *) malloc(sizeof(double));
    ipiv = (long int *) malloc(vlen * sizeof(long int));

    lwork = -1;
 
    /* normalize the CovMat */
    /*MatPrint(cdsA->CovMat, cdsA->vlen);*/
    NormalizeCovMat(cdsA->CovMat, cdsA->vlen);
    DSYTRF(&upper, &vlen, &cdsA->Dij_matrix[0][0], &vlen, ipiv, work, &lwork, &info);

    if (info < 0)
    {
        fprintf(stderr, "\n\n");
        fprintf(stderr, " ERROR120: the %ld-th argument had an illegal value \n\n", info);
        exit(EXIT_FAILURE);
    }
    else if (info > 0)
    {
        fprintf(stderr, "\n\n");
        fprintf(stderr, " ERROR120: D(%ld, %ld) is exactly zero; matrix is singular \n\n", info, info);
        exit(EXIT_FAILURE);
    }

    lwork = (long int) work[0];
    free(work);
    work = (double *) malloc(lwork * sizeof(double));
    DSYTRF(&upper, &vlen, &cdsA->Dij_matrix[0][0], &vlen, ipiv, work, &lwork, &info);
    free(work);
    work = (double *) malloc(vlen * sizeof(double));
    DSYTRI(&upper, &vlen, &cdsA->Dij_matrix[0][0], &vlen, ipiv, work, &info);

    if (info > 0)
    {
        fprintf(stderr, "\n\n");
        fprintf(stderr, " ERROR121: D(%ld, %ld) is exactly zero; matrix is singular \n\n", info, info);
        exit(EXIT_FAILURE);
    }
    else if (info < 0)
    {
        fprintf(stderr, "\n\n");
        fprintf(stderr, " ERROR121: the %ld-th argument had an illegal value \n\n", info);
        exit(EXIT_FAILURE);
    }

    /* copy lower to upper to WtMat*/
    for (i = 0; i < cdsA->vlen; ++i)
    {
        cdsA->WtMat[i][i] = cdsA->CovMat[i][i];
        SCREAMF(cdsA->WtMat[i][i]);
        for (j = 0; j < i; ++j)
            cdsA->WtMat[j][i] = cdsA->WtMat[i][j] = cdsA->CovMat[i][j];
    }

    free(ipiv);
    free(w);
    free(work);
    /*MatPrint(cdsA->WtMat, cdsA->vlen);*/
    SCREAMS("dsytrf/dsytri LAPACK");
}


void
CalcLeleCovMat(CoordsArray *cdsA)
{
    double        **MMT = NULL;
    double          idf, cov_sum;
    int             i, j, k;

    MMT = MatInit(cdsA->vlen, cdsA->vlen);

    idf = 1.0 / (3.0 * (double)(cdsA->cnum)); /* ML, biased, maybe should be n-1 to be unbiased?? */

    for (i = 0; i < cdsA->cnum; ++i)
        CoordsInnerProd(cdsA->coords[i]);

    CoordsInnerProd(cdsA->avecoords);

    for (i = 0; i < cdsA->vlen; ++i)
    {
        for (j = 0; j < cdsA->vlen; ++j)
        {
            cov_sum = 0.0;
            for (k = 0; k < cdsA->cnum; ++k)
                cov_sum += (cdsA->coords[k]->innerprod[i][j] - cdsA->avecoords->innerprod[i][j]);

            cdsA->CovMat[i][j] = cov_sum * idf;
        }
    }

    /* MatPrint(cdsA->CovMat, cdsA->vlen); */

    for (i = 0; i < cdsA->vlen; ++i)
    {
        if (cdsA->CovMat[i][i] < 0.0)
            cdsA->CovMat[i][i] = 0.0;
    }

    for (i = 0; i < cdsA->vlen; ++i)
        cdsA->var[i] = cdsA->CovMat[i][i];

/*     for (i = 0; i < cdsA->vlen; ++i) */
/*         printf("\n -->> LeleCovVar = %7.3e ", cdsA->CovMat[i][i]); */

    /*
    for (i = 0; i < cdsA->vlen; ++i)
    {
        for (j = 0; j < cdsA->vlen; ++j)
        {
            if (i == j)
                continue;
            else
                cdsA->CovMat[i][j] = 0.0;
        }
    }
    */

    MatDestroy(&MMT);
}


/* caller must find average structure first */
void
CalcLeleVariances(CoordsArray *cdsA)
{
    double          idf;
    double         *var = cdsA->var;
    const Coords  **coords = (const Coords **) cdsA->coords;
    const double   *avex = (const double *) cdsA->avecoords->x,
                   *avey = (const double *) cdsA->avecoords->y,
                   *avez = (const double *) cdsA->avecoords->z;

    int             i, j;

    idf = 1.0 / (double)(cdsA->cnum - 1); /* should be n-1 to be unbiased?? */

    for (i = 0; i < cdsA->vlen; ++i)
    {
        var[i] = 0.0;
        for (j = 0; j < cdsA->cnum; ++j)
        {
            var[i] += coords[j]->x[i] * coords[j]->x[i]
                    + coords[j]->y[i] * coords[j]->y[i]
                    + coords[j]->z[i] * coords[j]->z[i];
        }

        var[i] *= idf;
    }

    for (i = 0; i < cdsA->vlen; ++i)
        var[i] -= (avex[i] * avex[i] + avey[i] * avey[i] + avez[i] * avez[i]) / 3.0;

/*     for (i = 0; i < cdsA->vlen; ++i) */
/*         printf("\n -->> lele var = %12.3 ", var[i]); */
}


void
NNxCoords(double **mat, Coords *coords)
{
    int             i, k;

    /* (i x k)(k x j) = (i x j) */
    /* (N x N)(N x 3) = (N x 3) */
    for (i = 0; i < coords->vlen; ++i)
    {
        coords->covx[i] = coords->covy[i] = coords->covz[i] = 0.0;
        for (k = 0; k < coords->vlen; ++k)
        {
            coords->covx[i] += (mat[i][k] * coords->x[k]);
            coords->covy[i] += (mat[i][k] * coords->y[k]);
            coords->covz[i] += (mat[i][k] * coords->z[k]);
        }
    }
}


void
CoordsxNN(Coords *coords, const double **mat)
{
    int             j, k;

    /* (i x k)(k x j) = (i x j) */
    /* (3 x N)(N x N) = (3 x N) */
    for (j = 0; j < coords->vlen; ++j)
    {
        coords->covx[j] = coords->covy[j] = coords->covz[j] = 0.0;
        for (k = 0; k < coords->vlen; ++k)
        {
            coords->covx[j] += (coords->x[k] * mat[k][j]);
            coords->covy[j] += (coords->y[k] * mat[k][j]);
            coords->covz[j] += (coords->z[k] * mat[k][j]);
        }
    }
}


/*  double
    RadGyrSqr(Coords *coords)

    computes the unweighted square of the unaveraged radius of gyration
    of a molecule basically the sum of the square of the distances of 
    each atom from the centroid (this function assumes the molecule has
    been centered)
*/
double
RadGyrSqr(const Coords *coords)
{
    int             i;
    double          sum = 0.0;

    for (i = 0; i < coords->vlen; ++i)
        sum += (mysquare(coords->x[i]) + mysquare(coords->y[i]) + mysquare(coords->z[i]));

    return(sum);
}


double
RadGyrSqrW(const Coords *coords, const double *weights)
{
    int             i;
    double          sum = 0.0;

    for (i = 0; i < coords->vlen; ++i)
        sum += weights[i] * (mysquare(coords->x[i]) + mysquare(coords->y[i]) + mysquare(coords->z[i]));

    return(sum);
}


double
RadGyrSqrCov(Coords *coords, const double **weightmat)
{
    int             i;
    double          sum = 0.0;

    CoordsxNN(coords, weightmat);

    for (i = 0; i < coords->vlen; ++i)
    {
        sum += (coords->covx[i] * coords->x[i]) +
               (coords->covy[i] * coords->y[i]) +
               (coords->covz[i] * coords->z[i]);
    }

    return(sum);
}


void
CoordsInnerProd(Coords *coords)
{
    /* (i x k)(k x j) = (i x j) */
    /* (N x 3)(3 x N) = (N x N) */
    int             i, j;
    double        **innerprod = coords->innerprod;

    for (i = 0; i < coords->vlen; ++i)
        for (j = 0; j < coords->vlen; ++j)
            coords->innerprod[i][j] = 0.0;

    for (i = 0; i < coords->vlen; ++i)
    {
        for (j = 0; j <= i; ++j)
        {
            innerprod[i][j] += (coords->x[i] * coords->x[j])
                             + (coords->y[i] * coords->y[j])
                             + (coords->z[i] * coords->z[j]);
        }
    }

    for (i = 0; i < coords->vlen; ++i)
        for (j = 0; j < i; ++j)
            coords->innerprod[j][i] = coords->innerprod[i][j];
}


void
CoordsInnerProd2(Coords *coords)
{
    /* (i x k)(k x j) = (i x j) */
    /* (3 x N)(N x 3) = (3 x 3) */
    int             k;
    double        **innerprod2 = NULL;
    const double   *x = (const double *) coords->x,
                   *y = (const double *) coords->y,
                   *z = (const double *) coords->z;
    double          xk, yk, zk;

    if (coords->innerprod2 == NULL)
        innerprod2 = coords->innerprod2 = MatInit(3, 3);
    else
        innerprod2 = coords->innerprod2;

    for (k = 0; k < coords->vlen; ++k)
    {
        xk = x[k];
        yk = y[k];
        zk = z[k];

        innerprod2[0][0] += (xk * xk);
        innerprod2[1][1] += (yk * yk);
        innerprod2[2][2] += (zk * zk);
        innerprod2[0][1] += (xk * yk);
        innerprod2[0][2] += (xk * zk);
        innerprod2[1][2] += (yk * zk);
    }

    innerprod2[1][0] = innerprod2[0][1];
    innerprod2[2][0] = innerprod2[0][2];
    innerprod2[2][1] = innerprod2[1][2];

    /* Mat3Print(coords->innerprod2); */
}


/* compute the inner product of a symmetrical matrix, in place */
void
MatInnerProdSymIp(double **mat, const int N)
{
    int             i, j, k;
    double        **workmat = NULL;
    
    workmat = MatInit(N, N);

    /* InnerProd = M^T M */
    /* (i x k)(k x j) = (i x j) */
    for (i = 0; i < N; ++i)
    {
        for (j = 0; j < N; ++j)
        {
            workmat[i][j] = 0.0;
            for (k = 0; k < N; ++k)
                workmat[i][j] += (mat[i][k] * mat[j][k]);
        }
    }

    for (i = 0; i < N; ++i)
        for (j = 0; j < N; ++j)
             mat[i][j] = workmat[i][j];

    MatDestroy(&workmat);
}


void
MatInnerProdSymOp(double **mato, const double **mati, const int N)
{
    int             i, j, k;

    /* InnerProd = M^T M */
    /* (i x k)(k x j) = (i x j) */
    for (i = 0; i < N; ++i)
    {
        for (j = 0; j < N; ++j)
        {
            mato[i][j] = 0.0;
            for (k = 0; k < N; ++k)
                mato[i][j] += (mati[i][k] * mati[j][k]);
        }
    }
}


/*  Calculates EDMA average of distance coords, with chi^2 variance correction for
    bias. This is an unbiased estimate of the average coords, *assuming that the 
    coords are distributed normally*. 
*/
void
CalcEDMADistMat(CoordsArray *cdsA)
{
    int             i, j, k;
    const int       len = cdsA->vlen;
    double          normalize, off_diagonal, on_diagonal, varsqr;
    double        **H, **distmat;

    normalize = 1.0 / (double) cdsA->cnum;

    /* set up H, the centering/normalizing matrix */
    off_diagonal = -1.0 / (double) len;
    on_diagonal = 1.0 + off_diagonal;
    H = MatInit(len, len);
    distmat = MatInit(len, len);

    for (i = 0; i < len; ++i)
    {
        H[i][i] = on_diagonal;
        for (j = 0; j < i; ++j)
            H[i][j] = H[j][i] = off_diagonal;
    }

    /* The next four steps calculate the Eu(M) matrix, */
    /* which is Lele's matrix of squared distances (Lele 1993, pp. 579-580, Theorem 4)*/
    /* Lele, Subhash (1993) "Euclidean Distance Matrix Analysis (EDMA): Estimation of mean form */
    /* and mean form difference." Mathematical Geology 25(5):573-602 */

    /* (1) calculate the symmetric j x k atom squared distance e^i(l,m) matrix for all structure Coords i */
    for (i = 0; i < cdsA->cnum; ++i)
        for (j = 0; j < len; ++j)
            for (k = 0; k < j; ++k)
                cdsA->distmat->matrix[i][j][k] = SqrCoordsDist(cdsA->coords[i], j, cdsA->coords[i], k);

    /* (2) calculate the average squared distance matrix ave{e(l,m)} for the CoordsArray */
    for (i = 0; i < len; ++i)
        for (j = 0; j < i; ++j)
            distmat[i][j] = 0.0;

    for (i = 0; i < cdsA->cnum; ++i)
        for (j = 0; j < len; ++j)
            for (k = 0; k < j; ++k)
                distmat[j][k] += cdsA->distmat->matrix[i][j][k];

    for (j = 0; j < len; ++j)
        for (k = 0; k < j; ++k)
            distmat[j][k] *= normalize;

    for (i = 0; i < len; ++i)
        for (j = i+1; j < len; ++j)
            distmat[i][j] = distmat[j][i];

    /* (3) find the difference between each  e^i(l,m)  and the average  ave{e(l,m)}, */
    /*     square it, find the average and put it in Var_matrix, */
    /*     and finally set the diagonal to zero */
    for (i = 0; i < len; ++i)
        for (j = 0; j < i; ++j)
            cdsA->Var_matrix[i][j] = 0.0;

    for (i = 0; i < cdsA->cnum; ++i)
        for (j = 0; j < len; ++j)
            for (k = 0; k < j; ++k)
                cdsA->Var_matrix[j][k] += mysquare(cdsA->distmat->matrix[i][j][k] - distmat[j][k]);

    for (j = 0; j < len; ++j)
        for (k = 0; k < j; ++k)
            cdsA->Var_matrix[j][k] *= normalize;

    /* (4a) calculate ave(delta[l,m]), Lele 1993 Theorem 4, eqn. 2 */
    for (i = 0; i < len; ++i)
    {
        for (j = 0; j < i; ++j)
        {
            varsqr = mysquare(distmat[i][j]) - (1.5 * cdsA->Var_matrix[i][j]);
            if (varsqr > 0.0)
                cdsA->Dij_matrix[i][j] = sqrt(varsqr);
            else
                cdsA->Dij_matrix[i][j] = 0.0;
        }
    }
    /* MatPrint(cdsA->Dij_matrix, len); */

    /* (4b) copy lower left triangle to upper right triangle */
    for (i = 0; i < len; ++i)
        for (j = i+1; j < len; ++j)
            cdsA->Dij_matrix[i][j] = cdsA->Dij_matrix[j][i];

    /* VariancesEDMA((const double *) cdsA->var, cdsA->Dij_matrix, distmat, len); */

    MatDestroy(&H);
    MatDestroy(&distmat);
}


void
CalcEDMADistMatOcc(CoordsArray *cdsA)
{
    int             i, j, k;
    const int       len = cdsA->vlen, cnum = cdsA->cnum;
    double          off_diagonal, on_diagonal, varsqr;
    double        **H, **distmat;
    double          occ, occsum;

    /* set up H, the centering/normalizing matrix */
    off_diagonal = -1.0 / (double) len;
    on_diagonal = 1.0 + off_diagonal;
    H = MatInit(len, len);
    distmat = MatInit(len, len);

    for (i = 0; i < len; ++i)
    {
        H[i][i] = on_diagonal;
        for (j = 0; j < i; ++j)
            H[i][j] = H[j][i] = off_diagonal;
    }

    /* The next four steps calculate the Eu(M) matrix, */
    /* which is Lele's matrix of squared distances (Lele 1993, pp. 579-580, Theorem 4)*/
    /* Lele, Subhash (1993) "Euclidean Distance Matrix Analysis (EDMA): Estimation of mean form */
    /* and mean form difference." Mathematical Geology 25(5):573-602 */

    /* (1) calculate the symmetric j x k atom squared distance e^i(l,m) matrix for all structure Coords i */
    for (i = 0; i < cnum; ++i)
        for (j = 0; j < len; ++j)
            for (k = 0; k < j; ++k)
                cdsA->distmat->matrix[i][j][k] = SqrCoordsDist(cdsA->coords[i], j, cdsA->coords[i], k);

    /* (2) calculate the average squared distance matrix ave{e(l,m)} for the CoordsArray */
    for (j = 0; j < len; ++j)
    {
        for (k = 0; k < j; ++k)
        {
            cdsA->Dij_matrix[j][k] = 0.0;
            occsum = 0.0;
            for (i = 0; i < cnum; ++i)
            {
                occ = cdsA->coords[i]->o[j] * cdsA->coords[i]->o[k];
                occsum += occ;
                cdsA->Dij_matrix[j][k] += occ * cdsA->distmat->matrix[i][j][k];
            }

            if (occsum == 0.0)
                cdsA->Dij_matrix[j][k] = 0.0;
            else
                cdsA->Dij_matrix[j][k] /= occsum;
        }
    }

    /* (3) find the difference between each  e^i(l,m)  and the average  ave{e(l,m)}, */
    /*     square it, find the average and put it in Var_matrix, */
    /*     and finally set the diagonal to zero */
    for (i = 0; i < len; ++i)
        for (j = 0; j < i; ++j)
            cdsA->Var_matrix[i][j] = 0.0;
 
    for (j = 0; j < len; ++j)
    {
        for (k = 0; k < j; ++k)
        {
            occsum = 0.0;
            for (i = 0; i < cnum; ++i)
            {
                occ = cdsA->coords[i]->o[j] * cdsA->coords[i]->o[k];
                occsum += occ;
                cdsA->Var_matrix[j][k] += occ * mysquare(cdsA->distmat->matrix[i][j][k] - distmat[j][k]);
            }

            if (occsum == 0.0)
                cdsA->Var_matrix[j][k] = 0.0;
            else
                cdsA->Var_matrix[j][k] /= occsum;
        }
    }

    /* (4a) calculate ave(delta[l,m]), Lele 1993 Theorem 4, eqn. 2 */
    for (i = 0; i < len; ++i)
    {
        for (j = 0; j < i; ++j)
        {
            varsqr = mysquare(distmat[i][j]) - (1.5 * cdsA->Var_matrix[i][j]);
            if (varsqr > 0.0)
                cdsA->Dij_matrix[i][j] = sqrt(varsqr);
            else
                cdsA->Dij_matrix[i][j] = 0.0;
        }
    }
    /* MatPrint(cdsA->Dij_matrix, len); */

    /* (4b) copy lower left triangle to upper right triangle */
    for (i = 0; i < len; ++i)
        for (j = i+1; j < len; ++j)
            cdsA->Dij_matrix[i][j] = cdsA->Dij_matrix[j][i];

    /* VariancesEDMA((const double *) cdsA->var, cdsA->Dij_matrix, distmat, len); */

    MatDestroy(&H);
    MatDestroy(&distmat);
}



/*  Center/normalize with H,
    B(M) = -0.5 * H{Eu(M)}H
    based on Lele's three-step PCA algorithm given on page 581 Lele 1993

    This function is highly optimized, using the symmetry of the H and Eu(M)
    matrices, and the fact that H contains only two values.
*/
void
LeleCenterMat(double **mat, const int len)
{
    int             i, j, k;
    double        **tmpmat = NULL;
    double          off_diagonal, tmp;

    /* set up H, the centering/normalizing matrix */
    off_diagonal = -1.0 / (double) len;
    /* on_diagonal = 1.0 + off_diagonal; */
    tmpmat = MatInit(len, len);

    memcpy(&tmpmat[0][0], &mat[0][0], len*len*sizeof(double));

	for (i = 0; i < len; ++i)
	{
		tmp = 0.0;
		for (k = 0; k < len; ++k)
			tmp += mat[k][i];

        tmp *= off_diagonal;

		for (j = 0; j < len; ++j)
			tmpmat[j][i] += tmp;
	}

    /* MatPrint(tmpmat, len); */

    memcpy(&mat[0][0], &tmpmat[0][0], len*len*sizeof(double));

	for (i = 0; i < len; ++i)
	{
		tmp = 0.0;
		for (k = 0; k < len; ++k)
			tmp += tmpmat[i][k];

        tmp *= off_diagonal;

		for (j = 0; j < len; ++j)
			mat[i][j] += tmp;
	}

    for (i = 0; i < len; ++i)
        for (j = 0; j < len; ++j)
            mat[i][j] *= -0.5;

    MatDestroy(&tmpmat);
}


void
DoubleCenterMat(double **mat, const int len)
{
    int             i, j;
    double          cen;

	for (j = 0; j < len; ++j)
	{
		cen = 0.0;
		for (i = 0; i < len; ++i)
			cen += mat[i][j];

        cen /= len;

		for (i = 0; i < len; ++i)
			mat[i][j] -= cen;
	}

	for (j = 0; j < len; ++j)
	{
		cen = 0.0;
		for (i = 0; i < len; ++i)
			cen += mat[j][i];

        cen /= len;

		for (i = 0; i < len; ++i)
			mat[j][i] -= cen;
	}

    for (i = 0; i < len; ++i)
        for (j = 0; j < len; ++j)
            mat[i][j] *= -0.5;
}


/*  Calculates straight average of distance coords, without X^2 variance
    correction like CalcEDMADistMat() does. This is a biased estimate of
    the average coords (the squared distances between two coords are too
    large by the sum of the variance for each coord), but I believe it is
    maximum likelihood of some sort.
 */
void
CalcMLDistMat(CoordsArray *cdsA)
{
    int             i, j, k;
    const int       len = cdsA->vlen;
    double          normalize, tmpx, tmpy, tmpz;
    double        **Dij_matrix = cdsA->Dij_matrix;
    const Coords  **coords = (const Coords **) cdsA->coords;
    const Coords   *coordsi;

    normalize = 1.0 / cdsA->cnum;

    /* (1) calculate the symmetric j x k atom squared distance e^i(l,m) matrix for all structure Coords i */
    /* (2) calculate the average squared distance matrix ave{e(l,m)} for the CoordsArray */
    for (i = 0; i < len; ++i)
        for (j = 0; j < i; ++j)
            Dij_matrix[i][j] = 0.0;

    for (i = 0; i < cdsA->cnum; ++i)
    {
        coordsi = coords[i];
        for (j = 0; j < len; ++j)
        {
            for (k = 0; k < j; ++k)
            {
                tmpx = coordsi->x[j] - coordsi->x[k];
                tmpy = coordsi->y[j] - coordsi->y[k];
                tmpz = coordsi->z[j] - coordsi->z[k];

                Dij_matrix[j][k] += (tmpx*tmpx + tmpy*tmpy + tmpz*tmpz);
            }
        }
    }

    for (j = 0; j < len; ++j)
        for (k = 0; k < j; ++k)
            Dij_matrix[j][k] *= normalize;
    /*MatPrint(cdsA->Dij_matrix, len);*/

    /* (4b) copy lower left triangle to upper right triangle */
    for (i = 0; i < len; ++i)
        for (j = i+1; j < len; ++j)
            Dij_matrix[i][j] = Dij_matrix[j][i];
}


/* Same as CalcMLDistMat(), but weight by occupancy */
void
CalcMLDistMatOcc(CoordsArray *cdsA)
{
    int             i, j, k, m;
    const int       len = cdsA->vlen;
    const int       cnum = cdsA->cnum;
    double          occsum, occ, tmpx, tmpy, tmpz;
    double        **Dij_matrix = cdsA->Dij_matrix;
    const Coords  **coords = (const Coords **) cdsA->coords;
    const Coords   *coordsi;

    /* (1) calculate the symmetric j x k atom squared distance e^i(l,m) matrix for all structure Coords i */
    /* (2) calculate the average squared distance matrix ave{e(l,m)} for the CoordsArray */
    for (j = 0; j < len; ++j)
    {
        for (k = 0; k < j; ++k)
        {
            cdsA->Dij_matrix[j][k] = 0.0;
            occsum = 0.0;
            for (i = 0; i < cnum; ++i)
            {
                coordsi = coords[i];
                occ = coordsi->o[j] * coordsi->o[k];
                occsum += occ;
                tmpx = coordsi->x[j] - coordsi->x[k];
                tmpy = coordsi->y[j] - coordsi->y[k];
                tmpz = coordsi->z[j] - coordsi->z[k];

                Dij_matrix[j][k] += occ * (tmpx*tmpx + tmpy*tmpy + tmpz*tmpz);
            }

            /* printf("\n%f", cdsA->Dij_matrix[j][k]); */

            if (occsum == 0.0)
            {
                Dij_matrix[j][k] = 0.0;
            }
            else
                Dij_matrix[j][k] /= occsum;

            /* printf(" %f %f", occsum, cdsA->Dij_matrix[j][k]); */
        }
    }

    /* DLT debug -- could be made more efficient by saving indices of 0.0s */
    for (j = 0; j < len; ++j)
    {
        for (k = 0; k < j; ++k)
        {
            if (Dij_matrix[j][k] == 0.0)
            {
                for (m = 0; m < len; ++m)
                    if (m != k)
                        Dij_matrix[j][k] += Dij_matrix[j][m];

                for (m = 0; m < len; ++m)
                    if (m != k)
                        Dij_matrix[j][k] += Dij_matrix[m][k];

                Dij_matrix[j][k] /= (len*len - 1);
            }
        }
    }

    /* (4b) copy lower left triangle to upper right triangle */
    for (i = 0; i < len; ++i)
        for (j = 0; j < i; ++j)
            Dij_matrix[j][i] = Dij_matrix[i][j];
}


/* Calculate the average coordinates from the average distance matrix
   as calculated in CalcEDMADistMat(CoordsArray *cdsA) and
   CalcMLDistMat. This is a distance geometry embedding algorithm.
   
   See:
   Crippen and Havel (1978) Acta Cryst A34:282
   "Stable calculation of coordinates from distance data."
   
   Gower, J.C (1966) Biometrika 53:3-4:325-338.
   "Some distance properties of latent root and vector methods used in
   multivariate analysis."
   
   Both the above refs give equivalent methods. Most mol biologists
   know only the first, statisticians the second. 
   
   First, find the eigenvalues and eigenvectors of the NxN distance
   matrix. Second, order them largest first. The first three 
   eigenvectors, multiplied by the sqrt of the corresponding eigenvalue,
   are the x, y, and z coordinate vectors for the structure, respectively.   
*/
void
EmbedAveCoords(CoordsArray *cdsA)
{  
    int                      i;
    long int                 info = 0;
    char                     uplo_u = 'U';
    char                     jobz_v = 'V';
    char                     range_i = 'I';
    long int                 vlen = (long int) cdsA->vlen;
    long int                 il, iu, m;
    double                   vl, vu, abstol;
    double                   w0, w1, w2;
    double                 **z = NULL;
    long int                *isuppz, *iwork;
    long int                 lwork, liwork, negone;
    double                  *work, *w;
    double                   deviation1, deviation2;
    Coords                  *avecoords = cdsA->avecoords;

    /* Center/normalize with H, */
    /* B(M) = -0.5 * H{Eu(M)}H */
    /* based on Lele's three-step PCA algorithm given on page 581 Lele 1993 */

    DoubleCenterMat(cdsA->Dij_matrix, vlen);

    negone = -1;
    vl = vu = 0.0;
    w = (double *) malloc(vlen * sizeof(double));
    il = vlen - 2;
    iu = vlen - 0;
    abstol = 0.0;
    z = MatInit(3, vlen);
    isuppz = (long int*) malloc(6 * sizeof(long int));

    work = (double *) malloc(sizeof(double));
    iwork = (long int *) malloc(sizeof(long int));

    /* first dsyevr call lwork and liwork = -1 which
       makes dsyevr calculate optimum workspace given
       the other params */
    DSYEVR(&jobz_v, &range_i, &uplo_u,
            &vlen, &cdsA->Dij_matrix[0][0], &vlen,
            &vl, &vu, &il, &iu,
            &abstol, &m, w, &z[0][0], &vlen, isuppz,
            work, &negone, iwork, &negone, &info);

    /* lwork = 26*vlen; */
    /* liwork = 10*vlen; */

    lwork = work[0];
    liwork = iwork[0];
    free(work);
    free(iwork);
    work = (double *) malloc(lwork * sizeof(double));
    iwork = (long int *) malloc(liwork * sizeof(long int));

    /* LAPACK dsyevr() computes selected eigenvalues, and optionally, eigenvectors of a
       real symmetric matrix. Here we only need the first three largest evals and
       corresponding evecs. */
    DSYEVR(&jobz_v, &range_i, &uplo_u,
            &vlen, &cdsA->Dij_matrix[0][0], &vlen,
            &vl, &vu, &il, &iu,
            &abstol, &m, w, &z[0][0], &vlen, isuppz,
            work, &lwork, iwork, &liwork, &info);

    if (info > 0)
    {
        fprintf(stderr, "\n\n");
        fprintf(stderr, " ERROR119: LAPACK dsyevr() complete eigenvalue/eigenvector solving choked; \n");
        fprintf(stderr, "           the algorithm failed to converge; %ld \n", info);
        fprintf(stderr, "           off-diagonal elements of an intermediate tridiagonal \n");
        fprintf(stderr, "           form did not converge to zero \n\n");
        exit(EXIT_FAILURE);
    }
    else if (info < 0)
    {
        fprintf(stderr, "\n\n");
        fprintf(stderr, " ERROR120: the %ld-th argument had an illegal value \n\n", info);
        exit(EXIT_FAILURE);
    }

/*     int j; */
/*     for (i = 0; i < cdsA->vlen; ++i) */
/*     { */
/*         double bstick = 0.0; */
/*         for (j = 1; j <= i; ++j) */
/*             bstick += (1.0 / (double) j); */
/*         printf("\n%f %f", w[i], bstick); */
/*     } */

    w0 = sqrt(w[0]);
    w1 = sqrt(w[1]);
    w2 = sqrt(w[2]);

    for (i = 0; i < cdsA->vlen; ++i)
    {
        avecoords->x[i] = w2 * z[2][i];
        avecoords->y[i] = w1 * z[1][i];
        avecoords->z[i] = w0 * z[0][i];
        avecoords->o[i] = 1.0;
    }

    /* WriteAveCoordsFile(cdsA, "test.pdb"); */

    /* check to see if the average structure has the wrong chirality,
       since embedding basically randomly reflects the structure */
/*     deviation1 = Kearsley_unwt(cdsA->coords[0], */
/*                                avecoords, */
/*                                cdsA->coords[0]->matrix, */
/*                                cdsA->coords[0]->evals); */

    deviation1 = ProcLAPACKSVDvan(cdsA->coords[0],
                                  avecoords,
                                  cdsA->coords[0]->matrix,
                                  cdsA->coords[0]->tmpmat3a,
                                  cdsA->coords[0]->tmpmat3b,
                                  cdsA->coords[0]->tmpmat3c,
                                  cdsA->coords[0]->tmpvec3a);

    for (i = 0; i < cdsA->vlen; ++i)
        avecoords->x[i] = -avecoords->x[i];

/*     deviation2 = Kearsley_unwt(cdsA->coords[0], */
/*                                avecoords, */
/*                                cdsA->coords[0]->matrix, */
/*                                cdsA->coords[0]->evals); */

    deviation2 = ProcLAPACKSVDvan(cdsA->coords[0],
                                  avecoords,
                                  cdsA->coords[0]->matrix,
                                  cdsA->coords[0]->tmpmat3a,
                                  cdsA->coords[0]->tmpmat3b,
                                  cdsA->coords[0]->tmpmat3c,
                                  cdsA->coords[0]->tmpvec3a);

    if (deviation1 < deviation2)
        for (i = 0; i < cdsA->vlen; ++i)
            avecoords->x[i] = -avecoords->x[i];

    /* PrintCoords(cdsA->avecoords); */
    free(work);
    free(iwork);
    free(w);
    MatDestroy(&z);
    free(isuppz);
}
