/*****************************************************************
* Unipro UGENE - Integrated Bioinformatics Suite
* Copyright (C) 2008,2009 Unipro, Russia (http://ugene.unipro.ru)
* All Rights Reserved
* 
*     This source code is distributed under the terms of the
*     GNU General Public License. See the files COPYING and LICENSE
*     for details.
*****************************************************************/

#include "GenomeAlignerIndex.h"

#include <QtCore/QFile>
#include <QtEndian>

namespace U2 {

const QString GenomeAlignerIndex::HEADER_EXTENSION("idx");
const QString GenomeAlignerIndex::SARRAY_EXTENSION("sarr");
const QString GenomeAlignerIndex::REF_INDEX_EXTENSION("ref");
const QString GenomeAlignerIndex::HEADER("#UGENE suffix array index\n");
const QString GenomeAlignerIndex::PARAMETERS("#file \"%1\", sequence's length = %2, w = %3\n");
const QString COMMENT("#sequence's length, window's size, index's size, chars in the bitMask, chars out of the bitMask, bitChar's length\n");

GenomeAlignerIndex::GenomeAlignerIndex()
: sArray(NULL), bitMask(NULL), seq(NULL), indexFile(NULL),
memSA(NULL), memBM(NULL), objLens(NULL)
{
    bitTable = bt.getBitMaskCharBits(DNAAlphabet_NUCL);
    bitCharLen = bt.getBitMaskCharBitsNum(DNAAlphabet_NUCL);
}

GenomeAlignerIndex::~GenomeAlignerIndex() {
    if (NULL != indexFile) {
        indexFile->close();
        delete indexFile;
        indexFile = NULL;
    }
    delete[] sArray;
    delete[] bitMask;
    delete[] memSA;
    delete[] memBM;
    delete[] seq;
    delete[] objLens;
}

void GenomeAlignerIndex::serialize(const QString &refFileName) {
    QString indexFileName = baseFileName + QString(".") + HEADER_EXTENSION;
    QFile file(indexFileName);
    if (!file.open(QIODevice::WriteOnly)) {
        return;
    }

    QByteArray data;
    data = HEADER.toAscii();
    data += PARAMETERS.arg(refFileName).arg(seqLength).arg(w).toAscii();
    data += COMMENT;
    data += QByteArray::number(seqLength, 10) + ", ";
    data += QByteArray::number(w, 10) + ", ";
    data += QByteArray::number(indexLength, 10) + ", ";
    data += QByteArray::number(wCharsInMask, 10) + ", ";
    data += QByteArray::number(wAfterBits, 10) + ", ";
    data += QByteArray::number(bitCharLen, 10) + "\n";
    file.write(data);

    file.close();
}

inline int getNextInt(QByteArray &data, bool &eol, bool &intErr) {
    int commaIdx = data.indexOf(',');
    if (-1 == commaIdx) {
        commaIdx = data.length();
        eol = true;
    }

    QByteArray result = data.left(commaIdx).trimmed();
    data = data.mid(commaIdx+1).trimmed();

    char c = 0;
    for (int i = 0; i < result.length(); i++) {
        c = result[i];
        if (c <'0' || c>'9') {
            intErr = true;
            return -1;
        }
    }
    return result.toInt();
}

void GenomeAlignerIndex::deserialize(TaskStateInfo& ti) {
    QString indexFileName = baseFileName + QString(".") + HEADER_EXTENSION;
    QFile file(indexFileName);
    if (!file.open(QIODevice::ReadOnly)) {
        ti.setError("Can't open file-index");
        return;
    }
    QByteArray data;
    bool eol = false;
    bool intErr = false;

    do {
        data = file.readLine().trimmed();
    } while (data.length() > 0 && '#' == data[0]);

    if (0 == data.length()) {
        ti.setError("Empty parameters' line in the file-index");
        return;
    }
    seqLength = getNextInt(data, eol, intErr);
    w = getNextInt(data, eol, intErr);
    indexLength = getNextInt(data, eol, intErr);
    wCharsInMask = getNextInt(data, eol, intErr);
    wAfterBits = getNextInt(data, eol, intErr);
    if (eol) {
        ti.setError("Too little amount of parameters in the file-index");
        return;
    }
    int bitCharLen = getNextInt(data, eol, intErr);
    if (intErr) {
        ti.setError("Bad integer for some parameter in the file-index");
        return;
    }

    if (w <= 0 || indexLength <= 0) {
        ti.setError("Negative index's parameters");
        return;
    }

    if (this->bitCharLen != bitCharLen) {
        ti.setError("This index was built for a sequence with another sequence type");
        return;
    } else {
        this->bitCharLen = bitCharLen;
    }
}

bool GenomeAlignerIndex::openIndexFile() {
    indexFile = new QFile(baseFileName+QString(".")+GenomeAlignerIndex::SARRAY_EXTENSION);

    return indexFile->open(QIODevice::ReadOnly);
}

void GenomeAlignerIndex::createMemCache() {
    memSA = new quint32[2*partsInMemCache];
    memBM = new quint64[2*partsInMemCache];
    int elemSize = sizeof(quint32) + sizeof(quint64);
    quint32 partSize =  indexLength/partsInMemCache;
    QByteArray b(2*elemSize, '\0');
    char *buff = b.data();

    assert(NULL != indexFile && indexFile->isOpen());
    indexFile->seek(0);

    indexFile->read(buff, elemSize);
    memSA[0] = qFromBigEndian<quint32>((uchar*)buff);
    memBM[0] = qFromBigEndian<quint64>((uchar*)(buff+sizeof(quint32)));
    quint32 idx = 0;
    bool foundBorder = false;
    qint64 compRes = 0;
    for (int i=0; i<partsInMemCache; i++) {
        foundBorder = false;
        if (partsInMemCache-1 == i) {
            idx = indexLength - 1;
        } else {
            idx += partSize-1;
        }
        indexFile->seek(idx*elemSize);
        indexFile->read(buff, elemSize);
        memSA[2*i+1] = qFromBigEndian<quint32>((uchar*)buff);
        memBM[2*i+1] = qFromBigEndian<quint64>((uchar*)(buff + sizeof(quint32)));
        if (partsInMemCache-1 == i) {
            continue;
        }
        while (!foundBorder && idx < indexLength) {
            indexFile->read(buff, elemSize);
            memSA[2*i+2] = qFromBigEndian<quint32>((uchar*)(buff));
            memBM[2*i+2] = qFromBigEndian<quint64>((uchar*)(buff + sizeof(quint32)));
            compRes = compareWithoutArrays(memBM[2*i+1], memBM[2*i+2], memSA[2*i+1], memSA[2*i+2], seq);
            if (0 != compRes) {
                foundBorder = true;
            } else {
                memSA[2*i+1] = memSA[2*i+2];
            }
            idx++;
        }
        if (idx == indexLength) {
            partsInMemCache = i+1;
        }
    }
}

qint64 GenomeAlignerIndex::compareWithoutArrays(quint64 x1, quint64 x2, quint32 a1, quint32 a2, const char *seq) const {
    qint64 rc = x1-x2;
    if ( rc != 0 || wAfterBits == 0) {
        return rc;
    }

    const char* b1 = seq + a1 + wCharsInMask;
    const char* b2 = seq + a2 + wCharsInMask;
    for (const char* end = b1+wAfterBits; b1 < end; b1++, b2++) {
        rc=*b1-*b2;
        if ( rc != 0 ) {
            return rc;
        }
    }
    return 0;
}

int GenomeAlignerIndex::compareAfterBits(quint32 bitMaskPos, const char* query) const {
    const char* b1 = seq + *(memSA + bitMaskPos) + wCharsInMask;
    const char* b2 = query;
    int rc = 0;
    for (const char* end = b1+wAfterBits; b1 < end; b1++, b2++) {
        rc=*b1-*b2;
        if (rc != 0) {
            return rc;
        }
    }
    return 0;
}

quint64 GenomeAlignerIndex::getBitValue(const char *seq) const {
    quint64 bitValue = 0;
    for (int i = 0; i < wCharsInMask; i++) {
        bitValue = (bitValue << bitCharLen) | bitTable[uchar(*(seq+i))];
    }

    return bitValue;
}

int GenomeAlignerIndex::getPrefixSize() const {
    return w;
}

const char *GenomeAlignerIndex::getIndexedSequence() const {
    return seq;
}

int GenomeAlignerIndex::findInCache(const SAISearchContext &c) const {
    int low = 0;
    int high = 2*partsInMemCache-1;
    quint64 *a = memBM;
    int preAnswer = -1;
    while (low <= high) {
        int mid = (low + high) >> 1;
        int rc = a[mid] - c.bitValue;
        if (rc < 0) {
            if (1 < high - mid) {
                low = mid + 1;
            } else {
                if (0 != high%2 || a[high] <= c.bitValue) {
                    preAnswer = high >> 1;
                }
                break;
            }
        } else if (rc > 0) {
            if (1 < mid - low) {
                high = mid - 1;
            } else {
                if (0 == low%2 || a[low] >= c.bitValue) {
                    preAnswer = low >> 1;
                }
                break;
            }
        } else {
            preAnswer = mid >> 1;
            break;
        }
    }

    if (0 > preAnswer) {
        return preAnswer;
    }

    int leftBorder = compareAfterBits(2*preAnswer, c.query+wCharsInMask);
    if (leftBorder > 0) { //query < seq[left] < seq[right]
        return -1;
    }
    int rightBorder = compareAfterBits(2*preAnswer + 1, c.query+wCharsInMask);
    if (rightBorder > 0) { //seq[left] < seq[right] < query
        return -1;
    }

    return preAnswer;
}

} //namespace
