/*****************************************************************
* Unipro UGENE - Integrated Bioinformatics Suite
* Copyright (C) 2008,2009 Unipro, Russia (http://ugene.unipro.ru)
* All Rights Reserved
* 
*     This source code is distributed under the terms of the
*     GNU General Public License. See the files COPYING and LICENSE
*     for details.
*****************************************************************/

#include "ImportAnnotationsFromCSVTask.h" 

#include <core_api/AppContext.h>
#include <core_api/Counter.h>
#include <core_api/IOAdapter.h>
#include <core_api/ProjectModel.h>
#include <core_api/L10n.h>
#include <core_api/Log.h>

#include <gobjects/AnnotationTableObject.h>
#include <util_tasks/AddDocumentTask.h>
#include <util_tasks/LoadDocumentTask.h>
#include <util_tasks/SaveDocumentTask.h>
#include <util_text/TextUtils.h>

#include <script/ScriptTask.h>

#include <memory>

namespace GB2 {

//TODO: move to a separate header
#define ULOG_CAT_EXPORT_PLUGIN "Plugin: Export"
static LogCategory log(ULOG_CAT_EXPORT_PLUGIN);

ImportAnnotationsFromCSVTask::ImportAnnotationsFromCSVTask(ImportAnnotationsFromCSVTaskConfig& _config)
: Task(tr("Import annotations from CSV"), TaskFlags_NR_FOSCOE), 
config(_config), readTask(NULL), writeTask(NULL), addTask(NULL)
{
	assert(config.df != NULL);
    GCOUNTER(cvar,tvar,"ImportAnnotationsFromCSVTask");
    readTask = new ReadCSVAsAnnotationsTask(config.csvFile, config.parsingOptions);
	addSubTask(readTask);
}


QList<Task*> ImportAnnotationsFromCSVTask::onSubTaskFinished(Task* subTask) {
	QList<Task*> result;
	if (hasErrors() || subTask == addTask) {
		return result;
	}

    GUrl docUrl(config.dstFile);
    Document* projDoc =  AppContext::getProject()->findDocumentByURL(docUrl);
    bool inProject = projDoc!=NULL;

    if (doc.isNull() && projDoc != NULL) {
        doc = projDoc;;
    } 
	if (doc.isNull()) { //document is null -> save it and add to the project
		assert(subTask == readTask);
		QList<Annotation*> annotations = prepareAnnotations();
		doc = prepareNewDocument(annotations);
		writeTask = new SaveDocumentTask(doc);
		result.append(writeTask);
	} else if (writeTask != NULL && !inProject) { // document was saved -> add to the project
		addTask = new AddDocumentTask(doc);
		result.append(addTask);
	} else { //document already in the project -> check loaded state and add annotations to it
        assert(inProject);
        if (!doc->isLoaded()) {
            result.append(new LoadUnloadedDocumentTask(doc));
        } else {
            DocumentFormatConstraints dfc;
            dfc.flagsToSupport = DocumentFormatFlag_SupportWriting;
            dfc.supportedObjectTypes += GObjectTypes::ANNOTATION_TABLE;
            if (!doc->getDocumentFormat()->checkConstraints(dfc)) {
                setError(tr("Annotations can't be added to the document %1").arg(doc->getURLString()));
                return result;
            }
            if (doc->isStateLocked()) {
                setError(tr("Document is locked and can't be modified %1").arg(doc->getURLString()));
                return result;
            }
            QList<GObject*> objs = doc->findGObjectByType(GObjectTypes::ANNOTATION_TABLE);
            AnnotationTableObject* ao = objs.isEmpty() ? NULL : qobject_cast<AnnotationTableObject*>(objs.first());
            if (ao == NULL) {
                ao = new AnnotationTableObject("Annotations");
                doc->addObject(ao);
            }
            assert(ao != NULL);
            QList<Annotation*> annotations = prepareAnnotations();
            ao->addAnnotations(annotations);
        }
	}
	return result;
}

QList<Annotation*> ImportAnnotationsFromCSVTask::prepareAnnotations() const {
	assert(readTask != NULL && readTask->isFinished());
	QList<SharedAnnotationData> datas = readTask->getResult();
	QList<Annotation*> result;
	foreach (const SharedAnnotationData& d, datas) {
		Annotation* a = new Annotation(d);
		result.append(a);
	}
	return result;
}

Document* ImportAnnotationsFromCSVTask::prepareNewDocument(const QList<Annotation*>& annotations) const {
	IOAdapterId ioId = BaseIOAdapters::url2io(config.dstFile);
	IOAdapterFactory* iof = AppContext::getIOAdapterRegistry()->getIOAdapterFactoryById(ioId);
	Document* result = config.df->createNewDocument(iof, config.dstFile);
	AnnotationTableObject* ao = new AnnotationTableObject("Annotations");
	ao->addAnnotations(annotations);
    ao->setModified(false);
	result->addObject(ao);
	return result;
}

//////////////////////////////////////////////////////////////////////////
// ReadCSVAsAnnotationsTask

ReadCSVAsAnnotationsTask::ReadCSVAsAnnotationsTask(const QString& _file, const CSVParsingConfig& _config)
: Task(tr("Parse CSV file %1").arg(_file), TaskFlag_None), file(_file), config(_config)
{
}

#define BUFF_SIZE 8192
void ReadCSVAsAnnotationsTask::run() {
    GUrl url(file);
    IOAdapterId ioId = BaseIOAdapters::url2io(url);
    IOAdapterFactory* iof = AppContext::getIOAdapterRegistry()->getIOAdapterFactoryById(ioId);
    std::auto_ptr<IOAdapter> io(iof->createIOAdapter());
    
    if (!io->open(url, IOAdapterMode_Read)) {
        setError(L10N::errorOpeningFileRead(url));
        return;
    }
    
    QByteArray block(BUFF_SIZE, '\0');
    int blockLen = 0;
    QString text;
    while ((blockLen = io->readBlock(block.data(), BUFF_SIZE)) > 0) {
        int sizeBefore = text.length();
        QString line = QString::fromLocal8Bit(block.data(), blockLen).trimmed();
        text.append(line);
        if (text.length() != sizeBefore + line.length()) {
            setError(L10N::errorReadingFile(url));
        }
        stateInfo.progress = io->getProgress();
    }
    int maxColumns = 0;
    QList<QStringList> parsedLines = parseLinesIntoTokens(text, config, maxColumns, stateInfo);
    
    foreach(QStringList lineTokens, parsedLines) {
        SharedAnnotationData a(new AnnotationData());
        bool ok = true;
        QString error;
        int startPos = -1;
        int startPosOffset = 0;
        int len = -1;
        int endPos = -1;

        for (int column = 0; column < lineTokens.size() && ok; column++) {
            if (column > config.columns.size()) {
                break;
            }
            const ColumnConfig& columnConf = config.columns.at(column);
            const QString& token = lineTokens.at(column);
            switch(columnConf.role) {
                case ColumnRole_Qualifier: 
                    assert(!columnConf.qualifierName.isEmpty());
                    a->qualifiers.append(Qualifier(columnConf.qualifierName, token));
                    break;
                case ColumnRole_Name:
                    a->name = token.isEmpty() ? config.defaultAnnotationName : token;
                    ok = Annotation::isValidAnnotationName(a->name);
                    if (!ok) {
                        error = tr("Invalid annotation name: '%1'").arg(a->name);
                    }
                    break;
                case ColumnRole_StartPos:
                    assert(startPos == -1);
                    startPos = token.toInt(&ok) - 1;
                    startPosOffset = columnConf.startPositionOffset;
                    if (!ok) {
                        error = tr("Start offset is not numeric: '%1'").arg(token);
                    }
                    break;
                case ColumnRole_EndPos:
                    assert(endPos == -1);
                    endPos = token.toInt(&ok) + (columnConf.endPositionIsInclusive ? 1 : 0) - 1;
                    if (!ok) {
                        error = tr("End offset is not numeric: '%1'").arg(token);
                    }
                    break;
                case ColumnRole_Length:
                    assert(len == -1);
                    len = token.toInt(&ok);
                    if (!ok) {
                        error = tr("Length is not numeric: '%1'").arg(token);
                    }
                    break;
                default:
                    assert(columnConf.role == ColumnRole_Ignore);
            }
        }
        
        //add annotation
        if (ok) {
            //set up default name
            if (a->name.isEmpty()) {
                a->name = config.defaultAnnotationName;
            }
            //set up location
            LRegion location;
            if (startPos != -1) {
                location.startPos = startPos + startPosOffset;
                if (endPos != -1) {
                    location.len = endPos - startPos;
                } else {
                    location.len = len;
                }
            } else {
                location.len = len;
                location.startPos = endPos - len;
            }
            if (location.len < 0) {
                location.startPos = location.startPos + location.len;
                location.len = - location.len;
            }
            if (location.startPos < 0 || location.startPos > location.endPos()) {
                log.details(tr("Invalid location: start: %1  len: %2, in line :%3, ignoring")
                    .arg(QString::number(location.startPos)).arg(QString::number(location.len)).arg(lineTokens.join(config.splitToken)));
            } else {
                a->location.append(location);
                result.append(a);
            }
        } else {
            //TODO: make configurable to allow stop parsing on any error!
            log.details(tr("Can't parse line: '%1', error = %2, ignoring").arg(lineTokens.join(config.splitToken)).arg(error));
        }
    }
}


QList<QStringList> ReadCSVAsAnnotationsTask::parseLinesIntoTokens(const QString& text, const CSVParsingConfig& config, int& maxColumns, TaskStateInfo& ti) {
    QList<QStringList> result;
    assert(!config.splitToken.isEmpty() || !config.parsingScript.isEmpty());
    maxColumns = 0;
    QStringList lines = text.split('\n', QString::SkipEmptyParts);
	int lineNum = 1;
    for (int l = 0; l < lines.size(); l++) {
        if (l < config.linesToSkip) {
            continue;
        }
        QString line = lines.at(l).trimmed();
        if (!config.prefixToSkip.isEmpty() && line.startsWith(config.prefixToSkip)) {
            continue;
        }

        QStringList tokens = parseLineIntoTokens(line, config, ti, lineNum);
		lineNum++;
        maxColumns = qMax(maxColumns, tokens.size());
        result.append(tokens);
    }
    return result;
}

QString ReadCSVAsAnnotationsTask::LINE_VAR("line");
QString ReadCSVAsAnnotationsTask::LINE_NUM_VAR("lineNum");

QStringList ReadCSVAsAnnotationsTask::parseLineIntoTokens(const QString& line, const CSVParsingConfig& config, TaskStateInfo& ti, int lineNum) {
	QStringList result;
	if (config.parsingScript.isEmpty()) {
		result = line.split(config.splitToken, config.keepEmptyParts ? QString::KeepEmptyParts: QString::SkipEmptyParts);
		return result;
	}
	//run script
	QMap<QString, QScriptValue> vars;
	QScriptEngine engine;
	vars[LINE_VAR] = QScriptValue(&engine, line);
	vars[LINE_NUM_VAR] = QScriptValue(&engine, lineNum);
	QScriptValue scriptResult = ScriptTask::runScript(&engine, vars, config.parsingScript, ti);
	if (ti.cancelFlag || ti.hasErrors()) {
		return result;
	}
	if (scriptResult.isString()) {
		result.append(scriptResult.toString());
	} else if (scriptResult.isArray()) {
		QScriptValueIterator it(scriptResult);
		while (it.hasNext()) {
			it.next();
			QScriptValue val = it.value();
			result.append(val.toString());
		}
	} else {
		ti.setError(tr("Script result is not an array of strings!"));
	}
	return result;
}

class CharStat {
public:
	CharStat() : ch(0), count(0) {}
	char ch;
	int  count;
};

static QVector<CharStat> countFreqs(const QString& line) {
	QVector<CharStat> result(256);
	QByteArray ba = line.toLocal8Bit();
	const char* data = ba.constData();
	char prevChar = 0;
	for (int i = 0, n = ba.length(); i < n; i++) {
		char c = data[i];
		
		if (c == prevChar && (c == ' ' || c == '\t')) { //do not count repeating ws
			continue;
		}
		result[uchar(c)].ch = c;
		result[uchar(c)].count++;
		prevChar = c;
	}
	return result;
}

static void mergeFreqs(QVector<CharStat>& globalFreqs, const QVector<CharStat>& localFreqs) {
	assert(globalFreqs.size() == localFreqs.size());
	for(int i=0, n = globalFreqs.size(); i<n; i++) {
		if (globalFreqs.at(i).count != localFreqs.at(i).count) {
			globalFreqs[i].count = 0;
		}
	}
}

QString ReadCSVAsAnnotationsTask::guessSeparatorString(const QString& text, const CSVParsingConfig& config) {
	QVector<CharStat> globalFreqs;
	QStringList lines = text.split('\n', QString::SkipEmptyParts);
	int nMerges = 0;
	for (int l = 0; l < lines.size(); l++) {
		if (l < config.linesToSkip) {
			continue;
		}
		QString line = lines.at(l).trimmed();
		QVector<CharStat> lineFreqs = countFreqs(line);
		if (globalFreqs.isEmpty()) {
			globalFreqs = lineFreqs;
			continue;
		}
		if (!config.prefixToSkip.isEmpty() && line.startsWith(config.prefixToSkip)) {
			continue;
		}
		mergeFreqs(globalFreqs, lineFreqs);
		nMerges++;
	}
	CharStat max;
	for (int i = 0; i < globalFreqs.size(); i++) {
		const CharStat& cs = globalFreqs.at(i);
		if (cs.count > max.count) {
			max = cs;
		}
	}
	if (max.count == 0 || nMerges == 0) {
		return QString();
	}
	return QString(QChar(max.ch));

}

} //namespace
