/*****************************************************************
* Unipro UGENE - Integrated Bioinformatics Suite
* Copyright (C) 2008,2009 Unipro, Russia (http://ugene.unipro.ru)
* All Rights Reserved
* 
*     This source code is distributed under the terms of the
*     GNU General Public License. See the files COPYING and LICENSE
*     for details.
*****************************************************************/

#include "FindWorker.h"

#include <workflow/IntegralBusModel.h>
#include <workflow/WorkflowEnv.h>
#include <workflow/WorkflowRegistry.h>
#include <workflow_support/CoreDataTypes.h>
#include <workflow_library/BioDatatypes.h>
#include <workflow_library/BioActorLibrary.h>
#include <workflow_support/DelegateEditors.h>
#include <workflow_support/CoreLibConstants.h>

#include <datatype/DNASequence.h>
#include <core_api/DNATranslation.h>
#include <core_api/DNAAlphabet.h>
#include <core_api/AppContext.h>
#include <core_api/Log.h>
#include <util_algorithm/FindAlgorithmTask.h>
#include <util_tasks/FailTask.h>
#include <util_tasks/TaskSignalMapper.h>

#include "CoreLib.h"

/* TRANSLATOR GB2::LocalWorkflow::FindWorker */

namespace GB2 {
namespace LocalWorkflow {

static LogCategory log(ULOG_CAT_WD);

/***************************
 * FindWorkerFactory
 ***************************/
static const QString STRAND_ATTR("d_strand");
static const QString NAME_ATTR("a_name");
static const QString PATTERN_ATTR("b_pattern");
static const QString ERR_ATTR("c_maxerr");
static const QString ALGO_ATTR("f_algorithm");
static const QString AMINO_ATTR("e_translate");

const QString FindWorkerFactory::ACTOR_ID("find.pattern");

void FindWorkerFactory::init() {

    QMap<Descriptor, DataTypePtr> m;
    m[BioActorLibrary::SEQ_SLOT()] = BioDataTypes::DNA_SEQUENCE_TYPE();
    m[BioActorLibrary::FEATURE_TABLE_SLOT()] = BioDataTypes::ANNOTATION_TABLE_TYPE();
    DataTypePtr inSet(new MapDataType(Descriptor("regioned.sequence"), m));
    DataTypeRegistry* dr = WorkflowEnv::getDataTypeRegistry();
    assert(dr);
    dr->registerEntry(inSet);

    QList<PortDescriptor*> p; QList<Attribute*> a;
    {
        Descriptor ind(CoreLibConstants::IN_PORT_ID, FindWorker::tr("Input data"), FindWorker::tr("An input sequence and set of regions to search in."));
        Descriptor oud(CoreLibConstants::OUT_PORT_ID, FindWorker::tr("Pattern annotations"), FindWorker::tr("Found regions"));
        p << new PortDescriptor(ind, inSet, true);
        p << new PortDescriptor(oud, BioDataTypes::ANNOTATION_TABLE_TYPE(), false, true);
    }
    
    //DNATranslation*     complementTT;
    //DNATranslation*     proteinTT;
    //LRegion             searchRegion;

    {
        Descriptor nd(NAME_ATTR, FindWorker::tr("Annotate as"), FindWorker::tr("Name of the result annotations marking found regions."));
        Descriptor pd(PATTERN_ATTR, FindWorker::tr("Pattern"), FindWorker::tr("A subsequence pattern to look for."));
        Descriptor ed(ERR_ATTR, FindWorker::tr("Max mismatches"), 
            FindWorker::tr("The search stringency measured in number of max allowed mismatching symbols to the pattern."));
        Descriptor ald(ALGO_ATTR, FindWorker::tr("Allow insertions/deletions"), 
            FindWorker::tr("Take into account possibility of insertions/deletions when searching. By default substitutions only considered."));
        Descriptor amd(AMINO_ATTR, FindWorker::tr("Search in translation"), FindWorker::tr("Translate a supplied nucleotide sequence to protein then search in the translated sequence."));
        Descriptor sd(STRAND_ATTR, FindWorker::tr("Search in"), FindWorker::tr("Which strands should be searched: direct, complement or both."));

        a << new Attribute(nd, CoreDataTypes::STRING_TYPE(), true, "misc_feature");
        a << new Attribute(pd, CoreDataTypes::STRING_TYPE(), true);
        a << new Attribute(ed, CoreDataTypes::NUM_TYPE(), false, 0);
        a << new Attribute(sd, CoreDataTypes::NUM_TYPE(), false, 0);
        a << new Attribute(ald, CoreDataTypes::BOOL_TYPE(), false, false);
        a << new Attribute(amd, CoreDataTypes::BOOL_TYPE(), false, false);
    }

    Descriptor desc(ACTOR_ID, FindWorker::tr("Find substrings"), FindWorker::tr("Finds regions of similarity to the specified string in each input sequence (nucleotide or protein one). "
        "<p>Under the hood is the well-known Smith-Waterman algorithm for performing local sequence alignment."));
    ActorPrototype* proto = new BusActorPrototype(desc, p, a);
    QMap<QString, PropertyDelegate*> delegates;    
    
    QVariantMap lenMap; lenMap["minimum"] = QVariant(0); lenMap["maximum"] = QVariant(INT_MAX);
    delegates[ERR_ATTR] = new SpinBoxDelegate(lenMap);
    
    QVariantMap strandMap; 
    strandMap[FindWorker::tr("both strands")] = QVariant(FindAlgorithmStrand_Both);
    strandMap[FindWorker::tr("direct strand")] = QVariant(FindAlgorithmStrand_Direct);
    strandMap[FindWorker::tr("complement strand")] = QVariant(FindAlgorithmStrand_Complement);
    delegates[STRAND_ATTR] = new ComboBoxDelegate(strandMap);
    
    proto->setEditor(new DelegateEditor(delegates));
    proto->setIconPath( ":core/images/find_dialog.png" );
    proto->setPrompter(new FindPrompter());
    WorkflowEnv::getProtoRegistry()->registerProto(BioActorLibrary::CATEGORY_BASIC(), proto);

    DomainFactory* localDomain = WorkflowEnv::getDomainRegistry()->getById(LocalDomainFactory::ID);
    localDomain->registerEntry(new FindWorkerFactory());
}


/***************************
 * FindPrompter
 ***************************/
QString FindPrompter::composeRichDoc() {
    BusPort* input = qobject_cast<BusPort*>(target->getPort(CoreLibConstants::IN_PORT_ID));
    Actor* seqProducer = input->getProducer(BioActorLibrary::SEQ_SLOT_ID);
    Actor* annProducer = input->getProducer(BioActorLibrary::FEATURE_TABLE_SLOT_ID);

    QString seqName = seqProducer ? tr("In each sequence from <u>%1</u>,").arg(seqProducer->getLabel()) : "";
    QString annName = annProducer ? tr(" within a set of regions from <u>%1</u>").arg(annProducer->getLabel()) : "";

    FindAlgorithmSettings cfg;
    cfg.strand = FindAlgorithmStrand(getParameter(STRAND_ATTR).toInt());
    cfg.maxErr = getParameter(ERR_ATTR).toInt();
    cfg.insDelAlg = getParameter(ALGO_ATTR).toBool();
    QString pattern = getRequiredParam(PATTERN_ATTR);

    QString strandName;
    switch (cfg.strand) {
    case FindAlgorithmStrand_Both: strandName = FindWorker::tr("both strands"); break;
    case FindAlgorithmStrand_Direct: strandName = FindWorker::tr("direct strand"); break;
    case FindAlgorithmStrand_Complement: strandName = FindWorker::tr("complement strand"); break;
    }
    if (getParameter(AMINO_ATTR).toBool()) {
        strandName += tr(" of translated sequence");
    }

    QString resultName = getRequiredParam(NAME_ATTR);
    QString match = cfg.maxErr ? tr("matches with <u>no more than %1 errors</u>").arg(cfg.maxErr) : tr("exact matches");

    //FIXME mention search algorithm?
    QString doc = tr("%1 find pattern <u>%2</u> %3."
        "<br>Look for <u>%4</u> in <u>%5</u>."
        "<br>Output the list of found regions annotated as <u>%6</u>.")
        .arg(seqName)
        .arg(pattern)
        .arg(annName)
        .arg(match)
        .arg(strandName)
        .arg(resultName);
    
    return doc;
}

/***************************
 * FindWorker
 ***************************/
FindWorker::FindWorker(Actor* a) : BaseWorker(a), input(NULL), output(NULL) {
}

void FindWorker::init() {
    input = ports.value(CoreLibConstants::IN_PORT_ID);
    output = ports.value(CoreLibConstants::OUT_PORT_ID);
}

bool FindWorker::isReady() {
    return (input && input->hasMessage());
}

Task* FindWorker::tick() {
    Message inputMessage = getMessageAndSetupScriptValues(input);
    cfg.strand = FindAlgorithmStrand(actor->getParameter(STRAND_ATTR)->getAttributeValue<int>());
    cfg.maxErr = actor->getParameter(ERR_ATTR)->getAttributeValue<int>();
    cfg.insDelAlg = actor->getParameter(ALGO_ATTR)->getAttributeValue<bool>();
    resultName = actor->getParameter(NAME_ATTR)->getAttributeValue<QString>();
    cfg.pattern = actor->getParameter(PATTERN_ATTR)->getAttributeValue<QString>().toAscii();
    QVariantMap qm = input->get().getData().toMap();
    
    DNASequence seq = qm.value(BioActorLibrary::SEQ_SLOT_ID).value<DNASequence>();
    if (!seq.isNull()) {
        FindAlgorithmTaskSettings config(cfg);
        config.sequence = QByteArray(seq.constData(), seq.length());
        if (config.strand != FindAlgorithmStrand_Direct /*&& seq.alphabet->getType() == DNAAlphabet_NUCL*/) {
            QList<DNATranslation*> compTTs = AppContext::getDNATranslationRegistry()->
                lookupTranslation(seq.alphabet, DNATranslationType_NUCL_2_COMPLNUCL);
            if (!compTTs.isEmpty()) {
                config.complementTT = compTTs.first();
            } else {
                config.strand = FindAlgorithmStrand_Direct;
            }
        }
        if (actor->getParameter(AMINO_ATTR)->getAttributeValue<bool>()) {
            DNATranslationType tt = (seq.alphabet->getType() == DNAAlphabet_NUCL) ? DNATranslationType_NUCL_2_AMINO : DNATranslationType_RAW_2_AMINO;
            QList<DNATranslation*> TTs = AppContext::getDNATranslationRegistry()->lookupTranslation(seq.alphabet, tt);
            if (!TTs.isEmpty()) config.proteinTT = TTs.first(); //FIXME let user choose or use hints ?
        }

        if (qm.contains(BioActorLibrary::FEATURE_TABLE_SLOT_ID)) {
            const QList<SharedAnnotationData>& atl = qVariantValue<QList<SharedAnnotationData> >(qm.value(BioActorLibrary::FEATURE_TABLE_SLOT_ID));
            Task* t = new FindAllRegionsTask(config, atl);
            connect(new TaskSignalMapper(t), SIGNAL(si_taskFinished(Task*)), SLOT(sl_taskFinished(Task*)));
            return t;
        } else {
            config.searchRegion.len = seq.length();
            Task* t = new FindAlgorithmTask(config);
            connect(new TaskSignalMapper(t), SIGNAL(si_taskFinished(Task*)), SLOT(sl_taskFinished(Task*)));
            return t;
        }
    }
    QString err = tr("Null sequence supplied to FindWorker: %1").arg(seq.getName());
    if (failFast) {
        return new FailTask(err);
    } else {
        log.error(err);
        output->put(Message(BioDataTypes::ANNOTATION_TABLE_TYPE(), QVariant()));
        if (input->isEnded()) {
            output->setEnded();
        }
        return NULL;
    }
}

void FindWorker::sl_taskFinished(Task* t) {
    FindAlgorithmTask* ft = qobject_cast<FindAlgorithmTask*>(t);
    FindAllRegionsTask* at = qobject_cast<FindAllRegionsTask*>(t);
    assert(ft || at);
    QList<FindAlgorithmResult> res = ft ? ft->popResults() : at->getResult();
    if (output) {
        QVariant v = qVariantFromValue<QList<SharedAnnotationData> >(FindAlgorithmResult::toTable(res, resultName));
        output->put(Message(BioDataTypes::ANNOTATION_TABLE_TYPE(), v));
        if (input->isEnded()) {
            output->setEnded();
        }
        log.info(tr("Found %1 matches of pattern '%2'").arg(res.size()).arg(QString(cfg.pattern)));
    }
}

bool FindWorker::isDone() {
    return !input || input->isEnded();
}

void FindWorker::cleanup() {
}


/***************************
 * FindAllRegionsTask
 ***************************/
FindAllRegionsTask::FindAllRegionsTask(const FindAlgorithmTaskSettings& s, const QList<SharedAnnotationData>& l) :
Task(tr("FindAllRegionsTask"), TaskFlag_NoRun), cfg(s), regions(l) {}

void FindAllRegionsTask::prepare() {
    foreach(SharedAnnotationData sd, regions) {
        foreach(LRegion lr, sd->location) {
            cfg.searchRegion = lr;
            addSubTask(new FindAlgorithmTask(cfg));
        }
    }
}

QList<FindAlgorithmResult> FindAllRegionsTask::getResult() {
    QList<FindAlgorithmResult> lst;
    foreach(Task* t, getSubtasks()) {
        FindAlgorithmTask* ft = qobject_cast<FindAlgorithmTask*>(t);
        lst += ft->popResults();
    }
    return lst;
}

} //namespace LocalWorkflow
} //namespace GB2
