/*****************************************************************
* Unipro UGENE - Integrated Bioinformatics Suite
* Copyright (C) 2008 Unipro, Russia (http://ugene.unipro.ru)
* All Rights Reserved
* 
*     This source code is distributed under the terms of the
*     GNU General Public License. See the files COPYING and LICENSE
*     for details.
*****************************************************************/

#include "StockholmFormat.h"

#include "DocumentFormatUtils.h"

#include <core_api/Task.h>
#include <core_api/IOAdapter.h>
#include <gobjects/GObjectTypes.h>
#include <gobjects/MAlignmentObject.h>
#include <util_text/TextUtils.h>
#include <core_api/DNAAlphabet.h>

#include <memory>

namespace {

const int  BUF_SZ = 1024;
const int  SMALL_BUF_SZ = 128;
const char TERM_SYM = '\0';
const int  NO_BYTES = 0;
const char NEW_LINE = '\n';

const char* HEADER = "# STOCKHOLM 1.0\n\n";
const char* HEADER_MIN = "# STOCKHOLM 1.";
const int HEADER_SZ_MIN = 15;
const char* EOF_STR = "//";

const char COMMENT_OR_MARKUP_LINE = '#';
const char* EMPTY_STR = "";

const int WRITE_BLOCK_LENGTH = 50;

using namespace GB2;

const QByteArray FILE_ANNOTATION_ID = "#=GF ID";
const QByteArray UNI_ANNOTATION_MARK = "# UNIMARK";

//other not supported
enum AnnotationTag {
	NO_TAG = -1,
	ID
};
//other types not supported
enum AnnotationType {
	FILE_ANNOTATION,
	UNI_ANNOTATION
};

struct Annotation {
	AnnotationType type;
	AnnotationTag tag;
	QString val;
	Annotation( AnnotationType ty, AnnotationTag t, QString v ) { type = ty, tag = t; val = v; }
	virtual ~Annotation(){}
};
//#=GF annotations
struct FileAnnotation : public Annotation {
	FileAnnotation( AnnotationType ty, AnnotationTag t, QString v ): Annotation( ty, t, v ){}
};
//unipro ugene annotations
struct UniAnnotation : public Annotation {
	UniAnnotation( AnnotationType ty, AnnotationTag t, QString v ): Annotation( ty, t, v ){}
};

//you should put annotations here after creation
struct AnnotationBank {
	QList<Annotation*> ann_list;
	
	void addAnnotation( Annotation* ann ) {
		if ( NULL != ann ) {
			ann_list.append( ann );
		}
	}
	~AnnotationBank() {
		foreach( Annotation* p_ann, ann_list ) {
			delete p_ann;
		}
	}
};

//only GF ID annotation supported
Annotation* getAnnotation( const QByteArray& l ) {
	QByteArray line = l.trimmed();
	
	if ( line.startsWith( FILE_ANNOTATION_ID ) ) {
		QByteArray val = line.mid( FILE_ANNOTATION_ID.size() ).trimmed();
		return ( val.size() )? new FileAnnotation( FILE_ANNOTATION, ID , val ) : NULL;
	}
	else if ( UNI_ANNOTATION_MARK == line )  {
		return new UniAnnotation( UNI_ANNOTATION, NO_TAG, line );
	}
	return NULL;
}

QString getMsaName( const AnnotationBank& ann_bank ) {
	foreach( Annotation* ann, ann_bank.ann_list ) {
		assert( NULL != ann );
		if ( FILE_ANNOTATION == ann->type && ID == ann->tag ) {
			return ann->val;
		}
	}
	return QString::null;
}

bool isUniFile( const AnnotationBank& ann_bank ) {
	foreach( Annotation* ann, ann_bank.ann_list ) {
		assert( NULL != ann );
		if ( UNI_ANNOTATION == ann->type && UNI_ANNOTATION_MARK == ann->val ) {
			return true;
		}
	}
	return false;
}

template<class T>
void checkValThrowException( bool expected, T val1, T val2,  const StockholmFormat::StockholmBaseException& e ) {
	if ( expected != ( val1 == val2 ) ) {
		throw e;
	}
}

bool checkHeader( const char* data, int sz ) {
    assert( NULL != data && 0 <= sz );
	
    if ( HEADER_SZ_MIN > sz ) {
        return false;
    }
    return QByteArray( data, sz ).startsWith( HEADER_MIN );
}

//returns true if the line was skipped
bool skipCommentOrMarkup( IOAdapter* io, AnnotationBank& ann_bank ) {
    assert( NULL != io );
	
    QByteArray buf( BUF_SZ, TERM_SYM );
    QByteArray line;
	bool term_there = false;
    int ret = io->readUntil( buf.data(), BUF_SZ, TextUtils::LINE_BREAKS, IOAdapter::Term_Exclude, &term_there );
	
	checkValThrowException<int>( false, -1, ret, StockholmFormat::ReadError() );
	if ( COMMENT_OR_MARKUP_LINE == buf[0] ) {
        line.append( QByteArray( buf.data(), ret ) );
		while ( !term_there ) {
            ret = io->readUntil( buf.data(), BUF_SZ, TextUtils::LINE_BREAKS, IOAdapter::Term_Exclude, &term_there );
			checkValThrowException<int>( false, -1, ret,StockholmFormat::ReadError() );
			if ( NO_BYTES == ret ) {
                break;
            }
			line.append( QByteArray( buf.data(), ret ) );
		}
        ann_bank.addAnnotation( getAnnotation( line ) );
		return true;
    }
    io->skip( -ret );
    return false;
}

void skipBlankLines( IOAdapter* io, QByteArray* lines = NULL ) {
    assert( NULL != io );
	
    char c = 0;
    bool work = true;
    while ( work ) {
        int ret = io->readBlock( &c, 1 );
		checkValThrowException<int>( false, -1, ret, StockholmFormat::ReadError() );
		if ( NO_BYTES == ret ) {
			return;
		}
		work = TextUtils::LINE_BREAKS[(uchar)c] || TextUtils::WHITES[(uchar)c];
		if ( lines && TextUtils::LINE_BREAKS[(uchar)c] ) {
            lines->append( c );
        }
    }
    io->skip( -1 );
}

//skips all that it can
void skipMany( IOAdapter* io, AnnotationBank& ann_bank ) {
    assert( NULL != io );
	
    char c = 0;
    while ( 1 ) {
        bool ret = io->getChar( &c );
		checkValThrowException<bool>( false, false, ret, StockholmFormat::ReadError() );
		if ( COMMENT_OR_MARKUP_LINE == c ) {
            io->skip( -1 );
            skipCommentOrMarkup( io, ann_bank );
            continue;
        }
		else if ( TextUtils::LINE_BREAKS[(uchar)c] || TextUtils::WHITES[(uchar)c] ) {
            skipBlankLines( io );
            continue;
        }
        io->skip( -1 );
        break;
    }
}

bool eofMsa( IOAdapter* io ) {
    assert( NULL != io );

    QByteArray buf( SMALL_BUF_SZ, TERM_SYM );
    int ret = io->readUntil( buf.data(), SMALL_BUF_SZ, TextUtils::LINE_BREAKS, IOAdapter::Term_Include );
	checkValThrowException( false, -1, ret, StockholmFormat::ReadError() );
	io->skip( -ret );
    return EOF_STR == QByteArray( buf.data(), ret ).trimmed();
}

void readEofMsa( IOAdapter* io ) {
    assert( eofMsa( io ) );
    QByteArray buf( SMALL_BUF_SZ, TERM_SYM );
    int ret = io->readUntil( buf.data(), SMALL_BUF_SZ, TextUtils::LINE_BREAKS, IOAdapter::Term_Include );		
	checkValThrowException( false, -1, ret, StockholmFormat::ReadError() );
}

//returns end of sequence name in line
int getLine( IOAdapter* io, QByteArray& to ) {
    assert( NULL != io );

    QByteArray buf( BUF_SZ, TERM_SYM );
    bool there = false;

    while ( !there ) {
        int ret = io->readUntil( buf.data(), BUF_SZ, TextUtils::LINE_BREAKS, IOAdapter::Term_Exclude, &there );
		checkValThrowException<int>( false, -1, ret, StockholmFormat::ReadError() );
		if( NO_BYTES == ret ) {
            break;
        }
        to.append( QByteArray( buf.data(), ret ) );
    }
	
    int sz = to.size();
    int i = 0;
    for ( i = 0; i < sz; ++i ) {
        if ( TextUtils::WHITES[(uchar)to[i]] ) {
            break;
        }
    }
    return i;
}

bool blockEnded( IOAdapter* io ) {
    assert( NULL != io );
	
    QByteArray lines;
    skipBlankLines( io, &lines );
    if ( eofMsa( io ) ) {
        return true;
    }
	int nl_count = 0;
	int lines_sz = lines.size();
	
	for( int i = 0; i < lines_sz; ++i ) {
		nl_count = ( NEW_LINE == lines[i] )? nl_count + 1: nl_count;
	}
	return 1 < nl_count;
}

//calls after reading a block
bool checkSeqLength( const MAlignment& msa ) {
    int sz = msa.alignedSeqs.size();
    int seq_length = ( sz )? msa.alignedSeqs[0].sequence.size(): -1;
    bool ret = ( sz )? true: false;
	
    for ( int i = 0; i < sz; ++i ) {
        if ( !( ret = ret && ( seq_length == msa.alignedSeqs[i].sequence.size() ) ) ) {
            break;
        }
    }
    return ret;
}

bool nameWasBefore( const MAlignment& msa, const QString& name ) {
    int seq_num = msa.getNumSequences();
    bool ret = false;

    for( int i = 0; i < seq_num; ++i ) {
        if ( name == msa.alignedSeqs[i].name ) {
            ret = true;
            break;
        }
    }
    return ret;
}

void changeGaps( QByteArray& seq ) {
    int seq_sz = seq.size();
    for( int i = 0; i < seq_sz; ++i ) {
        if ( '.' == seq[i] ) {
            seq[i] = '-';
        }
    }
}

void loadOneMsa( IOAdapter* io, TaskStateInfo& tsi, MAlignment& msa, AnnotationBank& ann_bank ) {
	assert( NULL != io );
	
	QByteArray buf( BUF_SZ, TERM_SYM );
	int ret = 0;
	
	//skip header
	skipBlankLines( io );
	ret = io->readUntil( buf.data(), BUF_SZ, TextUtils::LINE_BREAKS, IOAdapter::Term_Exclude );
	checkValThrowException<int>( false, -1, ret, StockholmFormat::ReadError() );
	if ( !checkHeader( buf.data(), ret ) ) {
		throw StockholmFormat::BadFileData( StockholmFormat::tr( "invalid file: bad header line" ) );
	}
	//read blocks
	bool firstBlock = true;
	while ( 1 ) {
		skipMany( io, ann_bank );
		if ( eofMsa( io ) ) {
			break;
		}
		
		bool hasSeqs = true;
		int seq_ind = 0;
		while ( hasSeqs ) {
			QByteArray line;
			int name_end = getLine( io, line );
			QByteArray name = line.left( name_end );
			QByteArray seq  = line.mid( name_end + 1 ).trimmed();
			
			if ( name.startsWith( COMMENT_OR_MARKUP_LINE ) ) {
				ann_bank.addAnnotation( getAnnotation( line ) );
				hasSeqs = !blockEnded( io );
				tsi.progress = io->getProgress();
				continue;
			}
			changeGaps( seq );
			if ( firstBlock ) {
				if ( EMPTY_STR == name ) {
					throw StockholmFormat::BadFileData( StockholmFormat::tr( "invalid file: empty sequence name" ) );
				}
				if ( nameWasBefore( msa, QString( name.data() ) ) ) {
					throw StockholmFormat::BadFileData( StockholmFormat::tr( "invalid file: equal sequence names in one block" ) );
				}
				msa.alignedSeqs.append( MAlignmentItem( name.data(), seq ) );
			}
			else {
				MAlignmentItem& item = msa.alignedSeqs[seq_ind];
				if( name != item.name ) {
					throw StockholmFormat::BadFileData( StockholmFormat::tr( "invalid file: sequence names are not equal in blocks" ) );
				}
				item.sequence.append( seq );
			}
			seq_ind++;
			hasSeqs = !blockEnded( io );
			tsi.progress = io->getProgress();
		}
		firstBlock = false;
		//check sequence length after every block
		if ( !checkSeqLength( msa ) ) {
			throw StockholmFormat::BadFileData( StockholmFormat::tr( "invalid file: sequences in block are not of equal size" ) );
		}
	}// while( 1 )

	if ( !msa.getNumSequences() ) {
		throw StockholmFormat::BadFileData( StockholmFormat::tr( "invalid file: empty sequence alignment" ) );
	}
    
    DocumentFormatUtils::assignAlphabet(msa);
	if  (msa.alphabet == NULL) {
		throw StockholmFormat::BadFileData( StockholmFormat::tr( "invalid file: unknown alphabet" ) );
	}
}

bool eof( IOAdapter* io ) {
    assert( NULL != io );
    if ( !eofMsa( io ) ) {
        return false;
    }
    readEofMsa( io );
	skipBlankLines( io );
    
	QByteArray buf( SMALL_BUF_SZ, TERM_SYM );
    int ret = io->readBlock( buf.data(), SMALL_BUF_SZ );
	checkValThrowException<int>( false, -1, ret, StockholmFormat::ReadError() );
	io->skip( -ret );
    return 0 == ret;
}

void load( IOAdapter* io, QList<GObject*>& l, TaskStateInfo& tsi, bool& uni_file ) {
	assert( NULL != io );
	
	QStringList names_list;
	while( !eof( io ) ) {
		MAlignment msa;
		AnnotationBank ann_bank;
		QString name;
		
		loadOneMsa( io, tsi, msa, ann_bank );
		uni_file = uni_file || isUniFile( ann_bank );
		name = getMsaName( ann_bank );
		name = ( QString::null == name || names_list.contains( name ) )?
			QString( MA_OBJECT_NAME ) + "_" + QString::number( l.size() ): name;
		names_list.append( name );
		MAlignmentObject* obj = new MAlignmentObject( msa, name );
		l.append( obj );
	}
}

int getMaxNameLen( const MAlignment& msa ) {
    assert( msa.getNumSequences() );
    int sz = msa.getNumSequences();
    int max_len = msa.alignedSeqs[0].name.size();
	
    for( int i = 0; i < sz; ++i ) {
        int name_len =  msa.alignedSeqs[i].name.size();
        max_len = ( max_len < name_len )? name_len: max_len;
    }
    return max_len;
}
//returns a gap between name and sequence in block	
QByteArray getNameSeqGap( int diff ) {
    assert( 0 <= diff );
    QByteArray ret = "    ";
    for( int i = 0; i < diff; ++i ) {
        ret.append( " " );
    }
    return ret;
}

void save( IOAdapter* io, const MAlignment& msa ) {
    assert( NULL != io );
    assert( checkSeqLength( msa ) && msa.getNumSequences() );
    int ret = 0;
	
    QByteArray header( HEADER );
    ret = io->writeBlock( header );
	checkValThrowException<int>( true, header.size(), ret, StockholmFormat::WriteError() );
	QByteArray unimark = UNI_ANNOTATION_MARK + "\n\n";
	ret = io->writeBlock( unimark );
	checkValThrowException<int>( true, unimark.size(), ret, StockholmFormat::WriteError() );
	
	//write sequences
    int name_max_len = getMaxNameLen( msa );
    int seq_len = msa.getLength();
    int cur_seq_pos = 0;
    while ( 0 < seq_len ) {
        int block_len = ( WRITE_BLOCK_LENGTH >= seq_len )? seq_len: WRITE_BLOCK_LENGTH;
		
        //write block
        int seq_num = msa.getNumSequences();
        for( int i = 0; i < seq_num; ++i ) {
            QByteArray name = msa.alignedSeqs[i].name.toAscii();
            TextUtils::replace(name.data(), name.length(), TextUtils::WHITES, '_');
            name += getNameSeqGap( name_max_len - msa.alignedSeqs[i].name.size() );
            ret = io->writeBlock( name );
			checkValThrowException<int>( true, name.size(), ret, StockholmFormat::WriteError() );
			QByteArray seq = msa.alignedSeqs[i].sequence.mid( cur_seq_pos, block_len ) + NEW_LINE;
            ret = io->writeBlock( seq );
			checkValThrowException<int>( true, seq.size(), ret, StockholmFormat::WriteError() );
		}
        ret = io->writeBlock( QByteArray( "\n\n" ) );
		checkValThrowException<int>( true, 2, ret, StockholmFormat::WriteError() );
		seq_len -= block_len;
        cur_seq_pos += block_len;
    }
    //write eof
    ret = io->writeBlock( QByteArray( "//\n" ) );
	checkValThrowException<int>( true, 3, ret, StockholmFormat::WriteError() );
}

} // namespace

namespace GB2 {
StockholmFormat::StockholmFormat( QObject *obj ) : DocumentFormat( obj ) {
    format_name = tr( "Stockholm" );
}

QStringList StockholmFormat::getSupportedDocumentFileExtensions() {
    QStringList list;
	
    list << "sto";
    return list;
}

Document* StockholmFormat::loadExistingDocument( IOAdapterFactory *io_factory, const QString &url, TaskStateInfo &tsi, const QVariantMap &fs ) {
    assert( NULL != io_factory );
    std::auto_ptr<IOAdapter> io( io_factory->createIOAdapter() );
    if ( NULL == io.get() ) {
        tsi.error = tr( "Internal error: Can't create IOAdapter for file '%1'" ).arg( url );
        return NULL;
    }

    if( !io->open( url, IOAdapterMode_Read ) ) {
        tsi.error = tr( "Can't open file for reading: '%1'" ).arg( url );
        return NULL;
    }

    QList<GObject*> obj_list;
    try {
        bool uni_file = false;
		QString write_lock_reason;
		
		load( io.get(), obj_list, tsi, uni_file );
        io->close();
		if ( !uni_file ) {
			write_lock_reason = DocumentFormat::CREATED_NOT_BY_UGENE;
		}
		return new Document( this, io_factory, url, obj_list, fs, write_lock_reason );
    }
    catch ( const StockholmBaseException& e ) {
        tsi.error = e.msg;
    }
    catch ( ... ) {
        tsi.error = tr( "unknown error occurred" );
    }
    foreach( GObject* obj, obj_list ) {
        delete obj;
    }
    return NULL;
}

void StockholmFormat::storeDocument( Document* doc, TaskStateInfo& tsi, IOAdapterFactory* io_factory, const QString& newDocURL) {
    assert( NULL != doc );
    io_factory = ( io_factory )? io_factory: doc->getIOAdapterFactory();
    std::auto_ptr<IOAdapter> io( io_factory->createIOAdapter() );
    QString url = ( newDocURL.isEmpty() )? doc->getURL(): newDocURL;
	
    if ( !io->open( url, IOAdapterMode_Write ) ) {
        tsi.error = tr( "can't open file for writing" );
        return;
    }
    try {
        foreach( GObject* p_obj, doc->getObjects() ) {
			const MAlignmentObject* aln_obj = qobject_cast<const MAlignmentObject*>( p_obj );
			assert( NULL != aln_obj );
			save( io.get(), aln_obj->getMAlignment() );
		}
		io->close();
    }
    catch ( const StockholmBaseException& ex ) {
        tsi.error = ex.msg;
    }
    catch ( ... ) {
        tsi.error = tr( "unknown error occurred" );
    }
}

bool StockholmFormat::isDataFormatSupported( const char* data, int size ) const {
    return checkHeader( data, size );
}

bool StockholmFormat::isObjectOpSupported( const Document *doc, DocObjectOp op, GObjectType t ) const {
    Q_UNUSED(op);
    assert( NULL != doc );
    if ( GObjectTypes::MULTIPLE_ALIGNMENT != t  ) {
		return false;
    }
	/*if (op == DocumentFormat::DocObjectOp_Add) {
		return doc->getObjects().isEmpty();
	}
	return false;*/
    return true;
}

bool StockholmFormat::checkConstraints( const DocumentFormatConstraints &c ) const {
    foreach ( GObjectType t, c.supportedObjectTypes ) {
        if ( GObjectTypes::MULTIPLE_ALIGNMENT != t ) {
            return false;
        }
    }
    if ( c.checkRawData ) {
        return isDataFormatSupported( c.rawData.constData(), c.rawData.size() );
    }
    return true;
}
	
} //GB2
