/***************************************************************************
 *   Copyright (C) 2005 by Roberto Virga                                   *
 *   rvirga@users.sf.net                                                   *
 *                                                                         *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of the GNU General Public License as published by  *
 *   the Free Software Foundation; either version 2 of the License, or     *
 *   (at your option) any later version.                                   *
 *                                                                         *
 *   This program is distributed in the hope that it will be useful,       *
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
 *   GNU General Public License for more details.                          *
 *                                                                         *
 *   You should have received a copy of the GNU General Public License     *
 *   along with this program; if not, write to the                         *
 *   Free Software Foundation, Inc.,                                       *
 *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
 ***************************************************************************/

#include <qregexp.h>

#include "predictordata.h"

const QString PredictorAminoAcidName[] =
  {"GLY", "ALA", "SER", "CYS", "VAL", "THR", "ILE", "PRO", "MET", "ASP",
   "ASN", "LEU", "LYS", "GLU", "GLN", "ARG", "HIS", "PHE", "TYR", "TRP",
   "???"};

const char PredictorAminoAcidAbbrev[] =
  {'G', 'A', 'S', 'C', 'V', 'T', 'I', 'P', 'M', 'D',
   'N', 'L', 'K', 'E', 'Q', 'R', 'H', 'F', 'Y', 'W',
   '?'};

const unsigned PredictorAminoAcidAtoms[] =
  {4, 5, 6, 6, 7, 7, 8, 7, 8, 8, 8, 8, 9, 9, 9, 11, 10, 11, 12, 14, 0};

/*
const double PredictorAminoAcidMass[] =
  { 75.07, 89.09, 105.09, 121.16, 117.15, 119.12, 131.17, 115.13, 149.21, 133.10,
    132.12, 131.17, 146.19, 147.13, 146.15, 174.20, 155.16, 165.19, 181.19, 204.23,
    0.0};
*/

bool parseAminoAcid(const QString &name, PredictorAminoAcid &aa)
{
  for(unsigned i = 0; i < AminoAcids; ++i)
    if(PredictorAminoAcidName[i] == name) {
      aa = PredictorAminoAcid(i);
      return true;
    }

  return false;
} 

QValueList<uint> parseUIntList(const QString &text)
{
  QStringList list = QStringList::split(" ", text);
  QValueList<uint> out;
  
  for(QStringList::const_iterator it = list.constBegin(); list.constEnd() != it; ++it)
    out << (*it).toUInt(0, 10);
    
  return out;
}

QValueList<double> parseDoubleList(const QString &text)
{
  QStringList list = QStringList::split(" ", text);
  QValueList<double> out;
  
  for(QStringList::const_iterator it = list.constBegin(); list.constEnd() != it; ++it)
    out << (*it).toDouble();
    
  return out;
}

bool PredictorBurials::parse(const QStringList &lines)
{
  QStringList::const_iterator line = lines.constBegin();
  
  if(lines.constEnd() == line
  || !(*line).contains("Average percentage of burial")) return false;
  ++line;
  
  // skip header
  if(lines.constEnd() == line) return false;
  ++line;
  
  // parse table
  for(unsigned row = 0; row <= AminoAcids; ++row)
  {
    if(lines.constEnd() == line) return false;
    
    QValueList<double> values = parseDoubleList((*line).mid(4));
    if(values.count() != AminoAcids+1)  return false;
     
    for(unsigned column = 0; column <= AminoAcids; ++column)
      avg[row][column] = values[column];
                
    ++line;
  }
  
  if(lines.constEnd() == line 
  || !(*line).contains("Average standard deviation of burial")) return false;
  ++line;
  
  // skip header
  if(lines.constEnd() == line) return false;
  ++line;
  
  // parse table
  for(unsigned row = 0; row <= AminoAcids; ++row)
  {
    if(lines.constEnd() == line) return false;
    
    QValueList<double> values = parseDoubleList((*line).mid(4));
    if(values.count() != AminoAcids+1)  return false;
        
    for(unsigned column = 0; column <= AminoAcids; ++column)
      sdev[row][column] = values[column];
                
    ++line;
  }
    
  if(lines.constEnd() == line 
  || !(*line).contains("Number of pairs used")) return false;
  ++line; if(lines.constEnd() == line) return false;
  
  // skip header
  ++line; if(lines.constEnd() == line) return false;
  
  // parse table
  for(unsigned row = 0; row <= AminoAcids; ++row)
  {
    QValueList<unsigned> values = parseUIntList((*line).mid(4));
    if(values.count() != AminoAcids+1)  return false;
        
    for(unsigned column = 0; column <= AminoAcids; ++column)
      pairs[row][column] = values[column];
                
    ++line; if(lines.constEnd() == line) return false;
  }
  
  return true;
}

bool PredictorECovers24::parse(const QStringList &lines)
{
  QStringList::const_iterator line = lines.constBegin();
  
  // skip header
  if(lines.constEnd() == line) return false;
  ++line;
  
  // parse table
  for(unsigned row = 0; row < AminoAcids; ++row)
  {
    if(lines.constEnd() == line) return false;
    
    QValueList<double> values = parseDoubleList((*line).mid(4));
    if(values.count() != 25)  return false;
     
    for(unsigned column = 0; column < 25; ++column)
      value[row][column] = values[column];
                
    ++line;
  }
  
  return true;
}
  
bool PredictorProfile3::parse(const QStringList &lines)
{
  QStringList::const_iterator line = lines.constBegin();
  
  for(unsigned aa = 0; aa < AminoAcids; ++aa)
  {
    // skip header
    if(lines.constEnd() == line) return false;
    ++line;

    for(unsigned set = 0; set < 5; ++set)
      for(unsigned row = 0; row < 5; ++row)
      {
        if(lines.constEnd() == line) return false;
        sscanf(*line, "%lf %lf %lf %lf %lf",
               &value[aa][set][row][0],
               &value[aa][set][row][1],
               &value[aa][set][row][2],
               &value[aa][set][row][3],
               &value[aa][set][row][4]);
        ++line;
      }
  }
  
  return true;
}

bool PredictorQuasi3::parse(const QStringList &lines)
{
  unsigned npar = 0, nmid = 0, nant = 0;
  QStringList::const_iterator line = lines.constBegin();
  
  while(lines.constEnd() != line)
  {
    if((*line).startsWith("PAR")) {
      line++;
      
      for(unsigned row = 0; row < AminoAcids; ++row)
      {
        if(lines.constEnd() == line) return false;
    
        QValueList<double> values = parseDoubleList((*line).mid(4));
        if(values.count() != AminoAcids)  return false;
     
        for(unsigned column = 0; column < AminoAcids; ++column)
          par[npar][row][column] = values[column];
                
        ++line; 
      }
      
      npar++;
    }
    else if((*line).startsWith("MID"))
    {
      line++;
      
      for(unsigned row = 0; row < AminoAcids; ++row)
      {
        if(lines.constEnd() == line) return false;
    
        QValueList<double> values = parseDoubleList((*line).mid(4));
        if(values.count() != AminoAcids)  return false;
     
        for(unsigned column = 0; column < AminoAcids; ++column)
          mid[nmid][row][column] = values[column];
                
        ++line; 
      }
      
      nmid++;
    }
    else if((*line).startsWith("ANT"))
    {
      line++;
      
      for(unsigned row = 0; row < AminoAcids; ++row)
      {
        if(lines.constEnd() == line) return false;
    
        QValueList<double> values = parseDoubleList((*line).mid(4));
        if(values.count() != AminoAcids)  return false;
     
        for(unsigned column = 0; column < AminoAcids; ++column)
          ant[nant][row][column] = values[column];
                
        ++line; 
      }
      
      nant++;
    }
    else
      return false;
  }
  
  return true;
}

bool PredictorScale3B::parse(const QString &line)
{
  if(!parseAminoAcid(line.mid(0, 3), aa[0])) return false;
  if(!parseAminoAcid(line.mid(4, 3), aa[1])) return false;
  if(!parseAminoAcid(line.mid(8, 3), aa[2])) return false;
  sscanf(line.mid(12), "%u %u %u %lf",
         &count[0], &count[1], &count[2],
         &value);
  
  return true;
}

bool PredictorS1234::parse(const QStringList &lines)
{
  QStringList::const_iterator line = lines.constBegin();
  
  while(lines.constEnd() != line)
  {
    if((*line).startsWith("##### R1.2")) {
      line++;
      
      for(unsigned row = 0; row < AminoAcids; ++row)
        for(unsigned column = 0; column < AminoAcids; ++column)
        {
          // skip header
          if(lines.constEnd() == line) return false;
          line++;
          
          if(lines.constEnd() == line) return false;
          sscanf(*line, "%lf %lf %lf",
                 &r1_2[row][column][0],
                 &r1_2[row][column][1],
                 &r1_2[row][column][2]);

          line++;
        }
    } else if((*line).startsWith("##### R1.3")) {
      line++;
      
      for(unsigned row = 0; row < AminoAcids; ++row)
        for(unsigned column = 0; column < AminoAcids; ++column)
        {
          // skip header
          if(lines.constEnd() == line) return false;
          line++;
          
          if(lines.constEnd() == line) return false;
          sscanf(*line, "%lf %lf %lf %lf",
                 &r1_3[row][column][0],
                 &r1_3[row][column][1],
                 &r1_3[row][column][2],
                 &r1_3[row][column][3]);

          line++;
        }
    } else if((*line).startsWith("##### R1.4")) {
      line++;
      
      for(unsigned row = 0; row < AminoAcids; ++row)
        for(unsigned column = 0; column < AminoAcids; ++column)
        {
          // skip header
          if(lines.constEnd() == line) return false;
          line++;
          
          if(lines.constEnd() == line) return false;
          sscanf(*line, "%lf %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf",
                 &r1_4[row][column][0],
                 &r1_4[row][column][1],
                 &r1_4[row][column][2],
                 &r1_4[row][column][3],
                 &r1_4[row][column][4],
                 &r1_4[row][column][5],
                 &r1_4[row][column][6],
                 &r1_4[row][column][7],
                 &r1_4[row][column][8],
                 &r1_4[row][column][9],
                 &r1_4[row][column][10],
                 &r1_4[row][column][11],
                 &r1_4[row][column][12],
                 &r1_4[row][column][13]);

          line++;
        }
    } else if((*line).startsWith("##### R1.5")) {
      line++;
      
      for(unsigned row = 0; row < AminoAcids; ++row)
        for(unsigned column = 0; column < AminoAcids; ++column)
        {
          // skip header
          if(lines.constEnd() == line) return false;
          line++;
          
          if(lines.constEnd() == line) return false;
          sscanf(*line, "%lf %lf %lf %lf %lf %lf %lf",
                 &r1_5[row][column][0],
                 &r1_5[row][column][1],
                 &r1_5[row][column][2],
                 &r1_5[row][column][3],
                 &r1_5[row][column][4],
                 &r1_5[row][column][5],
                 &r1_5[row][column][6]);

          line++;
        }
    } else
      return false;
  }
  
  return true;
}

bool PredictorMonssterAtom::parse(const QString &line)
{
  sscanf(line, "%u %u %u", &x, &y, &z);
  
  return true;
}

bool PredictorMonssterInput::parse(const QStringList &lines)
{
  QStringList::const_iterator line = lines.constBegin();

  if(lines.constEnd() == line) return false;
  sscanf(*line, "%u %u %u %u", &random, &ncycle, &icycle, &tsteps);
  ++line;
         
  if(lines.constEnd() == line) return false;
  sscanf(*line, "%u %u", &resmin, &resmax);
  ++line;
  
  if(lines.constEnd() == line) return false;
  ++line;
  
  if(lines.constEnd() == line) return false;
  sscanf(*line, "%lf %lf %lf %lf", &temp[0], &temp[1], &softcore, &central);
  ++line;
  
  if(lines.constEnd() == line) return false;
  sscanf(*line, "%lf %lf %lf %lf %lf", &stiff, &pair, &kdcore, &hbond, &shrt);
  ++line;
    
  if(lines.constEnd() == line) return false;
  sscanf(*line, "%lf %lf %lf", &burial, &multibody, &threebody);
  ++line;
  
  return true;
}

bool PredictorMonssterResidue::parse(const QString &line)
{
  resSeq = line.left(5).toUInt(0, 10);
  if(!parseAminoAcid(line.mid(8, 3), resName)) return false;
  sscanf(line.mid(12), "%u %u", &count[0], &count[1]);
  
  return true;
}

bool PredictorMonssterSeq::parse(const QStringList &lines)
{
  atoms = 0;
  groups.clear();
  QStringList::const_iterator line = lines.constBegin();

  while(lines.constEnd() != line)
  {
    PredictorMonssterResidue item;
    if(!item.parse(*line)) return false;
    
    atoms += PredictorAminoAcidAtoms[item.resName];
    groups << item;
    ++line;
  }

  return true;
}

QString PredictorMonssterSeq::toString() const
{
  QString out;

  unsigned column = 0;
  for(QValueList<PredictorMonssterResidue>::const_iterator group = groups.begin();
      group != groups.end(); ++group)
  {
    if(column > 0 && (column % 60) == 0) out.append('\n');
    out.append(PredictorAminoAcidAbbrev[(*group).resName]);
    ++column;
  }

  return out;
}

bool PredictorMonssterRestraint::parse(const QString &line)
{
  sscanf(line, "%u %lf", &num, &value);
  
  return true;
}

bool PredictorAtomPDB::parse(const QString &line)
{
  const unsigned length = line.length();
  
  if(length < 5 || line.left(6) != "ATOM  ") return false;
  
  serial = (length > 6) ? line.mid(6, 5).toUInt(0, 10) : 0;
  
  if(length > 12)
  {
    element = line.mid(12, 2).stripWhiteSpace();
    if(element.startsWith("H")) element = "H";
    
    name.remoteness = 0;
    const QString greek = " ABGDEZHT";
    while(line.at(14) != greek.at(name.remoteness))
      if(++name.remoteness >= greek.length())
        break;
    
    name.branch = (line.at(15) == ' ') ? 0 : (line.at(15) - '1');
    name.iupac = line.mid(12, 4).stripWhiteSpace();
  }
  else
  {
    element = name.iupac = QString::null;
    name.remoteness = name.branch = 0;
  }
  
  altLoc = (length > 16) ? line.at(16) : QChar(' ');
  
  if(length > 17) {
    if(!parseAminoAcid(line.mid(17, 3), resName)) return false;
  } else
    resName = AminoAcids;
  
  chainID = (length > 21) ? line.at(21) : QChar(' ');
  
  resSeq = (length > 22) ? line.mid(22, 4).toUInt(0, 10) : 0;
  
  iCode = (length > 26) ? line.at(26) : QChar(' ');
  
  x = (length > 30) ? line.mid(30, 8).toDouble() : 0.0;
  
  y = (length > 38) ? line.mid(38, 8).toDouble() : 0.0;
  
  z = (length > 46) ? line.mid(46, 8).toDouble() : 0.0;
  
  occupancy = (length > 54) ? line.mid(54, 6).toDouble() : 0.0;
  
  tempFactor = (length > 60) ? line.mid(60, 6).toDouble() : 0.0;
  
  segID = (length > 72) ? line.mid(72, 4).stripWhiteSpace() : QString::null;
  
  if(length > 76) element = line.mid(76, 2).stripWhiteSpace();
  
  charge = (length > 78) ? line.mid(78, 2).stripWhiteSpace() : QString::null;
  
  return true;
}

bool operator<(const PredictorAtomPDB &a1, const PredictorAtomPDB &a2)
{
  return(a1.serial < a2.serial);
}

bool PredictorProteinPDB::parse(const QStringList &lines)
{
  atoms.clear();
  groups = 0;

  for(QStringList::const_iterator line = lines.constBegin();
      line != lines.constEnd(); ++line)
    if((*line).startsWith("ATOM "))
    {
      PredictorAtomPDB item;
      if(!item.parse(*line)) return false;
      
      atoms << item;
      if(item.name.iupac == "CA") groups++;
    }
    else if((*line).startsWith("END"))
      break;
  
  qHeapSort(atoms);
  
  return true;
}

QString PredictorProteinPDB::toString() const
{
  QString out;

  unsigned column = 0;
  for(QValueList<PredictorAtomPDB>::const_iterator atom = atoms.begin();
      atom != atoms.end(); ++atom)
  {
    if((*atom).name.iupac != "CA") continue;
    if(column > 0 && (column % 60) == 0) out.append('\n');
    out.append(PredictorAminoAcidAbbrev[(*atom).resName]);
    ++column;
  }

  return out;
}

bool PredictorProteinNOE::parse(const QString &line)
{
  QStringList values = QStringList::split(" ", line);
  if(values.count() != 21) return false;
  
  select[0].index = values[4].toUInt(0, 10);
  select[0].name = values[5];
  
  select[1].index = values[10].toUInt(0, 10);
  select[1].name = values[11];
  
  kmin = values[14].toDouble();
  rmin = values[16].toDouble();
  kmax = values[18].toDouble();
  rmax = values[20].toDouble();
  
  return true;
}

bool PredictorCharmmInp::parse(const QStringList &lines)
{
  ntemps = nsteps = t.low = t.high = 0;
  
  for(QStringList::const_iterator line = lines.begin(); lines.constEnd() != line; ++line)
  {
    if((*line).stripWhiteSpace().startsWith("!")) continue;
    
    int start = (*line).find(QRegExp("set \\w+ = "));
    if(start < 0) continue;
    start += 4;
    
    int end = (*line).find('=', start);
    if(end < 0) continue;
    
    const QString key = (*line).mid(start, end - start).stripWhiteSpace(),
                  value = (*line).mid(end+1).stripWhiteSpace();
    
    if(key == "ntemps")
      ntemps = value.toUInt(0, 10);
    else if(key == "nsteps")
      nsteps = value.toUInt(0, 10);
    else if(key == "thigh")
      t.high = value.toUInt(0, 10);
    else if(key == "tlow")
      t.low = value.toUInt(0, 10);
  }
  
  return true;
}

bool PredictorMonssterRestart::parse(const QStringList &lines)
{
  QStringList::const_iterator line = lines.constBegin();

  if(lines.constEnd() == line) return false;
  sscanf(*line, "%u %u %lf %lf %lf %lf %lf",
         &line1a[0], &line1a[1],
         &line1b[0], &line1b[1], &line1b[2], &line1b[3], &line1b[4]);
  ++line;
  
  if(lines.constEnd() == line) return false;
  sscanf(*line, "%u %u %u", &line2[0], &line2[1], &line2[2]);
  ++line;
  
  if(lines.constEnd() == line) return false;
  sscanf(*line, "%lf %lf", &line3[0], &line3[1]);
  ++line;
  
  unsigned count = 0;
  
  if(lines.constEnd() == line) return false;
  sscanf(*line, "%u", &count);
  ++line;
  
  chain.clear();
  for(unsigned i = 0; i < count; ++i)
  {
    PredictorMonssterAtom item;
    
    if(lines.constEnd() == line || !item.parse(*line)) return false;
    ++line;
    
    chain << item;
  }
  
  qDebug("...parse OK");
  
  return true;
}
