//
// =================================================================
//
//  Example: how to match a query graphs against small molecule
//           database
//
// =================================================================
//

#ifndef  __STRING_H
#include <string.h>
#endif

#ifndef  __MMDB_Graph__
#include "mmdb_graph.h"
#endif

#ifndef  __MMDB_SBase__
#include "mmdb_sbase.h"
#endif

#ifndef  __MMDB_Tables__
#include "mmdb_tables.h"
#endif


void MatchGraphs ( PCGraph G1, PCGraph G2 )  {
PCVertex     V1,V2;
PCGraphMatch U;
realtype     p1,p2;
ivector      F1,F2;
int          n1,n2,m1,m2, minMatch,nMatches,nv, i,j;

  //  1.  Compare graphs; minMatch is the minimal number of atoms
  //      to match. The greater minMatch, the faster execution.
  //      In this example, we require to have at least 75% of
  //      atoms in G1 and G2 matched.

  U  = new CGraphMatch();
  n1 = G1->GetNofVertices();
  n2 = G2->GetNofVertices();
  m1 = (3*n1)/4;
  m2 = (3*n2)/4;
  if ((n2>=m1) && (n1>=m2))  {
    // a 75%-match might be there
    minMatch = IMax(m1,m2);
    U->MatchGraphs ( G1,G2,minMatch );
  }


  //  2.  Output the results

  nMatches = U->GetNofMatches();
  printf ( "\n\n ======  %s against %s\n",G1->GetName(),G2->GetName() );
  if (nMatches<=0)  {
    printf ( " No matches found.\n" );
  } else  {
    printf ( " %i matches found:\n\n",nMatches );
    for (i=0;i<nMatches;i++)  {
      U->GetMatch ( i,F1,F2,nv,p1,p2 );
      printf (
      "\n"
      " ----------------------------------------------------------------\n"
      " Match #%i : %6.2f%% (%i/%i) <-> %6.2f%% (%i/%i)\n\n",
      i+1, p1*100.0,nv,G1->GetNofVertices(),
           p2*100.0,nv,G2->GetNofVertices() );
      for (j=1;j<=nv;j++)  {
        V1 = G1->GetVertex ( F1[j] );
        V2 = G2->GetVertex ( F2[j] );
        if ((!V1) || (!V2))  {
          printf ( " can't get vertices.\n"
                   " id1=%i, id2=%i\n should not be so; "
                   "report as a bug.\n",F1[j],F2[j] );
        } else  {
          printf ( " %4i.  [%4s] <-> [%4s]\n",
                   j,V1->GetName(),V2->GetName() );
        }
      }
    }
  }


  delete U;

}


int main ( int argc, char ** argv, char ** env )  {
PCSBase SBase;
PCGraph G1,G2;
PCFile  graphFile;
int     rc,nStructures,Hflag,i;


  //  1.  Make routine initializations, which must always be done
  //      before working with MMDB
  InitMatType();

  //  2.  Initialize the small molecule database
  SBase = new CSBase;
  rc = SBase->LoadIndex ( "" );
  if (rc!=SBASE_Ok)  {
    printf ( "\n SBASE index file not found.\n" );
    delete SBase;
    return 1;
  }


  //  3.  Make query graph representing two 6-rings.

  G1 = new CGraph();

  G1->AddVertex ( new CVertex(getElementNo("C"),"C1") );
  G1->AddVertex ( new CVertex(getElementNo("C"),"C2") );
  G1->AddVertex ( new CVertex(getElementNo("C"),"C3") );
  G1->AddVertex ( new CVertex(getElementNo("C"),"C4") );
  G1->AddVertex ( new CVertex(getElementNo("C"),"C5") );
  G1->AddVertex ( new CVertex(getElementNo("N"),"N6") );
  G1->AddVertex ( new CVertex(getElementNo("C"),"C7") );
  G1->AddVertex ( new CVertex(getElementNo("C"),"C8") );
  G1->AddVertex ( new CVertex(getElementNo("C"),"C9") );
  G1->AddVertex ( new CVertex(getElementNo("C"),"C10") );
  G1->AddVertex ( new CVertex(getElementNo("C"),"C11") );
  G1->AddVertex ( new CVertex(getElementNo("N"),"N12") );

  G1->AddEdge ( new CEdge(1,2,BOND_SINGLE) );
  G1->AddEdge ( new CEdge(2,3,BOND_SINGLE) );
  G1->AddEdge ( new CEdge(3,4,BOND_SINGLE) );
  G1->AddEdge ( new CEdge(4,5,BOND_SINGLE) );
  G1->AddEdge ( new CEdge(5,6,BOND_SINGLE) );
  G1->AddEdge ( new CEdge(6,1,BOND_SINGLE) );

  G1->AddEdge ( new CEdge( 7, 8,BOND_SINGLE) );
  G1->AddEdge ( new CEdge( 8, 9,BOND_SINGLE) );
  G1->AddEdge ( new CEdge( 9,10,BOND_SINGLE) );
  G1->AddEdge ( new CEdge(10,11,BOND_SINGLE) );
  G1->AddEdge ( new CEdge(11,12,BOND_SINGLE) );
  G1->AddEdge ( new CEdge(12, 7,BOND_SINGLE) );

  G1->AddEdge ( new CEdge(6,12,BOND_SINGLE) );

  G1->SetName ( "*QUERY*" );

  G1->Build   ( False );


  //  4.  Run the query through all databsae

  //  There are several methods for retrieving graphs
  //  from the sbase, here we use one most convenient
  //  for serial extractions.
  graphFile = SBase->GetGraphFile();
  if (!graphFile)  {
    printf ( "\n SBASE graph file not found.\n" );
    delete SBase;
    return 2;
  }
  
  G2 = NULL;  // when working with MMDB, _all_ pointers
              // must be either NULL or allocated

  Hflag = 1;  // neglect hydrogens

  nStructures = SBase->GetNofStructures();
  printf ( "  Total %i structures in SBASE\n",nStructures );
  for (i=0;i<nStructures;i++)  {
    rc = SBase->GetGraph ( graphFile,G2,Hflag );
    if (!G2)
      printf ( "\n %ith graph failed to extract.\n"
               "   Return code %i.\n",i,rc );
    else
      MatchGraphs ( G1,G2 );
  }
  
  graphFile->shut();
  delete graphFile;

  if (G1)  delete G1;
  if (G2)  delete G2;

  return 0;

}

