///###////////////////////////////////////////////////////////////////////////
//
// Burton Computer Corporation
// http://www.burton-computer.com
// http://www.cooldevtools.com
// $Id: BNRTokenSelector.cc 80 2004-11-07 04:01:42Z brian $
//
// Copyright (C) 2000 Burton Computer Corporation
// ALL RIGHTS RESERVED
//
// This program is open source software; you can redistribute it
// and/or modify it under the terms of the Q Public License (QPL)
// version 1.0. Use of this software in whole or in part, including
// linking it (modified or unmodified) into other programs is
// subject to the terms of the QPL.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// Q Public License for more details.
//
// You should have received a copy of the Q Public License
// along with this program; see the file LICENSE.txt.  If not, visit
// the Burton Computer Corporation or CoolDevTools web site
// QPL pages at:
//
//    http://www.burton-computer.com/qpl.html
//    http://www.cooldevtools.com/qpl.html
//

#include <set>
#include "Message.h"
#include "Token.h"
#include "BNRTokenSelector.h"

const int DUB_THRESHOLD = 5;

BNRTokenSelector::BNRTokenSelector()
{
}

BNRTokenSelector::~BNRTokenSelector()
{
}

Token *BNRTokenSelector::getToken(vector<Token *> &tokens,
                                  int index)
{
  if (index < 0 || index >= (int)tokens.size()) {
    return 0;
  } else {
    return tokens[index];
  }
}

void BNRTokenSelector::selectTokens(const Message &msg,
                                    vector<Token *> &tokens)
{
  m_numDubbed = 0;
  m_dubStart = -1;

  vector<Token *> temp_tokens;
  getBaseTokens(msg, temp_tokens);
  selectTokensWithFlag(msg, temp_tokens, Token::FLAG_NORMAL);
  selectTokensWithFlag(msg, temp_tokens, Token::FLAG_PHRASE);

  tokens.clear();
  if (((double)m_numDubbed / (double)msg.getInOrderTokenCount()) > 0.775) {
    cerr << "TOO MANY DUBBED" << endl;
    getTokensWithFlag(msg, tokens, Token::FLAG_ANY);
  } else {
    set<string> added;
    for (vector<Token *>::const_iterator i = temp_tokens.begin(); i != temp_tokens.end(); ++i) {
      Token *tok = *i;
      if (added.find(tok->getWord()) == added.end()) {
        added.insert(tok->getWord());
        tokens.push_back(tok);
        //cerr << "KEPT: " << tok->getWord() << endl;
      }
    }
  }
}

void BNRTokenSelector::getTokensWithFlag(const Message &msg,
                                         vector<Token *> &tokens,
                                         int flag)
{
  for (int i = 0; i < msg.getInOrderTokenCount(); ++i) {
    Token *tok = msg.getInOrderToken(i);
    if (tok->getFlags() & flag) {
      tokens.push_back(tok);
    }
  }
}

void BNRTokenSelector::getBaseTokens(const Message &msg,
                                     vector<Token *> &tokens)
{
  for (int i = 0; i < msg.getInOrderTokenCount(); ++i) {
    Token *tok = msg.getInOrderToken(i);
    if (tok->getFlags() & Token::FLAG_DERIVED || is_upper(tok->getWord()[0])) {
      tokens.push_back(tok);
    }
  }
}

void BNRTokenSelector::selectTokensWithFlag(const Message &msg,
                                            vector<Token *> &selected_tokens,
                                            int flag)
{
  vector<Token *> tokens;
  getTokensWithFlag(msg, tokens, flag);

  m_nextIndex = 0; 
  while (m_nextIndex < static_cast<int>(tokens.size())) {
    Token *prev_tok = getToken(tokens, m_nextIndex - 1);
    Token *tok = getToken(tokens, m_nextIndex);
    Token *next_tok = getToken(tokens, m_nextIndex + 1);
    assert(tok);

    /*
    cerr << m_nextIndex 
         << ": " << isInteresting(tok)
         << "-" << isHighOrder(tok)
         << "-" << isLowOrder(tok)
         << "  " << tok->getWord()
         << endl;
    */
    if (is_upper(tok->getWord()[0]) || processToken(tok, prev_tok, next_tok)) {
      selected_tokens.push_back(tok);
    }
    ++m_nextIndex;
  }
}

bool BNRTokenSelector::isNeutral(Token *tok)
{
  return (tok != 0) && !isLowOrder(tok) && !isHighOrder(tok);
}

bool BNRTokenSelector::isInteresting(Token *tok)
{
  return (tok != 0) && !isNeutral(tok);
}

bool BNRTokenSelector::isInnocent(Token *tok)
{
  return isLowOrder(tok);
}

bool BNRTokenSelector::isHighOrder(Token *tok)
{
  return (tok != 0) && (tok->getScore() > 0.7);
}

bool BNRTokenSelector::isLowOrder(Token *tok)
{
  return (tok != 0) && (tok->getScore() < 0.3);
}

bool BNRTokenSelector::isDubbingOn()
{
  return m_dubStart >= 0;
}

bool BNRTokenSelector::processToken(Token *tok,
                                    Token *prev_tok,
                                    Token *next_tok)
{
  bool keep = false;
  bool dub_aborted = false;

  if (isDubbingOn()) {
    if (isHighOrder(tok) || (isLowOrder(tok) && isLowOrder(next_tok)) || (next_tok == 0)) {
      if (m_nextIndex - m_dubStart < DUB_THRESHOLD) {
        m_nextIndex = m_dubStart;
      } else {
        m_numDubbed += (m_nextIndex - m_dubStart);
      }
      m_dubStart = -1;
      dub_aborted = true;
      keep = true;
    }
  }

  if (!dub_aborted) {
    if (isInteresting(tok) && (isNeutral(prev_tok) || isNeutral(next_tok))) {
      if (isInnocent(tok)) {
        m_dubStart = m_nextIndex;
      }
    } else {
      keep = !isDubbingOn();
    }
  }

  /*
  if (!keep) {
    if (isDubbingOn()) {
      cerr << "DUBB: " << m_nextIndex << ": " << tok->getWord() << endl;
    } else {
      cerr << "ELIM: " << m_nextIndex << ": " << tok->getWord() << endl;
    }
    cerr << "KEEP: " << m_nextIndex << ": " << tok->getWord() << endl;
  }
  */

  return keep;
}
