#include <math.h>
#include <iostream>
#include <map>
#include <list>
#include <fstream>

// Custom Headers
#include "../isr.h"
#include "dw_frequency.h"

// Defines
#define STOP_LIST  "computing_text_stop.lst"

// Save some typing, use things :)
using namespace std;
using ISR::cDictionary;

typedef map<std::string, double> DoubleMap;

// Read in the stop list. 1 word per line
void ReadStopList(cDictionary &stop)
{
   ifstream file;
   string str;
   
   // open for reading
   file.open(STOP_LIST, ifstream::in);
     
   while ( !file.eof() )
   {
      file >> str;
      stop.AddRef( str ); // add it
   }
   
   file.close();
}

int main( void )
{
  cDictionary StopDictionary;            // holds our words to ignore
  cDWF DocWordFreq;                      // holds the master dictionary and DocWordFreq
  DoubleMap InverseDocFreq;              // holds the inverse document frequency for all words in the collection
  cDictionary *ptr = new cDictionary;    // holds the dictionary we are currently processing
  list<cDictionary *> DictionaryList;    // a list of all dictionaries
  bool NewDictionary = false;            // do we need to make a new dictionry?
  int DocumentsScanned = 1;              // number of documents analyzed
  ISR::WordMap::const_iterator x;        // utility iterator
  
  ReadStopList(StopDictionary);
  
  DictionaryList.push_back(ptr);
  
  // loop until we break
  while ( true )
  {
    if ( cin.eof() )
     break;
        
    NewDictionary = ptr->PopulateCheckFile(StopDictionary); // populate with words
                                                            // omit stop file words
   
    if ( NewDictionary ) // new dictionary?
    {       
      // allocate new dictionary and add to list
      ptr = new cDictionary;
      DictionaryList.push_back(ptr);

      // reset flag
      NewDictionary = false;
      
      // increase # of documents read
      DocumentsScanned++;
    }
    else // end of input
     break;
  }

  // run through all dictionaries, add words to DWF and delete
  while ( !DictionaryList.empty() )
  {       
     // grab first element and pop from list
     ptr = DictionaryList.front();
     DictionaryList.pop_front();   
      
     for ( x = ptr->BeginDict(); x != ptr->EndDict(); x++ )
     {
        DocWordFreq.ChangeRef( x->first, x->second ); // put the word in
        DocWordFreq.AddOccur( x->first );
     }
    
     // delete dictionary
     delete ptr;
  }

  // Calculate Inverse Document Frequency for each term
  for ( x = DocWordFreq.BeginDict(); x != DocWordFreq.EndDict(); x++ )
    InverseDocFreq[x->first] = log( (double)DocumentsScanned/DocWordFreq.GetOccur(x->first) );
    
  // Print out data
  for ( DoubleMap::const_iterator z = InverseDocFreq.begin(); z != InverseDocFreq.end(); z++ )
   printf( "%f %s\n", z->second, z->first.c_str() );

  printf( "\n\nTotal files read: %d\n", DocumentsScanned );
  printf( "Total vocabulary: %d\n", DocWordFreq.Size() );
  printf( "Stop list size:   %d\n", StopDictionary.Size() );

  return EXIT_SUCCESS;
}