#include <math.h>
#include <iostream>
#include <map>
#include <list>
#include <fstream>
#include "../isr.h"
#include "dw_frequency.h"
#define STOP_LIST "computing_text_stop.lst"
using namespace std;
using ISR::cDictionary;
typedef map<std::string, double> DoubleMap;
void ReadStopList(cDictionary &stop)
{
ifstream file;
string str;
file.open(STOP_LIST, ifstream::in);
while ( !file.eof() )
{
file >> str;
stop.AddRef( str ); }
file.close();
}
int main( void )
{
cDictionary StopDictionary; cDWF DocWordFreq; DoubleMap InverseDocFreq; cDictionary *ptr = new cDictionary; list<cDictionary *> DictionaryList; bool NewDictionary = false; int DocumentsScanned = 1; ISR::WordMap::const_iterator x;
ReadStopList(StopDictionary);
DictionaryList.push_back(ptr);
while ( true )
{
if ( cin.eof() )
break;
NewDictionary = ptr->PopulateCheckFile(StopDictionary);
if ( NewDictionary ) {
ptr = new cDictionary;
DictionaryList.push_back(ptr);
NewDictionary = false;
DocumentsScanned++;
}
else break;
}
while ( !DictionaryList.empty() )
{
ptr = DictionaryList.front();
DictionaryList.pop_front();
for ( x = ptr->BeginDict(); x != ptr->EndDict(); x++ )
{
DocWordFreq.ChangeRef( x->first, x->second ); DocWordFreq.AddOccur( x->first );
}
delete ptr;
}
for ( x = DocWordFreq.BeginDict(); x != DocWordFreq.EndDict(); x++ )
InverseDocFreq[x->first] = log( (double)DocumentsScanned/DocWordFreq.GetOccur(x->first) );
for ( DoubleMap::const_iterator z = InverseDocFreq.begin(); z != InverseDocFreq.end(); z++ )
printf( "%f %s\n", z->second, z->first.c_str() );
printf( "\n\nTotal files read: %d\n", DocumentsScanned );
printf( "Total vocabulary: %d\n", DocWordFreq.Size() );
printf( "Stop list size: %d\n", StopDictionary.Size() );
return EXIT_SUCCESS;
}