static const char _cvsid[] =
"$Id: Resample0.cc,v 1.15 2004/06/18 20:23:25 maciek Exp $";
static const char _copyright[] = 
"Copyright 2004, University of Wisconsin Space Science & Engr. Center."; 

/**
 \file Resample0.cc
 
 \author R.K.Garcia <rayg@ssec.wisc.edu>

 This is a first-order implementation of a linear spectral resampling algorithm
 appropriate for use with the GIFTS and other interferometers. It is specified as
 an operator to be embedded into a pluggable processing stage capable of operating
 in more than one testing or deployment frameworks. 
 
 Its reference implementation is David Tobin's resample2.m 20040227,
 which follows the matrix multiplication approach as defined in the 
 BOMEM technical memo BOM-TECH-XXXX(Draft)-Spectral Resampling(work-02).doc
 
Dependencies:
 Blitz++ 0.6 open source matrix template library.
 BLAS basic linear algebra library, vendor implementation (e.g. Apple veclib), or ATLAS.
 
 
Unit test builds (to be moved to makefile):
 c++ -framework veclib -DTEST=1 -o test1 -I/System/Library/Frameworks/vecLib.framework/Headers -I/sw/include -L/sw/lib Resample0.cc -lstdc++ -lblitz
 c++ -framework veclib -DTEST=2 -o test2 -I/System/Library/Frameworks/vecLib.framework/Headers -I/sw/include -L/sw/lib Resample0.cc -lstdc++ -lblitz 
 c++ -framework veclib -DTEST=3 -o test3 -I/System/Library/Frameworks/vecLib.framework/Headers -I/sw/include -L/sw/lib Resample0.cc -lstdc++ -lblitz 


 */



/* C includes */
#include <cassert>
#include <cmath>
#include <cstdio>
#include <cstdlib>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>

/* STL includes */
#include <map>
#include <string>

/* BLITZ includes */
#include <blitz/array.h>

/* BLAS includes */
extern "C" {
#include <cblas.h>
};

#ifdef PROFILING
// clok.cc and clok.h from cvs TOOLS/dev/librkg/time/ 
#include "clok.h"
#endif

/* interface specification */
#include "SincResampler.h"

using namespace blitz;


/** two-dimensional matched-precision matrix is our F-matrix type. */
typedef Array<data_t,2> ResamplingMatrix;

/** intermediate double-precision matrix used during F-matrix generation */
typedef Array<double,2> IntermediateMatrix;


/** F matrix cache 
We assume for the moment that it is more expensive to load these from disk or network
than it is to recompute them. Thus, we build a memory cache and flush when the 
executable exits. If our upstream control logic is sufficiently clever,
it keeps sending us the same group so that we don't spend a lot of time
building F matrices. 
*/
typedef std::map< DetectorIndex_t, ResamplingMatrix *, std::less<DetectorIndex_t> > FMatrixCache;
/* FIXME: this needs to be set up to properly free the cache content as needed.
 */




/* prototypes */
inline double sinc( double x );
static int genFMatrix( ResamplingMatrix &F, unsigned src_npoints, double src_vlaser, double tgt_vlaser );


#ifdef TEST

void writeFBF( const char *filename, ResamplingMatrix &F )
{
    FILE *fpo = fopen( filename, "wb" );
    assert( fpo );
    int rc = fwrite( F.data(), sizeof(data_t)*F.size(), 1, fpo );
    assert(rc==1 /*fwrite succeeded*/);
    fclose(fpo);    
}

#endif


/**
 * sinc function
 */
inline double sinc( double x ) 
{
#ifndef M_PI
    static const double M_PI = atan(1.0) * 4.0; // FIXME: place constants centrally 
#endif
    return x==0.0? 1.0 : sin( M_PI * x ) / ( M_PI * x );
}

BZ_DECLARE_FUNCTION(sinc)  /* builds vectorization of a scalar function */

#ifdef USE_SINGLE_PRECISION
inline float floated( double x )
{ return float(x); } 

BZ_DECLARE_FUNCTION(floated)
#endif

/**
 F matrix generator. This is an alternative to the zero-fill-linear-interpolate 
 which is currently being tested. 
 The F matrix generator may be subject to later complications of its generation algorithm. 
 The src_vlaser entries vary with the off-axis angle of the pixels. 
 The tgt_vlaser is picked as nominal for the instrument.
 Caching of these matrices is potentially disk-expensive. 
 This implies that there will be a certain affinity of pixels for given CPUs, in 
 order that the caches of the F-matrices can be kept to a reasonable size per machine.
 Altenrately, we can dilute the accuracy by reducing the number of distinct src_vlaser values,
 and/or reducing the F matrix sizes, treating them as sparse or tridiagonal, zeroing out the 
 corners. This reduces the computation load as well as the memory load at the cost of a bite
 out of any error budget.
 
 FIXMEs: use the quality reduction factor
 
 MATLAB equivalent code (from DCT) as converted to I/J notation for Blitz++: 
 (refer to fmatrix.m)
 
 N is number of OPDs (2048)
 output matrix is N/2+1 in size
 W is N/2+1 (number of wavenumbers) (1025)
 double N = double(W-1)*2.0;
 
 I is row index, J is column index
 vlaser_l is source laser wavenumber
 vlaser_b is target laser wavenumber

 >> k = (N/2:N)';  % results in column vector of height w, starting with w-1
 double wm1 = double(W-1);
 // k is I+wm1

 >> k2 = repmat(k,1,N/2+1); % W copies of k
 // k2 is k is I+wm1

 >> ratio = vlaser_l / vlaser_b;
 double ratio = src_vlaser / tgt_vlaser;

 >> x = k2'*ratio - k2;
 // k2' is (J+wm1) therefore
 x = (J+wm1)*ratio - (I+wm1);
 
>> F2 = ratio * sinc(x)./sinc(x/N);
 double oon=1.0/double(N);
 F = sinc(x) / sinc( x * oon ) * ratio;
 
>> out.r3 = F2 * in.r2;
  
 */
static int genFMatrix( ResamplingMatrix &F, unsigned src_npoints, double src_vlaser, double tgt_vlaser )
{
    firstIndex I;
    secondIndex J;

    unsigned W = src_npoints; // wnums
    unsigned N = (W-1)*2;  // OPDs
    double wm1 = double(W-1);
    double ratio = src_vlaser / tgt_vlaser;
    IntermediateMatrix x( W,W );
    x = (wm1+J)*ratio - (wm1+I);
    #ifdef TEST
        cout << ratio << endl;
        if (W<16)
            cout << x << endl;
    #endif
    double oon=1.0/double(N);
    F.resize(W,W);
#ifdef USE_SINGLE_PRECISION
    IntermediateMatrix f(W,W);
    f = sinc(x) / sinc( x * oon ) * ratio;
    F = floated(f);
#else
    F = sinc(x) / sinc( x * oon ) * ratio;
#endif
    #ifdef TEST
        if (W<16)
            cout << F << endl;
    #endif
    
    return 0;
}


/**
 * Use the CBLAS implementation (ATLAS or vendor-supplied e.g. Apple veclib) to multiply 
 * the F-matrix "F" by the complex spectrum vector "X", resulting in the complex spectrum "Y".
 *
 * e.g. F is a real 1025 Row X 1025 Col matrix for a 1025-wavenumber spectrum generated from a 2048-value interferogram.
 * The multiplication of this real matrix by the complex spectrum is carried out not by promoting the matrix
 * to complex type (which may result in significantly more atomic floating point operations per element multiply)
 * but by using the complex representation of the vector as alternating real and imaginary components to do the
 * multiply as a pair of interleaved real multiplies.
 *
 * Currently assumes row-major Blitz matrices, i.e. rows are contiguous (fortranArray Blitz types are column-major)
 *
 *  Later, we may want to explore using DGEMM instead of DGEMV and handle a cached collection of 
 *  spectra of one given kind in one swipe by building a matrix out of K spectra. 
 */
inline static
int applyFMatrix( CalibratedRadianceSpectrum &Y, const ResamplingMatrix &F, const CalibratedRadianceSpectrum &X )
{ 
    /* real part */
#ifdef USE_SINGLE_PRECISION
    cblas_sgemv( CblasRowMajor /* rows are contig in memory */, CblasNoTrans, 
                 F.rows() /*M*/, F.columns() /*N*/, 
                 1.0 /*alpha*/, F.data(), F.columns() /*lda, row stride of a*/, 
                 (const float *)X.data(), 1 /* stride of b */, 
                 0.0, (float *)Y.data(), 1 /* stride of c */ );     
#else
    cblas_dgemv( CblasRowMajor /* rows are contig in memory */, CblasNoTrans, 
                 F.rows() /*M*/, F.columns() /*N*/, 
                 1.0 /*alpha*/, F.data(), F.columns() /*lda, row stride of a*/, 
                 (const double *)X.data(), 1 /* stride of b */, 
                 0.0, (double *)Y.data(), 1 /* stride of c */ );     
#endif
    // FIXME: assert dimensional and stride compatibility of blitz matrix being used within BLAS!
    // FIXME: quality metrics?
    return 0;
}


/************************************
* Implementation of the operator interface.
*/

/** \struct SincResamplerInternals
* \brief Private data structures and utility functions used by the operator public interface.
*/
struct SincResamplerInternals
{
    /** Cache of F matrices, maintained on the heap.
    */
    FMatrixCache cacheFs;
    
    ReferenceDatabaseForSincResampler &refdb;
    AuditingServiceForSincResampler &audit;
    MonitoringServiceForSincResampler &monitor;
    
    SincResamplerInternals(   ReferenceDatabaseForSincResampler &_refdb, 
                              AuditingServiceForSincResampler &_audit, 
                              MonitoringServiceForSincResampler &_monitor ):
        refdb( _refdb ), audit( _audit ), monitor( _monitor ) 
    { 
    }
    
    void flushCache()
    {
        // FIXME
    }
    
    ~SincResamplerInternals()
    {
        flushCache();
    }
        
    
    /* FIXME: return code in case there's an error .. */
    inline int fetchFMatrix( ResamplingMatrix *&pF, const DetectorIndex_t key )
    {
        FMatrixCache::iterator f = cacheFs.find( key );
        /* FIXME: consider verifying that it is, in fact, the same settings as were used to generate the cache contents */
        
        if (f==cacheFs.end()) // not found
        {
            ResamplingMatrix *pNewF = new ResamplingMatrix(1,1); // FIXME: allocate a better size
            const ResamplingSettings_t *pRefData = refdb[ key ];
            if (pRefData==NULL)
            {
                monitor.referenceResamplingSettingsNotFound( key );
                return -1; // FIXME: define error return codes in an enumeration
            }
            int rc = genFMatrix( *pNewF, 
                                 pRefData->inputPointCount, 
                                 pRefData->sourceLaserWavenumber, 
                                 pRefData->targetLaserWavenumber );
            assert(rc==0);
            monitor.addingEntryToResamplingCache( key, 0 ); 
            // FIXME: compute new size of cache
            // FIXME: flush a least-recently-used or (at least) random cache element if cache is too large
            // monitor.removingEntryFromResamplingCache( removed_key, new_cache_size )
            cacheFs[ key ] = pNewF;
            
            pF = pNewF;
            return 0; 
        }
        pF = f->second;
        return 0;
    }
};



SincResampler::SincResampler( ReferenceDatabaseForSincResampler &_refdb, 
                              AuditingServiceForSincResampler &_audit, 
                              MonitoringServiceForSincResampler &_monitor ):
    my( *( new SincResamplerInternals( _refdb, _audit, _monitor ) ) )
{
}


SincResampler::~SincResampler( )
{
    delete &my;
}


const std::string &SincResampler::errorString()
{
    abort( ); // FIXME: UNIMPLEMENTED
    static const std::string NYI( "Not Yet Implemented" );
    return NYI;
}

void SincResampler::referenceDatabaseHasChanged()
{
    // iterate throught the cache, deleting matrices
    FMatrixCache::iterator i;
    for (i = my.cacheFs.begin(); i != my.cacheFs.end(); ++i) {
        delete i->second;
    }
    my.cacheFs.clear();
    my.monitor.resamplingCacheFlushed();
}

const unsigned SincResampler::CACHE_SIZE_UNLIMITED=unsigned(-1);

void SincResampler::setMaxCacheSize( unsigned )
{
    abort(); // FIXME: UNIMPLEMENTED
}

/** Read max cache size in bytes.
*/
unsigned SincResampler::maxCacheSize( ) const
{
    return CACHE_SIZE_UNLIMITED; // FIXME
}


int SincResampler::operator()(  CalibratedRadianceSpectrum &output, 
                                const CalibratedRadianceSpectrum &input, 
                                DetectorIndex_t pixel )
{
    ResamplingMatrix *pF = NULL;
    #ifdef PROFILING
        clok stopwatch;
        stopwatch.start();
    #endif
    int rc = my.fetchFMatrix( pF, pixel );
    #ifdef PROFILING
        stopwatch.stop();
        double user, sys, wall;
        int N;
        stopwatch.result( N, wall, user, sys );
        my.monitor.timeCostOfFMatrix( wall, user, sys );
        stopwatch.reset();
    #endif
    if (rc!=0) goto aborted;
    #ifdef PROFILING
        stopwatch.start();
    #endif
    rc = applyFMatrix( output, *pF, input );
    #ifdef PROFILING
        stopwatch.stop();
        stopwatch.result( N, wall, user, sys );
        my.monitor.timeCostOfResampling( wall, user, sys );
    #endif
    if (rc!=0) goto aborted;
    my.monitor.resamplingCompleted();
    return 0;
    
aborted:
    my.monitor.resamplingAborted();
    return rc;
}




#if TEST==1

int test1( int argc, char **argv )
{
    ResamplingMatrix F;
    /* test pattern matching stored results using arbitrary ratio src/dst = 1.1 */
    genFMatrix( F, 17, 1.1, 1.0 ); 
    cout << F;
}


int main( int argc, char **argv )
{
    return test1( argc, argv );
}
#endif

#if TEST==2
int main( int argc, char **argv )
{
    ResamplingMatrix a(3,3);
    a = 2,0,0,
        0,1,0,
        0,0,3;
    ResamplingMatrix b(3,3);
    b = 1,2,3,
        4,5,6,
        7,8,9;
    ResamplingMatrix c(3,3);
    cblas_dgemm( CblasRowMajor, CblasNoTrans, CblasNoTrans, 3 /*M*/, 3 /*N*/, 3 /*K*/,
                 1.0 /*alpha*/, a.data(), 3 /*lda, row stride of a*/, 
                 b.data(), 3 /* ldc row stride of b */, 0.0, c.data(), 3 /* ldc stride of c */ ); 
    //cblas_dgemv( CblasRowMajor, CblasNoTrans, 3 /*M*/, 3 /*N*/, 1.0 /*alpha*/, a.data(), 3 /*lda, row stride of a*/, 
    //             b.data(), 1 /* stride of b */, 0.0, c.data(), 1 /* stride of c */ ); 
    //c = product(a,b);
    cout << c << endl;
}
#endif

#if TEST==3
/**
 * Generate an F matrix and write it to a flat binary file. 
 * parameters: filename width src_vlaser dst_vlaser
 */

#include <iomanip>

int main( int argc, char **argv )
{
    if (argc!=5) 
    {
        cout << "usage: " << argv[0] << " filename width src_vlaser dst_vlaser\n";
    }
    cout.precision(5);
    assert( argc==5 );
    const char *filename( argv[1] );
    int N( atoi( argv[2] ) );
    double src_vlaser( atof( argv[3] ) );
    double dst_vlaser( atof( argv[4] ) );
    cout << src_vlaser << endl << dst_vlaser << endl;
    ResamplingMatrix F(N,N);
    int rc = genFMatrix( F, N, src_vlaser, dst_vlaser );
    FILE *fpo = fopen( filename, "wb" );
    assert( fpo );
    ResamplingMatrix Fprime(N,N);
    Fprime = F.transpose( blitz::secondDim, blitz::firstDim ).copy();
    rc = fwrite( Fprime.data(), sizeof(data_t)*Fprime.size(), 1, fpo );
    //rc = fwrite( F.data(), sizeof(double)*F.size(), 1, fpo );
    assert(rc==1 /*fwrite succeeded*/);
    fclose(fpo);
}
#endif
