map.hpp
#include "omp.h"
#include <vector>
#include <iostream>
 
using namespace std; 
 
template <class InTask, class OutTask> class OmpMap {
 
private:
  OutTask (*fun) (InTask t); 
  int       nw; 
public:
 
  OmpMap(OutTask (*fun) (InTask t), int nw):fun(fun),nw(nw)  {}
 
  // map over std:vector
  std::vector<OutTask> compute(std::vector<InTask> v) {
    int i, n = v.size();
    vector<OutTask> * r = new vector<OutTask>(n);
#pragma omp parallel for num_threads(nw) private(i)
    for(i=0; i<n; i++) {
      (*r)[i] = fun(v[i]);
    }
    return(*r);
  };     
 
  // map inplace
  void compute_inplace(std::vector<InTask> &v) {
    int i, n = v.size();
#pragma omp parallel for num_threads(nw) private(i)
    for(i=0; i<n; i++) {
      v[i] = fun(v[i]);
    }
    return;
  };     
};
 
 
#define OMPTIMESTART {double __omp_t0_s321 = omp_get_wtime(); 
#define OMPTIMEEND    double __omp_t1_s321 = omp_get_wtime();		\
  std::cout << "==> " << __omp_t1_s321 - __omp_t0_s321 << " msecs" << std::endl;}
 
 
template <class InTask, class OutTask> class OmpStencil_1d {
private:
  std::vector<int> stencil;
  OutTask (*fun)(std::vector<InTask> v);
  int nw; 
 
  OutTask computeStencil(std::vector<InTask> v, int i) {
    std::vector<float> thisStencil(stencil.size() + 1); 
    for(int j=0; j<stencil.size(); j++) {
 
      int k = (i + stencil[j]);
      if(k<0) 
	k = v.size() + k;
      if(k>=v.size()) {
	// std::cout << "k " << k << " v.size() " << v.size() ;
	k = (k) - v.size();
	// std::cout << " k' = " << k << std::endl;
      }
      thisStencil[j] = v[k];
    }
    thisStencil[stencil.size()] = v[i];
 
    std::cout << "==::> " ; 
    for(int s=0; s<thisStencil.size(); s++) 
      std::cout << thisStencil[s] << "\t"; 
    std::cout << " <::==" << std::endl; 
 
    OutTask res = fun(thisStencil); 
    return(res);
  }
 
public:
  OmpStencil_1d(float (*fun)(std::vector<float> v), std::vector<int> stencil, int nw) 
    :fun(fun), stencil(stencil), nw(nw) {}
 
  std::vector<OutTask> * compute(std::vector<InTask> v) {
    std::vector<OutTask> * res = new std::vector<float>(v.size());
#pragma omp parallel for num_threads(nw)
    for(int i=0; i<v.size(); i++) {
      (*res)[i] = computeStencil(v,i);
    }
    return(res);
  }
};