Index: trunk/tests/test_OSDE1D.cc =================================================================== --- trunk/tests/test_OSDE1D.cc (revision 0) +++ trunk/tests/test_OSDE1D.cc (revision 706) @@ -0,0 +1,81 @@ +#include +#include + +#include "UnitTest++.h" +#include "test_utils.hh" + +#include "npstat/nm/ClassicalOrthoPolys1D.hh" +#include "npstat/nm/GaussLegendreQuadrature.hh" + +#include "npstat/stat/OSDE1D.hh" + +using namespace npstat; + +namespace { + class Integrator1 : public Functor1 + { + public: + inline Integrator1(const OSDE1D& osde, unsigned deg, unsigned p) + : osde_(osde), deg_(deg), p_(p) {} + + inline double operator()(const double& x) const + {return powl(osde_.poly(deg_, x), p_);} + + private: + const OSDE1D& osde_; + unsigned deg_; + unsigned p_; + }; + + class Integrator2 : public Functor1 + { + public: + inline Integrator2(const OSDE1D& osde, const unsigned* degrees, + const unsigned nDegrees) + : osde_(osde), degs_(degrees), nDegs_(nDegrees) {assert(degrees);} + + inline double operator()(const double& x) const + { + long double prod = 1.0L; + for (unsigned i=0; i #include #include #include #include "npstat/nm/LinearMapper1d.hh" #include "npstat/stat/OSDE1D.hh" namespace { void validate_sample_size(const std::string& where, const double expectedSampleSize) { if (expectedSampleSize <= 0.0) throw std::invalid_argument( "In npstat::OSDE1D::" + where + ": expected sample size must be positive"); } void require_identical_support(const std::string& where, const npstat::OSDE1D& osde, const npstat::AbsDistribution1D& distro) { if (osde.xmin() != distro.quantile(0.0) || osde.xmax() != distro.quantile(1.0)) throw std::invalid_argument( "In npstat::OSDE1D::" + where + ": incompatible density support"); } class OSDE1DHlp1 : public npstat::Functor1 { public: inline OSDE1DHlp1(const npstat::OSDE1D& p, const unsigned d1) : poly_(p), deg1_(d1) {} inline double operator()(const double& x) const {return poly_.poly(deg1_, x)*poly_.weight(x);} private: const npstat::OSDE1D& poly_; unsigned deg1_; }; class OSDE1DHlp2 : public npstat::Functor1 { public: inline OSDE1DHlp2(const npstat::OSDE1D& p, const unsigned d1, const unsigned d2, const double c1, const double c2) : poly_(p), deg1_(d1), deg2_(d2), c1_(c1), c2_(c2) {} inline double operator()(const double& x) const { const double w = poly_.weight(x); const std::pair& p = poly_.twopoly(deg1_, deg2_, x); return (w*p.first - c1_)*(w*p.second - c2_); } private: const npstat::OSDE1D& poly_; unsigned deg1_, deg2_; double c1_, c2_; }; } namespace npstat { OSDE1D::OSDE1D(const AbsClassicalOrthoPoly1D& poly1d, const double xlo, const double xhi) : poly_(poly1d.clone()), xmin_(xlo), xmax_(xhi) { if (xmin_ == xmax_) throw std::invalid_argument( "In npstat::OSDE1D constructor: " "density estimation interval has zero length"); if (xmin_ > xmax_) std::swap(xmin_, xmax_); const double polyWidth = poly_->xmax() - poly_->xmin(); scale_ = (xmax_ - xmin_)/polyWidth; shift_ = (poly_->xmax()*xmin_ - poly_->xmin()*xmax_)/polyWidth; } OSDE1D::OSDE1D(const OSDE1D& r) : poly_(r.poly_->clone()), xmin_(r.xmin_), xmax_(r.xmax_), shift_(r.shift_), scale_(r.scale_) { } OSDE1D& OSDE1D::operator=(const OSDE1D& r) { if (&r != this) { delete poly_; poly_ = 0; poly_ = r.poly_->clone(); xmin_ = r.xmin_; xmax_ = r.xmax_; shift_ = r.shift_; scale_ = r.scale_; } return *this; } OSDE1D::~OSDE1D() { delete poly_; } std::pair OSDE1D::twopoly( const unsigned deg1, const unsigned deg2, const double x) const { const std::pair& p = poly_->twopoly(deg1, deg2, (x - shift_)/scale_); return std::pair(p.first, p.second); } double OSDE1D::series(const double *coeffs, const unsigned maxdeg, const double x) const { return poly_->series(coeffs, maxdeg, (x - shift_)/scale_); } - double OSDE1D::integrateSeries(const double *coeffs, - const unsigned maxdeg) const - { - return scale_*poly_->integrateSeries(coeffs, maxdeg); - } - double OSDE1D::weight(const double x) const { return poly_->weight((x - shift_)/scale_)/scale_; } void OSDE1D::densityCoeffs(const AbsDistribution1D& distro, const unsigned nIntegrationPoints, double* coeffs, const unsigned maxdeg) const { assert(coeffs); require_identical_support("densityCoeffs", *this, distro); for (unsigned deg=0; deg<=maxdeg; ++deg) { OSDE1DHlp1 hlp(*this, deg); coeffs[deg] = distro.expectation(hlp, nIntegrationPoints); } } void OSDE1D::densityCoeffsVariance(const AbsDistribution1D& distro, const unsigned nIntegrationPoints, const double* coeffs, const unsigned maxdeg, const double expectedSampleSize, double* variances) const { assert(coeffs); assert(variances); validate_sample_size("densityCoeffsVariance", expectedSampleSize); require_identical_support("densityCoeffsVariance", *this, distro); for (unsigned deg=0; deg<=maxdeg; ++deg) { OSDE1DHlp2 hlp(*this, deg, deg, coeffs[deg], coeffs[deg]); variances[deg] = distro.expectation(hlp, nIntegrationPoints)/expectedSampleSize; } } npstat::Matrix OSDE1D::densityCoeffsCovariance(const AbsDistribution1D& distro, const unsigned nIntegrationPoints, const double* coeffs, const unsigned maxdeg, const double expectedSampleSize) const { assert(coeffs); validate_sample_size("densityCoeffsCovariance", expectedSampleSize); require_identical_support("densityCoeffsCovariance", *this, distro); npstat::Matrix result(maxdeg+1U, maxdeg+1U); for (unsigned deg=0; deg<=maxdeg; ++deg) for (unsigned k=0; k<=deg; ++k) { OSDE1DHlp2 hlp(*this, deg, k, coeffs[deg], coeffs[k]); result[deg][k] = distro.expectation(hlp, nIntegrationPoints); } for (unsigned deg=0; deg<=maxdeg; ++deg) for (unsigned k=deg+1; k<=maxdeg; ++k) result[deg][k] = result[k][deg]; result /= expectedSampleSize; return result; } unsigned OSDE1D::optimalDegreeHart(const double* coeffs, const double* variances, const unsigned maxdeg, const double k) { assert(coeffs); assert(variances); long double sum = 0.0L, bestSum = DBL_MAX; unsigned bestM = 0; for (unsigned i=1U; i<=maxdeg; ++i) { sum += k*variances[i] - coeffs[i]*coeffs[i]; if (sum < bestSum) { bestSum = sum; bestM = i; } } return bestM; } std::pair OSDE1D::supportRegion( const AbsClassicalOrthoPoly1D& poly1d, const double leftLimit, const bool leftIsFromSample, const double rightLimit, const bool rightIsFromSample, const unsigned long nPoints) { if (leftLimit >= rightLimit) throw std::invalid_argument( "In npstat::OSDE1D::supportRegion: " "the left limit value must be less than the right limit value"); if (!(leftIsFromSample || rightIsFromSample)) return std::pair(leftLimit, rightLimit); const unsigned nMin = leftIsFromSample && rightIsFromSample ? 2U : 1U; if (nPoints < nMin) throw std::invalid_argument( "In npstat::OSDE1D::supportRegion: insufficient sample size"); const double xmin = poly1d.xmin(); const double xmax = poly1d.xmax(); const double halfstep = (xmax - xmin)/2.0/nPoints; if (leftIsFromSample && rightIsFromSample) { LinearMapper1d mp(xmin + halfstep, leftLimit, xmax - halfstep, rightLimit); return std::pair(mp(xmin), mp(xmax)); } else if (leftIsFromSample) { LinearMapper1d mp(xmin + halfstep, leftLimit, xmax, rightLimit); return std::pair(mp(xmin), rightLimit); } else { LinearMapper1d mp(xmin, leftLimit, xmax - halfstep, rightLimit); return std::pair(leftLimit, mp(xmax)); } } } Index: trunk/npstat/stat/OSDE1D.hh =================================================================== --- trunk/npstat/stat/OSDE1D.hh (revision 705) +++ trunk/npstat/stat/OSDE1D.hh (revision 706) @@ -1,434 +1,446 @@ #ifndef NPSTAT_OSDE1D_HH_ #define NPSTAT_OSDE1D_HH_ /*! // \file OSDE1D.hh // // \brief Orthogonal Series Density Estimation (OSDE) in one dimension // // Author: I. Volobouev // // May 2017 */ #include #include #include "npstat/nm/AbsClassicalOrthoPoly1D.hh" #include "npstat/stat/AbsDistribution1D.hh" #ifdef SWIG #include #include #include "npstat/wrap/arrayNDToNumpy.hh" #include "npstat/wrap/numpyArrayUtils.hh" #endif // SWIG namespace npstat { class OSDE1D { public: /** // Main constructor. The arguments are as follows: // // poly1d -- classical orthogonal polynomial system to use // // xmin -- lower bound of the density support region // // xmax -- upper bound of the density support region */ OSDE1D(const AbsClassicalOrthoPoly1D& poly1d, double xmin, double xmax); OSDE1D(const OSDE1D&); OSDE1D& operator=(const OSDE1D&); ~OSDE1D(); //@{ /** Basic inspection of object properties */ inline const AbsClassicalOrthoPoly1D& clpoly() const {return *poly_;} inline double xmin() const {return xmin_;} inline double xmax() const {return xmax_;} double weight(double x) const; //@} /** Polynomial values */ inline double poly(const unsigned deg, const double x) const {return poly_->poly(deg, (x - shift_)/scale_);} /** // A pair of polynomial values. Faster than // calling "poly" two times. */ std::pair twopoly( unsigned deg1, unsigned deg2, double x) const; /** Polynomial series representing the fitted density */ double series(const double* coeffs, unsigned maxdeg, double x) const; /** Integral of the series */ - double integrateSeries(const double* coeffs, unsigned maxdeg) const; + inline double integrateSeries(const double* coeffs, + const unsigned maxdeg) const + {return scale_*poly_->integrateSeries(coeffs, maxdeg);} + + /** Unweighted polynomial integral */ + inline double integratePoly(const unsigned degree, + const unsigned power) const + {return scale_*poly_->integratePoly(degree, power);} + + /** Unweighted integral of a product of multiple polynomials */ + inline double jointIntegral(const unsigned* degrees, + const unsigned nDegrees) const + {return scale_*poly_->jointIntegral(degrees, nDegrees);} /** // Build the coefficients of the orthogonal polynomial series // for the given function. The length of the array "coeffs" // should be at least maxdeg + 1. Note that the coefficients // are returned in the order of increasing degree (same order // is used by the "series" function). */ template void calculateCoeffs(const Functor& fcn, const Quadrature& quad, double* coeffs, unsigned maxdeg) const; /** // Build the coefficients of the orthogonal polynomial series // for the given density using Gauss-Legendre quadratures. // The length of the array "coeffs" should be at least maxdeg + 1. */ void densityCoeffs(const AbsDistribution1D& distro, unsigned nIntegrationPoints, double* coeffs, unsigned maxdeg) const; /* // Expected variances of the coefficients obtained previously // with the "densityCoeffs" method. The length of the array // "variances" should be at least maxdeg + 1. */ void densityCoeffsVariance(const AbsDistribution1D& distro, unsigned nIntegrationPoints, const double* coeffs, unsigned maxdeg, double expectedSampleSize, double* variances) const; /* // Expected covariances of the coefficients obtained previously // with the "densityCoeffs" method. */ npstat::Matrix densityCoeffsCovariance(const AbsDistribution1D& distro, unsigned nIntegrationPoints, const double* coeffs, unsigned maxdeg, double expectedSampleSize) const; /** // Build the coefficients of the orthogonal polynomial series // for the given sample of points (empirical density function). // The length of the array "coeffs" should be at least maxdeg + 1. // Note that the coefficients are returned in the order of // increasing degree (same order is used by the "series" function). */ template void sampleCoeffs(const Numeric* coords, unsigned long lenCoords, double* coeffs, unsigned maxdeg) const; /* // Estimate variances of the coefficients obtained previously // with the "sampleCoeffs" method. */ template void sampleCoeffsVariance(const Numeric* coords, unsigned long lenCoords, const double* coeffs, unsigned maxdeg, double* variances) const; /* // Estimate covariances of the coefficients obtained previously // with the "sampleCoeffs" method. */ template npstat::Matrix sampleCoeffsCovariance(const Numeric* coords, unsigned long lenCoords, const double* coeffs, unsigned maxdeg) const; /** // Build the coefficients of the orthogonal polynomial series for // the given sample of weighted points (empirical density function). // The first element of the pair will be treated as the coordinate // and the second element will be treated as weight. Weights must // not be negative. The length of the array "coeffs" should be // at least maxdeg + 1. Note that the coefficients are returned // in the order of increasing degree (same order is used by the // "series" function). */ template void weightedSampleCoeffs(const Pair* points, unsigned long numPoints, double* coeffs, unsigned maxdeg) const; /* // Estimate variances of the coefficients obtained previously // with the "weightedSampleCoeffs" method. This code assumes that // weights and coordinates are statistically independent from // each other. */ template void weightedSampleCoeffsVariance(const Pair* points, unsigned long nPoints, const double* coeffs, unsigned maxdeg, double* variances) const; /* // Estimate covariances of the coefficients obtained previously // with the "weightedSampleCoeffs" method. This code assumes that // weights and coordinates are statistically independent from // each other. */ template npstat::Matrix weightedSampleCoeffsCovariance(const Pair* points, unsigned long nPoints, const double* coeffs, unsigned maxdeg) const; /** // Integrated squared error between the given function and the // polynomial series */ template double integratedSquaredError(const Functor& fcn, const Quadrature& quad, const double* coeffs, unsigned maxdeg) const; /** // Integrated squared error between the given function and the // polynomial series on an arbitrary interval. If the given // interval has regions beyond xmin() and xmax(), the integration // will assume that on those regions the value of the series is 0, // and the quadrature will be carried out there separately. */ template double integratedSquaredError(const Functor& fcn, const Quadrature& quad, const double* coeffs, unsigned maxdeg, double xmin, double xmax) const; /** // A helper function for choosing the density support interval if // it is unknown and has to be estimated from the sample. If the // limit is not from the sample (as indicated by the corresponding // boolean argument), it is left unchanged. The first element of // the returned pair will correspond to the lower limit of the // interval and the second element to the upper. */ static std::pair supportRegion( const AbsClassicalOrthoPoly1D& poly1d, double leftLimit, bool leftIsFromSample, double rightLimit, bool rightIsFromSample, unsigned long nPoints); /** // Optimal OSDE truncation degree according to the Hart scheme. // Minimizes Sum_{i=0}^M (k*variances[i] - coeffs[i]^2) over M. // The default value of k = 2 corresponds to the original scheme. // Argument arrays "coeffs" and "variances" should have at least // maxdeg + 1 elements and should be calculated in advance // with "sampleCoeffs"/"sampleCoeffVars" or with // "weightedSampleCoeffs"/"weightedSampleCoeffVars" methods. */ static unsigned optimalDegreeHart(const double* coeffs, const double* variances, unsigned maxdeg, double k = 2.0); private: const AbsClassicalOrthoPoly1D* poly_; double xmin_; double xmax_; double shift_; double scale_; #ifdef SWIG public: inline double series_2(const double* coeffs, unsigned nCoeffs, const double x) const { assert(nCoeffs); return this->series(coeffs, nCoeffs-1U, x); } inline PyObject* seriesScan(const double* coeffs, unsigned nCoeffs, const unsigned nOut) const { assert(nCoeffs); const unsigned maxdeg = nCoeffs - 1U; const int typenum = NumpyTypecode::code; npy_intp sh = nOut; PyObject* xarr = PyArray_SimpleNew(1, &sh, typenum); PyObject* yarr = PyArray_SimpleNew(1, &sh, typenum); if (xarr && yarr) { if (nOut) { try { double* x = (double*)PyArray_DATA((PyArrayObject*)xarr); double* y = (double*)PyArray_DATA((PyArrayObject*)yarr); const double dx = (xmax_ - xmin_)/nOut; for (unsigned i=0; iseries(coeffs, maxdeg, x[i]); } } catch (const std::exception& e) { Py_DECREF(xarr); Py_DECREF(yarr); throw e; } } return Py_BuildValue("(OO)", xarr, yarr); } else { Py_XDECREF(xarr); Py_XDECREF(yarr); return 0; } } inline void densityCoeffs_2(const AbsDistribution1D& distro, unsigned nIntegrationPoints, double* coeffsOut, unsigned nCoeffsOut) const { assert(nCoeffsOut); this->densityCoeffs(distro, nIntegrationPoints, coeffsOut, nCoeffsOut-1); } inline void densityCoeffsVariance_2(const AbsDistribution1D& distro, unsigned nIntegrationPoints, const double* coeffs, unsigned nCoeffs, double expectedSampleSize, double* coeffsOut, unsigned nCoeffsOut) const { assert(nCoeffs); if (!(nCoeffs == nCoeffsOut)) throw std::invalid_argument( "In npstat::OSDE1D::densityCoeffsVariance_2: incompatible output array"); this->densityCoeffsVariance(distro, nIntegrationPoints, coeffs, nCoeffs-1U, expectedSampleSize, coeffsOut); } inline npstat::Matrix densityCoeffsCovariance_2(const AbsDistribution1D& distro, unsigned nIntegrationPoints, const double* coeffs, unsigned nCoeffs, double expectedSampleSize) const { assert(nCoeffs); return this->densityCoeffsCovariance(distro, nIntegrationPoints, coeffs, nCoeffs-1, expectedSampleSize); } template inline void sampleCoeffs_2(const Vec& sample, double* coeffsOut, unsigned nCoeffsOut) const { const unsigned long sz = sample.size(); const typename Vec::value_type *in = 0; if (sz) in = &sample[0]; assert(nCoeffsOut); this->sampleCoeffs(in, sz, coeffsOut, nCoeffsOut-1U); } template inline void sampleCoeffsVariance_2(const Vec& sample, const double *coeffs, unsigned nCoeffs, double* coeffsOut, unsigned nCoeffsOut) const { const unsigned long sz = sample.size(); const typename Vec::value_type *in = 0; if (sz) in = &sample[0]; assert(nCoeffs); if (!(nCoeffs == nCoeffsOut)) throw std::invalid_argument( "In npstat::OSDE1D::sampleCoeffsVariance_2: incompatible output array"); this->sampleCoeffsVariance(in, sz, coeffs, nCoeffs-1U, coeffsOut); } template inline npstat::Matrix sampleCoeffsCovariance_2( const Vec& sample, const double *coeffs, unsigned nCoeffs) const { const unsigned long sz = sample.size(); const typename Vec::value_type *in = 0; if (sz) in = &sample[0]; assert(nCoeffs); return this->sampleCoeffsCovariance(in, sz, coeffs, nCoeffs-1U); } template inline void calculateCoeffs_2(const AbsDistribution1D& d, const Quadrature& quad, double* coeffsOut, unsigned nCoeffsOut) const { DensityFunctor1D fcn(d); assert(nCoeffsOut); return this->calculateCoeffs(fcn, quad, coeffsOut, nCoeffsOut-1U); } template inline double integratedSquaredError_2(const AbsDistribution1D& d, const Quadrature& quad, const double* coeffs, unsigned nCoeffs) const { DensityFunctor1D fcn(d); assert(nCoeffs); return this->integratedSquaredError(fcn, quad, coeffs, nCoeffs-1U); } template inline void weightedSampleCoeffs_2(const Vec& sample, double* coeffsOut, unsigned nCoeffsOut) const { const unsigned long sz = sample.size(); const typename Vec::value_type *in = 0; if (sz) in = &sample[0]; assert(nCoeffsOut); this->weightedSampleCoeffs(in, sz, coeffsOut, nCoeffsOut-1U); } template inline npstat::Matrix weightedSampleCoeffsCovariance_2( const Vec& sample, const double *coeffs, unsigned nCoeffs) const { const unsigned long sz = sample.size(); const typename Vec::value_type *in = 0; if (sz) in = &sample[0]; assert(nCoeffs); return this->weightedSampleCoeffsCovariance(in, sz, coeffs, nCoeffs-1U); } template inline void weightedSampleCoeffsVariance_2(const Vec& sample, const double *coeffs, unsigned nCoeffs, double* coeffsOut, unsigned nCoeffsOut) const { const unsigned long sz = sample.size(); const typename Vec::value_type *in = 0; if (sz) in = &sample[0]; assert(nCoeffs); if (!(nCoeffs == nCoeffsOut)) throw std::invalid_argument( "In npstat::OSDE1D::weightedSampleCoeffsVariance_2: incompatible output array"); this->weightedSampleCoeffsVariance(in, sz, coeffs, nCoeffs-1U, coeffsOut); } inline static unsigned optimalDegreeHart_2(const double* coeffs, unsigned nCoeffs, const double* variances, unsigned nVariances, double k = 2.0) { assert(nCoeffs); if (!(nCoeffs == nVariances)) throw std::invalid_argument( "In npstat::OSDE1D::optimalDegreeHart_2: incompatible input arrays"); return optimalDegreeHart(coeffs, variances, nCoeffs-1U, k); } #endif // SWIG }; } #include "npstat/stat/OSDE1D.icc" #endif // NPSTAT_OSDE1D_HH_