#include "Python.h"
#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
#include "arrayobject.h"
#include <iostream>
#include <iomanip>
#include <cmath>
#include <cfloat>
#include "mkl_rci.h"
#include "mkl_types.h"
#include "mkl_service.h"
using namespace std;
struct argstruct {
double *xval;
double *yval;
};
double lorentz(double *p, double x) {
return p[2] / (1.0 + pow((x / p[0] - 1.0), 4) * pow(p[1], 2));
}
void residuals(MKL_INT *m, MKL_INT *n, double *p, double *F, void *data) {
MKL_INT i=0;
struct argstruct *bdata;
double x, y;
bdata = (struct argstruct *) data;
for (i=0; i<(*m); i++) {
x = bdata->xval[i];
y = bdata->yval[i];
F[i] = y - lorentz(p, x);
}
}
static PyObject *mkloptimizer(PyObject *self, PyObject *args) {
PyArrayObject *xval, *yval, *eps0, *p0;
argstruct adata;
double *fvec, *fjac, *p, *eps, r1, r2, res, rs=0.25;
int count=0;
MKL_INT iter1 = 100000;
MKL_INT iter2 = 100000;
MKL_INT RCI_Request, successful, info[6], iter, st_cr, n, m, i;
_TRNSP_HANDLE_t handle;
if (!PyArg_ParseTuple(args, "O!O!O!O!", &PyArray_Type, &xval, &PyArray_Type, &yval, &PyArray_Type, &eps0, &PyArray_Type, &p0)) {
return NULL;
}
m = PyArray_SHAPE(xval)[0];
n = PyArray_SHAPE(p0)[0];
adata.xval = (double *) PyArray_DATA(xval);
adata.yval = (double *) PyArray_DATA(yval);
eps = (double *) PyArray_DATA(eps0);
p = (double *) PyArray_DATA(p0);
// Initialize fvec and fjac arrays
fvec = new double [m];
fjac = new double [m*n];
for (i=0; i<(m*n); i++) {
if (i<m) {
fvec[i] = 0.0;
}
fjac[i] = 0.0;
}
PySys_WriteStdout("Initializing Intel's MKL optimizer ... ");
if (dtrnlsp_init(&handle, &n, &m, p, eps, &iter1, &iter2, &rs) != TR_SUCCESS) {
PySys_WriteStdout("FAILED\n");
MKL_Free_Buffers();
PyErr_SetString(PyExc_AssertionError, "Initialization error\n");
return NULL;
} else {
PySys_WriteStdout("SUCCESS\n");
}
PySys_WriteStdout("Checking optimizer inputs ... ");
if (dtrnlsp_check(&handle, &n, &m, fjac, fvec, eps, info) != TR_SUCCESS) {
PySys_WriteStdout("FAILED\n");
MKL_Free_Buffers();
PyErr_SetString(PyExc_AssertionError, "Check optimizer inputs.\n");
return NULL;
} else {
PySys_WriteStdout("SUCCESS\n");
}
RCI_Request = 0;
successful = 0;
PySys_WriteStdout("\nBegin nonlinear least squares algorithm\n");
PySys_WriteStdout(" Iter p[0] p[1] p[2] Residual\n");
while (successful == 0) {
if (dtrnlsp_solve(&handle, fvec, fjac, &RCI_Request) != TR_SUCCESS) {
PyErr_SetString(PyExc_AssertionError,"Error encountered while searching for solution");
MKL_Free_Buffers ();
return NULL;
}
if (RCI_Request < 0) {
PySys_WriteStdout("Algorithm terminated\n");
successful = 1;
}
if (RCI_Request == 1) {
residuals(&m, &n, p, fvec, (void *) &adata);
res = 0.0;
for (i=0; i<m; i++) {
res += pow(fvec[i],2);
}
PySys_WriteStdout("%5d % 2.4f % 2.4f % 2.4f %2.6f\n", count, p[0], p[1], p[2], sqrt(res));
count += 1;
}
if (RCI_Request == 2) {
if (djacobix(residuals, &n, &m, fjac, p, eps, (void *) &adata) != TR_SUCCESS) {
PyErr_SetString(PyExc_AssertionError,"Error encountered in calculation of the Jacobian\n");
MKL_Free_Buffers();
return NULL;
}
}
}
PySys_WriteStdout("\nRetrieving solver's results:\n");
if (dtrnlsp_get(&handle, &iter, &st_cr, &r1, &r2) != TR_SUCCESS) {
PyErr_SetString(PyExc_AssertionError,"Error encountered in retrieving results\n");
MKL_Free_Buffers ();
return NULL;
} else {
PySys_WriteStdout("Number of iterations:\t\t%4d\n", iter);
PySys_WriteStdout("Initial residual:\t\t%2.18f\n", r1);
PySys_WriteStdout("Final residual:\t\t%2.18f\n", r2);
PySys_WriteStdout("Parameter values:\t\t(%2.6f, %2.6f, %2.6f)\n",p[0],p[1],p[2]);
}
delete [] fjac;
delete [] fvec;
return Py_BuildValue("(d,d,d)", p[0],p[1],p[2]);
}
static PyMethodDef pyMKLoptmethods[] = {
{"mkloptimizer", mkloptimizer, METH_VARARGS, "Documentation:"},
{NULL, NULL, 0, NULL}
};
PyMODINIT_FUNC initpyMKLopt(void) {
Py_InitModule("pyMKLopt", pyMKLoptmethods);
import_array();
}
Once compiled in Visual Studio you can import the resultant module into Python as follows:
import numpy as np
import pyMKLopt
np.random.seed(22)
DBL_EPSILON = np.spacing(1.0)
def lorentz(p,x):
return p[2] / (1.0 + (x / p[0] - 1.0)**4 * p[1]**2)
p = np.array([0.5, 0.25, 1.0], dtype=np.double)
x = np.linspace(-1.5, 2.5, num=30, endpoint=True)
noisyy = lorentz(p,x) + (np.random.randn(30) * 0.05)
p0 = np.array([1.0, 1.0, 1.0], dtype=np.double) #Initial guess
eps = np.array([1e-8, 1e-20, DBL_EPSILON, 1e-20, 1e-20, DBL_EPSILON], dtype=np.double)
solution = pyMKLopt.mkloptimizer(x, noisyy, eps, p0)
print '\n\n',solution
The output of this script (displayed in the console) is:
Initializing Intel's MKL optimizer ... SUCCESS
Checking optimizer inputs ... SUCCESS
Begin nonlinear least squares algorithm
Iter p[0] p[1] p[2] Residual
0 1.0000 1.0000 1.0000 1.521464
1 0.5798 -0.1432 0.8439 1.151126
2 0.4988 -0.2203 0.9402 0.354709
3 0.5090 -0.2644 1.0182 0.248959
4 0.5071 -0.2661 1.0216 0.248171
5 0.5071 -0.2662 1.0217 0.248171
6 0.5071 -0.2662 1.0217 0.248171
7 0.5071 -0.2662 1.0217 0.248171
8 0.5071 -0.2662 1.0217 0.248171
Algorithm terminated
Retrieving solver's results:
Number of iterations: 8
Initial residual: 1.521463582532435900
Final residual: 0.248170645865868120
Parameter values: (0.507083, -0.266186, 1.021697)
(0.5070833881680623, -0.26618567803609183, 1.0216970905916452)
As you can see it yields the same results (don't worry about the negative sign on p[1] - it's value is squared). I want to compare this optimizer (which uses the trust-region algorithm) versus the levmar routine. In practice I have found the levmar routine works quite well but there are scenarios when using boundary conditions where the optimizer gets "stuck". I get around this by performing a transformation that removes the need for boundary conditions. I will test this MKL optimizer to see how it performs in those situations.