Thursday, October 10, 2013

Adding Intel's MKL Nonlinear Optimizer to Python as a Module

I was perusing the documentation to Intel's MKL (math kernel library) when I stumbled across their nonlinear optimizer. I wanted to test this optimizer with the levmar routine implemented in a previous post. To do this I have again added it to Python via a module as follows:
#include "Python.h"
#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
#include "arrayobject.h"
#include <iostream>
#include <iomanip>
#include <cmath>
#include <cfloat>
#include "mkl_rci.h"
#include "mkl_types.h"
#include "mkl_service.h"
using namespace std;

struct argstruct {
 double *xval;
 double *yval;
};

double lorentz(double *p, double x) {
 return p[2] / (1.0 + pow((x / p[0] - 1.0), 4) * pow(p[1], 2));
}

void residuals(MKL_INT *m, MKL_INT *n, double *p, double *F, void *data) {
 MKL_INT i=0;
 struct argstruct *bdata;
 double x, y;
 bdata = (struct argstruct *) data;
 for (i=0; i<(*m); i++) {
  x = bdata->xval[i];
  y = bdata->yval[i];
  F[i] = y - lorentz(p, x);
 }
}

static PyObject *mkloptimizer(PyObject *self, PyObject *args) {
 PyArrayObject *xval, *yval, *eps0, *p0;
 argstruct adata;
 double *fvec, *fjac, *p, *eps, r1, r2, res, rs=0.25;
 int count=0;
 MKL_INT iter1 = 100000;
 MKL_INT iter2 = 100000;
 MKL_INT RCI_Request, successful, info[6], iter, st_cr, n, m, i;
 _TRNSP_HANDLE_t handle;

 if (!PyArg_ParseTuple(args, "O!O!O!O!", &PyArray_Type, &xval, &PyArray_Type, &yval, &PyArray_Type, &eps0, &PyArray_Type, &p0)) {
  return NULL;
 }
 m = PyArray_SHAPE(xval)[0];
 n = PyArray_SHAPE(p0)[0];
 adata.xval = (double *) PyArray_DATA(xval);
 adata.yval = (double *) PyArray_DATA(yval);
 eps = (double *) PyArray_DATA(eps0);
 p = (double *) PyArray_DATA(p0);
 // Initialize fvec and fjac arrays
 fvec = new double [m];
 fjac = new double [m*n];
 for (i=0; i<(m*n); i++) {
  if (i<m) {
   fvec[i] = 0.0;
  }
  fjac[i] = 0.0;
 }
 PySys_WriteStdout("Initializing Intel's MKL optimizer ... ");
 if (dtrnlsp_init(&handle, &n, &m, p, eps, &iter1, &iter2, &rs) != TR_SUCCESS) {
  PySys_WriteStdout("FAILED\n");
  MKL_Free_Buffers();
  PyErr_SetString(PyExc_AssertionError, "Initialization error\n");
  return NULL;
 } else {
  PySys_WriteStdout("SUCCESS\n");
 }
 PySys_WriteStdout("Checking optimizer inputs          ... ");
 if (dtrnlsp_check(&handle, &n, &m, fjac, fvec, eps, info) != TR_SUCCESS) {
  PySys_WriteStdout("FAILED\n");
  MKL_Free_Buffers();
  PyErr_SetString(PyExc_AssertionError, "Check optimizer inputs.\n");
  return NULL;
 } else {
  PySys_WriteStdout("SUCCESS\n");
 }
 RCI_Request = 0;
    successful = 0;
 PySys_WriteStdout("\nBegin nonlinear least squares algorithm\n");
 PySys_WriteStdout(" Iter      p[0]      p[1]      p[2]    Residual\n");
 while (successful == 0) {
        if (dtrnlsp_solve(&handle, fvec, fjac, &RCI_Request) != TR_SUCCESS) {
            PyErr_SetString(PyExc_AssertionError,"Error encountered while searching for solution");
            MKL_Free_Buffers ();
            return NULL;
        }
        if (RCI_Request < 0) { 
   PySys_WriteStdout("Algorithm terminated\n");
            successful = 1;
  }
        if (RCI_Request == 1) {
            residuals(&m, &n, p, fvec, (void *) &adata);
   res = 0.0;
   for (i=0; i<m; i++) {
    res += pow(fvec[i],2);
   }
   PySys_WriteStdout("%5d   % 2.4f   % 2.4f   % 2.4f    %2.6f\n", count, p[0], p[1], p[2], sqrt(res));
   count += 1;
        }
        if (RCI_Request == 2) {
            if (djacobix(residuals, &n, &m, fjac, p, eps, (void *) &adata) != TR_SUCCESS) {
                PyErr_SetString(PyExc_AssertionError,"Error encountered in calculation of the Jacobian\n");
                MKL_Free_Buffers();
                return NULL;
            }
        }
    }
 PySys_WriteStdout("\nRetrieving solver's results:\n");
 if (dtrnlsp_get(&handle, &iter, &st_cr, &r1, &r2) != TR_SUCCESS) {
        PyErr_SetString(PyExc_AssertionError,"Error encountered in retrieving results\n");
        MKL_Free_Buffers ();
        return NULL;
    } else {
  PySys_WriteStdout("Number of iterations:\t\t%4d\n", iter);
  PySys_WriteStdout("Initial residual:\t\t%2.18f\n", r1);
  PySys_WriteStdout("Final residual:\t\t%2.18f\n", r2);
  PySys_WriteStdout("Parameter values:\t\t(%2.6f, %2.6f, %2.6f)\n",p[0],p[1],p[2]);
 }


 delete [] fjac;
 delete [] fvec;
 return Py_BuildValue("(d,d,d)", p[0],p[1],p[2]);
}

static PyMethodDef pyMKLoptmethods[] = {
 {"mkloptimizer", mkloptimizer, METH_VARARGS, "Documentation:"},
 {NULL, NULL, 0, NULL}
};

PyMODINIT_FUNC initpyMKLopt(void) {
 Py_InitModule("pyMKLopt", pyMKLoptmethods);
 import_array();
}

Once compiled in Visual Studio you can import the resultant module into Python as follows:
import numpy as np
import pyMKLopt
np.random.seed(22)

DBL_EPSILON = np.spacing(1.0)

def lorentz(p,x):
    return p[2] / (1.0 + (x / p[0] - 1.0)**4 * p[1]**2)
   
p = np.array([0.5, 0.25, 1.0], dtype=np.double)
x = np.linspace(-1.5, 2.5, num=30, endpoint=True)
noisyy = lorentz(p,x) + (np.random.randn(30) * 0.05)
p0 = np.array([1.0, 1.0, 1.0], dtype=np.double) #Initial guess


eps = np.array([1e-8, 1e-20, DBL_EPSILON, 1e-20, 1e-20, DBL_EPSILON], dtype=np.double)
solution = pyMKLopt.mkloptimizer(x, noisyy, eps, p0)
print '\n\n',solution

The output of this script (displayed in the console) is:
Initializing Intel's MKL optimizer ... SUCCESS
Checking optimizer inputs          ... SUCCESS

Begin nonlinear least squares algorithm
 Iter      p[0]      p[1]      p[2]    Residual
    0    1.0000    1.0000    1.0000    1.521464
    1    0.5798   -0.1432    0.8439    1.151126
    2    0.4988   -0.2203    0.9402    0.354709
    3    0.5090   -0.2644    1.0182    0.248959
    4    0.5071   -0.2661    1.0216    0.248171
    5    0.5071   -0.2662    1.0217    0.248171
    6    0.5071   -0.2662    1.0217    0.248171
    7    0.5071   -0.2662    1.0217    0.248171
    8    0.5071   -0.2662    1.0217    0.248171
Algorithm terminated

Retrieving solver's results:
Number of iterations: 8
Initial residual: 1.521463582532435900
Final residual: 0.248170645865868120
Parameter values: (0.507083, -0.266186, 1.021697)


(0.5070833881680623, -0.26618567803609183, 1.0216970905916452)

As you can see it yields the same results (don't worry about the negative sign on p[1] - it's value is squared). I want to compare this optimizer (which uses the trust-region algorithm) versus the levmar routine. In practice I have found the levmar routine works quite well but there are scenarios when using boundary conditions where the optimizer gets "stuck". I get around this by performing a transformation that removes the need for boundary conditions. I will test this MKL optimizer to see how it performs in those situations.