#include <stdio.h>
#include <stdlib.h>
#include <mex.h>
#include <math.h>

#define X(ix,iy,iz) (iz)*iNxy+(ix)*iNy+ (iy)
#define Xxy(ix,iy)  (ix)*iNy+ (iy)

#define MAX(a,b) ( a > b ? a : b )
#define SIGN(x) ( x >= 0.0 ? 1.0 : -1.0 )
#define ABS(x) ( (x) > 0.0 ? x : -(x) )
#define SQR(x) (x)*(x)


float SQRT(float number) {
    long i;
    float x, y;
    const float f = 1.5F;

    x = number * 0.5F;
    y  = number;
    i  = * ( long * ) &y;
    i  = 0x5f3759df - ( i >> 1 );
    y  = * ( float * ) &i;
    y  = y * ( f - ( x * y * y ) );
    y  = y * ( f - ( x * y * y ) );
    return number * y;
}

/**********************************************/
/************** MAIN FUNCTION *****************/
/**********************************************/

/****************************************/
extern void mexFunction(int iNbOut, mxArray *pmxOut[],
int iNbIn, const mxArray *pmxIn[])
{

  /* iNbOut: number of outputs
     pmxOut: array of pointers to output arguments */

  /* iNbIn: number of inputs
     pmxIn: array of pointers to input arguments */


    float   *pfdx, *pfdy, *pfv, *pfvOld, *pfvOld2, *pfLambda1;
    float   *pfbx, *pfby, *pfVecParameters;
    float   fLambda, fEta, fct1, fct2, fctST, fG, fDxu, fDyu, fs, fTemp, fSumDiff,flambda1;
    float   fct1b, fct2b, fct1c, fct2c, fInvEta, fInvEta2, f1, f2;
    float    *pfv_initial,*pfDk, fMaxImRef;
    float   *pfE1, *pfE2, *pfGaussian;
    float   fNyxz,fSumUxy;
    float   fDiffNew, fDiffOld, fDiffNew2, fSumU, fError;
    float   fSumUold, fDiffFirst2, fStopThres;
    int     iNy, iNx, iNz, iNxy, iNdim, iDim[3], ix, iy, iz,izz;
    int     iNI, iX, iGS;
    int     iMeanGS, iCptGS;

    /* Inputs */
    pfDk   = mxGetData(pmxIn[0]); /*Dual variable D_k*/
    pfVecParameters = mxGetData(pmxIn[1]); /* Vector of parameters */
    pfv_initial= mxGetData(pmxIn[2]);


    /* Size */
    iNy = (int) pfVecParameters[0];
    iNx = (int) pfVecParameters[1];
    iNz = (int ) pfVecParameters[2];
    /* Parameters */
    fLambda = pfVecParameters[3];
    fEta = pfVecParameters[4];
    iNxy=iNx*iNy;

    /* Outputs */
    iNdim = 3;
    iDim[0] = iNy;
    iDim[1] = iNx;
    iDim[2] = iNz;
    pmxOut[0] = mxCreateNumericArray(iNdim,(const int*)iDim,mxSINGLE_CLASS,mxREAL);
    pfv = mxGetData(pmxOut[0]);


    /* Memory allocation */
    pfdx = (float *) calloc( (unsigned)(iNy*iNx*iNz), sizeof(float) );
    if (!pfdx)
        mexPrintf("Memory allocation failure\n");

    pfdy = (float *) calloc( (unsigned)(iNy*iNx*iNz), sizeof(float) );
    if (!pfdy)
        mexPrintf("Memory allocation failure\n");

    pfbx = (float *) calloc( (unsigned)(iNy*iNx*iNz), sizeof(float) );
    if (!pfbx)
        mexPrintf("Memory allocation failure\n");

    pfby = (float *) calloc( (unsigned)(iNy*iNx*iNz), sizeof(float) );
    if (!pfby)
        mexPrintf("Memory allocation failure\n");

    pfvOld = (float *) calloc( (unsigned)(iNy*iNx*iNz), sizeof(float) );
    if (!pfvOld)
        mexPrintf("Memory allocation failure\n");


    pfvOld2 = (float *) calloc( (unsigned)(iNy*iNx*iNz), sizeof(float) );
    if (!pfvOld2)
        mexPrintf("Memory allocation failure\n");

   /* Lagrangian multiplier for v \in \delta_+*/
    pfLambda1=(float *) calloc ( (unsigned)(iNy*iNx), sizeof(float) );
        if (!pfLambda1)
        mexPrintf("Memory allocation failure\n");



    /* Compute init function v*/
  for (iz=0; iz< iNz; iz++)
    for (ix=0; ix< iNx; ix++)
        for (iy=0; iy< iNy; iy++)
            pfv[X(ix,iy,iz)] = pfv_initial[X(ix,iy,iz)];


    /* Constants */
    fInvEta = fLambda/ fEta;
    fInvEta2 = SQR(fInvEta);

    fct1 = 1./4.;
    fct2 = 1./(4.0*fEta);
    fct1b = 1./3.;
    fct2b = 1./(3.0*fEta);
    fct1c = 1./2.;
    fct2c = 1./(2.0*fEta);
    fNyxz = (float)(iNy*iNx*iNz);
    fStopThres = 1e-6;

    /* Iterative minimization scheme
    See: T. Goldstein, X. Bresson, and S. Osher, Geometric Applications of the
    Split Bregman Method: Segmentation and Surface Reconstruction, CAM Report 09-06, 2009
    Iterative scheme:
    (u^k+1,d^k+1) = arg min int g_b |d| + lambda h_r u + mu/2 |d - grad u - b^k|^2
    b^k+1 = b^k + grad u^k+1 - d^k+1 */

    fDiffOld = 1e10; fDiffNew = 1e11;
    iMeanGS = 0; iCptGS = 0;
    iNI=0; /* number of iterations (outer iterations) */
    while ( ABS(fDiffNew-fDiffOld)>fStopThres && iNI<5 )
    {

        /* Store u^old for outer iterations */
    for (iz=0; iz< iNz; iz++)
        for (ix=0; ix< iNx; ix++)
            for (iy=0; iy< iNy; iy++)
                pfvOld[X(ix,iy,iz)] = pfv[X(ix,iy,iz)];

        /* Compute u^{k+1} with Gauss-Seidel */
        /* Solve u^k+1 = arg min int lambda h_r u + mu/2 |d - grad u - b^k|^2 */
        /* Euler-Lagrange is  mu Laplacian u = lambda hr + mu div (b^k-d^k), u in [0,1] */
        iGS=0; /* number of iterations for Gauss-Seidel (inner iterations) */
        fError = 1e10;
        while ( fError>1e-2 && iGS<2 )
        {

            /* Store u^old for inner iterations (Gauss-Seidel) */
        for (iz=0; iz< iNz; iz++)
            for (ix=0; ix< iNx; ix++)
                for (iy=0; iy< iNy; iy++)
                    pfvOld2[X(ix,iy,iz)] = pfv[X(ix,iy,iz)];

            for (ix=1; ix< iNx-1; ix++)
                for (iy=1; iy< iNy-1; iy++)
                {
                    flambda1=0.0f;
                    for (iz=0; iz< iNz; iz++)
                    {

                      iX = X(ix,iy,iz);
                      flambda1+=fEta*(pfdx[X(ix-1,iy,iz)] - pfdx[iX] - pfbx[X(ix-1,iy,iz)] + pfbx[iX]);
                      flambda1+=fEta*(pfdy[X(ix,iy-1,iz)] - pfdy[iX] - pfby[X(ix,iy-1,iz)] + pfby[iX]);
                      flambda1-=pfDk[iX];
                    }
                    pfLambda1[Xxy(ix,iy)]=flambda1/(float) (iNz);
                }

            /* Center */
        for (ix=1; ix< iNx-1; ix++)
          for (iy=1; iy< iNy-1; iy++)
          {
            fSumUxy=0.0f;
            for (iz=0; iz< iNz; iz++)
            {
                iX = X(ix,iy,iz);
                fG = pfv[X(ix+1,iy,iz)] + pfv[X(ix-1,iy,iz)] + pfv[X(ix,iy+1,iz)] + pfv[X(ix,iy-1,iz)];
                fG += pfdx[X(ix-1,iy,iz)] - pfdx[iX] - pfbx[X(ix-1,iy,iz)] + pfbx[iX];
                fG += pfdy[X(ix,iy-1,iz)] - pfdy[iX] - pfby[X(ix,iy-1,iz)] + pfby[iX];
                fG *= fct1;
                fG -= fct2* (pfDk[iX]+pfLambda1[Xxy(ix,iy)]);
                if (fG>1.0) fG=1.0;
                else if (fG<0.0) fG=0.0;
                pfv[iX] = fG;
                fSumUxy+=fG;
                }
                if (ABS(fSumUxy-1.0)>1e-6)
                {
                  for (iz=0; iz< iNz; iz++)
                   {
                       iX = X(ix,iy,iz);
                       pfv[iX]/=fSumUxy;
                   }
                }
          }

            /* Borders */
            ix=0;
                for (iy=1; iy< iNy-1; iy++)
                {
                    flambda1=0.0f;
                    for (iz=0; iz< iNz; iz++)
                    {

                      iX = X(ix,iy,iz);
                      flambda1+=fEta*(- pfdx[iX] + pfbx[iX]);
                      flambda1+=fEta*(pfdy[X(ix,iy-1,iz)] - pfdy[iX] - pfby[X(ix,iy-1,iz)] + pfby[iX]);
                      flambda1-=pfDk[iX];
                    }
                    pfLambda1[Xxy(ix,iy)]=flambda1/(float) iNz;
                }

            for (iy=1; iy< iNy-1; iy++)
            {
              fSumUxy=0.0f;
              for (iz=0; iz< iNz; iz++)
              {
                iX = X(ix,iy,iz);
                fG = pfv[X(ix+1,iy,iz)] + pfv[X(ix,iy+1,iz)] + pfv[X(ix,iy-1,iz)];
                fG += - pfdx[iX] + pfbx[iX];
                fG += pfdy[X(ix,iy-1,iz)] - pfdy[iX] - pfby[X(ix,iy-1,iz)] + pfby[iX];
                fG *= fct1b;
                fG -= fct2b* (pfDk[iX]+pfLambda1[Xxy(ix,iy)]);
                if (fG>1.0) fG=1.0;
                else if (fG<0.0) fG=0.0;
                pfv[iX] = fG;
                fSumUxy+=fG;
               }
               if (ABS(fSumUxy-1.0)>1e-6)
                {
                  for (iz=0; iz< iNz; iz++)
                   {
                       iX = X(ix,iy,iz);
                       pfv[iX]/=fSumUxy;
                   }
                }
            }

            ix=iNx-1;
                for (iy=1; iy< iNy-1; iy++)
                {
                    flambda1=0.0f;
                    for (iz=0; iz< iNz; iz++)
                    {

                      iX = X(ix,iy,iz);
                      flambda1+=fEta*(pfdx[X(ix-1,iy,iz)] - pfdx[iX] - pfbx[X(ix-1,iy,iz)] + pfbx[iX]);
                      flambda1+=fEta*(pfdy[X(ix,iy-1,iz)] - pfdy[iX] - pfby[X(ix,iy-1,iz)] + pfby[iX]);
                      flambda1-=pfDk[iX];
                    }
                    pfLambda1[Xxy(ix,iy)]=flambda1/(float) iNz;
                }

            for (iy=1; iy< iNy-1; iy++)
            {
              fSumUxy=0.0f;
              for(iz=0;iz<iNz;iz++)
              {
                iX = X(ix,iy,iz);
                fG = pfv[X(ix-1,iy,iz)] + pfv[X(ix,iy+1,iz)] + pfv[X(ix,iy-1,iz)];
                fG += pfdx[X(ix-1,iy,iz)] - pfdx[iX] - pfbx[X(ix-1,iy,iz)] + pfbx[iX];
                fG += pfdy[X(ix,iy-1,iz)] - pfdy[iX] - pfby[X(ix,iy-1,iz)] + pfby[iX];
                fG *= fct1b;
                fG -= fct2b* (pfDk[iX]+pfLambda1[Xxy(ix,iy)]);
                if (fG>1.0) fG=1.0;
                else if (fG<0.0) fG=0.0;
                pfv[iX] = fG;
                fSumUxy+=fG;
               }
               if (ABS(fSumUxy-1.0)>1e-6)
                {
                  for (iz=0; iz< iNz; iz++)
                   {
                       iX = X(ix,iy,iz);
                       pfv[iX]/=fSumUxy;
                   }
                }
             }

            iy=0;
            for (ix=1; ix< iNx-1; ix++)
                {
                    flambda1=0.0f;
                    for (iz=0; iz< iNz; iz++)
                    {

                      iX = X(ix,iy,iz);
                      flambda1+=fEta*(pfdx[X(ix-1,iy,iz)] - pfdx[iX] - pfbx[X(ix-1,iy,iz)] + pfbx[iX]);
                      flambda1+=fEta*(-pfdy[iX] + pfby[iX]);
                      flambda1-=pfDk[iX];
                    }
                    pfLambda1[Xxy(ix,iy)]=flambda1/(float) iNz;
                }

            for (ix=1; ix< iNx-1; ix++)
            {
              fSumUxy=0.0;
              for (iz=0;iz<iNz;iz++)
              {
                iX = X(ix,iy,iz);
                fG = pfv[X(ix+1,iy,iz)] + pfv[X(ix-1,iy,iz)] + pfv[X(ix,iy+1,iz)];
                fG += pfdx[X(ix-1,iy,iz)] - pfdx[iX] - pfbx[X(ix-1,iy,iz)] + pfbx[iX];
                fG += - pfdy[iX] + pfby[iX];
                fG *= fct1b;
                fG -= fct2b* (pfDk[iX]+pfLambda1[Xxy(ix,iy)]);
                if (fG>1.0) fG=1.0;
                else if (fG<0.0) fG=0.0;
                pfv[iX] = fG;
                fSumUxy+=fG;
               }
               if (ABS(fSumUxy-1.0)>1e-6)
                {
                  for (iz=0; iz< iNz; iz++)
                   {
                       iX = X(ix,iy,iz);
                       pfv[iX]/=fSumUxy;
                   }
                }
             }
            iy=iNy-1;
             for (ix=1; ix< iNx-1; ix++)
                {
                    flambda1=0.0f;
                    for (iz=0; iz< iNz; iz++)
                    {

                      iX = X(ix,iy,iz);
                      flambda1+=fEta*(pfdx[X(ix-1,iy,iz)] - pfdx[iX] - pfbx[X(ix-1,iy,iz)] + pfbx[iX]);
                      flambda1+=fEta*(pfdy[X(ix,iy-1,iz)] - pfdy[iX] - pfby[X(ix,iy-1,iz)] + pfby[iX]);
                      flambda1-=pfDk[iX];
                    }
                    pfLambda1[Xxy(ix,iy)]=flambda1/(float) iNz;
                }

           for (ix=1; ix< iNx-1; ix++)
           {
             fSumUxy=0.0;
             for (iz=0;iz<iNz;iz++)
             {
                iX = X(ix,iy,iz);
                fG = pfv[X(ix+1,iy,iz)] + pfv[X(ix-1,iy,iz)] + pfv[X(ix,iy-1,iz)];
                fG += pfdx[X(ix-1,iy,iz)] - pfdx[iX] - pfbx[X(ix-1,iy,iz)] + pfbx[iX];
                fG += pfdy[X(ix,iy-1,iz)] - pfdy[iX] - pfby[X(ix,iy-1,iz)] + pfby[iX];
                fG *= fct1b;
                fG -= fct2b* (pfDk[iX]+pfLambda1[Xxy(ix,iy)]);
                if (fG>1.0) fG=1.0;
                else if (fG<0.0) fG=0.0;
                pfv[iX] = fG;
                fSumUxy+=fG;
              }
              if (ABS(fSumUxy-1.0)>1e-6)
                {
                  for (iz=0; iz< iNz; iz++)
                   {
                       iX = X(ix,iy,iz);
                       pfv[iX]/=fSumUxy;
                   }
                }
           }

            ix=0; iy=0;
            flambda1=0.0f;
            for (iz=0; iz< iNz; iz++)
            {
                      iX = X(ix,iy,iz);
                      flambda1+=fEta*(- pfdx[iX] + pfbx[iX]);
                      flambda1+=fEta*(- pfdy[iX] + pfby[iX]);
                      flambda1-=pfDk[iX];
            }
            pfLambda1[Xxy(ix,iy)]=flambda1/(float) iNz;
            fSumUxy=0.0f;
            for (iz=0;iz<iNz;iz++)
            {
               iX = X(ix,iy,iz);
               fG = pfv[X(ix+1,iy,iz)] + pfv[X(ix,iy+1,iz)];
               fG += - pfdx[iX] + pfbx[iX];
               fG += - pfdy[iX] + pfby[iX];
               fG *= fct1c;
               fG -= fct2c* (pfDk[iX]+pfLambda1[Xxy(ix,iy)]);
               if (fG>1.0) fG=1.0;
                  else if (fG<0.0) fG=0.0;
                pfv[iX] = fG;
                fSumUxy+=fG;
              }
              if (ABS(fSumUxy-1.0)>1e-6)
                {
                  for (iz=0; iz< iNz; iz++)
                   {
                       iX = X(ix,iy,iz);
                       pfv[iX]/=fSumUxy;
                   }
                }

            ix=iNx-1; iy=0;
            flambda1=0.0f;
            for (iz=0; iz< iNz; iz++)
            {
                      iX = X(ix,iy,iz);
                      flambda1+=fEta*(pfdx[X(ix-1,iy,iz)] - pfdx[iX] - pfbx[X(ix-1,iy,iz)] + pfbx[iX]);
                      flambda1+= fEta*(- pfdy[iX] + pfby[iX]);
                      flambda1-=pfDk[iX];
            }
            pfLambda1[Xxy(ix,iy)]=flambda1/(float) iNz;
            fSumUxy=0.0f;
            for (iz=0;iz<iNz;iz++)
            {
               iX = X(ix,iy,iz);
               fG = pfv[X(ix-1,iy,iz)] + pfv[X(ix,iy+1,iz)];
               fG += pfdx[X(ix-1,iy,iz)] - pfdx[iX] - pfbx[X(ix-1,iy,iz)] + pfbx[iX];
               fG += - pfdy[iX] + pfby[iX];
               fG *= fct1c;
               fG -= fct2c* (pfDk[iX]+pfLambda1[Xxy(ix,iy)]);
               if (fG>1.0) fG=1.0;
                else if (fG<0.0) fG=0.0;
               pfv[iX] = fG;
               fSumUxy+=fG;
              }
              if (ABS(fSumUxy-1.0)>1e-6)
                {
                  for (iz=0; iz< iNz; iz++)
                   {
                       iX = X(ix,iy,iz);
                       pfv[iX]/=fSumUxy;
                   }
                }
            ix=0; iy=iNy-1;
            flambda1=0.0f;
            for (iz=0; iz< iNz; iz++)
            {
                      iX = X(ix,iy,iz);
                      flambda1+=fEta*( - pfdx[iX] + pfbx[iX]);
                      flambda1+=fEta*(pfdy[X(ix,iy-1,iz)]-pfdy[iX]-pfby[X(ix,iy-1,iz)] + pfby[iX]);
                      flambda1-=pfDk[iX];
            }
             pfLambda1[Xxy(ix,iy)]=flambda1/(float) iNz;
             fSumUxy=0.0;
             for (iz=0;iz<iNz;iz++)
             {
                iX = X(ix,iy,iz);
                fG = pfv[X(ix+1,iy,iz)] + pfv[X(ix,iy-1,iz)];
                fG += - pfdx[iX] + pfbx[iX];
                fG += pfdy[X(ix,iy-1,iz)]-pfdy[iX]-pfby[X(ix,iy-1,iz)] + pfby[iX];
                fG *= fct1c;
                fG -= fct2c* (pfDk[iX]+pfLambda1[Xxy(ix,iy)]);
                if (fG>1.0) fG=1.0;
                else if (fG<0.0) fG=0.0;
                pfv[iX] = fG;
                fSumUxy+=fG;
             }
             if (ABS(fSumUxy-1.0)>1e-6)
                {
                  for (iz=0; iz< iNz; iz++)
                   {
                       iX = X(ix,iy,iz);
                       pfv[iX]/=fSumUxy;
                   }
                }

            ix=iNx-1; iy=iNy-1;
            flambda1=0.0f;
            for (iz=0; iz< iNz; iz++)
            {
                      iX = X(ix,iy,iz);
                      flambda1+=fEta*(pfdx[X(ix-1,iy,iz)] - pfdx[iX] - pfbx[X(ix-1,iy,iz)] + pfbx[iX]);
                      flambda1+=fEta*(pfdy[X(ix,iy-1,iz)] - pfdy[iX] - pfby[X(ix,iy-1,iz)] + pfby[iX]);
                      flambda1-=pfDk[iX];
            }
            pfLambda1[Xxy(ix,iy)]=flambda1/(float) iNz;
            fSumUxy=0.0;
            for (iz=0;iz<iNz;iz++)
            {
               iX = X(ix,iy,iz);
               fG = pfv[X(ix-1,iy,iz)] + pfv[X(ix,iy-1,iz)];
               fG += pfdx[X(ix-1,iy,iz)] - pfdx[iX] - pfbx[X(ix-1,iy,iz)] + pfbx[iX];
               fG += pfdy[X(ix,iy-1,iz)] - pfdy[iX] - pfby[X(ix,iy-1,iz)] + pfby[iX];
               fG *= fct1c;
               fG -= fct2c* (pfDk[iX]+pfLambda1[Xxy(ix,iy)]);
               if (fG>1.0) fG=1.0;
                else if (fG<0.0) fG=0.0;
               pfv[iX] = fG;
               fSumUxy+=fG;
            }
            if (ABS(fSumUxy-1.0)>1e-6)
                {
                  for (iz=0; iz< iNz; iz++)
                   {
                       iX = X(ix,iy,iz);
                       pfv[iX]/=fSumUxy;
                   }
                }
            /* end Borders */


            /* Compute diff ( u - uold ) */
            fSumDiff = 0.0;
            fSumU = 0.0;
            fSumUold = 0.0;
        for (iz=0;iz<iNz;iz++)
            for (ix=0; ix< iNx; ix++)
                for (iy=0; iy< iNy; iy++)
                    fSumDiff += SQR(pfv[X(ix,iy,iz)]-pfvOld2[X(ix,iy,iz)]);
            fDiffNew2 = fSumDiff/ fNyxz;
            if ( iGS==0 )
            {
                fDiffFirst2 = fDiffNew2;
                fDiffNew2 = 1e10;
                fError = 1e10;
            }
            else
                fError = 1.0 - ABS(fDiffNew2-fDiffFirst2)/fDiffFirst2;
            iGS++;

        }


        iMeanGS += iGS;
        iCptGS++;


        /* Compute d^{k+1} (Soft-Thresholding) and b^{k+1} (Bregman function) */
        /* d^k+1 = arg min int g_b |d| + mu/2 |d - grad u - b^k|^2 */
        /* d^k+1 = (grad u^k+1 + b^k)/ |grad u^k+1 + b^k| max(|grad u^k+1 + b^k|-1/mu,0) */
        /* b^k+1 = b^k + grad u^k+1 - d^k+1 */
        /* Center */
      for (iz=0;iz<iNz;iz++)
        for (ix=0; ix< iNx-1; ix++)
            for (iy=0; iy< iNy-1; iy++)
        {
            iX = X(ix,iy,iz);
            /* d */
            fDxu = pfv[X(ix+1,iy,iz)] - pfv[iX];
            fDyu = pfv[X(ix,iy+1,iz)] - pfv[iX];
            f1 = fDxu+pfbx[iX];
            f2 = fDyu+pfby[iX];
            fs = SQR(f1)+SQR(f2);
            fctST = fInvEta2;
            if ( fs<fctST ) { pfdx[iX]=0.0; pfdy[iX]=0.0; }
            else {
                fs = SQRT(fs);
                fctST = SQRT(fctST);
                fTemp = fs-fctST; fTemp /= fs;
                pfdx[iX] = fTemp* f1;
                pfdy[iX] = fTemp* f2; }
            /* b */
            pfbx[iX] += fDxu - pfdx[iX];
            pfby[iX] += fDyu - pfdy[iX];
            }

        /* Borders */
        ix=iNx-1;
        for (iz=0;iz<iNz;iz++)
         for (iy=1; iy< iNy-1; iy++)
        {
            iX = X(ix,iy,iz);
            fDxu = 0.0;
            fDyu = pfv[X(ix,iy+1,iz)] - pfv[iX];
            f1 = fDxu+pfbx[iX]; f2 = fDyu+pfby[iX];
            fs = SQR(f1)+SQR(f2); fctST = fInvEta2;
            if ( fs<fctST ) { pfdx[iX]=0.0; pfdy[iX]=0.0; }
            else {
                fs = SQRT(fs); fctST = SQRT(fctST);
                fTemp = fs-fctST; fTemp /= fs;
                pfdx[iX] = fTemp* f1;
                pfdy[iX] = fTemp* f2; }
            pfbx[iX] += fDxu - pfdx[iX];
            pfby[iX] += fDyu - pfdy[iX];
        }

        iy=iNy-1;
        for (iz=0;iz<iNz;iz++)
         for (ix=1; ix< iNx-1; ix++)
        {
            iX = X(ix,iy,iz);
            fDxu = pfv[X(ix+1,iy,iz)] - pfv[iX];
            fDyu = 0.0;
            f1 = fDxu+pfbx[iX]; f2 = fDyu+pfby[iX];
            fs = SQR(f1)+SQR(f2); fctST = fInvEta2;
            if ( fs<fctST ) { pfdx[iX]=0.0; pfdy[iX]=0.0; }
            else {
                fs = SQRT(fs); fctST = SQRT(fctST);
                fTemp = fs-fctST; fTemp /= fs;
                pfdx[iX] = fTemp* f1;
                pfdy[iX] = fTemp* f2; }
            pfbx[iX] += fDxu - pfdx[iX];
            pfby[iX] += fDyu - pfdy[iX];
        }

        ix=iNx-1; iy=0;
        for (iz=0;iz<iNz;iz++)
        {
        iX = X(ix,iy,iz);
        fDxu = 0.0;
        fDyu = pfv[X(ix,iy+1,iz)] - pfv[iX];
        f1 = fDxu+pfbx[iX]; f2 = fDyu+pfby[iX];
        fs = SQR(f1)+SQR(f2); fctST = fInvEta2;
        if ( fs<fctST ) { pfdx[iX]=0.0; pfdy[iX]=0.0; }
        else {
            fs = SQRT(fs); fctST = SQRT(fctST);
            fTemp = fs-fctST; fTemp /= fs;
            pfdx[iX] = fTemp* f1;
            pfdy[iX] = fTemp* f2; }
        pfbx[iX] += fDxu - pfdx[iX];
        pfby[iX] += fDyu - pfdy[iX];
        }
        ix=0; iy=iNy-1;
        for (iz=0;iz<iNz;iz++)
        {
        iX = X(ix,iy,iz);
        fDxu = pfv[X(ix+1,iy,iz)] - pfv[iX];
        fDyu = 0.0;
        f1 = fDxu+pfbx[iX]; f2 = fDyu+pfby[iX];
        fs = SQR(f1)+SQR(f2); fctST = fInvEta2;
        if ( fs<fctST ) { pfdx[iX]=0.0; pfdy[iX]=0.0; }
        else {
            fs = SQRT(fs); fctST = SQRT(fctST);
            fTemp = fs-fctST; fTemp /= fs;
            pfdx[iX] = fTemp* f1;
            pfdy[iX] = fTemp* f2; }
        pfbx[iX] += fDxu - pfdx[iX];
        pfby[iX] += fDyu - pfdy[iX];
        }
        ix=iNx-1; iy=iNy-1;
        for (iz=0;iz<iNz;iz++)
        {
        iX = X(ix,iy,iz);
        fDxu = 0.0;
        fDyu = 0.0;
        f1 = fDxu+pfbx[iX]; f2 = fDyu+pfby[iX];
        fs = SQR(f1)+SQR(f2); fctST = fInvEta2;
        if ( fs<fctST ) { pfdx[iX]=0.0; pfdy[iX]=0.0; }
        else {
            fs = SQRT(fs); fctST = SQRT(fctST);
            fTemp = fs-fctST; fTemp /= fs;
            pfdx[iX] = fTemp* f1;
            pfdy[iX] = fTemp* f2; }
        pfbx[iX] += fDxu - pfdx[iX];
        pfby[iX] += fDyu - pfdy[iX];
        }
        /* Borders */



        fSumDiff = 0.0;
        fSumU = 0.0;
        fSumUold = 0.0;
      for (iz=0;iz<iNz;iz++)
        for (ix=0; ix< iNx; ix++)
            for (iy=0; iy< iNy; iy++)
           {
            fSumDiff += SQR(pfv[X(ix,iy,iz)]-pfvOld[X(ix,iy,iz)]);
            fSumU += SQR(pfv[X(ix,iy,iz)]);
            fSumUold += SQR(pfvOld[X(ix,iy,iz)]);
            }
        fDiffOld = fDiffNew;
        fDiffNew = fSumDiff/ (fSumU*fSumUold);
        iNI++;

    }


    /* Outputs (see above) */


    /* Free memory */
    free( (float *) pfdx );
    free( (float *) pfdy );
    free( (float *) pfbx );
    free( (float *) pfby );
    free( (float *) pfvOld );
    free( (float *) pfvOld2 );
    free((float *) pfLambda1);

}





