/***************************************************************************/
/***************************************************************************/
/*
Computing NL weights W(x,y,x',y') with library CImg. 
Input: 
    iNy:ͼ߶ȡ
	iNxͼ
	imСim*im
	iw Сiw*iw
	fh ˹
	iNbNeigh iNbNeighȨ
Output:
    W1:iNx*iNy*4*iNbNeighzżʱW1(x,y,z)=W(x,y,x',y');
	zʱW1(x,y,z)=sqrt(W(x,y,x',y'));
	Y1: iNx*iNy*2*iNbNeigh. zżʱ,Y1(x,y,z)=x';
    zʱY1(x,y,z)=y'.
	SY: iNx*iNy顣W1(x,y,:)зԪzֵΪSY(x,y).

	How to use this function?
    e.g. Non-local means.
	start_time2=clock();
	CImg<float> res(iNx,iNy,1,1,0);
	cimg_forXY(res,x,y)
	{   
		float sumW=0.0f;
		for (i=0;i<SY(x,y);i++)
		{
			res(x,y)+=Img0(Y1(x,y,2*i),Y1(x,y,2*i+1))*W1(x,y,2*i);
		    sumW+=W1(x,y,2*i);
		}
		res(x,y)/=sumW;
	 }
	end_time2=clock();
	mexPrintf("time=%.3f\n",difftime(end_time2,start_time2)/1000);
    CImg<>diffimg=Img0-res;
     Img0.get_append(res,'x').display();
	 diffimg.display();
*/
/*************************************************************************************/


#include"NLMethods.h"


/************************************************************************/
float SQRT(float number) {
    long i;
    float x, y;
    const float f = 1.5F;
    
    x = number * 0.5F;
    y  = number;
    i  = * ( long * ) &y;
    i  = 0x5f3759df - ( i >> 1 );
    y  = * ( float * ) &i;
    y  = y * ( f - ( x * y * y ) );
    y  = y * ( f - ( x * y * y ) );
    return number * y;
}

      
/*************************************************************************/
template<typename T>
 void NLWeights(CImg<T> &Img0, CImgList<T> &W1_Y1_SY, const int im=5,  const int iw=15, 
	           int iNbNeigh=20, const float fh=0.25*0.25, int iIncludeCloseNeigh=1)
{ 
    
    float   *pfIm0, *pfVecGeneralParameters, *pfW;
    float   *pfW2, *pfW2b;
    float   *pfSdx, *pfIm0e, fDif, fDist, *pfWe;
    float   fcurrent, fTmp, *pfWmat;
    
    int     iNy, iNx, iNdim, iDim[3], iDisplay;
    int     iN3, im2, iw2, ic1;
    int     iy, ix, i, iye, ixe, i2, ixt, iyt, iNbBestNeigh;
    int     *piY, iNyx, iNxe, iNye, ik, idx, idy, ip;
    int     adr, adr1, adr2, adr3, adr4, iyp1, iyp2, ixp1, ixp2, iNyxw;
    int     iNxe2, iNye2, iNyxe, iX, iX1;
    int     istart, iend, imiddle, icurrent, isqw2, iTmp, i3;
    int     *pidx, *pidy, *pidxb, *pidyb, iStartCpt, iXd, iY, iXsqw2; 
    int     iXNeigh2, iYNeigh2, iXNeigh4, iYNeigh4;
    int     iXinv, iYinv, i1a, i1b, i2a, i2b, iNbNeigh2, iNbNeigh4;
    int     *piSizeNeigh, *piYmat,j2, j3;
    int     iNbNeighToSort, iIsolatedPt;
    
    short   *psId1, *psId2, *psId3;
    
    time_t  start_time, end_time;
    time_t  start_time2, end_time2;
    
    start_time = clock();
    
	mexPrintf("\nStart NL-Weights\n");
    
    
	//Img0.normalize(0,1.0f);
	pfIm0=Img0.data(); 
	iNy=Img0.height();
	iNx=Img0.width();
    mexPrintf("iNy= %i, iNx= %i, im= %i, iw= %i, fh= %.3f\n",iNy,iNx,im,iw,fh);
    
    
    im2 = (im-1)/2;
    iw2 = (iw-1)/2;
    ic1 = im2+iw2;
    mexPrintf("iNbNeigh= %i, im2= %i, iw2= %i, ic1= %i\n",iNbNeigh,im2,iw2,ic1);
    
    
    if (iNbNeigh>iw*iw-4)
    {
        iNbNeigh = iw*iw;
        iNbBestNeigh = iNbNeigh;
        iIncludeCloseNeigh = NO;
    }
    else
    {
        iNbBestNeigh = iNbNeigh;
        if ( iIncludeCloseNeigh==YES ) iNbNeigh += 4;
    }
    mexPrintf("iNbNeigh= %i, iNbBestNeigh= %i, iIncludeCloseNeigh= %i\n",iNbNeigh,iNbBestNeigh,iIncludeCloseNeigh);

    
    iNyx = iNy* iNx;
    
    
    
    iNyxw = iNy*iNx*iw;
    
    
    iNxe = iNx + 2* iw2;
    iNye = iNy + 2* iw2;
    
    iNxe2 = 2*iNxe-2;
    iNye2 = 2*iNye-2;
    
    isqw2 = iw*iw;
    
    
    iNyxe = iNxe*iNye;
    
    iNbNeigh2 = 2*iNbNeigh;
    iNbNeigh4 = 4*iNbNeigh;
    
    
    iN3 = iNbNeigh* 2* 2;
    iNdim = 3;
    iDim[0] = iNx;
    iDim[1] = iNy;
    iDim[2] = iN3;
	CImg<float>W(iNx,iNy,iN3,1,0);
    pfW=W.data();
    
    iN3 =iNbNeigh* 2* 2;
    iNdim = 3;
    iDim[0] = iNx;
    iDim[1] = iNy;
    iDim[2] = iN3;
    CImg<int>Y(iNx,iNy,iN3,1,0);
	piY=Y.data();
    
    
    iNdim = 2;
    iDim[0] = iNx;
    iDim[1] = iNy;
	CImg<int>SY(iNx,iNy,1,1,0);
	piSizeNeigh=SY.data();
    
    

    iN3 =iNbNeigh* 2* 2;
    iNdim = 3;
    iDim[0] = iNx;
    iDim[1] = iNy;
    iDim[2] = iN3;
    
    CImg<float> W1(iNx,iNy,iN3,1,0);
	CImg<int> Y1(iNx,iNy,iN3,1,0);
	pfWmat=W1.data();
	piYmat=Y1.data();
    
    
    
    
    pfW2 = (float *) calloc( (unsigned)(iw*iw), sizeof(float) );
    if (!pfW2)
        mexPrintf("Memory allocation failure\n");
    
    pfW2b = (float *) calloc( (unsigned)(iNbNeigh), sizeof(float) );
    if (!pfW2b)
        mexPrintf("Memory allocation failure\n");
    
    pidx = (int *) calloc( (unsigned)(iw*iw), sizeof(int) );
    if (!pidx)
        mexPrintf("Memory allocation failure\n");
    
    pidy = (int *) calloc( (unsigned)(iw*iw), sizeof(int) );
    if (!pidy)
        mexPrintf("Memory allocation failure\n");
    
    pidxb = (int *) calloc( (unsigned)(iNbNeigh), sizeof(int) );
    if (!pidxb)
        mexPrintf("Memory allocation failure\n");
    
    pidyb = (int *) calloc( (unsigned)(iNbNeigh), sizeof(int) );
    if (!pidyb)
        mexPrintf("Memory allocation failure\n");
    
    psId1 = (short *) calloc( (unsigned)(iNx*iNy*iw*iw), sizeof(short) );
    if (!psId1)
        mexPrintf("Memory allocation failure\n");
     
    psId2 = (short *) calloc( (unsigned)(iNx*iNy*iNbNeigh2), sizeof(short) );
    if (!psId2)
        mexPrintf("Memory allocation failure\n");
    
    psId3 = (short *) calloc( (unsigned)(iw*iw), sizeof(short) );
    if (!psId3)
        mexPrintf("Memory allocation failure\n");
    
    pfSdx = (float *) calloc( (unsigned)(iNxe*iNye), sizeof(float) );
    if (!pfSdx)
        mexPrintf("Memory allocation failure\n");
    
    pfIm0e = (float *) calloc( (unsigned)(iNxe*iNye), sizeof(float) );
    if (!pfIm0e)
        mexPrintf("Memory allocation failure\n");
    
    pfWe = (float *) calloc( (unsigned)(iNx*iNy*iw*iw), sizeof(float) );
    if (!pfWe)
        mexPrintf("Memory allocation failure\n");
    

    /* init */
	
    start_time2 = clock();
    for (i=0; i< iNyx; i++)
    {
        j2 = i*isqw2;
        j3 = i*iNbNeigh2;
        for (i2=0; i2< isqw2; i2++) psId1[j2+i2] = 0;
        for (i2=0; i2< iNbNeigh2; i2++) psId2[j3+i2] = 0;
        piSizeNeigh[i] = 0;
    }
    end_time2 = clock();
    mexPrintf("Step #1 (init): Time= %.3f sec",difftime(end_time2,start_time2)/1000);
    

 
    
    /* compute extended image: reflective boundary */
    for (iye=0,ik=0;iye<iNye;iye++)
    {
        if (iye<iw2) iy=iw2-iye;
        else if (iye>iNy+iw2-1) iy=2*iNy+iw2-iye-2;
        else iy=iye-iw2;
        
        for (ixe=0;ixe<iNxe;ixe++,ik++)
        {
            if (ixe<iw2)  ix=iw2-ixe;
            else if (ixe>iNx+iw2-1) ix=2*iNx+iw2-ixe-2;
            else ix=ixe-iw2;
            
            pfIm0e[ik]=pfIm0[X(ix,iy)];
        }
    }
  
    /* compute differences between patches*/
    start_time2 = clock();
    for (idy=-iw2;idy<=iw2;idy++)
        for (idx=-iw2;idx<=iw2;idx++)
    {
        
        /* compute translate image S_{dx,dy}(ixe,iye)
         clear image */
        for (ip=0;ip<iNyxe;ip++) pfSdx[ip]=0.0;
        /**/
        ixe=0; iye=0;
        ixt=ixe+idx;
        iyt=iye+idy;
        if (ixt<0)ixt=-ixt; if (ixt>=iNxe) ixt=iNxe2-ixt;
        if (iyt<0)iyt=-iyt; if (iyt>=iNye) iyt=iNye2-iyt;
        fDif = pfIm0e[Xe(ixe,iye)]-pfIm0e[Xe(ixt,iyt)];
        pfSdx[Xe(ixe,iye)] = fDif*fDif;
        /**/
        ixe=0;
        for (iye=1;iye<iNye;iye++)
        {
            ixt=ixe+idx;
            iyt=iye+idy;
            if (ixt<0)ixt=-ixt; if (ixt>=iNxe) ixt=iNxe2-ixt;
            if (iyt<0)iyt=-iyt; if (iyt>=iNye) iyt=iNye2-iyt;
            fDif = pfIm0e[Xe(ixe,iye)]-pfIm0e[Xe(ixt,iyt)];
            pfSdx[Xe(ixe,iye)] = pfSdx[Xe(ixe,iye-1)] + fDif*fDif;
        }
        /**/
        iye=0;
        for (ixe=1;ixe<iNxe;ixe++)
        {
            ixt=ixe+idx;
            iyt=iye+idy;
            if (ixt<0)ixt=-ixt; if (ixt>=iNxe) ixt=iNxe2-ixt;
            if (iyt<0)iyt=-iyt; if (iyt>=iNye) iyt=iNye2-iyt;
            fDif = pfIm0e[Xe(ixe,iye)]-pfIm0e[Xe(ixt,iyt)];
            pfSdx[Xe(ixe,iye)] = pfSdx[Xe(ixe-1,iye)] + fDif*fDif;
        }
        /**/
        for (ixe=1;ixe<iNxe;ixe++)
            for (iye=1;iye<iNye;iye++)
        {
            ixt=ixe+idx;
            iyt=iye+idy;
            if (ixt<0)ixt=-ixt; if (ixt>=iNxe) ixt=2*iNxe-ixt-2;
            if (iyt<0)iyt=-iyt; if (iyt>=iNye) iyt=2*iNye-iyt-2;
            fDif = pfIm0e[Xe(ixe,iye)]-pfIm0e[Xe(ixt,iyt)];
            pfSdx[Xe(ixe,iye)] = pfSdx[Xe(ixe-1,iye)] + pfSdx[Xe(ixe,iye-1)] - pfSdx[Xe(ixe-1,iye-1)] + fDif*fDif;
        }
        
        /* Original Image zone */
        for (iye=iw2;iye<iNye-iw2;iye++)
            for (ixe=iw2;ixe<iNxe-iw2;ixe++)       
        {
            /* output image position */
            iy=iye-iw2;
            ix=ixe-iw2;
            adr=X(ix,iy);
            /* Sdx(iy-s,ix-s)-Sdx(iy+s,ix+s) to get the patch difference */
            iyp1=iye-im2;
            ixp1=ixe-im2;
            iyp2=iye+im2;
            ixp2=ixe+im2;
            /* adr */
            adr1=Xe(ixp1,iyp1); /*(iy-s,ix-s)*/
            adr2=Xe(ixp2,iyp2); /*(iy+s,ix+s)*/
            adr3=Xe(ixp1,iyp2); /*(iy-s,ix+s)*/
            adr4=Xe(ixp2,iyp1); /*(iy+s,ix-s)*/
            /* dist */
            fDist = pfSdx[adr2] - pfSdx[adr3] - pfSdx[adr4] + pfSdx[adr1];
            pfWe[XWe(ix,iy,idx+iw2,idy+iw2)] = fDist;
            }
        
        }
    end_time2 = clock();
    mexPrintf("\nStep #2 (distance between patches): Time= %.3f sec\n",difftime(end_time2,start_time2)/1000);
    
    
    
    
        
    /* CENTER*/
    start_time2 = clock();
    for (iy=ic1; iy< iNy-ic1; iy++)
        for(ix=ic1; ix< iNx-ic1; ix++)
    {
        iX = X(ix,iy);
        iXsqw2 = iX*isqw2;
        for (idy=-iw2; idy<= iw2; idy++)
            for(idx=-iw2; idx<= iw2; idx++)
        {
                iXd = Xd(idx,idy);
                pfW2[iXd] = pfWe[XWe2(iXsqw2,iXd)];
                pidx[iXd] = idx; 
                pidy[iXd] = idy;
            }
        
        /* 4 neighbors */
        if ( iIncludeCloseNeigh==YES )
        {
            idx = 1; idy = 0; pfW2[Xd(idx,idy)] = 0.0;
            idx = -1; idy = 0; pfW2[Xd(idx,idy)] = 0.0;
            idx = 0; idy = 1; pfW2[Xd(idx,idy)] = 0.0;
            idx = 0; idy = -1; pfW2[Xd(idx,idy)] = 0.0;
            iNbNeighToSort = iNbNeigh;
        }
        else        
            iNbNeighToSort = iNbBestNeigh;
        
        
        for (i2=0; i2< iNbNeighToSort; i2++)  pfW2b[i2] = 1024.0;
        for (i2=0; i2< isqw2; i2++)
        {
            fcurrent = pfW2[i2];
            if ( fcurrent<pfW2b[iNbNeighToSort-1] )
            {
                /* dichotomy*/
                istart = 0;
                iend = iNbNeighToSort-1;
                while ( iend-istart>1 )
                {
                    imiddle = (iend-istart)/2;
                    if (pfW2b[istart+imiddle] > fcurrent)
                        iend = istart+imiddle;
                    else
                        istart = istart+imiddle;
                }
                if (pfW2b[istart] > fcurrent)
                    icurrent = istart;
                else
                    icurrent = iend;
                
                /* shifting */
                for(i3=iNbNeighToSort-2; i3>=icurrent; i3--)
                {
                    SWAP(pfW2b[i3],pfW2b[i3+1],fTmp);
                    SWAP(pidxb[i3],pidxb[i3+1],iTmp);
                    SWAP(pidyb[i3],pidyb[i3+1],iTmp);
                }
                
                /* new value */
                pfW2b[icurrent] = fcurrent;
                pidxb[icurrent] = pidx[i2];
                pidyb[icurrent] = pidy[i2];
                
            }
        } /* end for (i2=0; i2< isqw2; i2++) */
        
        
        
        /* 4 neighbors */
        if ( iIncludeCloseNeigh==YES )
        {
            icurrent = 0;
            idx = 0; idy = 0; pfW2b[icurrent] = pfW2b[1+4]; /* give same weight to x,y and Yx,Yy*/
            pidxb[icurrent] = idx; pidyb[icurrent] = idy; icurrent++;
            idx = 1; idy = 0; pfW2b[icurrent] = pfWe[XWe2(iXsqw2,Xd(idx,idy))];
            pidxb[icurrent] = idx; pidyb[icurrent] = idy; icurrent++;
            idx = -1; idy = 0; pfW2b[icurrent] = pfWe[XWe2(iXsqw2,Xd(idx,idy))];
            pidxb[icurrent] = idx; pidyb[icurrent] = idy; icurrent++;
            idx = 0; idy = 1; pfW2b[icurrent] = pfWe[XWe2(iXsqw2,Xd(idx,idy))];
            pidxb[icurrent] = idx; pidyb[icurrent] = idy; icurrent++;
            idx = 0; idy = -1; pfW2b[icurrent] = pfWe[XWe2(iXsqw2,Xd(idx,idy))];
            pidxb[icurrent] = idx; pidyb[icurrent] = idy;
        }
        else
            pfW2b[0] = pfW2b[1]; /* give same weight to x,y and Yx,Yy*/
        
        
        /* compute symmetric W*/
		 if ( exp(-pfW2b[0]/fh)>0.2 ) iIsolatedPt = 0; else iIsolatedPt = 1;
        iXNeigh2 = iX*iNbNeigh2;
        iXNeigh4 = iX*iNbNeigh4;
        iStartCpt = 0;
        for (i=0; i< iNbNeigh; i++)
        {
            idx = pidxb[i];
            idy = pidyb[i];
            iXd = Xd(idx,idy);
            
            if ( psId1[Xid1(iXsqw2,iXd)]==0 )
            {
                iY = X(ix+idx,iy+idy);
                iYNeigh2 = iY* iNbNeigh2;
                iYNeigh4 = iY* iNbNeigh4;
                for (i2=iStartCpt; i2< iNbNeigh2; i2++)
                    if ( psId2[Xid2(iXNeigh2,i2)]==0 && psId2[Xid2(iYNeigh2,i2)]==0 )
                {
                    i1a = X4(iXNeigh4,2*i2);
                    i1b = i1a + 1;
                    i2a = X4(iYNeigh4,2*i2);
                    i2b = i2a + 1;
                    
                    if (iIsolatedPt==0) pfW[i1a] = exp(-pfW2b[i]/fh); else pfW[i1a] = 0.2;
                    pfW[i1b] = SQRT(pfW[i1a]);
                    pfW[i2a] = pfW[i1a];
                    pfW[i2b] = pfW[i1b];
                    piY[i1a] = ix+idx;
                    piY[i1b] = iy+idy;
                    piY[i2a] = ix;
                    piY[i2b] = iy;

                    psId2[Xid2(iXNeigh2,i2)] = 1;
                    psId2[Xid2(iYNeigh2,i2)] = 1;
                    psId1[Xid1(iXsqw2,iXd)] = 1;
                    psId1[Xid1(iY*isqw2,Xd(-idx,-idy))] = 1;
                    if ( i2==iStartCpt+1 ) iStartCpt++;
                    
                    if ( piSizeNeigh[iX]<i2 ) piSizeNeigh[iX]=i2;
                    if ( piSizeNeigh[iY]<i2 ) piSizeNeigh[iY]=i2;
                    
                    i2 = iNbNeigh2;
                    }  
                
            } /* end if ( piId1[Xid1(iX,iXd)]==0 )*/
            
        } /* end for (i=0; i< iNbNeigh; i++)*/
        
        
        } /* END */
    end_time2 = clock();
    mexPrintf("Step #3 (sort m best values): Time= %.3f sec\n",difftime(end_time2,start_time2)/1000000);
   
    
    
    
    
    
    
    
    
    
    /* BORDERS*/
    /* BORDER #1*/
    start_time2 = clock();
    for (iy=0; iy< ic1; iy++)
        for(ix=0; ix< iNx; ix++)
    {
        iX = X(ix,iy);
        iXsqw2 = iX*isqw2;
        for (idy=-iw2; idy<= iw2; idy++)
            for(idx=-iw2; idx<= iw2; idx++)
                if ( ix+idx>=0 && ix+idx<iNx && iy+idy>=0 && iy+idy<iNy )
        {
            iXd = Xd(idx,idy);
            pfW2[iXd] = pfWe[XWe2(iXsqw2,iXd)];
            pidx[iXd] = idx;
            pidy[iXd] = idy;
            psId3[iXd] = 0;
                }
                else
        {
            iXd = Xd(idx,idy);
            pfW2[Xd(idx,idy)] = 1024.0;
            pidx[iXd] = idx;
            pidy[iXd] = idy;
            psId3[iXd] = 1;
                }
        
        
        /* 4 neighbors */
        if ( iIncludeCloseNeigh==YES )
        {
            idx = 1; idy = 0; pfW2[Xd(idx,idy)] = 0.0;
            idx = -1; idy = 0; pfW2[Xd(idx,idy)] = 0.0;
            idx = 0; idy = 1; pfW2[Xd(idx,idy)] = 0.0;
            idx = 0; idy = -1; pfW2[Xd(idx,idy)] = 0.0;
            iNbNeighToSort = iNbNeigh;
        }
        else        
            iNbNeighToSort = iNbBestNeigh;
        
        
        for (i2=0; i2< iNbNeighToSort; i2++)  pfW2b[i2] = 1024.0;
        for (i2=0; i2< isqw2; i2++)
        {
            fcurrent = pfW2[i2];
            if ( fcurrent<pfW2b[iNbNeighToSort-1] )
            {
                /* dichotomy */
                istart = 0;
                iend = iNbNeighToSort-1;
                while ( iend-istart>1 )
                {
                    imiddle = (iend-istart)/2;
                    if (pfW2b[istart+imiddle] > fcurrent)
                        iend = istart+imiddle;
                    else
                        istart = istart+imiddle;
                }
                if (pfW2b[istart] > fcurrent)
                    icurrent = istart;
                else
                    icurrent = iend;
                
                /* shifting */
                for(i3=iNbNeighToSort-2; i3>=icurrent; i3--)
                {
                    SWAP(pfW2b[i3],pfW2b[i3+1],fTmp);
                    SWAP(pidxb[i3],pidxb[i3+1],iTmp);
                    SWAP(pidyb[i3],pidyb[i3+1],iTmp);
                }
                
                /* new value */
                pfW2b[icurrent] = fcurrent;
                pidxb[icurrent] = pidx[i2];
                pidyb[icurrent] = pidy[i2];
                
            }
        } /* end for (i2=0; i2< isqw2; i2++) */
        
        
        /* 4 neighbors*/
        if ( iIncludeCloseNeigh==YES )
        {
            icurrent = 0;
            idx = 0; idy = 0; pfW2b[icurrent] = pfW2b[1+4];
            pidxb[icurrent] = idx; pidyb[icurrent] = idy; icurrent++;
            idx = 1; idy = 0;
            if ( ix+idx>=0 && ix+idx<iNx && iy+idy>=0 && iy+idy<iNy )
            { pfW2b[icurrent] = pfWe[XWe2(iXsqw2,Xd(idx,idy))]; pidxb[icurrent] = idx; pidyb[icurrent] = idy; icurrent++;}
            idx = -1; idy = 0;
            if ( ix+idx>=0 && ix+idx<iNx && iy+idy>=0 && iy+idy<iNy )
            { pfW2b[icurrent] = pfWe[XWe2(iXsqw2,Xd(idx,idy))]; pidxb[icurrent] = idx; pidyb[icurrent] = idy; icurrent++;}
            idx = 0; idy = 1;
            if ( ix+idx>=0 && ix+idx<iNx && iy+idy>=0 && iy+idy<iNy )
            { pfW2b[icurrent] = pfWe[XWe2(iXsqw2,Xd(idx,idy))]; pidxb[icurrent] = idx; pidyb[icurrent] = idy; icurrent++;}
            idx = 0; idy = -1;
            if ( ix+idx>=0 && ix+idx<iNx && iy+idy>=0 && iy+idy<iNy )
            { pfW2b[icurrent] = pfWe[XWe2(iXsqw2,Xd(idx,idy))]; pidxb[icurrent] = idx; pidyb[icurrent] = idy;}
        }
        else
            pfW2b[0] = pfW2b[1];
        
        /* compute symmetric W */
        if ( exp(-pfW2b[0]/fh)>0.2 ) iIsolatedPt = 0; else iIsolatedPt = 1;
        iXNeigh2 = iX*iNbNeigh2;
        iXNeigh4 = iX*iNbNeigh4;
        iStartCpt = 0;
        for (i=0; i< iNbNeigh; i++)
        {
            idx = pidxb[i];
            idy = pidyb[i];
            iXd = Xd(idx,idy);
            if ( psId3[iXd]==0 )
            {
                if ( psId1[Xid1(iXsqw2,iXd)]==0 )
                {
                    iY = X(ix+idx,iy+idy);
                    iYNeigh2 = iY* iNbNeigh2;
                    iYNeigh4 = iY* iNbNeigh4;
                    for (i2=iStartCpt; i2< iNbNeigh2; i2++)
                        if ( psId2[Xid2(iXNeigh2,i2)]==0 && psId2[Xid2(iYNeigh2,i2)]==0 )
                    {
                        i1a = X4(iXNeigh4,2*i2);
                        i1b = i1a + 1;
                        i2a = X4(iYNeigh4,2*i2);
                        i2b = i2a + 1;
                        if (iIsolatedPt==0) pfW[i1a] = exp(-pfW2b[i]/fh); else pfW[i1a] = 0.2;
                        pfW[i1b] = SQRT(pfW[i1a]);
                        pfW[i2a] = pfW[i1a];
                        pfW[i2b] = pfW[i1b];
                        piY[i1a] = ix+idx;
                        piY[i1b] = iy+idy;
                        piY[i2a] = ix;
                        piY[i2b] = iy;
                        psId2[Xid2(iXNeigh2,i2)] = 1;
                        psId2[Xid2(iYNeigh2,i2)] = 1;
                        psId1[Xid1(iXsqw2,iXd)] = 1;
                        psId1[Xid1(iY*isqw2,Xd(-idx,-idy))] = 1;
                        if ( i2==iStartCpt+1 ) iStartCpt++;
                        if ( piSizeNeigh[iX]<i2 ) piSizeNeigh[iX]=i2;
                        if ( piSizeNeigh[iY]<i2 ) piSizeNeigh[iY]=i2;
                        i2 = iNbNeigh2;
                        }
                } /* end if ( piId1[Xid1(iX,iXd)]==0 )*/
            } /* end for (i=0; i< iNbNeigh; i++)*/
        }
        } /* END*/
    
    
    /* BORDER #2 */
    for (iy=iNy-ic1; iy< iNy; iy++)
        for(ix=0; ix< iNx; ix++)
    {
        iX = X(ix,iy);
        iXsqw2 = iX*isqw2;
        for (idy=-iw2; idy<= iw2; idy++)
            for(idx=-iw2; idx<= iw2; idx++)
                if ( ix+idx>=0 && ix+idx<iNx && iy+idy>=0 && iy+idy<iNy )
        {
            iXd = Xd(idx,idy);
            pfW2[iXd] = pfWe[XWe2(iXsqw2,iXd)];
            pidx[iXd] = idx;
            pidy[iXd] = idy;
            psId3[iXd] = 0;
                }
                else
        {
            iXd = Xd(idx,idy);
            pfW2[Xd(idx,idy)] = 1024.0;
            pidx[iXd] = idx;
            pidy[iXd] = idy;
            psId3[iXd] = 1;
                }
        
        
        /* 4 neighbors */
        if ( iIncludeCloseNeigh==YES )
        {
            idx = 1; idy = 0; pfW2[Xd(idx,idy)] = 0.0;
            idx = -1; idy = 0; pfW2[Xd(idx,idy)] = 0.0;
            idx = 0; idy = 1; pfW2[Xd(idx,idy)] = 0.0;
            idx = 0; idy = -1; pfW2[Xd(idx,idy)] = 0.0;
            iNbNeighToSort = iNbNeigh;
        }
        else        
            iNbNeighToSort = iNbBestNeigh;
        
        
        for (i2=0; i2< iNbNeighToSort; i2++)  pfW2b[i2] = 1024.0;
        for (i2=0; i2< isqw2; i2++)
        {
            fcurrent = pfW2[i2];
            if ( fcurrent<pfW2b[iNbNeighToSort-1] )
            {
                /* dichotomy */
                istart = 0;
                iend = iNbNeighToSort-1;
                while ( iend-istart>1 )
                {
                    imiddle = (iend-istart)/2;
                    if (pfW2b[istart+imiddle] > fcurrent)
                        iend = istart+imiddle;
                    else
                        istart = istart+imiddle;
                }
                if (pfW2b[istart] > fcurrent)
                    icurrent = istart;
                else
                    icurrent = iend;
                
                /* shifting */
                for(i3=iNbNeighToSort-2; i3>=icurrent; i3--)
                {
                    SWAP(pfW2b[i3],pfW2b[i3+1],fTmp);
                    SWAP(pidxb[i3],pidxb[i3+1],iTmp);
                    SWAP(pidyb[i3],pidyb[i3+1],iTmp);
                }
                
                /* new value*/
                pfW2b[icurrent] = fcurrent;
                pidxb[icurrent] = pidx[i2];
                pidyb[icurrent] = pidy[i2];
                
            }
        } /* end for (i2=0; i2< isqw2; i2++) */
        
        
        /* 4 neighbors */
        if ( iIncludeCloseNeigh==YES )
        {
            icurrent = 0;
            idx = 0; idy = 0; pfW2b[icurrent] = pfW2b[1+4];
            pidxb[icurrent] = idx; pidyb[icurrent] = idy; icurrent++;
            idx = 1; idy = 0;
            if ( ix+idx>=0 && ix+idx<iNx && iy+idy>=0 && iy+idy<iNy )
            { pfW2b[icurrent] = pfWe[XWe2(iXsqw2,Xd(idx,idy))]; pidxb[icurrent] = idx; pidyb[icurrent] = idy; icurrent++;}
            idx = -1; idy = 0;
            if ( ix+idx>=0 && ix+idx<iNx && iy+idy>=0 && iy+idy<iNy )
            { pfW2b[icurrent] = pfWe[XWe2(iXsqw2,Xd(idx,idy))]; pidxb[icurrent] = idx; pidyb[icurrent] = idy; icurrent++;}
            idx = 0; idy = 1;
            if ( ix+idx>=0 && ix+idx<iNx && iy+idy>=0 && iy+idy<iNy )
            { pfW2b[icurrent] = pfWe[XWe2(iXsqw2,Xd(idx,idy))]; pidxb[icurrent] = idx; pidyb[icurrent] = idy; icurrent++;}
            idx = 0; idy = -1;
            if ( ix+idx>=0 && ix+idx<iNx && iy+idy>=0 && iy+idy<iNy )
            { pfW2b[icurrent] = pfWe[XWe2(iXsqw2,Xd(idx,idy))]; pidxb[icurrent] = idx; pidyb[icurrent] = idy;}
            else
            { pfW2b[icurrent] = 1024.0; pidxb[icurrent] = 0; pidyb[icurrent] = 0;}
        }
        else
            pfW2b[0] = pfW2b[1];
        
        /* compute symmetric W */
		 if ( exp(-pfW2b[0]/fh)>0.2 ) iIsolatedPt = 0; else iIsolatedPt = 1;
        iXNeigh2 = iX*iNbNeigh2;
        iXNeigh4 = iX*iNbNeigh4;
        iStartCpt = 0;
        for (i=0; i< iNbNeigh; i++)
        {
            idx = pidxb[i];
            idy = pidyb[i];
            iXd = Xd(idx,idy);
            if ( psId3[iXd]==0 )
            {
                if ( psId1[Xid1(iXsqw2,iXd)]==0 )
                {
                    iY = X(ix+idx,iy+idy);
                    iYNeigh2 = iY* iNbNeigh2;
                    iYNeigh4 = iY* iNbNeigh4;
                    for (i2=iStartCpt; i2< iNbNeigh2; i2++)
                        if ( psId2[Xid2(iXNeigh2,i2)]==0 && psId2[Xid2(iYNeigh2,i2)]==0 )
                    {
                        i1a = X4(iXNeigh4,2*i2);
                        i1b = i1a + 1;
                        i2a = X4(iYNeigh4,2*i2);
                        i2b = i2a + 1;
                        if (iIsolatedPt==0) pfW[i1a] = exp(-pfW2b[i]/fh); else pfW[i1a] = 0.2;
                        pfW[i1b] = SQRT(pfW[i1a]);
                        pfW[i2a] = pfW[i1a];
                        pfW[i2b] = pfW[i1b];
                        piY[i1a] = ix+idx;
                        piY[i1b] = iy+idy;
                        piY[i2a] = ix;
                        piY[i2b] = iy;
                        psId2[Xid2(iXNeigh2,i2)] = 1;
                        psId2[Xid2(iYNeigh2,i2)] = 1;
                        psId1[Xid1(iXsqw2,iXd)] = 1;
                        psId1[Xid1(iY*isqw2,Xd(-idx,-idy))] = 1;
                        if ( i2==iStartCpt+1 ) iStartCpt++;
                        if ( piSizeNeigh[iX]<i2 ) piSizeNeigh[iX]=i2;
                        if ( piSizeNeigh[iY]<i2 ) piSizeNeigh[iY]=i2;
                        i2 = iNbNeigh2;
                        }
                } /* end if ( piId1[Xid1(iX,iXd)]==0 )*/
            } /* end for (i=0; i< iNbNeigh; i++)*/
        }
        } /* END*/
    
    
    /* BORDER #3*/
    for (iy=ic1; iy< iNy-ic1; iy++)
        for(ix=0; ix< ic1; ix++)
    { 
        iX = X(ix,iy);
        iXsqw2 = iX*isqw2;
        for (idy=-iw2; idy<= iw2; idy++)
            for(idx=-iw2; idx<= iw2; idx++)
                if ( ix+idx>=0 && ix+idx<iNx && iy+idy>=0 && iy+idy<iNy )
        {
            iXd = Xd(idx,idy);
            pfW2[iXd] = pfWe[XWe2(iXsqw2,iXd)];
            pidx[iXd] = idx;
            pidy[iXd] = idy;
            psId3[iXd] = 0;
                }
                else
        {
            iXd = Xd(idx,idy);
            pfW2[Xd(idx,idy)] = 1024.0;
            pidx[iXd] = idx;
            pidy[iXd] = idy;
            psId3[iXd] = 1;
                }
        
        
        /* 4 neighbors */
        if ( iIncludeCloseNeigh==YES )
        {
            idx = 1; idy = 0; pfW2[Xd(idx,idy)] = 0.0;
            idx = -1; idy = 0; pfW2[Xd(idx,idy)] = 0.0;
            idx = 0; idy = 1; pfW2[Xd(idx,idy)] = 0.0;
            idx = 0; idy = -1; pfW2[Xd(idx,idy)] = 0.0;
            iNbNeighToSort = iNbNeigh;
        }
        else        
            iNbNeighToSort = iNbBestNeigh;
        
        
        for (i2=0; i2< iNbNeighToSort; i2++)  pfW2b[i2] = 1024.0;
        for (i2=0; i2< isqw2; i2++)
        {
            fcurrent = pfW2[i2];
            if ( fcurrent<pfW2b[iNbNeighToSort-1] )
            {
                /* dichotomy */
                istart = 0;
                iend = iNbNeighToSort-1;
                while ( iend-istart>1 )
                {
                    imiddle = (iend-istart)/2;
                    if (pfW2b[istart+imiddle] > fcurrent)
                        iend = istart+imiddle;
                    else
                        istart = istart+imiddle;
                }
                if (pfW2b[istart] > fcurrent)
                    icurrent = istart;
                else
                    icurrent = iend;
                
                /* shifting */
                for(i3=iNbNeighToSort-2; i3>=icurrent; i3--)
                {
                    SWAP(pfW2b[i3],pfW2b[i3+1],fTmp);
                    SWAP(pidxb[i3],pidxb[i3+1],iTmp);
                    SWAP(pidyb[i3],pidyb[i3+1],iTmp);
                }
                
                /* new value */
                pfW2b[icurrent] = fcurrent;
                pidxb[icurrent] = pidx[i2];
                pidyb[icurrent] = pidy[i2];
                
            }
        } /* end for (i2=0; i2< isqw2; i2++)*/
        
        
        /* 4 neighbors */
        if ( iIncludeCloseNeigh==YES )
        {
            icurrent = 0;
            idx = 0; idy = 0; pfW2b[icurrent] = pfW2b[1+4];
            pidxb[icurrent] = idx; pidyb[icurrent] = idy; icurrent++;
            idx = 1; idy = 0;
            if ( ix+idx>=0 && ix+idx<iNx && iy+idy>=0 && iy+idy<iNy )
            { pfW2b[icurrent] = pfWe[XWe2(iXsqw2,Xd(idx,idy))]; pidxb[icurrent] = idx; pidyb[icurrent] = idy; icurrent++;}
            idx = -1; idy = 0;
            if ( ix+idx>=0 && ix+idx<iNx && iy+idy>=0 && iy+idy<iNy )
            { pfW2b[icurrent] = pfWe[XWe2(iXsqw2,Xd(idx,idy))]; pidxb[icurrent] = idx; pidyb[icurrent] = idy; icurrent++;}
            idx = 0; idy = 1;
            if ( ix+idx>=0 && ix+idx<iNx && iy+idy>=0 && iy+idy<iNy )
            { pfW2b[icurrent] = pfWe[XWe2(iXsqw2,Xd(idx,idy))]; pidxb[icurrent] = idx; pidyb[icurrent] = idy; icurrent++;}
            idx = 0; idy = -1;
            if ( ix+idx>=0 && ix+idx<iNx && iy+idy>=0 && iy+idy<iNy )
            { pfW2b[icurrent] = pfWe[XWe2(iXsqw2,Xd(idx,idy))]; pidxb[icurrent] = idx; pidyb[icurrent] = idy;}
        }
        else
            pfW2b[0] = pfW2b[1];
        
        /* compute symmetric W*/
        if ( exp(-pfW2b[0]/fh)>0.2 ) iIsolatedPt = 0; else iIsolatedPt = 1;
        iXNeigh2 = iX*iNbNeigh2;
        iXNeigh4 = iX*iNbNeigh4;
        iStartCpt = 0;
        for (i=0; i< iNbNeigh; i++)
        {
            idx = pidxb[i];
            idy = pidyb[i];
            iXd = Xd(idx,idy);
            if ( psId3[iXd]==0 )
            {
                if ( psId1[Xid1(iXsqw2,iXd)]==0 )
                {
                    iY = X(ix+idx,iy+idy);
                    iYNeigh2 = iY* iNbNeigh2;
                    iYNeigh4 = iY* iNbNeigh4;
                    for (i2=iStartCpt; i2< iNbNeigh2; i2++)
                        if ( psId2[Xid2(iXNeigh2,i2)]==0 && psId2[Xid2(iYNeigh2,i2)]==0 )
                    {
                        i1a = X4(iXNeigh4,2*i2);
                        i1b = i1a + 1;
                        i2a = X4(iYNeigh4,2*i2);
                        i2b = i2a + 1;
                        if (iIsolatedPt==0) pfW[i1a] = exp(-pfW2b[i]/fh); else pfW[i1a] = 0.2;
                        pfW[i1b] = SQRT(pfW[i1a]);
                        pfW[i2a] = pfW[i1a];
                        pfW[i2b] = pfW[i1b];
                        piY[i1a] = ix+idx;
                        piY[i1b] = iy+idy;
                        piY[i2a] = ix;
                        piY[i2b] = iy;
                        psId2[Xid2(iXNeigh2,i2)] = 1;
                        psId2[Xid2(iYNeigh2,i2)] = 1;
                        psId1[Xid1(iXsqw2,iXd)] = 1;
                        psId1[Xid1(iY*isqw2,Xd(-idx,-idy))] = 1;
                        if ( i2==iStartCpt+1 ) iStartCpt++;
                        if ( piSizeNeigh[iX]<i2 ) piSizeNeigh[iX]=i2;
                        if ( piSizeNeigh[iY]<i2 ) piSizeNeigh[iY]=i2;
                        i2 = iNbNeigh2;
                        }
                } /* end if ( piId1[Xid1(iX,iXd)]==0 )*/
            } /* end for (i=0; i< iNbNeigh; i++)*/
        }
        } /* END*/
    
    
    /* BORDER #4*/
    for (iy=ic1; iy< iNy-ic1; iy++)
        for(ix=iNx-ic1; ix< iNx; ix++)
    { 
        iX = X(ix,iy);
        iXsqw2 = iX*isqw2;
        for (idy=-iw2; idy<= iw2; idy++)
            for(idx=-iw2; idx<= iw2; idx++)
                if ( ix+idx>=0 && ix+idx<iNx && iy+idy>=0 && iy+idy<iNy )
        {
            iXd = Xd(idx,idy);
            pfW2[iXd] = pfWe[XWe2(iXsqw2,iXd)];
            pidx[iXd] = idx;
            pidy[iXd] = idy;
            psId3[iXd] = 0;
                }
                else
        {
            iXd = Xd(idx,idy);
            pfW2[Xd(idx,idy)] = 1024.0;
            pidx[iXd] = idx;
            pidy[iXd] = idy;
            psId3[iXd] = 1;
                }
        
        
        /* 4 neighbors */
        if ( iIncludeCloseNeigh==YES )
        {
            idx = 1; idy = 0; pfW2[Xd(idx,idy)] = 0.0;
            idx = -1; idy = 0; pfW2[Xd(idx,idy)] = 0.0;
            idx = 0; idy = 1; pfW2[Xd(idx,idy)] = 0.0;
            idx = 0; idy = -1; pfW2[Xd(idx,idy)] = 0.0;
            iNbNeighToSort = iNbNeigh;
        }
        else        
            iNbNeighToSort = iNbBestNeigh;
        
        
        for (i2=0; i2< iNbNeighToSort; i2++)  pfW2b[i2] = 1024.0;
        for (i2=0; i2< isqw2; i2++)
        {
            fcurrent = pfW2[i2];
            if ( fcurrent<pfW2b[iNbNeighToSort-1] )
            {
                /* dichotomy */
                istart = 0;
                iend = iNbNeighToSort-1;
                while ( iend-istart>1 )
                {
                    imiddle = (iend-istart)/2;
                    if (pfW2b[istart+imiddle] > fcurrent)
                        iend = istart+imiddle;
                    else
                        istart = istart+imiddle;
                }
                if (pfW2b[istart] > fcurrent)
                    icurrent = istart;
                else
                    icurrent = iend;
                
                /* shifting */
                for(i3=iNbNeighToSort-2; i3>=icurrent; i3--)
                {
                    SWAP(pfW2b[i3],pfW2b[i3+1],fTmp);
                    SWAP(pidxb[i3],pidxb[i3+1],iTmp);
                    SWAP(pidyb[i3],pidyb[i3+1],iTmp);
                }
                
                /* new value */
                pfW2b[icurrent] = fcurrent;
                pidxb[icurrent] = pidx[i2];
                pidyb[icurrent] = pidy[i2];
                
            }
        } /* end for (i2=0; i2< isqw2; i2++) */
        
        
        /* 4 neighbors */
        if ( iIncludeCloseNeigh==YES )
        {
            icurrent = 0;
            idx = 0; idy = 0; pfW2b[icurrent] = pfW2b[1+4];
            pidxb[icurrent] = idx; pidyb[icurrent] = idy; icurrent++;
            idx = 1; idy = 0;
            if ( ix+idx>=0 && ix+idx<iNx && iy+idy>=0 && iy+idy<iNy )
            { pfW2b[icurrent] = pfWe[XWe2(iXsqw2,Xd(idx,idy))]; pidxb[icurrent] = idx; pidyb[icurrent] = idy; icurrent++;}
            idx = -1; idy = 0;
            if ( ix+idx>=0 && ix+idx<iNx && iy+idy>=0 && iy+idy<iNy )
            { pfW2b[icurrent] = pfWe[XWe2(iXsqw2,Xd(idx,idy))]; pidxb[icurrent] = idx; pidyb[icurrent] = idy; icurrent++;}
            idx = 0; idy = 1;
            if ( ix+idx>=0 && ix+idx<iNx && iy+idy>=0 && iy+idy<iNy )
            { pfW2b[icurrent] = pfWe[XWe2(iXsqw2,Xd(idx,idy))]; pidxb[icurrent] = idx; pidyb[icurrent] = idy; icurrent++;}
            idx = 0; idy = -1;
            if ( ix+idx>=0 && ix+idx<iNx && iy+idy>=0 && iy+idy<iNy )
            { pfW2b[icurrent] = pfWe[XWe2(iXsqw2,Xd(idx,idy))]; pidxb[icurrent] = idx; pidyb[icurrent] = idy;}
            else
            { pfW2b[icurrent] = 1024.0; pidxb[icurrent] = 0; pidyb[icurrent] = 0;}
        }
        else
            pfW2b[0] = pfW2b[1];
        
        /* compute symmetric W */
        if ( exp(-pfW2b[0]/fh)>0.2 ) iIsolatedPt = 0; else iIsolatedPt = 1;
        iXNeigh2 = iX*iNbNeigh2;
        iXNeigh4 = iX*iNbNeigh4;
        iStartCpt = 0;
        for (i=0; i< iNbNeigh; i++)
        {
            idx = pidxb[i];
            idy = pidyb[i];
            iXd = Xd(idx,idy);
            if ( psId3[iXd]==0 )
            {
                if ( psId1[Xid1(iXsqw2,iXd)]==0 )
                {
                    iY = X(ix+idx,iy+idy);
                    iYNeigh2 = iY* iNbNeigh2;
                    iYNeigh4 = iY* iNbNeigh4;
                    for (i2=iStartCpt; i2< iNbNeigh2; i2++)
                        if ( psId2[Xid2(iXNeigh2,i2)]==0 && psId2[Xid2(iYNeigh2,i2)]==0 )
                    {
                        i1a = X4(iXNeigh4,2*i2);
                        i1b = i1a + 1;
                        i2a = X4(iYNeigh4,2*i2);
                        i2b = i2a + 1;
                        if (iIsolatedPt==0) pfW[i1a] = exp(-pfW2b[i]/fh); else pfW[i1a] = 0.2;
                        pfW[i1b] = SQRT(pfW[i1a]);
                        pfW[i2a] = pfW[i1a];
                        pfW[i2b] = pfW[i1b];
                        piY[i1a] = ix+idx;
                        piY[i1b] = iy+idy;
                        piY[i2a] = ix;
                        piY[i2b] = iy;
                        psId2[Xid2(iXNeigh2,i2)] = 1;
                        psId2[Xid2(iYNeigh2,i2)] = 1;
                        psId1[Xid1(iXsqw2,iXd)] = 1;
                        psId1[Xid1(iY*isqw2,Xd(-idx,-idy))] = 1;
                        if ( i2==iStartCpt+1 ) iStartCpt++;
                        if ( piSizeNeigh[iX]<i2 ) piSizeNeigh[iX]=i2;
                        if ( piSizeNeigh[iY]<i2 ) piSizeNeigh[iY]=i2;
                        i2 = iNbNeigh2;
                        }
                } /* end if ( piId1[Xid1(iX,iXd)]==0 ) */
            } /* end for (i=0; i< iNbNeigh; i++) */
        }
        } /* END */
    end_time2 = clock();
    mexPrintf("Step #4 (compute along border): Time= %.3f sec\n",difftime(end_time2,start_time2)/1000000);

    
     
    /* add 1 to matrix of the number of neighbors */
    for (i=0; i< iNyx; i++) piSizeNeigh[i]++;
    
    
    
    
   
    
    
    /* matrices that can be easily read under matlab */
    start_time2 = clock();
    for (iy=0; iy< iNy; iy++)
        for(ix=0; ix< iNx; ix++)
    {
        iX = X(ix,iy);
        iXNeigh4 = iX*iNbNeigh4;
        for (i=0; i<piSizeNeigh[iX]; i++)
        {
            piYmat[X4b(iX,2*i)] = piY[X4(iXNeigh4,2*i)]; 
            piYmat[X4b(iX,2*i+1)] = piY[X4(iXNeigh4,2*i+1)]; 
            pfWmat[X4b(iX,2*i)] = pfW[X4(iXNeigh4,2*i)]; 
            pfWmat[X4b(iX,2*i+1)] = pfW[X4(iXNeigh4,2*i+1)]; 
        }
        }
    end_time2 = clock();
    mexPrintf("Step #5 (matrices for CImg): Time= %.3f sec\n",difftime(end_time2,start_time2)/1000000);
    
	
	
      
    free( (float *) pfW2 );
    free( (float *) pfW2b );
    free( (int *) pidx );
    free( (int *) pidy );
    free( (int *) pidxb );
    free( (int *) pidyb );
    free( (short *) psId1 );
    free( (short *) psId2 );
    free( (float *) pfSdx );
    free( (float *) pfIm0e );
    free( (float *) pfWe );
    end_time = clock();
    mexPrintf("Total computing Time for NL-Weights= %.3f sec\n \n",difftime(end_time,start_time)/1000000);
	W1_Y1_SY.assign(W1,Y1,SY); 
    return;	
}

/****************************************/
/**************************************** ****************************************/

/**********************************************/
/************** SUB FUNCTIONS *****************/
/* 
*/
/**********************************************/


/****************************************/
/* SEGMENTATION MODEL FOR SMOOTH/NON-TEXTURE IMAGES (CHAN-VESE MODEL) */
/* See: T.F.Chan, L.A.Vese, Active contours without edges, IEEE
Transactions on Image Processing. 10:2, pp. 266-277, 2001. */
/****************************************/
void vComputeChanVese (
  float  *pfIm0, 
  float  *pfu,
  int    iNx,
  int    iNy,
  float  *pfHr,
  int    iNbItersUpdateHr,
  float  *pfMeanIn,
  float  *pfMeanOut,
  int    iNI
  )
 
{
    float  fMeanIn, fMeanOut, fNormalizationIn, fNormalizationOut;
    int    ix, iy;
    
    
    /* Update Chan-Vese region function Hr every "iNbItersUpdateHr" iterations */
    if ( (iNI==0) || (iNI%iNbItersUpdateHr==0) )
    {
        
        /* Compute inside and outside means */
        fMeanIn = 0.0; fMeanOut = 0.0;
        fNormalizationIn = 0.0; fNormalizationOut = 0.0;
        
        for (ix=0; ix< iNx; ix++)
            for (iy=0; iy< iNy; iy++)
        {
            fMeanIn += pfIm0[X(ix,iy)]*pfu[X(ix,iy)];
            fNormalizationIn += pfu[X(ix,iy)];
            fMeanOut += pfIm0[X(ix,iy)]*(1.0-pfu[X(ix,iy)]);
            fNormalizationOut += 1.0-pfu[X(ix,iy)];
            }

        if (fNormalizationIn>0.0) fMeanIn = fMeanIn/ fNormalizationIn;
        if (fNormalizationOut>0.0) fMeanOut = fMeanOut/ fNormalizationOut;
        
        
        /* Compute region term h_r = (c1-I)^2 - (c2-I)^2*/
        for (ix=0; ix< iNx; ix++)
            for (iy=0; iy< iNy; iy++)
                pfHr[X(ix,iy)] = ( SQR(fMeanIn-pfIm0[X(ix,iy)]) - SQR(fMeanOut-pfIm0[X(ix,iy)]) );
	
        *pfMeanIn = fMeanIn;
        *pfMeanOut = fMeanOut;
        
    }
    
    

}
/****************************************/

/****************************************/
/*CHAN-VESE MODEL SEGMENTATION WITH SPLIT BREGMAN ITERATION */
/*Model:
    u^*=arg min \int G_b |gradient u|+fLambda/2 (\int u(Im0-c_in)^2 + \int (1-u)(Im0-c_out)^2), where 0<=u<=1,
	G_b is a edge detection fuction.
*/
/****************************************/

template<typename T>
void CVSeg_SBI(CImg<T>& Im0,CImg<T> & u, CImg<T> & Gb, float fMu=1.0e2, float fLambda=1.0f)
{
    float   *pfIm0, *pfdx, *pfdy, *pfu, *pfuOld, *pfGb, *pfGb2;
    float   *pfbx, *pfby;
    float   fct1, fct2, fctST, fG, fDxu, fDyu, fs, fTemp, fSumDiff;
    float   fct1b, fct2b, fct1c, fct2c, fInvMu, fInvMu2, f1, f2, fMeanIn, fMeanOut;
    float   *pfHr, fMaxImRef;
    float   *pfrIn, *pfrOut, *pfuOld2;
    float   fRangeHr, fMinHr, fMaxHr, fNyx;
    float   fDiffNew, fDiffOld, fDiffNew2, fSumU, fError;
    float   fSumUold, fDiffFirst2, fStopThres;
    int     iNy, iNx, iNdim, iDim[3], ix, iy;
    int     iNI, iX, iGS, i;
    int     iNbItersUpdateHr, iMeanGS, iCptGS;
    time_t  start_time, end_time;
    
    
    start_time = clock();
    
    //CImg<float> Im0("cam1.bmp");
	//Im0.channel(0).normalize(0,1);
	iNx=Im0.width();
	iNy=Im0.height();
	pfIm0=Im0.data();
	pfGb=Gb.data();
    pfu=u.data(); 
   
    
  
    /* Memory allocation */
    pfdx = (float *) calloc( (unsigned)(iNy*iNx), sizeof(float) );
    if (!pfdx)
        mexPrintf("Memory allocation failure\n");
    
    pfdy = (float *) calloc( (unsigned)(iNy*iNx), sizeof(float) );
    if (!pfdy)
        mexPrintf("Memory allocation failure\n");
    
    pfbx = (float *) calloc( (unsigned)(iNy*iNx), sizeof(float) );
    if (!pfbx)
        mexPrintf("Memory allocation failure\n");
    
    pfby = (float *) calloc( (unsigned)(iNy*iNx), sizeof(float) );
    if (!pfby)
        mexPrintf("Memory allocation failure\n");
    
    pfuOld = (float *) calloc( (unsigned)(iNy*iNx), sizeof(float) );
    if (!pfuOld)
        mexPrintf("Memory allocation failure\n");
    
    pfGb2 = (float *) calloc( (unsigned)(iNy*iNx), sizeof(float) );
    if (!pfGb2)
        mexPrintf("Memory allocation failure\n");
    
    pfHr = (float *) calloc( (unsigned)(iNy*iNx), sizeof(float) );
    if (!pfHr)
        mexPrintf("Memory allocation failure\n");
    
    pfuOld = (float *) calloc( (unsigned)(iNy*iNx), sizeof(float) );
    if (!pfuOld)
        mexPrintf("Memory allocation failure\n");
    
    pfuOld2 = (float *) calloc( (unsigned)(iNy*iNx), sizeof(float) );
    if (!pfuOld2)
        mexPrintf("Memory allocation failure\n");
    
    
    
    
    /* Compute square of Edge Detector function (speed up computations) */
    for (ix=0; ix< iNx; ix++)
        for (iy=0; iy< iNy; iy++)
            pfGb2[X(ix,iy)] = SQR(pfGb[X(ix,iy)]); 
    
    
    
  
    
    /* Compute init function u (any non-zero function is fine) */
    fMaxImRef = 0.0;
    for (ix=0; ix< iNx; ix++)
        for (iy=0; iy< iNy; iy++)
            if (ABS(pfIm0[X(ix,iy)])>fMaxImRef) fMaxImRef= ABS(pfIm0[X(ix,iy)]);
    for (ix=0; ix< iNx; ix++)
        for (iy=0; iy< iNy; iy++)
            pfu[X(ix,iy)] = pfIm0[X(ix,iy)]/fMaxImRef;
    
    
    /* Parameters for the segmentation code */
   
    
    
    
    
    
    /* Normalize lambda value for the computation of Gauss-Seidel iterations */
    /* Estimate of g_r */
    /* CHAN-VESE MODEL */
    vComputeChanVese(pfIm0,pfu,iNx,iNy,pfHr,iNbItersUpdateHr,&fMeanIn,&fMeanOut,0);
 

    /* Estimate of the range of g_r */
    fMinHr = 1e10;
    fMaxHr = -1e10;
    for (ix=1; ix< iNx-1; ix++)
        for (iy=1; iy< iNy-1; iy++)
    {
        if ( pfHr[X(ix,iy)]>fMaxHr ) fMaxHr = pfHr[X(ix,iy)];
        if ( pfHr[X(ix,iy)]<fMinHr ) fMinHr = pfHr[X(ix,iy)];
        }
    fRangeHr = fMaxHr-fMinHr;
        
    /* Normalize lambda with respect to the range of hr */
    fLambda /= fRangeHr;     
    
    
    
    
    /* Display parameters */
    mexPrintf("Image size: Height= %i and Width= %i\n",iNy,iNx);
    mexPrintf("Lambda= %.5f, Mu= %.5f\n",fLambda,fMu);
   
    
    /* Constants */
    iNbItersUpdateHr = 1;  /* number of iterations to update the region function g_r */
    
    fInvMu = 1./ fMu;
    fInvMu2 = SQR(fInvMu);
    
    fct1 = 1./4.;
    fct2 = fLambda/(4.0*fMu);
    fct1b = 1./3.;
    fct2b = fLambda/(3.0*fMu);
    fct1c = 1./2.;
    fct2c = fLambda/(2.0*fMu);

    fNyx = (float)(iNy*iNx);
    fStopThres = 1e-6;
    
        
    
    /* Iterative minimization scheme
    See: T. Goldstein, X. Bresson, and S. Osher, Geometric Applications of the 
    Split Bregman Method: Segmentation and Surface Reconstruction, CAM Report 09-06, 2009
    Iterative scheme:
    (u^k+1,d^k+1) = arg min int g_b |d| + lambda h_r u + mu/2 |d - grad u - b^k|^2
    b^k+1 = b^k + grad u^k+1 - d^k+1 */
    
    fDiffOld = 1e10; fDiffNew = 1e11;
    iMeanGS = 0; iCptGS = 0;
    iNI=0; /* number of iterations (outer iterations) */
    while ( ABS(fDiffNew-fDiffOld)>fStopThres && iNI<30 ) 
    {
        
        /* Store u^old for outer iterations */
        for (ix=0; ix< iNx; ix++)
            for (iy=0; iy< iNy; iy++)
                pfuOld[X(ix,iy)] = pfu[X(ix,iy)];
        
        
        /* Update region function hr */
       /* CHAN-VESE MODEL */
      vComputeChanVese(pfIm0,pfu,iNx,iNy,pfHr,iNbItersUpdateHr,&fMeanIn,&fMeanOut,iNI);
        
        
        
        /* Compute u^{k+1} with Gauss-Seidel */
        /* Solve u^k+1 = arg min int lambda h_r u + mu/2 |d - grad u - b^k|^2 */
        /* Euler-Lagrange is  mu Laplacian u = lambda hr + mu div (b^k-d^k), u in [0,1] */
        iGS=0; /* number of iterations for Gauss-Seidel (inner iterations) */
        fError = 1e10;
        while ( fError>1e-2 && iGS<50 )   
        {
            
            /* Store u^old for inner iterations (Gauss-Seidel) */
            for (ix=0; ix< iNx; ix++)
                for (iy=0; iy< iNy; iy++)
                    pfuOld2[X(ix,iy)] = pfu[X(ix,iy)];
            
            /* Center */
            for (ix=1; ix< iNx-1; ix++)
                for (iy=1; iy< iNy-1; iy++)
            {
                iX = X(ix,iy);
                fG = pfu[X(ix+1,iy)] + pfu[X(ix-1,iy)] + pfu[X(ix,iy+1)] + pfu[X(ix,iy-1)];
                fG += pfdx[X(ix-1,iy)] - pfdx[iX] - pfbx[X(ix-1,iy)] + pfbx[iX];
                fG += pfdy[X(ix,iy-1)] - pfdy[iX] - pfby[X(ix,iy-1)] + pfby[iX];
                fG *= fct1;
                fG -= fct2* pfHr[iX];
                if(fG>1.0) fG=1.0; else if(fG<0.0) fG=0.0;
                pfu[iX] = fG;
                }
            
            /* Borders */
            ix=0;
            for (iy=1; iy< iNy-1; iy++)
            {
                iX = X(ix,iy);
                fG = pfu[X(ix+1,iy)] + pfu[X(ix,iy+1)] + pfu[X(ix,iy-1)];
                fG += - pfdx[iX] + pfbx[iX];
                fG += pfdy[X(ix,iy-1)] - pfdy[iX] - pfby[X(ix,iy-1)] + pfby[iX];
                fG *= fct1b;
                fG -= fct2b* pfHr[iX];
                if(fG>1.0) fG=1.0; else if(fG<0.0) fG=0.0;
                pfu[iX] = fG;
            }
            
            ix=iNx-1;
            for (iy=1; iy< iNy-1; iy++)
            {
                iX = X(ix,iy);
                fG = pfu[X(ix-1,iy)] + pfu[X(ix,iy+1)] + pfu[X(ix,iy-1)];
                fG += pfdx[X(ix-1,iy)] - pfdx[iX] - pfbx[X(ix-1,iy)] + pfbx[iX];
                fG += pfdy[X(ix,iy-1)] - pfdy[iX] - pfby[X(ix,iy-1)] + pfby[iX];
                fG *= fct1b;
                fG -= fct2b* pfHr[iX];
                if(fG>1.0) fG=1.0; else if(fG<0.0) fG=0.0;
                pfu[iX] = fG;
            }
            
            iy=0;
            for (ix=1; ix< iNx-1; ix++)
            {
                iX = X(ix,iy);
                fG = pfu[X(ix+1,iy)] + pfu[X(ix-1,iy)] + pfu[X(ix,iy+1)];
                fG += pfdx[X(ix-1,iy)] - pfdx[iX] - pfbx[X(ix-1,iy)] + pfbx[iX];
                fG += - pfdy[iX] + pfby[iX];
                fG *= fct1b;
                fG -= fct2b* pfHr[iX];
                if(fG>1.0) fG=1.0; else if(fG<0.0) fG=0.0;
                pfu[iX] = fG;
            }
            
            iy=iNy-1;
            for (ix=1; ix< iNx-1; ix++)
            {
                iX = X(ix,iy);
                fG = pfu[X(ix+1,iy)] + pfu[X(ix-1,iy)] + pfu[X(ix,iy-1)];
                fG += pfdx[X(ix-1,iy)] - pfdx[iX] - pfbx[X(ix-1,iy)] + pfbx[iX];
                fG += pfdy[X(ix,iy-1)] - pfdy[iX] - pfby[X(ix,iy-1)] + pfby[iX];
                fG *= fct1b;
                fG -= fct2b* pfHr[iX];
                if(fG>1.0) fG=1.0; else if(fG<0.0) fG=0.0;
                pfu[iX] = fG;
            }
            
            ix=0; iy=0;
            iX = X(ix,iy);
            fG = pfu[X(ix+1,iy)] + pfu[X(ix,iy+1)];
            fG += - pfdx[iX] + pfbx[iX];
            fG += - pfdy[iX] + pfby[iX];
            fG *= fct1c;
            fG -= fct2c* pfHr[iX];
            if(fG>1.0) fG=1.0; else if(fG<0.0) fG=0.0;
            pfu[iX] = fG;
            
            ix=iNx-1; iy=0;
            iX = X(ix,iy);
            fG = pfu[X(ix-1,iy)] + pfu[X(ix,iy+1)];
            fG += pfdx[X(ix-1,iy)] - pfdx[iX] - pfbx[X(ix-1,iy)] + pfbx[iX];
            fG += - pfdy[iX] + pfby[iX];
            fG *= fct1c;
            fG -= fct2c* pfHr[iX];
            if(fG>1.0) fG=1.0; else if(fG<0.0) fG=0.0;
            pfu[iX] = fG;
            
            ix=0; iy=iNy-1;
            iX = X(ix,iy);
            fG = pfu[X(ix-1,iy)] + pfu[X(ix,iy+1)];
            fG += pfdx[X(ix-1,iy)] - pfdx[iX] - pfbx[X(ix-1,iy)] + pfbx[iX];
            fG += - pfdy[iX] + pfby[iX];
            fG *= fct1c;
            fG -= fct2c* pfHr[iX];
            if(fG>1.0) fG=1.0; else if(fG<0.0) fG=0.0;
            pfu[iX] = fG;
            
            ix=iNx-1; iy=iNy-1;
            iX = X(ix,iy);
            fG = pfu[X(ix-1,iy)] + pfu[X(ix,iy-1)];
            fG += pfdx[X(ix-1,iy)] - pfdx[iX] - pfbx[X(ix-1,iy)] + pfbx[iX];
            fG += pfdy[X(ix,iy-1)] - pfdy[iX] - pfby[X(ix,iy-1)] + pfby[iX];
            fG *= fct1c;
            fG -= fct2c* pfHr[iX];
            if(fG>1.0) fG=1.0; else if(fG<0.0) fG=0.0;
            pfu[iX] = fG;
            /* end Borders */
            
            
            /* Compute diff ( u - uold ) */
            fSumDiff = 0.0;
            fSumU = 0.0;
            fSumUold = 0.0;
            for (ix=0; ix< iNx; ix++)
                for (iy=0; iy< iNy; iy++)
                    fSumDiff += SQR(pfu[X(ix,iy)]-pfuOld2[X(ix,iy)]);
            fDiffNew2 = fSumDiff/ fNyx;
            if ( iGS==0 ) 
            {
                fDiffFirst2 = fDiffNew2;
                fDiffNew2 = 1e10;
                fError = 1e10;
            }
            else
                fError = 1.0 - ABS(fDiffNew2-fDiffFirst2)/fDiffFirst2;
            iGS++;
            
        }
        
        
        iMeanGS += iGS;
        iCptGS++;
        

        /* Compute d^{k+1} (Soft-Thresholding) and b^{k+1} (Bregman function) */
        /* d^k+1 = arg min int g_b |d| + mu/2 |d - grad u - b^k|^2 */
        /* d^k+1 = (grad u^k+1 + b^k)/ |grad u^k+1 + b^k| max(|grad u^k+1 + b^k|-1/mu,0) */
        /* b^k+1 = b^k + grad u^k+1 - d^k+1 */
        /* Center */
        for (ix=0; ix< iNx-1; ix++)
            for (iy=0; iy< iNy-1; iy++)
        {
            iX = X(ix,iy);
            /* d */
            fDxu = pfu[X(ix+1,iy)] - pfu[iX];
            fDyu = pfu[X(ix,iy+1)] - pfu[iX];
            f1 = fDxu+pfbx[iX];
            f2 = fDyu+pfby[iX];
            fs = SQR(f1)+SQR(f2);
            fctST = fInvMu2* pfGb2[iX];
            if ( fs<fctST ) { pfdx[iX]=0.0; pfdy[iX]=0.0; }
            else {
                fs = SQRT(fs);
                fctST = SQRT(fctST);
                fTemp = fs-fctST; fTemp /= fs;
                pfdx[iX] = fTemp* f1;
                pfdy[iX] = fTemp* f2; }
            /* b */
            pfbx[iX] += fDxu - pfdx[iX];
            pfby[iX] += fDyu - pfdy[iX];
            }
        
        /* Borders */
        ix=iNx-1;
        for (iy=1; iy< iNy-1; iy++)
        {
            iX = X(ix,iy);
            fDxu = 0.0;
            fDyu = pfu[X(ix,iy+1)] - pfu[iX];
            f1 = fDxu+pfbx[iX]; f2 = fDyu+pfby[iX];
            fs = SQR(f1)+SQR(f2); fctST = fInvMu2* pfGb2[iX];
            if ( fs<fctST ) { pfdx[iX]=0.0; pfdy[iX]=0.0; }
            else {
                fs = SQRT(fs); fctST = SQRT(fctST);
                fTemp = fs-fctST; fTemp /= fs;
                pfdx[iX] = fTemp* f1;
                pfdy[iX] = fTemp* f2; }
            pfbx[iX] += fDxu - pfdx[iX];
            pfby[iX] += fDyu - pfdy[iX];
        }
        
        iy=iNy-1;
        for (ix=1; ix< iNx-1; ix++)
        {
            iX = X(ix,iy);
            fDxu = pfu[X(ix+1,iy)] - pfu[iX];
            fDyu = 0.0;
            f1 = fDxu+pfbx[iX]; f2 = fDyu+pfby[iX];
            fs = SQR(f1)+SQR(f2); fctST = fInvMu2* pfGb2[iX];
            if ( fs<fctST ) { pfdx[iX]=0.0; pfdy[iX]=0.0; }
            else {
                fs = SQRT(fs); fctST = SQRT(fctST);
                fTemp = fs-fctST; fTemp /= fs;
                pfdx[iX] = fTemp* f1;
                pfdy[iX] = fTemp* f2; }
            pfbx[iX] += fDxu - pfdx[iX];
            pfby[iX] += fDyu - pfdy[iX];
        }
        
        ix=iNx-1; iy=0;
        iX = X(ix,iy);
        fDxu = 0.0;
        fDyu = pfu[X(ix,iy+1)] - pfu[iX];
        f1 = fDxu+pfbx[iX]; f2 = fDyu+pfby[iX];
        fs = SQR(f1)+SQR(f2); fctST = fInvMu2* pfGb2[iX];
        if ( fs<fctST ) { pfdx[iX]=0.0; pfdy[iX]=0.0; }
        else {
            fs = SQRT(fs); fctST = SQRT(fctST);
            fTemp = fs-fctST; fTemp /= fs;
            pfdx[iX] = fTemp* f1;
            pfdy[iX] = fTemp* f2; }
        pfbx[iX] += fDxu - pfdx[iX];
        pfby[iX] += fDyu - pfdy[iX];
        
        ix=0; iy=iNy-1;
        iX = X(ix,iy);
        fDxu = pfu[X(ix+1,iy)] - pfu[iX];
        fDyu = 0.0;
        f1 = fDxu+pfbx[iX]; f2 = fDyu+pfby[iX];
        fs = SQR(f1)+SQR(f2); fctST = fInvMu2* pfGb2[iX];
        if ( fs<fctST ) { pfdx[iX]=0.0; pfdy[iX]=0.0; }
        else {
            fs = SQRT(fs); fctST = SQRT(fctST);
            fTemp = fs-fctST; fTemp /= fs;
            pfdx[iX] = fTemp* f1;
            pfdy[iX] = fTemp* f2; }
        pfbx[iX] += fDxu - pfdx[iX];
        pfby[iX] += fDyu - pfdy[iX];
        
        ix=iNx-1; iy=iNy-1;
        iX = X(ix,iy);
        fDxu = 0.0;
        fDyu = 0.0;
        f1 = fDxu+pfbx[iX]; f2 = fDyu+pfby[iX];
        fs = SQR(f1)+SQR(f2); fctST = fInvMu2* pfGb2[iX];
        if ( fs<fctST ) { pfdx[iX]=0.0; pfdy[iX]=0.0; }
        else {
            fs = SQRT(fs); fctST = SQRT(fctST);
            fTemp = fs-fctST; fTemp /= fs;
            pfdx[iX] = fTemp* f1;
            pfdy[iX] = fTemp* f2; }
        pfbx[iX] += fDxu - pfdx[iX];
        pfby[iX] += fDyu - pfdy[iX];
        /* Borders */
        
        
        
        fSumDiff = 0.0;
        fSumU = 0.0;
        fSumUold = 0.0;
        for (ix=0; ix< iNx; ix++)
            for (iy=0; iy< iNy; iy++)
        {
            fSumDiff += SQR(pfu[X(ix,iy)]-pfuOld[X(ix,iy)]);
            fSumU += SQR(pfu[X(ix,iy)]);
            fSumUold += SQR(pfuOld[X(ix,iy)]);
            }
        fDiffOld = fDiffNew;
        fDiffNew = fSumDiff/ (fSumU*fSumUold);
        iNI++;
        
    }
    
    mexPrintf("number of iterations (outer iterations)= %i\n",iNI);
    mexPrintf("mean number of iterations for Gauss-Seidel (inner iterations)= %.1f\n",((float)iMeanGS)/((float)iCptGS));
    
    /* Outputs (see above) */
    /* Free memory */
    free( (float *) pfdx );
    free( (float *) pfdy );
    free( (float *) pfbx );
    free( (float *) pfby );
    free( (float *) pfGb2 );
    free( (float *) pfHr );
    free( (float *) pfuOld );
    free( (float *) pfuOld2 );
  
    end_time = clock();
    mexPrintf("\nComputing Time for Active Contour Segmentation= %.4f sec\n \n",difftime(end_time,start_time)/1000);
	/*
	cimg_forXY(u,x,y)
	{
		if (u(x,y)>0.5)
			u(x,y)=1.0;
		else
			u(x,y)=0.0;
	}
	*/
	return;
}

/************************************************************************************/
/***************************************************************************************/

template<typename T>
 void LocalWeights(CImg<T> &Img0, CImgList<T> &W1_Y1_SY, const int im=7,const float fh=0.25*0.25)
 {
   int Nx=Img0.width(),Ny=Img0.height(),wnum=im*im;
   float vmean=1.0f/(float) (wnum);
   int halfim= (int)((im-1.0f)/2.0f);
   
   CImg<float> W(Nx,Ny,2*wnum,1,vmean);
   CImg<int> Y(Nx,Ny,2*wnum),SY(Nx,Ny,1,1,wnum);
   cimg_forXYZ(Img0,x,y,z)
   {
     int ii=0;
     for (int j=-halfim;j<=halfim;j++)
		 for (int i=-halfim;i<=halfim;i++)
	      {   
			  int ix,iy;
			  ix=x+i;
			  iy=y+j;
			  if (ix<0) ix=-ix;
			  else if (ix>Nx-1) ix=2*(Nx-1)-ix;
			  if (iy<0) iy=-iy;
			  else if (iy>Ny-1) iy=2*(Ny-1)-iy;
              Y(x,y,2*ii)=ix;
			  Y(x,y,2*ii+1)=iy;
			  ii++;
	      }
   }

   W1_Y1_SY.assign(W,Y,SY);
   return;
 }
/************************************************************************************************/
 /************************************************************************************************/
 template<typename T>
void get_level(CImg<T> &img,CImg<T> &U, CImg<T> &dest) 
{ 
  float m,M=U.max_min(m);
  for (int k=0;k<=M;k++)
  {
    CImg_2x2(I,T); Inn = 0;
    cimg_for2x2(U,x,y,0,0,I,T) if (Icc!=Inc || Icc!=Icn) dest(x,y) = 1.0f;
  }//else dest(x,y) = Icc<0?100:0;
  return ;
}

/******************************************************************************************************/
/******************************************************************************************************/
template<typename T>
void ComputeNLEdgeDetector(CImg<T> & Im0, CImg<T> & Edge,CImgList<T> W1)

{
    CImg<>W=W1[0];
    CImg<int>Y=W1[1],SY=W1[2];
    float   fG11, fG22, fG12;
    int iNx=Im0.width(),iNy=Im0.height();

	//CImg<> I=Im0.get_normalize(0,1.0);
    
	CImgList<> Ixy=Im0.get_gradient(0,1);
    
    // Compute extended Ix, Iy with mirror conditions 
	/*
    ip2 = (ip-1)/2;
    iNxe = iNx + 2* ip2;
    iNye = iNy + 2* ip2;
    iNyxe = iNxe*iNye;
	
	CImg<> Ixe(iNxe,iNye),Iye(iNxe,iNye);
    
    for (iye=0,ik=0;iye<iNye;iye++)
     {
        if (iye<ip2)  iy=ip2-iye;
        else if (iye>iNy+ip2-1) iy=2*iNy+ip2-iye-2;
        else iy=iye-ip2;
        for (ixe=0;ixe<iNxe;ixe++,ik++)
          {
             if (ixe<ip2) ix=ip2-ixe;
             else if (ixe>iNx+ip2-1) ix=2*iNx+ip2-ixe-2;
             else ix=ixe-ip2;       
             Ixe[ik]=Ixy(0,ix,iy);
             Iye[ik]=Ixy(1,ix,iy);
          }
     }
    */
    
    // Compute Texture Feature 
     for (int y=0;y<iNy;y++)
      for (int x=0;x<iNx;x++)
    {
        fG11 = 0.0;
        fG22 = 0.0;
        fG12 = 0.0;
		float sumw=0.0f;
		for (int ii=0;ii<SY(x,y);ii++)
		{
			int ix=Y(x,y,2*ii),iy=Y(x,y,2*ii+1);
		    if (ix!=0 || iy!=0 || (x==0 && y==0))
			{  
			   float wei=W(x,y,2*ii);
			   sumw+=wei;
               fG11 += wei*SQR(Ixy(0,ix,iy));
			   fG22 += wei*SQR(Ixy(1,ix,iy));
			   fG12 += wei*Ixy(0,ix,iy)*Ixy(1,ix,iy);
			}
					   
		}
		fG11/=sumw;
		fG22/=sumw;
		fG12/=sumw;
		Edge(x,y) =1.0/( (1.0+fG11)*(1.0+fG22) - SQR(fG12) );
	  }

	/*
    for (iye=ip2;iye<iNye-ip2;iye++)
        for (ixe=ip2;ixe<iNxe-ip2;ixe++)
    {
        fG11 = 0.0;
        fG22 = 0.0;
        fG12 = 0.0;
        for (iyp2=-ip2;iyp2<ip2;iyp2++)
            for (ixp2=-ip2;ixp2<ip2;ixp2++)
        {
            iy2 = iye+iyp2;
            ix2 = ixe+ixp2;
            iXe = Xe(ix2,iy2);
            fG11 += SQR(Ixe(ix2,iy2));
            fG22 += SQR(Iye(ix2,iy2));
            fG12 += Ixe(ix2,iy2)*Iye(ix2,iy2);
            }
        
        iy=iye-ip2;
        ix=ixe-ip2;
       // pfTextFeat[X(ix,iy)] = exp(-( (1.0+fG11)*(1.0+fG22) - SQR(fG12) )/2.0f);//
		TextFeat(ix,iy) =1.0/( (1.0+fG11)*(1.0+fG22) - SQR(fG12) );
        }
     */
    // Normalize Texture Feature 

    Edge.normalize(0,1.0);
   
    
}
/*******************************************************************************************************/
/*******************************************************************************************************/

template<typename T>
void ComputeTextureFeature(CImg<T> & Im0, CImg<T> & TextFeat, int ip)

{
    int    ix,iy,iX, iNxe, iNye, ip2, ixe, iye, ik;
    int    iyp2, ixp2, iy2, ix2, iXe, iNyxe;
    float  *pfIm0,*pfTextFeat,*pfI, *pfIx, *pfIy, *pfIxe, *pfIye, fG11, fG22, fG12;
    float  fMaxI, fMinI, fNbBin;
    int iNx=Im0.width(),iNy=Im0.height();

	//CImg<> I=Im0.get_normalize(0,1.0);
    
	CImgList<> Ixy=Im0.get_gradient(0,1);
    
    // Compute extended Ix, Iy with mirror conditions 
    ip2 = (ip-1)/2;
    iNxe = iNx + 2* ip2;
    iNye = iNy + 2* ip2;
    iNyxe = iNxe*iNye;
	
	CImg<> Ixe(iNxe,iNye),Iye(iNxe,iNye);
    
    for (iye=0,ik=0;iye<iNye;iye++)
     {
        if (iye<ip2)  iy=ip2-iye;
        else if (iye>iNy+ip2-1) iy=2*iNy+ip2-iye-2;
        else iy=iye-ip2;
        for (ixe=0;ixe<iNxe;ixe++,ik++)
          {
             if (ixe<ip2) ix=ip2-ixe;
             else if (ixe>iNx+ip2-1) ix=2*iNx+ip2-ixe-2;
             else ix=ixe-ip2;       
             Ixe[ik]=Ixy(0,ix,iy);
             Iye[ik]=Ixy(1,ix,iy);
          }
     }
    
    
    // Compute Texture Feature 
    for (iye=ip2;iye<iNye-ip2;iye++)
        for (ixe=ip2;ixe<iNxe-ip2;ixe++)
    {
        fG11 = 0.0;
        fG22 = 0.0;
        fG12 = 0.0;
        for (iyp2=-ip2;iyp2<ip2;iyp2++)
            for (ixp2=-ip2;ixp2<ip2;ixp2++)
        {
            iy2 = iye+iyp2;
            ix2 = ixe+ixp2;
            iXe = Xe(ix2,iy2);
            fG11 += SQR(Ixe(ix2,iy2));
            fG22 += SQR(Iye(ix2,iy2));
            fG12 += Ixe(ix2,iy2)*Iye(ix2,iy2);
            }
        
        iy=iye-ip2;
        ix=ixe-ip2;
       // pfTextFeat[X(ix,iy)] = exp(-( (1.0+fG11)*(1.0+fG22) - SQR(fG12) )/2.0f);//
		TextFeat(ix,iy) =1.0/( (1.0+fG11)*(1.0+fG22) - SQR(fG12) );
        }
 
    // Normalize Texture Feature 

    TextFeat.normalize(0,1.0);
   
    
}
