function [IOut,output, result] = denoiseImageKSVD(Image,K,Thelta,u,IMin0)
%==========================================================================
%   P E R F O R M   D E N O I S I N G   U S I N G   A  D I C T  I O N A R Y
%                  T R A I N E D   O N   N O I S Y   I M A G E
%==========================================================================
% function IOut = denoiseImageWKSVD(Image,sigma,K,varargin)
% denoise an image by sparsely representing each block with the
% already overcomplete trained Dictionary, and averaging the represented parts.
% This function may take some time to process. Possible factor that effect
% the processing time are:
%  1. number of KSVD iterations - the default number of iterations is 10.
%  However, fewer iterations may, in most cases, result an acceleration in
%  the process, without effecting  the result too much. Therefore, when
%  required, this parameter may be re-set.
%  2. maxBlocksToConsider - The maximal number of blocks to train on. If this 
%  number is larger the number of blocks in the image, random blocks
%  from the image will be selected for training. 
% ===================================================================
% INPUT ARGUMENTS : Image - the noisy image (gray-level scale)
%                   sigma - the s.d. of the noise (assume to be white Gaussian).
%                   K - the number of atoms in the trained dictionary.
%    Optional arguments:              
%                  'blockSize' - the size of the blocks the algorithm
%                       works. All blocks are squares, therefore the given
%                       parameter should be one number (width or height).
%                       Default value: 8.
%                       'errorFactor' - a factor that multiplies sigma in order
%                       to set the allowed representation error. In the
%                       experiments presented in the paper, it was set to 1.15
%                       (which is also the default  value here).
%                  'maxBlocksToConsider' - maximal number of blocks that
%                       can be processed. This number is dependent on the memory
%                       capabilities of the machine, and performances?
%                       considerations. If the number of available blocks in the
%                       image is larger than 'maxBlocksToConsider', the sliding
%                       distance between the blocks increases. The default value
%                       is: 250000.
%                  'slidingFactor' - the sliding distance between processed
%                       blocks. Default value is 1. However, if the image is
%                       large, this number increases automatically (because of
%                       memory requirements). Larger values result faster
%                       performances (because of fewer processed blocks).
%                  'numKSVDIters' - the number of KSVD iterations processed
%                       blocks from the noisy image. If the number of
%                       blocks in the image is larger than this number,
%                       random blocks from all available blocks will be
%                       selected. The default value for this parameter is:
%                       10 if sigma > 5, and 5 otherwise.
%                  'maxNumBlocksToTrainOn' - the maximal number of blocks
%                       to train on. The default value for this parameter is
%                       65000. However, it might not be enough for very large
%                       images
%                  'displayFlag' - if this flag is switched on,
%                       announcement after finishing each iteration will appear,
%                       as also a measure concerning the progress of the
%                       algorithm (the average number of required coefficients
%                       for representation). The default value is 1 (on).
%                  'waitBarOn' - can be set to either 1 or 0. If
%                       waitBarOn==1 a waitbar, presenting the progress of the
%                       algorithm will be displayed.
%   u is the weighting function.
% OUTPUT ARGUMENTS : Iout - a 2-dimensional array in the same size of the
%                       input image, that contains the cleaned image.
%                    output.D - the trained dictionary.
% This code is written based on KSVD algorithm.
% =========================================================================

% first, train a dictionary on the noisy image
IOut=Image;
sigma2(1)=Thelta.sigma2(1);
sigma2(2)=Thelta.sigma2(2);
r(1)=Thelta.r(1);
r(2)=Thelta.r(2);
for iteration=1:20
sigma=sqrt(r(1)*sigma2(1)+(1-r(1))*sigma2(2));
W=sqrt((1-u)./(sigma2(1))+u./(sigma2(2)));
reduceDC = 1;
[NN1,NN2] = size(Image);
waitBarOn = 1;
if (sigma > 5)
    numIterOfKsvd = 10;
else
    numIterOfKsvd = 5;
end
C = 1.15;
maxBlocksToConsider = 260000;
slidingDis = 1;
bb = 8;
maxNumBlocksToTrainOn = 5000;
displayFlag = 1;
Imagefortrain=Image;

%{
for argI = 1:2:length(varargin)
    if (strcmp(varargin{argI}, 'slidingFactor'))
        slidingDis = varargin{argI+1};
    end
    if (strcmp(varargin{argI}, 'errorFactor'))
        C = varargin{argI+1};
    end
    if (strcmp(varargin{argI}, 'maxBlocksToConsider'))
        maxBlocksToConsider = varargin{argI+1};
    end
    if (strcmp(varargin{argI}, 'numKSVDIters'))
        numIterOfKsvd = varargin{argI+1};
    end
    if (strcmp(varargin{argI}, 'blockSize'))
        bb = varargin{argI+1};
    end
    if (strcmp(varargin{argI}, 'maxNumBlocksToTrainOn'))
        maxNumBlocksToTrainOn = varargin{argI+1};
    end
    if (strcmp(varargin{argI}, 'displayFlag'))
        displayFlag = varargin{argI+1};
    end
    if (strcmp(varargin{argI}, 'waitBarOn'))
        waitBarOn = varargin{argI+1};
    end
end
%}
if (sigma <= 5)
    numIterOfKsvd = 5;
end

% first, train a dictionary on blocks from the noisy image

if(prod([NN1,NN2]-bb+1)> maxNumBlocksToTrainOn)
    rand('seed',2000);
    randPermutation =  randperm(prod([NN1,NN2]-bb+1));
    selectedBlocks = randPermutation(1:maxNumBlocksToTrainOn);

    blkMatrix = zeros(bb^2,maxNumBlocksToTrainOn);
    blkWeight= zeros(bb^2,maxNumBlocksToTrainOn);
    for i = 1:maxNumBlocksToTrainOn
        [row,col] = ind2sub(size(Image)-bb+1,selectedBlocks(i));
        currBlock = Imagefortrain(row:row+bb-1,col:col+bb-1);
        blkMatrix(:,i) = currBlock(:);
        currWeiBlock = W(row:row+bb-1,col:col+bb-1);
        blkWeight(:,i) = currWeiBlock(:);
    end
else
    blkMatrix = im2col(Imagefortrain,[bb,bb],'sliding');
    blkWeight = im2col(W,[bb,bb],'sliding');
end

param.K = K;
param.numIteration = numIterOfKsvd ;

param.errorFlag = 1; 
param.L=1;
% decompose signals until a certain error is reached. do not use fix number of coefficients.
%param.errorGoal = sigma*C;
param.errorGoal = C;
param.preserveDCAtom = 0;

if (iteration==1)
Pn=ceil(sqrt(K));
DCT=zeros(bb,Pn);
for k=0:1:Pn-1,
    V=cos([0:1:bb-1]'*k*pi/Pn);
    if k>0, V=V-mean(V); end;
    DCT(:,k+1)=V/norm(V);
end;
DCT=kron(DCT,DCT);
% Wsum=mean(blkWeight,2);
% DCTnorm=sqrt((DCT'*Wsum).^2);
% DCT=DCT./repmat(DCTnorm',[size(DCT,1),1]);
param.initialDictionary = DCT(:,1:param.K );
else
  param.initialDictionary= Dictionary;%DCT(:,1:param.K );
end
param.InitializationMethod =  'GivenMatrix';

if (reduceDC)
    vecOfMeans = mean(blkMatrix);
    blkMatrix = blkMatrix-ones(size(blkMatrix,1),1)*vecOfMeans;
end

if (waitBarOn)
    counterForWaitBar = param.numIteration+1;
    h = waitbar(0,'Denoising In Process ...');
    param.waitBarHandle = h;
    param.counterForWaitBar = counterForWaitBar;
end

tic;
param.displayProgress = displayFlag;
[Dictionary,output] = WKSVD(blkMatrix,blkWeight,param);
output.D = Dictionary;
testtime1=toc;
fprintf('step1-Learning time =%f\n',testtime1);

if (displayFlag)
    disp('finished Trainning dictionary');
end


tic;
% denoise the image using the resulted dictionary
%errT = sigma*C;
errT=C;
IMout=zeros(NN1,NN2);
Weight=zeros(NN1,NN2);
%blocks = im2col(Image,[NN1,NN2],[bb,bb],'sliding');
while (prod(floor((size(Image)-bb)/slidingDis)+1)>maxBlocksToConsider)
    slidingDis = slidingDis+1;
end
[blocks,idx] = my_im2col(Image,[bb,bb],slidingDis);
[Weightblocks,idx] = my_im2col(W,[bb,bb],slidingDis);

if (waitBarOn)
    newCounterForWaitBar = (param.numIteration+1)*size(blocks,2);
end


% go with jumps of 30000
for jj = 1:30000:size(blocks,2)
    if (waitBarOn)
        waitbar(((param.numIteration*size(blocks,2))+jj)/newCounterForWaitBar);
    end
    jumpSize = min(jj+30000-1,size(blocks,2));
    if (reduceDC)
        vecOfMeans = mean(blocks(:,jj:jumpSize));
        blocks(:,jj:jumpSize) = blocks(:,jj:jumpSize) - repmat(vecOfMeans,size(blocks,1),1);
    end
    
    %Coefs = mexOMPerrIterative(blocks(:,jj:jumpSize),Dictionary,errT);
    Coefs = OMPerr(Dictionary,blocks(:,jj:jumpSize),Weightblocks(:,jj:jumpSize),errT);
   % Coefs = OMP_ModifiedRecursion(Dictionary,blocks(:,jj:jumpSize),Weightblocks(:,jj:jumpSize),errT);
   
    if (reduceDC)
        blocks(:,jj:jumpSize)= Dictionary*Coefs + ones(size(blocks,1),1) * vecOfMeans;
    else
        blocks(:,jj:jumpSize)= Dictionary*Coefs ;
    end
end
testtime1=toc;
fprintf('step1-sparse time =%f\n',testtime1);

tic;
count = 1;
Weight = zeros(NN1,NN2);
IMout = zeros(NN1,NN2);
[rows,cols] = ind2sub(size(Image)-bb+1,idx);
for i  = 1:length(cols)
    col = cols(i); row = rows(i);        
    block =reshape(blocks(:,count),[bb,bb]).*reshape(Weightblocks(:,count).^2,[bb,bb]);
    IMout(row:row+bb-1,col:col+bb-1)=IMout(row:row+bb-1,col:col+bb-1)+block;
    Weight(row:row+bb-1,col:col+bb-1)=Weight(row:row+bb-1,col:col+bb-1)+reshape(Weightblocks(:,count).^2,[bb,bb]);
    count = count+1;
end;

if (waitBarOn)
    close(h);
end
lambda=0.034*sigma;%0.034*(sqrt(sigma2(1))*(1-u)+sqrt(sigma2(2))*u);

IOut = (W.^2.*Image+lambda.*IMout)./(W.^2+lambda.*Weight);
testtime1=toc;
fprintf('step2 time =%f\n',testtime1);
%IOut1(:,:,1)=(W.^2.*Image+0.034*sigma*IMout)./(W.^2+0.034*sigma*Weight);
%IOut1(:,:,2)=IMout./Weight;
figure(4),imshow(IOut,[]),title(num2str(iteration));
count = 1;

tic;
M = zeros(NN1,NN2);
estnoise2blk=zeros(NN1,NN2);
for i  = 1:length(cols)
    col = cols(i); row = rows(i);        
    block =(reshape(blocks(:,count),[bb,bb])-Image(row:row+bb-1,col:col+bb-1)).^2;
    estnoise2blk(row:row+bb-1,col:col+bb-1)=estnoise2blk(row:row+bb-1,col:col+bb-1)+block;
    M(row:row+bb-1,col:col+bb-1)=M(row:row+bb-1,col:col+bb-1)+ones(bb);
    count = count+1;
end;

%Thelta.IOutnew(:,:,iteration)=IOut;
%assignin('base','IOutnew',IOutnew);
%assignin('base','testM',Weight);
PSNR(iteration) = 20*log10(255/sqrt(mean((IMin0(:)-IOut(:)).^2)));
figure(3);
plot(PSNR);
%assignin('base','PSNR',PSNR);
%estnoise2=(Image-IOut).^2;
x=(Image-IOut).^2;
estnoise2=(x+lambda.*estnoise2blk)./(1+lambda.*M);
%estnoise2=(x+lambda.*estnoise2blk)./(1+lambda.*M);
%{
for i=1:2
tempp(:,:,i)=0.5*((x+(lambda.*estnoise2blk))./sigma2(i))+...
    0.5*log(sigma2(i)).*(1+lambda.*M)-log(r(i))*(1+lambda.*M); 
end
[M,indc]=min(tempp,[],3);
u=double(indc==2);
%for testiteration=1:1
%}
%sigma2(2)=50^2;

  for i=1:2
  temp(:,:,i)=(r(i)/sqrt(sigma2(i)))*exp(-(estnoise2./(2*sigma2(i))));
  end
  u=temp(:,:,2)./(sum(temp,3)+eps);
testtime1=toc;
fprintf('step3 time =%f\n',testtime1);
 
%assignin('base','unew',u);
tic;
count = 1;
sumblkuf1=zeros(NN1,NN2);
sumblkuf2=zeros(NN1,NN2);
sumblku1=zeros(NN1,NN2);
sumblku2=zeros(NN1,NN2);
for i  = 1:length(cols)
    col = cols(i); row = rows(i);        
    block =(reshape(blocks(:,count),[bb,bb])-Image(row:row+bb-1,col:col+bb-1)).^2.*u(row:row+bb-1,col:col+bb-1);
    sumblkuf2(row:row+bb-1,col:col+bb-1)=sumblkuf2(row:row+bb-1,col:col+bb-1)+block;
    block1 =(reshape(blocks(:,count),[bb,bb])-Image(row:row+bb-1,col:col+bb-1)).^2.*(1-u(row:row+bb-1,col:col+bb-1));
    sumblkuf1(row:row+bb-1,col:col+bb-1)=sumblkuf1(row:row+bb-1,col:col+bb-1)+block1;
    sumblku1(row:row+bb-1,col:col+bb-1)=sumblku1(row:row+bb-1,col:col+bb-1)+(1-u(row:row+bb-1,col:col+bb-1));
    sumblku2(row:row+bb-1,col:col+bb-1)=sumblku2(row:row+bb-1,col:col+bb-1)+u(row:row+bb-1,col:col+bb-1);
    count = count+1;
end;

%sigma2(1)=sum(sum(estnoise2.*(1-u),1),2)/sum(sum(1-u,1),2);
%sigma2(2)=sum(sum(estnoise2.*u,1),2)/sum(sum(u,1),2);
%r(2)=sum(u(:))/numel(u);
%r(1)=1-r(2);
sigma2(1)=(sum(sum(x.*(1-u),1),2)+sum(sum(lambda.*sumblkuf1)))/(sum(sum(1-u,1),2)+sum(sum(lambda.*sumblku1)));
sigma2(2)=(sum(sum(x.*u,1),2)+sum(sum(lambda.*sumblkuf2)))/(sum(sum(u,1),2)+sum(sum(lambda.*sumblku2)));
r(2)=(sum(u(:))+sum(sum(lambda.*sumblku2)))/(numel(u)+sum(sum(lambda.*M)));
r(1)=1-r(2);

%end
ressigma2(1,iteration)=sigma2(1);
ressigma2(2,iteration)=sigma2(2);
resr(1,iteration)=r(1);
resr(2,iteration)=r(2);
resIOut(:,:,iteration)=IOut;
 %assignin('base','testnew',test);
 %unew(:,:,iteration)=u;
 %assignin('base','unew',unew);
end
testtime1=toc;
fprintf('step4 time =%f\n',testtime1);
result.PSNR=PSNR;
result.sigma2=ressigma2;
result.r=resr;
result.IOut=resIOut;
%result.u=u;

