"Fossies" - the Fresh Open Source Software Archive  

Source code changes of the file "rtengine/dirpyr_equalizer.cc" between
rawtherapee-5.6.tar.xz and rawtherapee-5.7.tar.xz

About: RawTherapee is a powerful cross-platform raw image processing program.

dirpyr_equalizer.cc  (rawtherapee-5.6.tar.xz):dirpyr_equalizer.cc  (rawtherapee-5.7.tar.xz)
skipping to change at line 15 skipping to change at line 15
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or * the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version. * (at your option) any later version.
* *
* RawTherapee is distributed in the hope that it will be useful, * RawTherapee is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of * but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details. * GNU General Public License for more details.
* *
* You should have received a copy of the GNU General Public License * You should have received a copy of the GNU General Public License
* along with RawTherapee. If not, see <http://www.gnu.org/licenses/>. * along with RawTherapee. If not, see <https://www.gnu.org/licenses/>.
* *
* (C) 2010 Emil Martinec <ejmartin@uchicago.edu> * (C) 2010 Emil Martinec <ejmartin@uchicago.edu>
* *
*/ */
#include <cstddef> #include <cstddef>
#include <cmath> #include <cmath>
#include "improcfun.h" #include "improcfun.h"
#include "array2D.h" #include "array2D.h"
#include "rt_math.h" #include "rt_math.h"
#include "opthelper.h" #include "opthelper.h"
#define RANGEFN(i) ((1000.0f / (i + 1000.0f))) namespace {
#define DIRWT(i1,j1,i,j) ( domker[(i1-i)/scale+halfwin][(j1-j)/scale+halfwin] *
RANGEFN(fabsf((data_fine[i1][j1]-data_fine[i][j]))) )
namespace rtengine float rangeFn(float i) {
{ return 1.f / (i + 1000.f);
constexpr int maxlevel = 6;
constexpr float noise = 2000;
//sequence of scales
constexpr int scales[maxlevel] = {1, 2, 4, 8, 16, 32};
extern const Settings* settings;
//sequence of scales
void ImProcFunctions :: dirpyr_equalizer(float ** src, float ** dst, int srcwidt
h, int srcheight, float ** l_a, float ** l_b, const double * mult, const double
dirpyrThreshold, const double skinprot, float b_l, float t_l, float t_r, int sca
leprev)
{
int lastlevel = maxlevel;
float atten123 = (float) settings->level123_cbdl;
if(atten123 > 50.f) {
atten123 = 50.f;
}
if(atten123 < 0.f) {
atten123 = 0.f;
}
float atten0 = (float) settings->level0_cbdl;
if(atten0 > 40.f) {
atten123 = 40.f;
}
if(atten0 < 0.f) {
atten0 = 0.f;
}
if((t_r - t_l) < 0.55f) {
t_l = t_r + 0.55f; //avoid too small range
}
while (lastlevel > 0 && fabs(mult[lastlevel - 1] - 1) < 0.001) {
lastlevel--;
//printf("last level to process %d \n",lastlevel);
}
if (lastlevel == 0) {
return;
}
int level;
float multi[maxlevel] = {1.f, 1.f, 1.f, 1.f, 1.f, 1.f};
float scalefl[maxlevel];
for(int lv = 0; lv < maxlevel; lv++) {
scalefl[lv] = ((float) scales[lv]) / (float) scaleprev;
if(lv >= 1) {
if(scalefl[lv] < 1.f) {
multi[lv] = (atten123 * ((float) mult[lv] - 1.f) / 100.f) + 1.f;
//modulate action if zoom < 100%
} else {
multi[lv] = (float) mult[lv];
}
} else {
if(scalefl[lv] < 1.f) {
multi[lv] = (atten0 * ((float) mult[lv] - 1.f) / 100.f) + 1.f;
//modulate action if zoom < 100%
} else {
multi[lv] = (float) mult[lv];
}
}
}
multi_array2D<float, maxlevel> dirpyrlo (srcwidth, srcheight);
level = 0;
//int thresh = 100 * mult[5];
int scale = (int)(scales[level]) / scaleprev;
if(scale < 1) {
scale = 1;
}
dirpyr_channel(src, dirpyrlo[0], srcwidth, srcheight, 0, scale);
level = 1;
while(level < lastlevel) {
scale = (int)(scales[level]) / scaleprev;
if(scale < 1) {
scale = 1;
}
dirpyr_channel(dirpyrlo[level - 1], dirpyrlo[level], srcwidth, srcheight
, level, scale);
level ++;
}
float **tmpHue = nullptr, **tmpChr = nullptr;
if(skinprot != 0.f) {
// precalculate hue and chroma, use SSE, if available
// by precalculating these values we can greatly reduce the number of ca
lculations in idirpyr_eq_channel()
// but we need two additional buffers for this preprocessing
tmpHue = new float*[srcheight];
for (int i = 0; i < srcheight; i++) {
tmpHue[i] = new float[srcwidth];
}
#ifdef __SSE2__
#ifdef _OPENMP
#pragma omp parallel for
#endif
for(int i = 0; i < srcheight; i++) {
int j;
for(j = 0; j < srcwidth - 3; j += 4) {
_mm_storeu_ps(&tmpHue[i][j], xatan2f(LVFU(l_b[i][j]), LVFU(l_a[i
][j])));
}
for(; j < srcwidth; j++) {
tmpHue[i][j] = xatan2f(l_b[i][j], l_a[i][j]);
}
}
#else
#ifdef _OPENMP
#pragma omp parallel for
#endif
for(int i = 0; i < srcheight; i++) {
for(int j = 0; j < srcwidth; j++) {
tmpHue[i][j] = xatan2f(l_b[i][j], l_a[i][j]);
}
}
#endif
tmpChr = new float*[srcheight];
for (int i = 0; i < srcheight; i++) {
tmpChr[i] = new float[srcwidth];
}
#ifdef __SSE2__
#ifdef _OPENMP
#pragma omp parallel
#endif
{
__m128 div = _mm_set1_ps(327.68f);
#ifdef _OPENMP
#pragma omp for
#endif
for(int i = 0; i < srcheight; i++) {
int j;
for(j = 0; j < srcwidth - 3; j += 4) {
_mm_storeu_ps(&tmpChr[i][j], vsqrtf(SQRV(LVFU(l_b[i][j])) +
SQRV(LVFU(l_a[i][j]))) / div);
}
for(; j < srcwidth; j++) {
tmpChr[i][j] = sqrtf(SQR((l_b[i][j])) + SQR((l_a[i][j]))) /
327.68f;
}
}
}
#else
#ifdef _OPENMP
#pragma omp parallel for
#endif
for(int i = 0; i < srcheight; i++) {
for(int j = 0; j < srcwidth; j++) {
tmpChr[i][j] = sqrtf(SQR((l_b[i][j])) + SQR((l_a[i][j]))) / 327.
68f;
}
}
#endif
}
// with the current implementation of idirpyr_eq_channel we can safely use t
he buffer from last level as buffer, saves some memory
float ** buffer = dirpyrlo[lastlevel - 1];
for(int level = lastlevel - 1; level > 0; level--) {
idirpyr_eq_channel(dirpyrlo[level], dirpyrlo[level - 1], buffer, srcwidt
h, srcheight, level, multi, dirpyrThreshold, tmpHue, tmpChr, skinprot, b_l, t_l,
t_r);
}
scale = scales[0];
idirpyr_eq_channel(dirpyrlo[0], dst, buffer, srcwidth, srcheight, 0, multi,
dirpyrThreshold, tmpHue, tmpChr, skinprot, b_l, t_l, t_r);
if(skinprot != 0.f) {
for (int i = 0; i < srcheight; i++) {
delete [] tmpChr[i];
}
delete [] tmpChr;
for (int i = 0; i < srcheight; i++) {
delete [] tmpHue[i];
}
delete [] tmpHue;
}
#ifdef _OPENMP
#pragma omp parallel for
#endif
for (int i = 0; i < srcheight; i++)
for (int j = 0; j < srcwidth; j++) {
dst[i][j] = /*CLIP*/(buffer[i][j]); // TODO: Really a clip necessar
y?
}
}
void ImProcFunctions :: dirpyr_equalizercam (CieImage *ncie, float ** src, float
** dst, int srcwidth, int srcheight, float ** h_p, float ** C_p, const double *
mult, const double dirpyrThreshold, const double skinprot, bool execdir, float
b_l, float t_l, float t_r, int scaleprev)
{
int lastlevel = maxlevel;
if(settings->verbose) {
printf("CAM dirpyr scaleprev=%i\n", scaleprev);
}
float atten123 = (float) settings->level123_cbdl;
if(atten123 > 50.f) {
atten123 = 50.f;
}
if(atten123 < 0.f) {
atten123 = 0.f;
}
// printf("atten=%f\n",atten);
float atten0 = (float) settings->level0_cbdl;
if(atten0 > 40.f) {
atten123 = 40.f;
}
if(atten0 < 0.f) {
atten0 = 0.f;
}
if((t_r - t_l) < 0.55f) {
t_l = t_r + 0.55f; //avoid too small range
}
while (fabs(mult[lastlevel - 1] - 1) < 0.001 && lastlevel > 0) {
lastlevel--;
//printf("last level to process %d \n",lastlevel);
}
if (lastlevel == 0) {
return;
}
int level;
float multi[maxlevel] = {1.f, 1.f, 1.f, 1.f, 1.f, 1.f};
float scalefl[maxlevel];
for(int lv = 0; lv < maxlevel; lv++) {
scalefl[lv] = ((float) scales[lv]) / (float) scaleprev;
// if(scalefl[lv] < 1.f) multi[lv] = 1.f; else multi[lv]=(float) mult[
lv];
if (lv >= 1) {
if(scalefl[lv] < 1.f) {
multi[lv] = (atten123 * ((float) mult[lv] - 1.f) / 100.f) + 1.f;
} else {
multi[lv] = (float) mult[lv];
}
} else {
if(scalefl[lv] < 1.f) {
multi[lv] = (atten0 * ((float) mult[lv] - 1.f) / 100.f) + 1.f;
} else {
multi[lv] = (float) mult[lv];
}
}
}
if(settings->verbose) {
printf("CAM CbDL mult0=%f 1=%f 2=%f 3=%f 4=%f 5=%f\n", multi[0], multi[
1], multi[2], multi[3], multi[4], multi[5]);
}
multi_array2D<float, maxlevel> dirpyrlo (srcwidth, srcheight);
level = 0;
int scale = (int)(scales[level]) / scaleprev;
if(scale < 1) {
scale = 1;
}
dirpyr_channel(src, dirpyrlo[0], srcwidth, srcheight, 0, scale);
level = 1;
while(level < lastlevel) {
scale = (int)(scales[level]) / scaleprev;
if(scale < 1) {
scale = 1;
}
dirpyr_channel(dirpyrlo[level - 1], dirpyrlo[level], srcwidth, srcheight
, level, scale);
level ++;
}
// with the current implementation of idirpyr_eq_channel we can safely use t
he buffer from last level as buffer, saves some memory
float ** buffer = dirpyrlo[lastlevel - 1];
for(int level = lastlevel - 1; level > 0; level--) {
idirpyr_eq_channelcam(dirpyrlo[level], dirpyrlo[level - 1], buffer, srcw
idth, srcheight, level, multi, dirpyrThreshold , h_p, C_p, skinprot, b_l, t_l, t
_r);
}
idirpyr_eq_channelcam(dirpyrlo[0], dst, buffer, srcwidth, srcheight, 0, mult
i, dirpyrThreshold, h_p, C_p, skinprot, b_l, t_l, t_r);
if(execdir) {
#ifdef _OPENMP
#pragma omp parallel for schedule(dynamic,16)
#endif
for (int i = 0; i < srcheight; i++)
for (int j = 0; j < srcwidth; j++) {
if(ncie->J_p[i][j] > 8.f && ncie->J_p[i][j] < 92.f) {
dst[i][j] = /*CLIP*/( buffer[i][j] ); // TODO: Really a c
lip necessary?
} else {
dst[i][j] = src[i][j];
}
}
} else {
for (int i = 0; i < srcheight; i++)
for (int j = 0; j < srcwidth; j++) {
dst[i][j] = /*CLIP*/( buffer[i][j] ); // TODO: Really a clip ne
cessary?
}
}
} }
void ImProcFunctions::dirpyr_channel(float ** data_fine, float ** data_coarse, i nt width, int height, int level, int scale) void dirpyr_channel(const float * const * data_fine, float ** data_coarse, int w idth, int height, int level, int scale)
{ {
// scale is spacing of directional averaging weights // scale is spacing of directional averaging weights
// calculate weights, compute directionally weighted average // calculate weights, compute directionally weighted average
if(level > 1) { if (level > 1) {
//generate domain kernel //generate domain kernel
int domker[5][5] = {{1, 1, 1, 1, 1}, {1, 2, 2, 2, 1}, {1, 2, 2, 2, 1}, { // multiplied each value of domker by 1000 to avoid multiplication by 1
1, 2, 2, 2, 1}, {1, 1, 1, 1, 1}}; 000 inside the loop
// int domker[5][5] = {{1,1,1,1,1},{1,1,1,1,1},{1,1,1,1,1},{1,1,1,1,1}, #ifdef __SSE2__
{1,1,1,1,1}}; const float domkerv[5][5][4] ALIGNED16 = {{{1000, 1000, 1000, 1000}, {10
static const int halfwin = 2; 00, 1000, 1000, 1000}, {1000, 1000, 1000, 1000}, {1000, 1000, 1000, 1000}, {1000
const int scalewin = halfwin * scale; , 1000, 1000, 1000}},
{{1000, 1000, 1000, 1000}, {20
00, 2000, 2000, 2000}, {2000, 2000, 2000, 2000}, {2000, 2000, 2000, 2000}, {1000
, 1000, 1000, 1000}},
{{1000, 1000, 1000, 1000}, {20
00, 2000, 2000, 2000}, {2000, 2000, 2000, 2000}, {2000, 2000, 2000, 2000}, {1000
, 1000, 1000, 1000}},
{{1000, 1000, 1000, 1000}, {20
00, 2000, 2000, 2000}, {2000, 2000, 2000, 2000}, {2000, 2000, 2000, 2000}, {1000
, 1000, 1000, 1000}},
{{1000, 1000, 1000, 1000}, {10
00, 1000, 1000, 1000}, {1000, 1000, 1000, 1000}, {1000, 1000, 1000, 1000}, {1000
, 1000, 1000, 1000}}};
#endif
const float domker[5][5] = {{1000, 1000, 1000, 1000, 1000},
{1000, 2000, 2000, 2000, 1000},
{1000, 2000, 2000, 2000, 1000},
{1000, 2000, 2000, 2000, 1000},
{1000, 1000, 1000, 1000, 1000}};
constexpr int halfwin = 2;
#ifdef _OPENMP #ifdef _OPENMP
#pragma omp parallel #pragma omp parallel
#endif #endif
{ {
const int scalewin = halfwin * scale;
#ifdef __SSE2__ #ifdef __SSE2__
__m128 thousandv = _mm_set1_ps( 1000.0f ); const vfloat thousandv = F2V(1000.f);
__m128 dirwtv, valv, normv, dftemp1v, dftemp2v; #endif
// multiplied each value of domkerv by 1000 to avoid multiplication by 1000 ins
ide the loop
float domkerv[5][5][4] ALIGNED16 = {{{1000, 1000, 1000, 1000}, {1000
, 1000, 1000, 1000}, {1000, 1000, 1000, 1000}, {1000, 1000, 1000, 1000}, {1000,
1000, 1000, 1000}}, {{1000, 1000, 1000, 1000}, {2000, 2000, 2000, 2000}, {2000,
2000, 2000, 2000}, {2000, 2000, 2000, 2000}, {1000, 1000, 1000, 1000}}, {{1000,
1000, 1000, 1000}, {2000, 2000, 2000, 2000}, {2000, 2000, 2000, 2000}, {2000, 20
00, 2000, 2000}, {1000, 1000, 1000, 1000}}, {{1000, 1000, 1000, 1000}, {2000, 20
00, 2000, 2000}, {2000, 2000, 2000, 2000}, {2000, 2000, 2000, 2000}, {1000, 1000
, 1000, 1000}}, {{1000, 1000, 1000, 1000}, {1000, 1000, 1000, 1000}, {1000, 1000
, 1000, 1000}, {1000, 1000, 1000, 1000}, {1000, 1000, 1000, 1000}}};
#endif // __SSE2__
int j;
#ifdef _OPENMP #ifdef _OPENMP
#pragma omp for //schedule (dynamic,8) #pragma omp for
#endif #endif
for(int i = 0; i < height; i++) { for (int i = 0; i < height; i++) {
float dirwt; int j;
for (j = 0; j < scalewin; j++) {
for(j = 0; j < scalewin; j++) {
float val = 0.f; float val = 0.f;
float norm = 0.f; float norm = 0.f;
for(int inbr = max(0, i - scalewin); inbr <= min(height - 1, i + scalewin); inbr += scale) { for (int inbr = max(0, i - scalewin); inbr <= min(height - 1 , i + scalewin); inbr += scale) {
for (int jnbr = max(0, j - scalewin); jnbr <= j + scalew in; jnbr += scale) { for (int jnbr = max(0, j - scalewin); jnbr <= j + scalew in; jnbr += scale) {
//printf("i=%d ",(inbr-i)/scale+halfwin); const float dirwt = domker[(inbr - i) / scale + half
dirwt = DIRWT(inbr, jnbr, i, j); win][(jnbr - j)/ scale + halfwin] * rangeFn(fabsf(data_fine[inbr][jnbr] - data_f
ine[i][j]));
val += dirwt * data_fine[inbr][jnbr]; val += dirwt * data_fine[inbr][jnbr];
norm += dirwt; norm += dirwt;
} }
} }
data_coarse[i][j] = val / norm; //low pass filter data_coarse[i][j] = val / norm; //low pass filter
} }
#ifdef __SSE2__ #ifdef __SSE2__
for(; j < width - scalewin - 3; j += 4) { for (; j < width - scalewin - 3; j += 4) {
valv = _mm_setzero_ps(); vfloat valv = ZEROV;
normv = _mm_setzero_ps(); vfloat normv = ZEROV;
dftemp1v = LVFU(data_fine[i][j]); const vfloat dftemp1v = LVFU(data_fine[i][j]);
for(int inbr = MAX(0, i - scalewin); inbr <= MIN(height - 1, for (int inbr = MAX(0, i - scalewin); inbr <= MIN(height - 1
i + scalewin); inbr += scale) { , i + scalewin); inbr += scale) {
int indexihlp = (inbr - i) / scale + halfwin; const int indexihlp = (inbr - i) / scale + halfwin;
for (int jnbr = j - scalewin, indexjhlp = 0; jnbr <= j +
for (int jnbr = j - scalewin, indexjhlp = 0; jnbr <= j + scalewin; jnbr += scale, ++indexjhlp) {
scalewin; jnbr += scale, indexjhlp++) { const vfloat dftemp2v = LVFU(data_fine[inbr][jnbr]);
dftemp2v = LVFU(data_fine[inbr][jnbr]); const vfloat dirwtv = LVF(domkerv[indexihlp][indexjh
dirwtv = LVF(domkerv[indexihlp][indexjhlp]) / (vabsf lp]) / (vabsf(dftemp1v - dftemp2v) + thousandv);
(dftemp1v - dftemp2v) + thousandv);
valv += dirwtv * dftemp2v; valv += dirwtv * dftemp2v;
normv += dirwtv; normv += dirwtv;
} }
} }
STVFU(data_coarse[i][j], valv / normv); //low pass filter
_mm_storeu_ps( &data_coarse[i][j], valv / normv); //low pass
filter
}
for(; j < width - scalewin; j++) {
float val = 0.f;
float norm = 0.f;
for(int inbr = max(0, i - scalewin); inbr <= min(height - 1,
i + scalewin); inbr += scale) {
for (int jnbr = j - scalewin; jnbr <= j + scalewin; jnbr
+= scale) {
dirwt = DIRWT(inbr, jnbr, i, j);
val += dirwt * data_fine[inbr][jnbr];
norm += dirwt;
}
}
data_coarse[i][j] = val / norm; //low pass filter
} }
#endif
#else for (; j < width - scalewin; j++) {
for(; j < width - scalewin; j++) {
float val = 0.f; float val = 0.f;
float norm = 0.f; float norm = 0.f;
for(int inbr = max(0, i - scalewin); inbr <= min(height - 1, i + scalewin); inbr += scale) { for (int inbr = max(0, i - scalewin); inbr <= min(height - 1 , i + scalewin); inbr += scale) {
for (int jnbr = j - scalewin; jnbr <= j + scalewin; jnbr += scale) { for (int jnbr = j - scalewin; jnbr <= j + scalewin; jnbr += scale) {
dirwt = DIRWT(inbr, jnbr, i, j); const float dirwt = domker[(inbr - i) / scale + half win][(jnbr - j)/ scale + halfwin] * rangeFn(fabsf(data_fine[inbr][jnbr] - data_f ine[i][j]));
val += dirwt * data_fine[inbr][jnbr]; val += dirwt * data_fine[inbr][jnbr];
norm += dirwt; norm += dirwt;
} }
} }
data_coarse[i][j] = val / norm; //low pass filter data_coarse[i][j] = val / norm; //low pass filter
} }
#endif for (; j < width; j++) {
for(; j < width; j++) {
float val = 0.f; float val = 0.f;
float norm = 0.f; float norm = 0.f;
for(int inbr = max(0, i - scalewin); inbr <= min(height - 1, i + scalewin); inbr += scale) { for (int inbr = max(0, i - scalewin); inbr <= min(height - 1 , i + scalewin); inbr += scale) {
for (int jnbr = j - scalewin; jnbr <= min(width - 1, j + scalewin); jnbr += scale) { for (int jnbr = j - scalewin; jnbr <= min(width - 1, j + scalewin); jnbr += scale) {
dirwt = DIRWT(inbr, jnbr, i, j); const float dirwt = domker[(inbr - i) / scale + half win][(jnbr - j)/ scale + halfwin] * rangeFn(fabsf(data_fine[inbr][jnbr] - data_f ine[i][j]));
val += dirwt * data_fine[inbr][jnbr]; val += dirwt * data_fine[inbr][jnbr];
norm += dirwt; norm += dirwt;
} }
} }
data_coarse[i][j] = val / norm; //low pass filter data_coarse[i][j] = val / norm; //low pass filter
} }
} }
} }
} else { // level <=1 means that all values of domker would be 1.0f, so n o need for multiplication } else { // level <=1 means that all values of domker would be 1.0f, so n o need for multiplication
// const int scalewin = scale;
#ifdef _OPENMP #ifdef _OPENMP
#pragma omp parallel #pragma omp parallel
#endif #endif
{ {
#ifdef __SSE2__ #ifdef __SSE2__
__m128 thousandv = _mm_set1_ps( 1000.0f ); const vfloat thousandv = F2V(1000.0f);
__m128 dirwtv, valv, normv, dftemp1v, dftemp2v; #endif
#endif // __SSE2__
int j;
#ifdef _OPENMP #ifdef _OPENMP
#pragma omp for schedule(dynamic,16) #pragma omp for schedule(dynamic,16)
#endif #endif
for(int i = 0; i < height; i++) for (int i = 0; i < height; i++)
{ {
float dirwt; int j = 0;
for (; j < scale; j++) {
for(j = 0; j < scale; j++) {
float val = 0.f; float val = 0.f;
float norm = 0.f; float norm = 0.f;
for(int inbr = max(0, i - scale); inbr <= min(height - 1, i + scale); inbr += scale) { for (int inbr = max(0, i - scale); inbr <= min(height - 1, i + scale); inbr += scale) {
for (int jnbr = max(0, j - scale); jnbr <= j + scale; jn br += scale) { for (int jnbr = max(0, j - scale); jnbr <= j + scale; jn br += scale) {
dirwt = RANGEFN(fabsf(data_fine[inbr][jnbr] - data_f ine[i][j])); const float dirwt = rangeFn(fabsf(data_fine[inbr][jn br] - data_fine[i][j]));
val += dirwt * data_fine[inbr][jnbr]; val += dirwt * data_fine[inbr][jnbr];
norm += dirwt; norm += dirwt;
} }
} }
data_coarse[i][j] = val / norm; //low pass filter data_coarse[i][j] = val / norm; //low pass filter
} }
#ifdef __SSE2__ #ifdef __SSE2__
for(; j < width - scale - 3; j += 4) { for (; j < width - scale - 3; j += 4) {
valv = _mm_setzero_ps(); vfloat valv = ZEROV;
normv = _mm_setzero_ps(); vfloat normv = ZEROV;
dftemp1v = LVFU(data_fine[i][j]); const vfloat dftemp1v = LVFU(data_fine[i][j]);
for(int inbr = MAX(0, i - scale); inbr <= MIN(height - 1, i + scale); inbr += scale) { for (int inbr = MAX(0, i - scale); inbr <= MIN(height - 1, i + scale); inbr += scale) {
for (int jnbr = j - scale; jnbr <= j + scale; jnbr += sc ale) { for (int jnbr = j - scale; jnbr <= j + scale; jnbr += sc ale) {
dftemp2v = LVFU(data_fine[inbr][jnbr]); const vfloat dftemp2v = LVFU(data_fine[inbr][jnbr]);
dirwtv = thousandv / (vabsf(dftemp2v - dftemp1v) + t const vfloat dirwtv = thousandv / (vabsf(dftemp2v -
housandv); dftemp1v) + thousandv);
valv += dirwtv * dftemp2v; valv += dirwtv * dftemp2v;
normv += dirwtv; normv += dirwtv;
} }
} }
STVFU(data_coarse[i][j], valv / normv); //low pass filter
_mm_storeu_ps( &data_coarse[i][j], valv / normv); //low pass
filter
}
for(; j < width - scale; j++) {
float val = 0.f;
float norm = 0.f;
for(int inbr = max(0, i - scale); inbr <= min(height - 1, i
+ scale); inbr += scale) {
for (int jnbr = j - scale; jnbr <= j + scale; jnbr += sc
ale) {
dirwt = RANGEFN(fabsf(data_fine[inbr][jnbr] - data_f
ine[i][j]));
val += dirwt * data_fine[inbr][jnbr];
norm += dirwt;
}
}
data_coarse[i][j] = val / norm; //low pass filter
} }
#endif
#else for (; j < width - scale; j++) {
for(; j < width - scale; j++) {
float val = 0.f; float val = 0.f;
float norm = 0.f; float norm = 0.f;
for(int inbr = max(0, i - scale); inbr <= min(height - 1, i + scale); inbr += scale) { for (int inbr = max(0, i - scale); inbr <= min(height - 1, i + scale); inbr += scale) {
for (int jnbr = j - scale; jnbr <= j + scale; jnbr += sc ale) { for (int jnbr = j - scale; jnbr <= j + scale; jnbr += sc ale) {
dirwt = RANGEFN(fabsf(data_fine[inbr][jnbr] - data_f ine[i][j])); const float dirwt = rangeFn(fabsf(data_fine[inbr][jn br] - data_fine[i][j]));
val += dirwt * data_fine[inbr][jnbr]; val += dirwt * data_fine[inbr][jnbr];
norm += dirwt; norm += dirwt;
} }
} }
data_coarse[i][j] = val / norm; //low pass filter data_coarse[i][j] = val / norm; //low pass filter
} }
#endif for (; j < width; j++) {
for(; j < width; j++) {
float val = 0.f; float val = 0.f;
float norm = 0.f; float norm = 0.f;
for(int inbr = max(0, i - scale); inbr <= min(height - 1, i + scale); inbr += scale) { for (int inbr = max(0, i - scale); inbr <= min(height - 1, i + scale); inbr += scale) {
for (int jnbr = j - scale; jnbr <= min(width - 1, j + sc ale); jnbr += scale) { for (int jnbr = j - scale; jnbr <= min(width - 1, j + sc ale); jnbr += scale) {
dirwt = RANGEFN(fabsf(data_fine[inbr][jnbr] - data_f ine[i][j])); const float dirwt = rangeFn(fabsf(data_fine[inbr][jn br] - data_fine[i][j]));
val += dirwt * data_fine[inbr][jnbr]; val += dirwt * data_fine[inbr][jnbr];
norm += dirwt; norm += dirwt;
} }
} }
data_coarse[i][j] = val / norm; //low pass filter data_coarse[i][j] = val / norm; //low pass filter
} }
} }
} }
} }
} }
void ImProcFunctions::idirpyr_eq_channel(float ** data_coarse, float ** data_fin void fillLut(LUTf &irangefn, int level, double dirpyrThreshold, float mult, floa
e, float ** buffer, int width, int height, int level, float mult[maxlevel], cons t skinprot) {
t double dirpyrThreshold, float ** hue, float ** chrom, const double skinprot, f
loat b_l, float t_l, float t_r)
{
const float skinprotneg = -skinprot;
const float factorHard = (1.f - skinprotneg / 100.f);
float offs;
if(skinprot == 0.f) { float multbis;
offs = 0.f; if (level == 4 && mult > 1.f) {
multbis = 1.f + 0.65f * (mult - 1.f);
} else if (level == 5 && mult > 1.f) {
multbis = 1.f + 0.45f * (mult - 1.f);
} else { } else {
offs = -1.f; multbis = mult; //multbis to reduce artifacts for high values mult
}
float multbis[maxlevel];
multbis[level] = mult[level]; //multbis to reduce artifacts for high values
mult
if(level == 4 && mult[level] > 1.f) {
multbis[level] = 1.f + 0.65f * (mult[level] - 1.f);
}
if(level == 5 && mult[level] > 1.f) {
multbis[level] = 1.f + 0.45f * (mult[level] - 1.f);
} }
LUTf irangefn (0x20000); const float offs = skinprot == 0.f ? 0.f : -1.f;
{ constexpr float noise = 2000.f;
const float noisehi = 1.33f * noise * dirpyrThreshold / expf(level * log const float noisehi = 1.33f * noise * dirpyrThreshold / expf(level * log(3.0
(3.0)), noiselo = 0.66f * noise * dirpyrThreshold / expf(level * log(3.0)); )), noiselo = 0.66f * noise * dirpyrThreshold / expf(level * log(3.0));
//printf("level=%i multlev=%f noisehi=%f noiselo=%f skinprot=%f\n",level
,mult[level], noisehi, noiselo, skinprot);
for (int i = 0; i < 0x20000; i++) { for (int i = 0; i < 0x20000; i++) {
if (abs(i - 0x10000) > noisehi || multbis[level] < 1.0) { if (abs(i - 0x10000) > noisehi || multbis < 1.0) {
irangefn[i] = multbis[level] + offs; irangefn[i] = multbis + offs;
} else {
if (abs(i - 0x10000) < noiselo) {
irangefn[i] = 1.f + offs;
} else { } else {
if (abs(i - 0x10000) < noiselo) { irangefn[i] = 1.f + offs + (multbis - 1.f) * (noisehi - abs(i -
irangefn[i] = 1.f + offs ; 0x10000)) / (noisehi - noiselo + 0.01f);
} else {
irangefn[i] = 1.f + offs + (multbis[level] - 1.f) * (noisehi
- abs(i - 0x10000)) / (noisehi - noiselo + 0.01f) ;
}
} }
} }
} }
}
if(skinprot == 0.f) void idirpyr_eq_channel(const float * const * data_coarse, const float * const *
data_fine, float ** buffer, int width, int height, int level, float mult, const
double dirpyrThreshold, const float * const * hue, const float * const * chrom,
const double skinprot, float b_l, float t_l, float t_r)
{
const float skinprotneg = -skinprot;
const float factorHard = (1.f - skinprotneg / 100.f);
LUTf irangefn(0x20000);
fillLut(irangefn, level, dirpyrThreshold, mult, skinprot);
if (!skinprot) {
#ifdef _OPENMP #ifdef _OPENMP
#pragma omp parallel for schedule(dynamic,16) #pragma omp parallel for schedule(dynamic,16)
#endif #endif
for(int i = 0; i < height; i++) { for (int i = 0; i < height; i++) {
for(int j = 0; j < width; j++) { for (int j = 0; j < width; j++) {
float hipass = (data_fine[i][j] - data_coarse[i][j]); const float hipass = data_fine[i][j] - data_coarse[i][j];
buffer[i][j] += irangefn[hipass + 0x10000] * hipass; buffer[i][j] += irangefn[hipass + 0x10000] * hipass;
} }
} }
else if(skinprot > 0.f) } else if (skinprot > 0.f) {
#ifdef _OPENMP #ifdef _OPENMP
#pragma omp parallel for schedule(dynamic,16) #pragma omp parallel for schedule(dynamic,16)
#endif #endif
for(int i = 0; i < height; i++) { for (int i = 0; i < height; i++) {
for(int j = 0; j < width; j++) { for (int j = 0; j < width; j++) {
float scale = 1.f; float scale = 1.f;
float hipass = (data_fine[i][j] - data_coarse[i][j]); const float hipass = data_fine[i][j] - data_coarse[i][j];
// These values are precalculated now rtengine::Color::SkinSatCbdl(data_fine[i][j] / 327.68f, hue[i][j
float modhue = hue[i][j]; ], chrom[i][j], skinprot, scale, true, b_l, t_l, t_r);
float modchro = chrom[i][j]; buffer[i][j] += (1.f + (irangefn[hipass + 0x10000]) * scale) * h
Color::SkinSatCbdl ((data_fine[i][j]) / 327.68f, modhue, modchro ipass;
, skinprot, scale, true, b_l, t_l, t_r);
buffer[i][j] += (1.f + (irangefn[hipass + 0x10000]) * scale) * h
ipass ;
} }
} }
else } else {
#ifdef _OPENMP #ifdef _OPENMP
#pragma omp parallel for schedule(dynamic,16) #pragma omp parallel for schedule(dynamic,16)
#endif #endif
for(int i = 0; i < height; i++) { for (int i = 0; i < height; i++) {
for(int j = 0; j < width; j++) { for (int j = 0; j < width; j++) {
float scale = 1.f; float scale = 1.f;
float hipass = (data_fine[i][j] - data_coarse[i][j]); const float hipass = data_fine[i][j] - data_coarse[i][j];
// These values are precalculated now rtengine::Color::SkinSatCbdl(data_fine[i][j] / 327.68f, hue[i][j
float modhue = hue[i][j]; ], chrom[i][j], skinprotneg, scale, false, b_l, t_l, t_r);
float modchro = chrom[i][j]; const float correct = irangefn[hipass + 0x10000];
Color::SkinSatCbdl ((data_fine[i][j]) / 327.68f, modhue, modchro
, skinprotneg, scale, false, b_l, t_l, t_r);
float correct = irangefn[hipass + 0x10000];
if (scale == 1.f) {//image hard if (scale == 1.f) {//image hard
buffer[i][j] += (1.f + (correct) * (factorHard)) * hipass ; buffer[i][j] += (1.f + correct * factorHard) * hipass;
} else { //image soft with scale < 1 ==> skin } else { //image soft with scale < 1 ==> skin
buffer[i][j] += (1.f + (correct)) * hipass ; buffer[i][j] += (1.f + correct) * hipass;
} }
} }
} }
}
} }
void ImProcFunctions::idirpyr_eq_channelcam(float ** data_coarse, float ** data_ fine, float ** buffer, int width, int height, int level, float mult[maxlevel], c onst double dirpyrThreshold, float ** l_a_h, float ** l_b_c, const double skinpr ot, float b_l, float t_l, float t_r) void idirpyr_eq_channelcam(const float * const * data_coarse, const float * cons t * data_fine, float ** buffer, int width, int height, int level, float mult, co nst double dirpyrThreshold, const float * const * h_p, const float * const * C_p , const double skinprot, float b_l, float t_l, float t_r)
{ {
const float skinprotneg = -skinprot; const float skinprotneg = -skinprot;
const float factorHard = (1.f - skinprotneg / 100.f); const float factorHard = 1.f - skinprotneg / 100.f;
float offs; LUTf irangefn(0x20000);
fillLut(irangefn, level, dirpyrThreshold, mult, skinprot);
if(skinprot == 0.f) { if (!skinprot) {
offs = 0.f; #ifdef _OPENMP
#pragma omp parallel for schedule(dynamic,16)
#endif
for (int i = 0; i < height; i++) {
for (int j = 0; j < width; j++) {
const float hipass = data_fine[i][j] - data_coarse[i][j];
buffer[i][j] += irangefn[hipass + 0x10000] * hipass;
}
}
} else if (skinprot > 0.f) {
#ifdef _OPENMP
#pragma omp parallel for schedule(dynamic,16)
#endif
for (int i = 0; i < height; i++) {
for (int j = 0; j < width; j++) {
const float hipass = data_fine[i][j] - data_coarse[i][j];
float scale = 1.f;
rtengine::Color::SkinSatCbdlCam(data_fine[i][j] / 327.68f, h_p[i
][j] , C_p[i][j], skinprot, scale, true, b_l, t_l, t_r);
buffer[i][j] += (1.f + (irangefn[hipass + 0x10000]) * scale) * h
ipass;
}
}
} else { } else {
offs = -1.f; #ifdef _OPENMP
#pragma omp parallel for schedule(dynamic,16)
#endif
for (int i = 0; i < height; i++) {
for (int j = 0; j < width; j++) {
const float hipass = data_fine[i][j] - data_coarse[i][j];
float scale = 1.f;
const float correct = irangefn[hipass + 0x10000];
rtengine::Color::SkinSatCbdlCam(data_fine[i][j] / 327.68f, h_p[i
][j], C_p[i][j], skinprotneg, scale, false, b_l, t_l, t_r);
if (scale == 1.f) {//image hard
buffer[i][j] += (1.f + correct * factorHard) * hipass;
} else { //image soft
buffer[i][j] += (1.f + correct) * hipass;
}
}
}
} }
}
}
namespace rtengine
{
float multbis[maxlevel]; extern const Settings* settings;
multbis[level] = mult[level]; //multbis to reduce artifacts for high values void ImProcFunctions::dirpyr_equalizer(const float * const * src, float ** dst,
mult int srcwidth, int srcheight, const float * const * l_a, const float * const * l_
b, const double * mult, const double dirpyrThreshold, const double skinprot, flo
at b_l, float t_l, float t_r, int scaleprev)
{
//sequence of scales
constexpr int maxlevel = 6;
constexpr int scales[maxlevel] = {1, 2, 4, 8, 16, 32};
const float atten123 = rtengine::LIM<float>(settings->level123_cbdl, 0.f, 50
.f);
const float atten0 = rtengine::LIM<float>(settings->level0_cbdl, 0.f, 40.f);
if(level == 4 && mult[level] > 1.f) { int lastlevel = maxlevel;
multbis[level] = 1.f + 0.65f * (mult[level] - 1.f); while (lastlevel > 0 && fabs(mult[lastlevel - 1] - 1) < 0.001) {
--lastlevel;
} }
if(level == 5 && mult[level] > 1.f) { if (lastlevel == 0) {
multbis[level] = 1.f + 0.45f * (mult[level] - 1.f); return;
} }
LUTf irangefn (0x20000); float multi[maxlevel];
{
const float noisehi = 1.33f * noise * dirpyrThreshold / expf(level * log
(3.0)), noiselo = 0.66f * noise * dirpyrThreshold / expf(level * log(3.0));
//printf("level=%i multlev=%f noisehi=%f noiselo=%f skinprot=%f\n",level for (int lv = 0; lv < maxlevel; ++lv) {
,mult[level], noisehi, noiselo, skinprot); if (scales[lv] < scaleprev) {
for (int i = 0; i < 0x20000; i++) { const float factor = lv >= 1 ? atten123 : atten0;
if (abs(i - 0x10000) > noisehi || multbis[level] < 1.0) { multi[lv] = (factor * ((float) mult[lv] - 1.f) / 100.f) + 1.f; //
irangefn[i] = multbis[level] + offs; modulate action if zoom < 100%
} else { } else {
if (abs(i - 0x10000) < noiselo) { multi[lv] = mult[lv];
irangefn[i] = 1.f + offs ;
} else {
irangefn[i] = 1.f + offs + (multbis[level] - 1.f) * (noisehi
- abs(i - 0x10000)) / (noisehi - noiselo + 0.01f) ;
}
}
} }
} }
if(skinprot == 0.f) multi_array2D<float, maxlevel> dirpyrlo (srcwidth, srcheight);
dirpyr_channel(src, dirpyrlo[0], srcwidth, srcheight, 0, std::max(scales[0]
/ scaleprev, 1));
for (int level = 1; level < lastlevel; ++level) {
dirpyr_channel(dirpyrlo[level - 1], dirpyrlo[level], srcwidth, srcheight
, level, std::max(scales[level] / scaleprev, 1));
}
array2D<float> tmpHue, tmpChr;
if (skinprot) {
// precalculate hue and chroma, use SSE, if available
// by precalculating these values we can greatly reduce the number of ca
lculations in idirpyr_eq_channel()
// but we need two additional buffers for this preprocessing
tmpHue(srcwidth, srcheight);
tmpChr(srcwidth, srcheight);
#ifdef _OPENMP #ifdef _OPENMP
#pragma omp parallel for schedule(dynamic,16) #pragma omp parallel
#endif
{
#ifdef __SSE2__
const vfloat div = F2V(327.68f);
#endif #endif
for(int i = 0; i < height; i++) {
for(int j = 0; j < width; j++) {
float hipass = (data_fine[i][j] - data_coarse[i][j]);
buffer[i][j] += irangefn[hipass + 0x10000] * hipass ;
}
}
else if(skinprot > 0.f)
#ifdef _OPENMP #ifdef _OPENMP
#pragma omp parallel for schedule(dynamic,16) #pragma omp for
#endif #endif
for(int i = 0; i < height; i++) {
for(int j = 0; j < width; j++) { for (int i = 0; i < srcheight; i++) {
float hipass = (data_fine[i][j] - data_coarse[i][j]); int j = 0;
float scale = 1.f; #ifdef __SSE2__
Color::SkinSatCbdlCam ((data_fine[i][j]) / 327.68f, l_a_h[i][j] for (; j < srcwidth - 3; j += 4) {
, l_b_c[i][j], skinprot, scale, true, b_l, t_l, t_r); const vfloat lav = LVFU(l_a[i][j]);
buffer[i][j] += (1.f + (irangefn[hipass + 0x10000]) * scale) * h const vfloat lbv = LVFU(l_b[i][j]);
ipass ; STVFU(tmpHue[i][j], xatan2f(lbv, lav));
STVFU(tmpChr[i][j], vsqrtf(SQRV(lbv) + SQRV(lav)) / div);
}
#endif
for (; j < srcwidth; j++) {
tmpHue[i][j] = xatan2f(l_b[i][j], l_a[i][j]);
tmpChr[i][j] = sqrtf(SQR((l_b[i][j])) + SQR((l_a[i][j]))) /
327.68f;
}
} }
} }
else }
// with the current implementation of idirpyr_eq_channel we can safely use t
he buffer from last level as buffer, saves some memory
float** buffer = dirpyrlo[lastlevel - 1];
for (int level = lastlevel - 1; level > 0; --level) {
idirpyr_eq_channel(dirpyrlo[level], dirpyrlo[level - 1], buffer, srcwidt
h, srcheight, level, multi[level], dirpyrThreshold, tmpHue, tmpChr, skinprot, b_
l, t_l, t_r);
}
idirpyr_eq_channel(dirpyrlo[0], dst, buffer, srcwidth, srcheight, 0, multi[0
], dirpyrThreshold, tmpHue, tmpChr, skinprot, b_l, t_l, t_r);
#ifdef _OPENMP #ifdef _OPENMP
#pragma omp parallel for schedule(dynamic,16) #pragma omp parallel for
#endif #endif
for(int i = 0; i < height; i++) {
for(int j = 0; j < width; j++) {
float hipass = (data_fine[i][j] - data_coarse[i][j]);
float scale = 1.f;
float correct;
correct = irangefn[hipass + 0x10000];
Color::SkinSatCbdlCam ((data_fine[i][j]) / 327.68f, l_a_h[i][j],
l_b_c[i][j] , skinprotneg, scale, false, b_l, t_l, t_r);
if (scale == 1.f) {//image hard for (int i = 0; i < srcheight; i++) {
buffer[i][j] += (1.f + (correct) * factorHard) * hipass ; for (int j = 0; j < srcwidth; j++) {
dst[i][j] = buffer[i][j];
}
}
}
} else { //image soft void ImProcFunctions::dirpyr_equalizercam(const CieImage *ncie, float ** src, fl
buffer[i][j] += (1.f + (correct)) * hipass ; oat ** dst, int srcwidth, int srcheight, const float * const * h_p, const float
} * const * C_p, const double * mult, const double dirpyrThreshold, const double s
} kinprot, float b_l, float t_l, float t_r, int scaleprev)
{
//sequence of scales
constexpr int maxlevel = 6;
constexpr int scales[maxlevel] = {1, 2, 4, 8, 16, 32};
const float atten123 = rtengine::LIM<float>(settings->level123_cbdl, 0.f, 50
.f);
const float atten0 = rtengine::LIM<float>(settings->level0_cbdl, 0.f, 40.f);
int lastlevel = maxlevel;
while (fabs(mult[lastlevel - 1] - 1) < 0.001 && lastlevel > 0) {
--lastlevel;
}
if (lastlevel == 0) {
return;
}
float multi[maxlevel];
for (int lv = 0; lv < maxlevel; lv++) {
if (scales[lv] < scaleprev) {
const float factor = lv >= 1 ? atten123 : atten0;
multi[lv] = (factor * ((float) mult[lv] - 1.f) / 100.f) + 1.f;
} else {
multi[lv] = mult[lv];
} }
}
// if(gamutlab) { multi_array2D<float, maxlevel> dirpyrlo (srcwidth, srcheight);
// ImProcFunctions::badpixcam (buffer[i][j], 6.0, 10, 2);//for bad pixels
// } dirpyr_channel(src, dirpyrlo[0], srcwidth, srcheight, 0, std::max(scales[0]
/ scaleprev, 1));
/* if(gamutlab) {//disabled
float Lprov1=(buffer[i][j])/327.68f;
float R,G,B;
#ifdef _DEBUG
bool neg=false;
bool more_rgb=false;
//gamut control : Lab values are in gamut
Color::gamutLchonly(modhue,Lprov1,modchro, R, G, B, wip, highlig
ht, 0.15f, 0.96f, neg, more_rgb);
#else
//gamut control : Lab values are in gamut
Color::gamutLchonly(modhue,Lprov1,modchro, R, G, B, wip, highlig
ht, 0.15f, 0.96f);
#endif
// Color::gamutLchonly(modhue,Lprov1,modchro, R, G, B, wip, highlight,
0.15f, 0.96f);//gamut control in Lab mode ..not in CIECAM
buffer[i][j]=Lprov1*327.68f;
float2 sincosval = xsincosf(modhue);
l_a_h[i][j]=327.68f*modchro*sincosval.y;
l_b_c[i][j]=327.68f*modchro*sincosval.x;
}
*/
}
// float hipass = (data_fine[i][j]-data_coarse[i][j]); for (int level = 1; level < lastlevel; ++level) {
// buffer[i][j] += irangefn[hipass+0x10000] * hipass ; dirpyr_channel(dirpyrlo[level - 1], dirpyrlo[level], srcwidth, srcheight
, level, std::max(scales[level] / scaleprev, 1));
}
#undef DIRWT_L // with the current implementation of idirpyr_eq_channel we can safely use t
#undef DIRWT_AB he buffer from last level as buffer, saves some memory
float ** buffer = dirpyrlo[lastlevel - 1];
#undef NRWT_L for (int level = lastlevel - 1; level > 0; --level) {
#undef NRWT_AB idirpyr_eq_channelcam(dirpyrlo[level], dirpyrlo[level - 1], buffer, srcw
idth, srcheight, level, multi[level], dirpyrThreshold , h_p, C_p, skinprot, b_l,
t_l, t_r);
}
idirpyr_eq_channelcam(dirpyrlo[0], dst, buffer, srcwidth, srcheight, 0, mult
i[0], dirpyrThreshold, h_p, C_p, skinprot, b_l, t_l, t_r);
#ifdef _OPENMP
#pragma omp parallel for schedule(dynamic,16)
#endif
for (int i = 0; i < srcheight; i++) {
for (int j = 0; j < srcwidth; j++) {
if (ncie->J_p[i][j] > 8.f && ncie->J_p[i][j] < 92.f) {
dst[i][j] = buffer[i][j];
} else {
dst[i][j] = src[i][j];
}
}
}
}
} }
 End of changes. 89 change blocks. 
672 lines changed or deleted 347 lines changed or added

Home  |  About  |  Features  |  All  |  Newest  |  Dox  |  Diffs  |  RSS Feeds  |  Screenshots  |  Comments  |  Imprint  |  Privacy  |  HTTP(S)