[Rawstudio-commit] r3161 - trunk/plugins/dcp

Klaus Post klauspost at gmail.com
Thu Feb 4 21:54:52 CET 2010


Author: post
Date: 2010-02-04 21:54:52 +0100 (Thu, 04 Feb 2010)
New Revision: 3161

Modified:
   trunk/plugins/dcp/dcp-sse2.c
   trunk/plugins/dcp/dcp.c
Log:
Reduce curve table to 256 entries, but interpolate between them instead. Significant speedup and avoid posterizing the image.

Modified: trunk/plugins/dcp/dcp-sse2.c
===================================================================
--- trunk/plugins/dcp/dcp-sse2.c	2010-02-04 19:31:47 UTC (rev 3160)
+++ trunk/plugins/dcp/dcp-sse2.c	2010-02-04 20:54:52 UTC (rev 3161)
@@ -550,6 +550,7 @@
 static gfloat _rgb_div_ps[4] __attribute__ ((aligned (16))) = {1.0/65535.0, 1.0/65535.0, 1.0/65535.0, 1.0/65535.0};
 static gint _15_bit_epi32[4] __attribute__ ((aligned (16))) = { 32768, 32768, 32768, 32768};
 static guint _16_bit_sign[4] __attribute__ ((aligned (16))) = {0x80008000,0x80008000,0x80008000,0x80008000};
+static gfloat _twofiftysix_ps[4] __attribute__ ((aligned (16))) = {255.9999f,255.9999f,255.9999f,255.9999f};
 
 #define SETFLOAT4(N, A, B, C, D) float N[4] __attribute__ ((aligned (16))); \
 N[0] = D; N[1] = C; N[2] = B; N[3] = A;
@@ -761,20 +762,25 @@
 
 			if (!dcp->curve_is_flat)			
 			{
-				/* Convert v to lookup values */
-				/* TODO: Use 8 bit fraction as interpolation, for interpolating
-				* a more precise lookup using linear interpolation. Maybe use less than
-				* 16 bits for lookup for speed, 10 bits with interpolation should be enough */
-				__m128 v_mul = _mm_load_ps(_16_bit_ps);
-				v = _mm_mul_ps(v, v_mul);
-				__m128i lookup = _mm_cvtps_epi32(v);
-				gfloat* v_p = (gfloat*)&v;
+				/* Convert v to lookup values and interpolate */
+				__m128 v_mul = _mm_mul_ps(v, _mm_load_ps(_twofiftysix_ps));
+				__m128i lookup = _mm_cvtps_epi32(v_mul);
 				_mm_store_si128((__m128i*)&xfer[0], lookup);
 
-				v_p[0] = dcp->curve_samples[xfer[0]];
-				v_p[1] = dcp->curve_samples[xfer[1]];
-				v_p[2] = dcp->curve_samples[xfer[2]];
-				v_p[3] = dcp->curve_samples[xfer[3]];
+				/* Calculate fractions */
+				__m128 frac = _mm_sub_ps(v_mul, _mm_floor_positive_ps(v_mul));
+				__m128 inv_frac = _mm_sub_ps(_mm_load_ps(_ones_ps), frac);
+				
+				/* Load two adjacent curve values and interpolate between them */
+				__m128 p0p1 = _mm_castsi128_ps(_mm_loadl_epi64((__m128i*)&dcp->curve_samples[xfer[0]]));
+				__m128 p2p3 = _mm_castsi128_ps(_mm_loadl_epi64((__m128i*)&dcp->curve_samples[xfer[2]]));
+				p0p1 = _mm_loadh_pi(p0p1, (__m64*)&dcp->curve_samples[xfer[1]]);
+				p2p3 = _mm_loadh_pi(p2p3, (__m64*)&dcp->curve_samples[xfer[3]]);
+				
+				/* Pack all lower values in v0, high in v1 and interpolate */
+				__m128 v0 = _mm_shuffle_ps(p0p1, p2p3, _MM_SHUFFLE(2,0,2,0));
+				__m128 v1 = _mm_shuffle_ps(p0p1, p2p3, _MM_SHUFFLE(3,1,3,1));
+				v = _mm_add_ps(_mm_mul_ps(inv_frac, v0), _mm_mul_ps(frac, v1));
 			}
 
 			/* Apply looktable */

Modified: trunk/plugins/dcp/dcp.c
===================================================================
--- trunk/plugins/dcp/dcp.c	2010-02-04 19:31:47 UTC (rev 3160)
+++ trunk/plugins/dcp/dcp.c	2010-02-04 20:54:52 UTC (rev 3161)
@@ -197,13 +197,13 @@
 					g_object_unref(spline);
 					/* Create extra entry */
 					sampled[65536] = sampled[65535];
-					for (i = 0; i < 65536; i++)
+					for (i = 0; i < 256; i++)
 					{
-						gfloat value = (gfloat)i * (1.0 / 65535.0f);
+						gfloat value = (gfloat)i * (1.0 / 255.0f);
 						/* Gamma correct value */
 						value = powf(value, 1.0f / 2.2f);
 						
-						/*Lookup curve corrected value */
+						/* Lookup curve corrected value */
 						gfloat lookup = (int)(value * 65535.0f);
 						gfloat v0 = sampled[(int)lookup];
 						gfloat v1 = sampled[(int)lookup+1];
@@ -216,6 +216,7 @@
 						/* Store in table */
 						dcp->curve_samples[i] = value;
 					}
+					dcp->curve_samples[256] = dcp->curve_samples[255];
 				}
 			}
 			if (knots)
@@ -224,7 +225,7 @@
 		else
 			dcp->curve_is_flat = TRUE;
 
-		for(i=0;i<65536;i++)
+		for(i=0;i<257;i++)
 			dcp->curve_samples[i] = MIN(1.0f, MAX(0.0f, dcp->curve_samples[i]));
 
 		changed = TRUE;
@@ -267,7 +268,7 @@
 {
 	RSDcpClass *klass = RS_DCP_GET_CLASS(dcp);
 
-	dcp->curve_samples = g_new(gfloat, 65536);
+	dcp->curve_samples = g_new(gfloat, 257);
 	dcp->huesatmap_interpolated = NULL;
 	dcp->use_profile = FALSE;
 	dcp->curve_is_flat = TRUE;
@@ -921,7 +922,13 @@
 
 			/* Curve */
 			if (!dcp->curve_is_flat)
-				v = dcp->curve_samples[_S(v)];
+			{
+				gfloat lookup = CLAMP(v * 256.0f, 0.0f, 255.9999f);
+				gfloat v0 = dcp->curve_samples[(gint)lookup];
+				gfloat v1 = dcp->curve_samples[(gint)lookup + 1];
+				lookup -= floorf(lookup);
+				v = v0 * (1.0f - lookup) + v1 * lookup;
+			}
 
 			if (dcp->looktable)
 				huesat_map(dcp->looktable, &h, &s, &v);




More information about the Rawstudio-commit mailing list