Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
110 changes: 31 additions & 79 deletions alg.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,12 @@
#include "mmx.h"
#endif

#ifdef __SSE2__
#define HAVE_SSE2
#include <emmintrin.h>
#include "alg/sse2.h"
#endif

#define MAX2(x, y) ((x) > (y) ? (x) : (y))
#define MAX3(x, y, z) ((x) > (y) ? ((x) > (z) ? (x) : (z)) : ((y) > (z) ? (y) : (z)))

Expand Down Expand Up @@ -352,42 +358,23 @@ void alg_draw_red_location(struct coord *cent, struct images *imgs, int width, u
#define DIFF(x, y) (ABS((x)-(y)))
#define NDIFF(x, y) (ABS(x) * NORM / (ABS(x) + 2 * DIFF(x, y)))

#ifdef HAVE_SSE2
#include "alg/alg_noise_tune.sse2.c"
#else
#include "alg/alg_noise_tune.plain.c"
#endif

/**
* alg_noise_tune
*
*/
void alg_noise_tune(struct context *cnt, unsigned char *new)
{
struct images *imgs = &cnt->imgs;
int i;
unsigned char *ref = imgs->ref;
int diff, sum = 0, count = 0;
unsigned char *mask = imgs->mask;
unsigned char *smartmask = imgs->smartmask_final;

i = imgs->motionsize;

for (; i > 0; i--) {
diff = ABS(*ref - *new);

if (mask)
diff = ((diff * *mask++) / 255);

if (*smartmask) {
sum += diff + 1;
count++;
}

ref++;
new++;
smartmask++;
}

if (count > 3) /* Avoid divide by zero. */
sum /= count / 3;

/* 5: safe, 4: regular, 3: more sensitive */
cnt->noise = 4 + (cnt->noise + sum) / 2;
#ifdef HAVE_SSE2
alg_noise_tune_sse2(cnt, new);
#else
alg_noise_tune_plain(cnt, new);
#endif
}

/**
Expand Down Expand Up @@ -1301,6 +1288,15 @@ int alg_switchfilter(struct context *cnt, int diffs, unsigned char *newimg)
return 0;
}

#define ACCEPT_STATIC_OBJECT_TIME 10 /* Seconds */
#define EXCLUDE_LEVEL_PERCENT 20

#ifdef HAVE_SSE2
#include "alg/alg_update_reference_frame.sse2.c"
#else
#include "alg/alg_update_reference_frame.plain.c"
#endif

/**
* alg_update_reference_frame
*
Expand All @@ -1314,55 +1310,11 @@ int alg_switchfilter(struct context *cnt, int diffs, unsigned char *newimg)
* action - UPDATE_REF_FRAME or RESET_REF_FRAME
*
*/
#define ACCEPT_STATIC_OBJECT_TIME 10 /* Seconds */
#define EXCLUDE_LEVEL_PERCENT 20
void alg_update_reference_frame(struct context *cnt, int action)
{
int accept_timer = cnt->lastrate * ACCEPT_STATIC_OBJECT_TIME;
int i, threshold_ref;
int *ref_dyn = cnt->imgs.ref_dyn;
unsigned char *image_virgin = cnt->imgs.image_virgin;
unsigned char *ref = cnt->imgs.ref;
unsigned char *smartmask = cnt->imgs.smartmask_final;
unsigned char *out = cnt->imgs.out;

if (cnt->lastrate > 5) /* Match rate limit */
accept_timer /= (cnt->lastrate / 3);

if (action == UPDATE_REF_FRAME) { /* Black&white only for better performance. */
threshold_ref = cnt->noise * EXCLUDE_LEVEL_PERCENT / 100;

for (i = cnt->imgs.motionsize; i > 0; i--) {
/* Exclude pixels from ref frame well below noise level. */
if (((int)(abs(*ref - *image_virgin)) > threshold_ref) && (*smartmask)) {
if (*ref_dyn == 0) { /* Always give new pixels a chance. */
*ref_dyn = 1;
} else if (*ref_dyn > accept_timer) { /* Include static Object after some time. */
*ref_dyn = 0;
*ref = *image_virgin;
} else if (*out) {
(*ref_dyn)++; /* Motionpixel? Keep excluding from ref frame. */
} else {
*ref_dyn = 0; /* Nothing special - release pixel. */
*ref = (*ref + *image_virgin) / 2;
}

} else { /* No motion: copy to ref frame. */
*ref_dyn = 0; /* Reset pixel */
*ref = *image_virgin;
}

ref++;
image_virgin++;
smartmask++;
ref_dyn++;
out++;
} /* end for i */

} else { /* action == RESET_REF_FRAME - also used to initialize the frame at startup. */
/* Copy fresh image */
memcpy(cnt->imgs.ref, cnt->imgs.image_virgin, cnt->imgs.size);
/* Reset static objects */
memset(cnt->imgs.ref_dyn, 0, cnt->imgs.motionsize * sizeof(cnt->imgs.ref_dyn));
}
#ifdef HAVE_SSE2
alg_update_reference_frame_sse2(cnt, action);
#else
alg_update_reference_frame_plain(cnt, action);
#endif
}
36 changes: 36 additions & 0 deletions alg/alg_noise_tune.plain.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
/**
* alg_noise_tune_plain
*
*/
static void alg_noise_tune_plain(struct context *cnt, unsigned char *new)
{
struct images *imgs = &cnt->imgs;
unsigned char *ref = imgs->ref;
unsigned int sum = 0, count = 0;
unsigned char *mask = imgs->mask;
unsigned char *smartmask = imgs->smartmask_final;

int i = imgs->motionsize;

for (; i > 0; i--) {
unsigned char absdiff = (*ref > *new) ? (*ref - *new) : (*new - *ref);

if (mask)
absdiff = ((absdiff * *mask++) / 255);

if (*smartmask) {
sum += absdiff + 1;
count++;
}

ref++;
new++;
smartmask++;
}

if (count > 3) /* Avoid divide by zero. */
sum /= count / 3;

/* 5: safe, 4: regular, 3: more sensitive */
cnt->noise = 4 + (cnt->noise + sum) / 2;
}
129 changes: 129 additions & 0 deletions alg/alg_noise_tune.sse2.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
/**
* alg_noise_tune_sse2
*
*/
static void alg_noise_tune_sse2(struct context *cnt, unsigned char *new)
{
struct images *imgs = &cnt->imgs;
unsigned char *ref = imgs->ref;
unsigned int sum = 0, count = 0;
unsigned char *mask = imgs->mask;
unsigned char *smartmask = imgs->smartmask_final;

int j, i = imgs->motionsize;

int sse_iters;
__m128i maskrow, zeromask;
__m128i alo, ahi;
__m128i ones = _mm_set1_epi8(1);
__m128i sum16lo = _mm_setzero_si128();
__m128i sum16hi = _mm_setzero_si128();
__m128i sum32 = _mm_setzero_si128();
__m128i count8 = _mm_setzero_si128();
uint32_t total[4];
uint8_t counts[16] __attribute__((aligned(16)));

/* SSE reads 16 bytes at a time; truncating division: */
for (sse_iters = i >> 4; sse_iters > 0; sse_iters--)
{
/* Load 16 bytes from images. Addresses need not be 16-byte aligned: */
__m128i refrow = _mm_loadu_si128((__m128i *)ref);
__m128i newrow = _mm_loadu_si128((__m128i *)new);

/* Calculate absolute difference per byte: abs(ref - new): */
__m128i absdiff = _mm_absdiff_epu8(refrow, newrow);

/* If there is a mask image, alpha blend the absdiff by its pixels: */
if (mask)
{
/* Load mask image data: */
maskrow = _mm_loadu_si128((__m128i *)mask);
mask += 16;

/* "Alpha blend" absdiff with mask, absdiff *= (mask / 255): */
absdiff = _mm_scale_epu8(absdiff, maskrow);
}
/* Add 1 to all diff values: */
absdiff = _mm_adds_epu8(absdiff, ones);

/* Fetch the smartmask values: */
maskrow = _mm_loadu_si128((__m128i *)smartmask);

/* Set diff values to 0 where smartmask is 0: */
zeromask = _mm_cmpeq_epi8(maskrow, _mm_setzero_si128());
absdiff = _mm_andnot_si128(zeromask, absdiff);

/* Increment count for every nonzero value of smartmask: */
count8 = _mm_adds_epu8(count8, _mm_andnot_si128(zeromask, ones));

/* Split 16 bytes of sum into 16x16-bit values:
* 0 . 1 . 2 . 3 . 4 . 5 . 6 . 7 .
* 8 . 9 . A . B . C . D . E . F .
*/
sse_u8_to_u16(absdiff, &alo, &ahi);
sum16lo = _mm_adds_epu16(sum16lo, alo);
sum16hi = _mm_adds_epu16(sum16hi, ahi);

/* Offload these 16-bit counters into a 32-bit counter at least once
* every 128 rounds to prevent overflow:
* Also do this in the last iteration to empty out the counters: */
if (!(sse_iters & 0x7F) || sse_iters == 1)
{
/* Split these two into 4x32 bits and do 32-bit additions:
* 0 . . . 1 . . . 2 . . . 3 . . . +
* 4 . . . 5 . . . 6 . . . 7 . . . +
* 8 . . . 9 . . . A . . . B . . . +
* C . . . D . . . E . . . F . . .
* Add all of these to the running sum: */

sse_u16_to_u32(sum16lo, &alo, &ahi);
sum32 = _mm_add_epi32(sum32, _mm_add_epi32(alo, ahi));

sse_u16_to_u32(sum16hi, &alo, &ahi);
sum32 = _mm_add_epi32(sum32, _mm_add_epi32(alo, ahi));

sum16lo = _mm_setzero_si128();
sum16hi = _mm_setzero_si128();

_mm_store_si128((__m128i *)counts, count8);
for (j = 0; j < 16; j++) {
count += counts[j];
}
count8 = _mm_setzero_si128();
}

ref += 16;
new += 16;
smartmask += 16;
}
/* Outside the hot loop, write out the running sum to memory
* and add the four component uint32's to get the total sum: */
_mm_storeu_si128((__m128i *)&total, sum32);
sum = total[0] + total[1] + total[2] + total[3];

/* We handled all 16-bit blocks. Truncate i to its value mod 16, so that
* the regular bytewise code can handle the remainder: */
i &= 0x0F;

for (; i > 0; i--) {
unsigned char absdiff = (*ref > *new) ? (*ref - *new) : (*new - *ref);

if (mask)
absdiff = ((absdiff * *mask++) / 255);

if (*smartmask) {
sum += absdiff + 1;
count++;
}

ref++;
new++;
smartmask++;
}

if (count > 3) /* Avoid divide by zero. */
sum /= count / 3;

/* 5: safe, 4: regular, 3: more sensitive */
cnt->noise = 4 + (cnt->noise + sum) / 2;
}
53 changes: 53 additions & 0 deletions alg/alg_update_reference_frame.plain.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
static void alg_update_reference_frame_plain(struct context *cnt, int action)
{
int accept_timer = cnt->lastrate * ACCEPT_STATIC_OBJECT_TIME;
int i, threshold_ref;
uint16_t *ref_dyn = cnt->imgs.ref_dyn;
unsigned char *image_virgin = cnt->imgs.image_virgin;
unsigned char *ref = cnt->imgs.ref;
unsigned char *smartmask = cnt->imgs.smartmask_final;
unsigned char *out = cnt->imgs.out;

if (cnt->lastrate > 5) /* Match rate limit */
accept_timer /= (cnt->lastrate / 3);

if (action == UPDATE_REF_FRAME) { /* Black&white only for better performance. */
threshold_ref = cnt->noise * EXCLUDE_LEVEL_PERCENT / 100;

for (i = cnt->imgs.motionsize; i > 0; i--) {
int thresholdmask = ((int)(abs(*ref - *image_virgin)) > threshold_ref);
int includemask = (thresholdmask && (*smartmask != 0));

/* Exclude pixels from ref frame well below noise level. */
if (includemask) {
if (*ref_dyn == 0) { /* Always give new pixels a chance. */
*ref_dyn = 1;
} else if (*ref_dyn > accept_timer) { /* Include static Object after some time. */
*ref_dyn = 0;
*ref = *image_virgin;
} else if (*out) {
(*ref_dyn)++; /* Motionpixel? Keep excluding from ref frame. */
} else {
*ref_dyn = 0; /* Nothing special - release pixel. */
*ref = (*ref + *image_virgin) / 2;
}

} else { /* No motion: copy to ref frame. */
*ref_dyn = 0; /* Reset pixel */
*ref = *image_virgin;
}

ref++;
image_virgin++;
smartmask++;
ref_dyn++;
out++;
} /* end for i */

} else { /* action == RESET_REF_FRAME - also used to initialize the frame at startup. */
/* Copy fresh image */
memcpy(cnt->imgs.ref, cnt->imgs.image_virgin, cnt->imgs.size);
/* Reset static objects */
memset(cnt->imgs.ref_dyn, 0, cnt->imgs.motionsize * sizeof(*cnt->imgs.ref_dyn));
}
}
Loading