Skip to content

Commit 708c107

Browse files
authored
Merge pull request #33 from microsoft/feat/exif-detections-no-bboxes
feat: add option to disable bounding boxes and store detections in EXIF
2 parents 93ff8e4 + d1473da commit 708c107

File tree

3 files changed

+138
-53
lines changed

3 files changed

+138
-53
lines changed

sparrow.env

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,4 +9,5 @@ SERVER_BASE_URL=https://server.sparrow-earth.com
99
TZ=Etc/UTC
1010
ONLY_SAVE_ANIMALS=true
1111
FTP_USER=camera
12-
FTP_PASS=
12+
FTP_PASS=
13+
DRAW_BOXES=true

sparrow/inference.py

Lines changed: 135 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,15 @@
22
"""
33
This script uses Triton Inference Server to perform object detection using the MegaDetectorV6 model,
44
and then for each "animal" detection, it crops the bounding box and sends it to a classification
5-
model (e.g., AI4GAmazonClassification) for species classification. The final annotated image is saved
6-
along with logging details in a CSV file.
5+
model (e.g., AI4GAmazonClassification) for species classification.
6+
7+
Results are:
8+
- Logged to CSV
9+
- For JPEG outputs, all bounding boxes (with labels & scores) are stored as JSON in EXIF
10+
UserComment so server-side can turn overlays on/off later.
11+
12+
By default, the saved image pixels are on (boxes drawn). You can disable drawing with:
13+
DRAW_BOXES=flase
714
"""
815

916
import os
@@ -13,6 +20,7 @@
1320
import logging
1421
import threading
1522
from datetime import datetime
23+
1624
from PIL import Image, ImageFile, ImageDraw, ImageFont
1725
import numpy as np
1826
import tritonclient.http as httpclient
@@ -23,6 +31,7 @@
2331
from filelock import FileLock
2432
from utils.sparrow_id import get_hardware_id
2533
from utils.detection_utils import non_max_suppression, scale_boxes
34+
import piexif # EXIF metadata
2635

2736
# Setup Logging & Folders
2837
LOGS_DIR = "/app/logs"
@@ -40,6 +49,7 @@
4049
log = logging.getLogger("inference")
4150

4251
ONLY_SAVE_ANIMALS = os.getenv("ONLY_SAVE_ANIMALS", "false").strip().lower() == "true"
52+
DRAW_BOXES = os.getenv("DRAW_BOXES", "true").strip().lower() == "true"
4353

4454
# Model Config Sync
4555
CONFIG_DIR = "/app/config"
@@ -98,6 +108,7 @@
98108

99109
os.makedirs(CONFIG_DIR, exist_ok=True)
100110

111+
101112
def load_model_config():
102113
"""Load model_settings.json (create default if missing)."""
103114
if not os.path.isfile(MODEL_CONFIG_FILE):
@@ -112,6 +123,7 @@ def load_model_config():
112123
model_logger.error(f"Failed to load model_settings.json: {e}")
113124
return DEFAULT_MODEL_CONFIG.copy()
114125

126+
115127
def save_model_config(config):
116128
"""Atomically save model_settings.json."""
117129
tmp_path = f"{MODEL_CONFIG_FILE}.tmp"
@@ -123,6 +135,7 @@ def save_model_config(config):
123135
except Exception as e:
124136
model_logger.error(f"Failed to save model_settings.json: {e}")
125137

138+
126139
def fetch_model_settings(unique_id, auth_key):
127140
"""
128141
Fetch updated model settings from the server.
@@ -145,61 +158,50 @@ def fetch_model_settings(unique_id, auth_key):
145158
except Exception as e:
146159
model_logger.warning(f"Could not fetch model settings: {e}")
147160

161+
148162
def model_settings_fetch_loop(unique_id, auth_key):
149-
"""
150-
Background thread that pings the server every 1 minute
151-
to update model_settings.json if there's a change.
152-
"""
163+
"""Background thread to periodically fetch model settings."""
153164
model_logger.info("Started model settings background fetch thread.")
154165
while True:
155166
fetch_model_settings(unique_id, auth_key)
156167
time.sleep(60)
157168

169+
158170
def get_current_model_name():
159-
"""
160-
Reads 'selected_model' from model_settings.json.
161-
Returns the default 'AI4GAmazonClassification' if missing.
162-
"""
171+
"""Get current classification model name."""
163172
return load_model_config().get("selected_model", "AI4GAmazonClassification")
164173

174+
165175
def get_current_labels():
166-
"""
167-
Reads 'lables' dict from model_settings.json.
168-
Returns the default label set if missing.
169-
"""
176+
"""Get current label dictionary."""
170177
return load_model_config().get("lables", DEFAULT_MODEL_CONFIG["lables"])
171178

179+
172180
def is_classification_enabled():
173-
"""
174-
Reads 'classification_enabled' from model_settings.json.
175-
Defaults to True.
176-
"""
181+
"""Whether classification is enabled."""
177182
return load_model_config().get("classification_enabled", True)
178183

184+
179185
def is_keep_blanks_enabled():
180-
"""
181-
Reads 'keep_blanks' from model_settings.json.
182-
Defaults to False.
183-
"""
186+
"""Whether blank images should be kept."""
184187
return load_model_config().get("keep_blanks", False)
185188

189+
186190
def get_detection_threshold():
187-
"""
188-
Reads 'detection_threshold' from model_settings.json.
189-
Defaults to DEFAULT_MODEL_CONFIG['detection_threshold'].
190-
"""
191+
"""Get detection confidence threshold."""
191192
cfg = load_model_config()
192193
return cfg.get("detection_threshold", DEFAULT_MODEL_CONFIG["detection_threshold"])
193194

195+
194196
# Image & Preprocess Utils
195197
ImageFile.LOAD_TRUNCATED_IMAGES = True
196198

199+
197200
def load_font():
198-
"""
199-
Return a Pillow built-in bitmap font.
200-
"""
201+
"""Return a Pillow built-in bitmap font."""
201202
return ImageFont.load_default()
202203

204+
203205
def letterbox(im, new_shape=(640, 640), auto=False, scaleFill=False, scaleup=True, stride=32):
204206
"""Resize and pad image to meet stride-multiple constraints."""
205207
if isinstance(im, Image.Image):
@@ -232,10 +234,12 @@ def letterbox(im, new_shape=(640, 640), auto=False, scaleFill=False, scaleup=Tru
232234
im = F.pad(im * 255.0, padding, value=114) / 255.0
233235
return im
234236

237+
235238
# MegaDetector classes
236239
class_name_to_id = {0: "animal", 1: "person", 2: "vehicle"}
237240
colors = ["red", "blue", "purple"]
238241

242+
239243
def preprocess_classification(img):
240244
"""
241245
Preprocess a PIL image for classification:
@@ -247,6 +251,7 @@ def preprocess_classification(img):
247251
img_np = np.expand_dims(img_np, axis=0).astype(np.float32)
248252
return img_np
249253

254+
250255
# Triton / IO Setup
251256
TRITON_URL = (os.getenv("TRITON_SERVER_URL") or os.getenv("TRITON_URL", "http://triton:8000")).strip().rstrip("/")
252257
if TRITON_URL.startswith(("http://", "https://")):
@@ -266,6 +271,7 @@ def preprocess_classification(img):
266271
csv_file = '/app/static/data/detections.csv'
267272
os.makedirs(os.path.dirname(csv_file), exist_ok=True)
268273

274+
269275
def write_to_csv(image_name, detection, confidence, date):
270276
"""Append detection results to CSV."""
271277
file_exists = os.path.isfile(csv_file)
@@ -275,6 +281,41 @@ def write_to_csv(image_name, detection, confidence, date):
275281
writer.writerow(['Image Name', 'Detection', 'Confidence Score', 'Date'])
276282
writer.writerow([image_name, detection, confidence, date])
277283

284+
285+
def save_jpeg_with_boxes(img, boxes_meta, out_path):
286+
"""
287+
Save a JPEG with bounding boxes stored as JSON in EXIF UserComment.
288+
289+
boxes_meta: list of dicts, each like:
290+
{
291+
"x1": float (normalized 0-1),
292+
"y1": float,
293+
"x2": float,
294+
"y2": float,
295+
"label": str,
296+
"score": float,
297+
"class_id": int,
298+
"source": str,
299+
"model": str or None
300+
}
301+
"""
302+
exif_bytes_in = img.info.get("exif", b"")
303+
if exif_bytes_in:
304+
try:
305+
exif_dict = piexif.load(exif_bytes_in)
306+
except Exception:
307+
exif_dict = {"0th": {}, "Exif": {}, "GPS": {}, "1st": {}, "thumbnail": None}
308+
else:
309+
exif_dict = {"0th": {}, "Exif": {}, "GPS": {}, "1st": {}, "thumbnail": None}
310+
311+
payload = json.dumps(boxes_meta).encode("utf-8")
312+
# EXIF UserComment should start with an encoding prefix
313+
exif_dict["Exif"][piexif.ExifIFD.UserComment] = b"ASCII\0\0\0" + payload
314+
315+
exif_bytes_out = piexif.dump(exif_dict)
316+
img.save(out_path, format="JPEG", exif=exif_bytes_out)
317+
318+
278319
# Background Settings Fetch
279320
try:
280321
with open(AUTH_KEY_PATH, "r") as f:
@@ -292,7 +333,7 @@ def write_to_csv(image_name, detection, confidence, date):
292333

293334
if AUTH_KEY and UNIQUE_ID:
294335
model_thread = threading.Thread(
295-
target=model_settings_fetch_loop,
336+
target=model_settings_fetch_loop,
296337
args=(UNIQUE_ID, AUTH_KEY),
297338
daemon=True
298339
)
@@ -365,38 +406,43 @@ def write_to_csv(image_name, detection, confidence, date):
365406
md_confidence = pred[:, 4]
366407
md_class_id = pred[:, 5].astype(int)
367408

368-
annotated_img = image.copy()
369-
draw = ImageDraw.Draw(annotated_img)
370409
font = load_font()
371410

372-
drew_any = False # track whether we drew any boxes (after filtering)
373-
skipped_count = 0 # track how many non-animal detections we skip
411+
drew_any = False # we had at least one kept detection
412+
skipped_count = 0 # how many non-animal detections we skip
413+
414+
# Metadata for EXIF (one dict per detection)
415+
boxes_meta = []
416+
img_w, img_h = image.size
417+
418+
# Only create drawing context if we actually want boxes rendered
419+
annotated_img = image.copy() if DRAW_BOXES else image
420+
draw = ImageDraw.Draw(annotated_img) if DRAW_BOXES else None
374421

375422
for i in range(len(pred)):
376423
cls_id = md_class_id[i]
377424

378425
# Skip non-animals (person=1, vehicle=2) if ONLY_SAVE_ANIMALS is enabled
379426
if ONLY_SAVE_ANIMALS and cls_id in (1, 2):
380-
# Log the skip with bbox + confidence
381427
try:
382-
x1, y1, x2, y2 = [float(v) for v in xyxy[i]]
428+
x1_s, y1_s, x2_s, y2_s = [float(v) for v in xyxy[i]]
383429
except Exception:
384-
x1 = y1 = x2 = y2 = -1.0
430+
x1_s = y1_s = x2_s = y2_s = -1.0
385431
label_skipped = "person" if cls_id == 1 else "vehicle"
386-
conf = float(md_confidence[i])
432+
conf_s = float(md_confidence[i])
387433
log.info(
388-
f"Skipping {label_skipped} (conf={conf:.2f}) due to ONLY_SAVE_ANIMALS; "
389-
f"image={image_name}, box=({x1:.1f},{y1:.1f},{x2:.1f},{y2:.1f})"
434+
f"Skipping {label_skipped} (conf={conf_s:.2f}) due to ONLY_SAVE_ANIMALS; "
435+
f"image={image_name}, box=({x1_s:.1f},{y1_s:.1f},{x2_s:.1f},{y2_s:.1f})"
390436
)
391437
skipped_count += 1
392438
continue
393439

394440
md_label = class_name_to_id[cls_id]
395441
det_conf = md_confidence[i]
442+
x1, y1, x2, y2 = xyxy[i]
396443

397444
# Only run classification if it's an "animal" AND classification is enabled
398445
if cls_id == 0 and is_classification_enabled():
399-
x1, y1, x2, y2 = xyxy[i]
400446
cropped = image.crop((x1, y1, x2, y2))
401447
cropped_np = preprocess_classification(cropped)
402448

@@ -420,21 +466,50 @@ def write_to_csv(image_name, detection, confidence, date):
420466

421467
write_to_csv(image_name, detected_class, clf_conf, date)
422468
label = f"{detected_class} {clf_conf:.2f}"
469+
470+
stored_label = detected_class
471+
stored_conf = clf_conf
472+
stored_model = current_model_name
423473
else:
424474
# For person/vehicle, or if classification disabled, use MD label only
425-
# (This path is not reached for non-animals when ONLY_SAVE_ANIMALS skipped above)
426475
write_to_csv(image_name, md_label, det_conf, date)
427476
label = f"{md_label} {det_conf:.2f}"
428477

429-
# Draw bounding box and label
430-
draw.rectangle(xyxy[i], outline=colors[cls_id], width=2)
431-
text_bbox = draw.textbbox((xyxy[i][0], xyxy[i][1] - 20), label, font=font)
432-
draw.rectangle(
433-
[text_bbox[0], text_bbox[1] - 2, text_bbox[2] + 2, text_bbox[3] + 2],
434-
fill=colors[cls_id]
478+
stored_label = md_label
479+
stored_conf = float(det_conf)
480+
stored_model = None
481+
482+
# Optionally draw bounding box and label
483+
if DRAW_BOXES and draw is not None:
484+
draw.rectangle(xyxy[i], outline=colors[cls_id], width=2)
485+
text_bbox = draw.textbbox((xyxy[i][0], xyxy[i][1] - 20), label, font=font)
486+
draw.rectangle(
487+
[text_bbox[0], text_bbox[1] - 2, text_bbox[2] + 2, text_bbox[3] + 2],
488+
fill=colors[cls_id]
489+
)
490+
draw.text((xyxy[i][0] + 2, xyxy[i][1] - 20), label, font=font, fill='white')
491+
492+
drew_any = True # we have at least one kept detection
493+
494+
# Store normalized coordinates + label in metadata list
495+
norm_x1 = float(x1) / float(img_w)
496+
norm_y1 = float(y1) / float(img_h)
497+
norm_x2 = float(x2) / float(img_w)
498+
norm_y2 = float(y2) / float(img_h)
499+
500+
boxes_meta.append(
501+
{
502+
"x1": norm_x1,
503+
"y1": norm_y1,
504+
"x2": norm_x2,
505+
"y2": norm_y2,
506+
"label": stored_label,
507+
"score": float(stored_conf),
508+
"class_id": int(cls_id),
509+
"source": "megadetectorv6",
510+
"model": stored_model,
511+
}
435512
)
436-
draw.text((xyxy[i][0] + 2, xyxy[i][1] - 20), label, font=font, fill='white')
437-
drew_any = True
438513

439514
# Per-image summary for skipped detections
440515
if ONLY_SAVE_ANIMALS and skipped_count:
@@ -451,8 +526,16 @@ def write_to_csv(image_name, detection, confidence, date):
451526
print(f"Removed source file {image_path} (all detections filtered)")
452527
continue
453528

454-
annotated_img.save(os.path.join(output_dir, image_name))
455-
print(f"Saved {os.path.join(output_dir, image_name)}")
529+
# Save CLEAN image, embedding boxes in EXIF if JPEG
530+
out_path = os.path.join(output_dir, image_name)
531+
img_to_save = image # always save original pixels
532+
533+
if image_name.lower().endswith((".jpg", ".jpeg")):
534+
save_jpeg_with_boxes(img_to_save, boxes_meta, out_path)
535+
else:
536+
img_to_save.save(out_path)
537+
538+
print(f"Saved {out_path}")
456539

457540
# Remove original after processing
458541
os.remove(image_path)

sparrow/requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,3 +25,4 @@ librosa==0.10.2.post1
2525
numba==0.59.1
2626
llvmlite==0.42.0
2727
pyftpdlib==2.1.0
28+
piexif==1.1.3

0 commit comments

Comments
 (0)