Overview
I had an opportunity to obtain the coordinates within a larger image from multiple cropped sub-images. This article is a memo summarizing the method for doing this.
I introduce a method using OpenCV’s SIFT (Scale-Invariant Feature Transform) to perform feature point matching between template images and the original image, estimate the affine transformation, and obtain the coordinates.
Implementation
Required Libraries
pip install opencv-python numpy tqdm
Python Code
The following code matches template images (PNG images in templates_dir) against a specified large image (image_path) using SIFT, and obtains the coordinates within the original image.
import cv2
import numpy as np
from glob import glob
from tqdm import tqdm
import os
# Load image in grayscale
def load_image_gray(path):
img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
if img is None:
print(f"Image not found: {path}")
return img
# Extract features
def extract_features(image, detector):
return detector.detectAndCompute(image, None)
# Matching process
def match_features(des1, des2, matcher, ratio_test=0.7, min_matches=4):
matches = matcher.knnMatch(des1, des2, k=2)
good_matches = [m for m, n in matches if m.distance < ratio_test * n.distance]
return good_matches if len(good_matches) >= min_matches else None
# Affine transformation estimation
def estimate_affine_transform(kp1, kp2, good_matches):
src_pts = np.float32([kp2[m.trainIdx].pt for m in good_matches]).reshape(-1, 1, 2)
dst_pts = np.float32([kp1[m.queryIdx].pt for m in good_matches]).reshape(-1, 1, 2)
M_affine, _ = cv2.estimateAffinePartial2D(src_pts, dst_pts, method=cv2.RANSAC, ransacReprojThreshold=5.0)
return M_affine
# Draw matched rectangle on image
def draw_matched_rectangle(image, M_affine, templ_shape):
h, w = templ_shape
rect_pts = np.float32([[0, 0], [w, 0], [w, h], [0, h]]) # Four corners of the rectangle
transformed_pts = cv2.transform(np.array([rect_pts]), M_affine)[0] # Transformed coordinates
cv2.polylines(image, [np.int32(transformed_pts)], isClosed=True, color=(0, 0, 255), thickness=2)
return transformed_pts
# Main processing
def main(image_path, templates_dir, output_path):
# Load image and template list
img = load_image_gray(image_path)
templ_paths = glob(templates_dir)
dst_img = cv2.imread(image_path)
# SIFT feature detector & BFMatcher setup
sift = cv2.SIFT_create()
bf = cv2.BFMatcher(cv2.NORM_L2, crossCheck=False)
kp1, des1 = extract_features(img, sift)
# If no features found
if des1 is None:
print("No features found in the target image.")
return
for templ_path in tqdm(templ_paths):
templ = load_image_gray(templ_path)
if templ is None:
continue
kp2, des2 = extract_features(templ, sift)
if des2 is None:
continue
good_matches = match_features(des1, des2, bf)
if good_matches is None:
print(f"Insufficient feature matches: {templ_path}")
continue
# Affine transformation estimation
M_affine = estimate_affine_transform(kp1, kp2, good_matches)
if M_affine is None:
print(f"Affine transformation estimation failed: {templ_path}")
continue
# Draw rectangle
best_dst = draw_matched_rectangle(dst_img, M_affine, templ.shape)
# Display filename near the rectangle
x, y, _, _ = cv2.boundingRect(best_dst)
base_name = os.path.splitext(os.path.basename(templ_path))[0]
cv2.putText(dst_img, base_name, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
# Save result
cv2.imwrite(output_path, dst_img)
print(f"Result image saved: {output_path}")
Execution
# Execution
if __name__ == "__main__":
# Parameter settings
IMAGE_PATH = "/xxx/default.jpg"
TEMPLATES_DIR = "/xxx/*.png"
OUTPUT_PATH = "/xxx/match_result.jpg"
main(IMAGE_PATH, TEMPLATES_DIR, OUTPUT_PATH)
Summary
This article introduced a method for estimating where sub-images are located in the original image using SIFT-based feature point matching, and identifying positions through affine transformation.
- SIFT is used for feature extraction (freely available since OpenCV 4.4)
- BFMatcher is used for feature matching, and RANSAC is used for noise removal
- Affine transformation is used to estimate coordinates and draw rectangles on the original image
- Result images are saved for visualization of where each sub-image is located
This method can be applied to tasks such as locating partial images in historical maps, OCR region detection, and image comparison.
Future challenges:
- Correction for rotated images
- Consideration of faster algorithms than SIFT (ORB, AKAZE, etc.)
- Processing speed optimization (feature point filtering)
There may be some incomplete points, but I hope this serves as a useful reference.