How to Get Coordinates of Sub-Images from a Larger Image

Overview

I had an opportunity to obtain the coordinates within a larger image from multiple cropped sub-images. This article is a memo summarizing the method for doing this.

I introduce a method using OpenCV’s SIFT (Scale-Invariant Feature Transform) to perform feature point matching between template images and the original image, estimate the affine transformation, and obtain the coordinates.

Implementation

Required Libraries

pip install opencv-python numpy tqdm

Python Code

The following code matches template images (PNG images in templates_dir) against a specified large image (image_path) using SIFT, and obtains the coordinates within the original image.

import cv2
import numpy as np
from glob import glob
from tqdm import tqdm
import os

# Load image in grayscale
def load_image_gray(path):
    img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
    if img is None:
        print(f"Image not found: {path}")
    return img

# Extract features
def extract_features(image, detector):
    return detector.detectAndCompute(image, None)

# Matching process
def match_features(des1, des2, matcher, ratio_test=0.7, min_matches=4):
    matches = matcher.knnMatch(des1, des2, k=2)
    good_matches = [m for m, n in matches if m.distance < ratio_test * n.distance]
    return good_matches if len(good_matches) >= min_matches else None

# Affine transformation estimation
def estimate_affine_transform(kp1, kp2, good_matches):
    src_pts = np.float32([kp2[m.trainIdx].pt for m in good_matches]).reshape(-1, 1, 2)
    dst_pts = np.float32([kp1[m.queryIdx].pt for m in good_matches]).reshape(-1, 1, 2)
    M_affine, _ = cv2.estimateAffinePartial2D(src_pts, dst_pts, method=cv2.RANSAC, ransacReprojThreshold=5.0)
    return M_affine

# Draw matched rectangle on image
def draw_matched_rectangle(image, M_affine, templ_shape):
    h, w = templ_shape
    rect_pts = np.float32([[0, 0], [w, 0], [w, h], [0, h]])  # Four corners of the rectangle
    transformed_pts = cv2.transform(np.array([rect_pts]), M_affine)[0]  # Transformed coordinates
    cv2.polylines(image, [np.int32(transformed_pts)], isClosed=True, color=(0, 0, 255), thickness=2)
    return transformed_pts

# Main processing
def main(image_path, templates_dir, output_path):
    # Load image and template list
    img = load_image_gray(image_path)
    templ_paths = glob(templates_dir)
    dst_img = cv2.imread(image_path)

    # SIFT feature detector & BFMatcher setup
    sift = cv2.SIFT_create()
    bf = cv2.BFMatcher(cv2.NORM_L2, crossCheck=False)
    kp1, des1 = extract_features(img, sift)

    # If no features found
    if des1 is None:
        print("No features found in the target image.")
        return

    for templ_path in tqdm(templ_paths):
        templ = load_image_gray(templ_path)
        if templ is None:
            continue

        kp2, des2 = extract_features(templ, sift)
        if des2 is None:
            continue

        good_matches = match_features(des1, des2, bf)
        if good_matches is None:
            print(f"Insufficient feature matches: {templ_path}")
            continue

        # Affine transformation estimation
        M_affine = estimate_affine_transform(kp1, kp2, good_matches)
        if M_affine is None:
            print(f"Affine transformation estimation failed: {templ_path}")
            continue

        # Draw rectangle
        best_dst = draw_matched_rectangle(dst_img, M_affine, templ.shape)

        # Display filename near the rectangle
        x, y, _, _ = cv2.boundingRect(best_dst)
        base_name = os.path.splitext(os.path.basename(templ_path))[0]
        cv2.putText(dst_img, base_name, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

    # Save result
    cv2.imwrite(output_path, dst_img)
    print(f"Result image saved: {output_path}")

Execution

# Execution
if __name__ == "__main__":
    # Parameter settings
    IMAGE_PATH = "/xxx/default.jpg"
    TEMPLATES_DIR = "/xxx/*.png"
    OUTPUT_PATH = "/xxx/match_result.jpg"
    main(IMAGE_PATH, TEMPLATES_DIR, OUTPUT_PATH)

Summary

This article introduced a method for estimating where sub-images are located in the original image using SIFT-based feature point matching, and identifying positions through affine transformation.

SIFT is used for feature extraction (freely available since OpenCV 4.4)
BFMatcher is used for feature matching, and RANSAC is used for noise removal
Affine transformation is used to estimate coordinates and draw rectangles on the original image
Result images are saved for visualization of where each sub-image is located

This method can be applied to tasks such as locating partial images in historical maps, OCR region detection, and image comparison.

Future challenges:

Correction for rotated images
Consideration of faster algorithms than SIFT (ORB, AKAZE, etc.)
Processing speed optimization (feature point filtering)

There may be some incomplete points, but I hope this serves as a useful reference.

Overview#

Implementation#

Required Libraries#

Python Code#

Execution#

Summary#