Back to AI/ML

Computer Vision Basics

Master Computer Vision with OpenCV and deep learning. Learn image processing, object detection, face recognition, and image segmentation.

Video Tutorial

Introduction to Computer Vision

Computer Vision enables computers to understand and interpret visual information from the world. It combines image processing, machine learning, and deep learning to extract meaningful insights from images and videos.

Examples:

# Common Computer Vision Tasks:
- Image Classification
- Object Detection
- Image Segmentation
- Facial Recognition
- Pose Estimation
- Optical Character Recognition (OCR)

Key applications of computer vision

Setting Up Computer Vision Environment

Install essential libraries for computer vision including OpenCV, PIL, and deep learning frameworks.

Examples:

# Install computer vision libraries
pip install opencv-python pillow matplotlib

# Install deep learning libraries
pip install tensorflow torch torchvision

# Install additional tools
pip install scikit-image

Install essential CV libraries

import cv2
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt

# Verify OpenCV installation
print(f"OpenCV version: {cv2.__version__}")

Import and verify CV libraries

Image Loading and Basic Operations

Learn to load, display, and perform basic operations on images using OpenCV and PIL.

Examples:

import cv2
import matplotlib.pyplot as plt

# Load image
img = cv2.imread('image.jpg')

# Convert BGR to RGB (OpenCV uses BGR by default)
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

# Display image
plt.figure(figsize=(10, 6))
plt.imshow(img_rgb)
plt.axis('off')
plt.title('Original Image')
plt.show()

# Get image properties
print(f"Image shape: {img.shape}")
print(f"Image dtype: {img.dtype}")

Load and display images with OpenCV

# Resize image
resized = cv2.resize(img, (300, 300))

# Crop image
cropped = img[100:400, 200:500]

# Rotate image
height, width = img.shape[:2]
center = (width // 2, height // 2)
rotation_matrix = cv2.getRotationMatrix2D(center, 45, 1.0)
rotated = cv2.warpAffine(img, rotation_matrix, (width, height))

# Flip image
flipped = cv2.flip(img, 1)  # 1 for horizontal, 0 for vertical

Basic image transformations

Image Preprocessing

Preprocessing is crucial for improving model performance. Learn common preprocessing techniques.

Examples:

import cv2
import numpy as np

# Convert to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

# Apply Gaussian blur
blurred = cv2.GaussianBlur(img, (5, 5), 0)

# Edge detection with Canny
edges = cv2.Canny(gray, 100, 200)

# Thresholding
_, binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)

# Adaptive thresholding
adaptive = cv2.adaptiveThreshold(
    gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
    cv2.THRESH_BINARY, 11, 2
)

Common preprocessing operations

# Histogram equalization (improve contrast)
equalized = cv2.equalizeHist(gray)

# Normalize image
normalized = cv2.normalize(img, None, 0, 255, cv2.NORM_MINMAX)

# Denoise image
denoised = cv2.fastNlMeansDenoisingColored(img, None, 10, 10, 7, 21)

Advanced preprocessing techniques

Image Classification with CNN

Build a Convolutional Neural Network for image classification using TensorFlow/Keras.

Examples:

import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.datasets import cifar10

# Load CIFAR-10 dataset
(X_train, y_train), (X_test, y_test) = cifar10.load_data()

# Normalize pixel values
X_train = X_train.astype('float32') / 255.0
X_test = X_test.astype('float32') / 255.0

# Build CNN model
model = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3)),
    layers.MaxPooling2D((2, 2)),
    
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    
    layers.Conv2D(64, (3, 3), activation='relu'),
    
    layers.Flatten(),
    layers.Dense(64, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(10, activation='softmax')
])

model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

# Train model
history = model.fit(
    X_train, y_train,
    epochs=10,
    batch_size=64,
    validation_split=0.2
)

Complete CNN for image classification

Object Detection with Pre-trained Models

Use pre-trained models for object detection. We'll use OpenCV's DNN module with pre-trained models.

Examples:

import cv2
import numpy as np

# Load YOLO model
net = cv2.dnn.readNet('yolov3.weights', 'yolov3.cfg')
layer_names = net.getLayerNames()
output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]

# Load class names
with open('coco.names', 'r') as f:
    classes = [line.strip() for line in f.readlines()]

# Load and preprocess image
img = cv2.imread('image.jpg')
height, width = img.shape[:2]
blob = cv2.dnn.blobFromImage(img, 0.00392, (416, 416), (0, 0, 0), True, crop=False)

# Perform detection
net.setInput(blob)
outputs = net.forward(output_layers)

# Process detections
boxes = []
confidences = []
class_ids = []

for output in outputs:
    for detection in output:
        scores = detection[5:]
        class_id = np.argmax(scores)
        confidence = scores[class_id]
        
        if confidence > 0.5:
            # Get bounding box coordinates
            center_x = int(detection[0] * width)
            center_y = int(detection[1] * height)
            w = int(detection[2] * width)
            h = int(detection[3] * height)
            x = int(center_x - w / 2)
            y = int(center_y - h / 2)
            
            boxes.append([x, y, w, h])
            confidences.append(float(confidence))
            class_ids.append(class_id)

# Apply non-max suppression
indices = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)

# Draw bounding boxes
for i in indices:
    box = boxes[i]
    x, y, w, h = box
    label = str(classes[class_ids[i]])
    cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2)
    cv2.putText(img, label, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

Object detection with YOLO

Face Detection and Recognition

Detect and recognize faces in images using OpenCV's Haar Cascades and deep learning models.

Examples:

import cv2

# Load Haar Cascade for face detection
face_cascade = cv2.CascadeClassifier(
    cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'
)

# Load image
img = cv2.imread('people.jpg')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

# Detect faces
faces = face_cascade.detectMultiScale(
    gray,
    scaleFactor=1.1,
    minNeighbors=5,
    minSize=(30, 30)
)

print(f"Found {len(faces)} faces")

# Draw rectangles around faces
for (x, y, w, h) in faces:
    cv2.rectangle(img, (x, y), (x+w, y+h), (255, 0, 0), 2)

# Display result
cv2.imshow('Faces', img)
cv2.waitKey(0)
cv2.destroyAllWindows()

Face detection with Haar Cascades

Image Segmentation

Image segmentation divides an image into meaningful regions. Learn semantic segmentation techniques.

Examples:

import cv2
import numpy as np

# Load image
img = cv2.imread('image.jpg')

# Convert to different color spaces
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)

# Define color range for segmentation (e.g., blue objects)
lower_blue = np.array([100, 50, 50])
upper_blue = np.array([130, 255, 255])

# Create mask
mask = cv2.inRange(hsv, lower_blue, upper_blue)

# Apply mask to original image
result = cv2.bitwise_and(img, img, mask=mask)

# Display results
cv2.imshow('Original', img)
cv2.imshow('Mask', mask)
cv2.imshow('Result', result)
cv2.waitKey(0)
cv2.destroyAllWindows()

Color-based image segmentation

# K-means clustering for segmentation
img = cv2.imread('image.jpg')
img_flat = img.reshape((-1, 3))
img_flat = np.float32(img_flat)

# Define criteria and apply K-means
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 100, 0.2)
k = 5
_, labels, centers = cv2.kmeans(
    img_flat, k, None, criteria, 10, cv2.KMEANS_RANDOM_CENTERS
)

# Convert back to 8-bit values
centers = np.uint8(centers)
segmented = centers[labels.flatten()]
segmented = segmented.reshape(img.shape)

K-means clustering for segmentation

Data Augmentation

Data augmentation increases dataset diversity by applying transformations to images, improving model generalization.

Examples:

from tensorflow.keras.preprocessing.image import ImageDataGenerator
import numpy as np

# Create data augmentation generator
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True,
    zoom_range=0.2,
    shear_range=0.2,
    fill_mode='nearest'
)

# Load and prepare image
img = cv2.imread('image.jpg')
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = np.expand_dims(img, axis=0)

# Generate augmented images
i = 0
for batch in datagen.flow(img, batch_size=1):
    plt.figure(figsize=(4, 4))
    plt.imshow(batch[0].astype('uint8'))
    plt.axis('off')
    plt.show()
    i += 1
    if i >= 5:  # Generate 5 augmented images
        break

Data augmentation with ImageDataGenerator

Quick Reference

Essential Libraries

  • opencv-python - Image processing
  • PIL/Pillow - Image manipulation
  • tensorflow - Deep learning
  • matplotlib - Visualization

Best Practices

  • ✓ Preprocess images consistently
  • ✓ Use data augmentation
  • ✓ Leverage pre-trained models
  • ✓ Normalize pixel values