Master Computer Vision with OpenCV and deep learning. Learn image processing, object detection, face recognition, and image segmentation.
Computer Vision enables computers to understand and interpret visual information from the world. It combines image processing, machine learning, and deep learning to extract meaningful insights from images and videos.
# Common Computer Vision Tasks:
- Image Classification
- Object Detection
- Image Segmentation
- Facial Recognition
- Pose Estimation
- Optical Character Recognition (OCR)Key applications of computer vision
Install essential libraries for computer vision including OpenCV, PIL, and deep learning frameworks.
# Install computer vision libraries
pip install opencv-python pillow matplotlib
# Install deep learning libraries
pip install tensorflow torch torchvision
# Install additional tools
pip install scikit-imageInstall essential CV libraries
import cv2
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
# Verify OpenCV installation
print(f"OpenCV version: {cv2.__version__}")Import and verify CV libraries
Learn to load, display, and perform basic operations on images using OpenCV and PIL.
import cv2
import matplotlib.pyplot as plt
# Load image
img = cv2.imread('image.jpg')
# Convert BGR to RGB (OpenCV uses BGR by default)
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
# Display image
plt.figure(figsize=(10, 6))
plt.imshow(img_rgb)
plt.axis('off')
plt.title('Original Image')
plt.show()
# Get image properties
print(f"Image shape: {img.shape}")
print(f"Image dtype: {img.dtype}")Load and display images with OpenCV
# Resize image
resized = cv2.resize(img, (300, 300))
# Crop image
cropped = img[100:400, 200:500]
# Rotate image
height, width = img.shape[:2]
center = (width // 2, height // 2)
rotation_matrix = cv2.getRotationMatrix2D(center, 45, 1.0)
rotated = cv2.warpAffine(img, rotation_matrix, (width, height))
# Flip image
flipped = cv2.flip(img, 1) # 1 for horizontal, 0 for verticalBasic image transformations
Preprocessing is crucial for improving model performance. Learn common preprocessing techniques.
import cv2
import numpy as np
# Convert to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Apply Gaussian blur
blurred = cv2.GaussianBlur(img, (5, 5), 0)
# Edge detection with Canny
edges = cv2.Canny(gray, 100, 200)
# Thresholding
_, binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
# Adaptive thresholding
adaptive = cv2.adaptiveThreshold(
gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY, 11, 2
)Common preprocessing operations
# Histogram equalization (improve contrast)
equalized = cv2.equalizeHist(gray)
# Normalize image
normalized = cv2.normalize(img, None, 0, 255, cv2.NORM_MINMAX)
# Denoise image
denoised = cv2.fastNlMeansDenoisingColored(img, None, 10, 10, 7, 21)Advanced preprocessing techniques
Build a Convolutional Neural Network for image classification using TensorFlow/Keras.
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.datasets import cifar10
# Load CIFAR-10 dataset
(X_train, y_train), (X_test, y_test) = cifar10.load_data()
# Normalize pixel values
X_train = X_train.astype('float32') / 255.0
X_test = X_test.astype('float32') / 255.0
# Build CNN model
model = models.Sequential([
layers.Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3)),
layers.MaxPooling2D((2, 2)),
layers.Conv2D(64, (3, 3), activation='relu'),
layers.MaxPooling2D((2, 2)),
layers.Conv2D(64, (3, 3), activation='relu'),
layers.Flatten(),
layers.Dense(64, activation='relu'),
layers.Dropout(0.5),
layers.Dense(10, activation='softmax')
])
model.compile(
optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy']
)
# Train model
history = model.fit(
X_train, y_train,
epochs=10,
batch_size=64,
validation_split=0.2
)Complete CNN for image classification
Use pre-trained models for object detection. We'll use OpenCV's DNN module with pre-trained models.
import cv2
import numpy as np
# Load YOLO model
net = cv2.dnn.readNet('yolov3.weights', 'yolov3.cfg')
layer_names = net.getLayerNames()
output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]
# Load class names
with open('coco.names', 'r') as f:
classes = [line.strip() for line in f.readlines()]
# Load and preprocess image
img = cv2.imread('image.jpg')
height, width = img.shape[:2]
blob = cv2.dnn.blobFromImage(img, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
# Perform detection
net.setInput(blob)
outputs = net.forward(output_layers)
# Process detections
boxes = []
confidences = []
class_ids = []
for output in outputs:
for detection in output:
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > 0.5:
# Get bounding box coordinates
center_x = int(detection[0] * width)
center_y = int(detection[1] * height)
w = int(detection[2] * width)
h = int(detection[3] * height)
x = int(center_x - w / 2)
y = int(center_y - h / 2)
boxes.append([x, y, w, h])
confidences.append(float(confidence))
class_ids.append(class_id)
# Apply non-max suppression
indices = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
# Draw bounding boxes
for i in indices:
box = boxes[i]
x, y, w, h = box
label = str(classes[class_ids[i]])
cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2)
cv2.putText(img, label, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)Object detection with YOLO
Detect and recognize faces in images using OpenCV's Haar Cascades and deep learning models.
import cv2
# Load Haar Cascade for face detection
face_cascade = cv2.CascadeClassifier(
cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'
)
# Load image
img = cv2.imread('people.jpg')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Detect faces
faces = face_cascade.detectMultiScale(
gray,
scaleFactor=1.1,
minNeighbors=5,
minSize=(30, 30)
)
print(f"Found {len(faces)} faces")
# Draw rectangles around faces
for (x, y, w, h) in faces:
cv2.rectangle(img, (x, y), (x+w, y+h), (255, 0, 0), 2)
# Display result
cv2.imshow('Faces', img)
cv2.waitKey(0)
cv2.destroyAllWindows()Face detection with Haar Cascades
Image segmentation divides an image into meaningful regions. Learn semantic segmentation techniques.
import cv2
import numpy as np
# Load image
img = cv2.imread('image.jpg')
# Convert to different color spaces
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
# Define color range for segmentation (e.g., blue objects)
lower_blue = np.array([100, 50, 50])
upper_blue = np.array([130, 255, 255])
# Create mask
mask = cv2.inRange(hsv, lower_blue, upper_blue)
# Apply mask to original image
result = cv2.bitwise_and(img, img, mask=mask)
# Display results
cv2.imshow('Original', img)
cv2.imshow('Mask', mask)
cv2.imshow('Result', result)
cv2.waitKey(0)
cv2.destroyAllWindows()Color-based image segmentation
# K-means clustering for segmentation
img = cv2.imread('image.jpg')
img_flat = img.reshape((-1, 3))
img_flat = np.float32(img_flat)
# Define criteria and apply K-means
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 100, 0.2)
k = 5
_, labels, centers = cv2.kmeans(
img_flat, k, None, criteria, 10, cv2.KMEANS_RANDOM_CENTERS
)
# Convert back to 8-bit values
centers = np.uint8(centers)
segmented = centers[labels.flatten()]
segmented = segmented.reshape(img.shape)K-means clustering for segmentation
Data augmentation increases dataset diversity by applying transformations to images, improving model generalization.
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import numpy as np
# Create data augmentation generator
datagen = ImageDataGenerator(
rotation_range=20,
width_shift_range=0.2,
height_shift_range=0.2,
horizontal_flip=True,
zoom_range=0.2,
shear_range=0.2,
fill_mode='nearest'
)
# Load and prepare image
img = cv2.imread('image.jpg')
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = np.expand_dims(img, axis=0)
# Generate augmented images
i = 0
for batch in datagen.flow(img, batch_size=1):
plt.figure(figsize=(4, 4))
plt.imshow(batch[0].astype('uint8'))
plt.axis('off')
plt.show()
i += 1
if i >= 5: # Generate 5 augmented images
breakData augmentation with ImageDataGenerator
opencv-python - Image processingPIL/Pillow - Image manipulationtensorflow - Deep learningmatplotlib - Visualization