Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SAMPLES] YOLO Object Detection + Combo sample to enable Multiperson HRNet pose #27

Open
wants to merge 13 commits into
base: main
Choose a base branch
from
71 changes: 60 additions & 11 deletions AIDevGallery/Samples/ModelsDefinitions/imagemodels.modelgroup.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,11 @@
"Name": "Faster RCNN 10",
"Url": "https://github.com/onnx/models/blob/main/validated/vision/object_detection_segmentation/faster-rcnn/model/FasterRCNN-10.onnx",
"Description": "FPN",
"HardwareAccelerator": [ "CPU", "DML" ],
"SupportedOnQualcomm": true,
"HardwareAccelerator": [
"CPU",
"DML"
],
"SupportedOnQualcomm": true,
"Size": 167330019,
"License": "mit"
},
Expand All @@ -26,7 +29,10 @@
"Name": "Faster RCNN 12",
"Url": "https://github.com/onnx/models/blob/main/validated/vision/object_detection_segmentation/faster-rcnn/model/FasterRCNN-12.onnx",
"Description": "FPN-fp32",
"HardwareAccelerator": [ "CPU", "DML" ],
"HardwareAccelerator": [
"CPU",
"DML"
],
"SupportedOnQualcomm": true,
"Size": 176713194,
"License": "mit"
Expand All @@ -45,7 +51,10 @@
"Name": "ResNet101 v1 7",
"Url": "https://github.com/onnx/models/blob/main/validated/vision/classification/resnet/model/resnet101-v1-7.onnx",
"Description": "ResNet101 v1 7",
"HardwareAccelerator": [ "CPU", "DML" ],
"HardwareAccelerator": [
"CPU",
"DML"
],
"SupportedOnQualcomm": true,
"Size": 178914043,
"License": "apache-2.0"
Expand All @@ -55,7 +64,10 @@
"Name": "ResNet50 v1 7",
"Url": "https://github.com/onnx/models/blob/main/validated/vision/classification/resnet/model/resnet50-v1-7.onnx",
"Description": "ResNet50 v1 7",
"HardwareAccelerator": [ "CPU", "DML" ],
"HardwareAccelerator": [
"CPU",
"DML"
],
"SupportedOnQualcomm": true,
"Size": 102583340,
"License": "apache-2.0"
Expand All @@ -74,8 +86,11 @@
"Name": "MobileNet v2 1.0",
"Url": "https://github.com/onnx/models/blob/main/validated/vision/classification/mobilenet/model/mobilenetv2-10.onnx",
"Description": "MobileNet v2 1.0",
"HardwareAccelerator": [ "CPU", "DML" ],
"SupportedOnQualcomm": true,
"HardwareAccelerator": [
"CPU",
"DML"
],
"SupportedOnQualcomm": true,
"Size": 13963115,
"License": "apache-2.0"
}
Expand All @@ -93,7 +108,10 @@
"Name": "SqueezeNet 1.1",
"Url": "https://github.com/onnx/models/blob/main/validated/vision/classification/squeezenet/model/squeezenet1.1-7.onnx",
"Description": "SqueezeNet 1.1",
"HardwareAccelerator": [ "CPU", "DML" ],
"HardwareAccelerator": [
"CPU",
"DML"
],
"SupportedOnQualcomm": true,
"Size": 4956208,
"License": "apache-2.0"
Expand Down Expand Up @@ -134,7 +152,10 @@
"Name": "ESRGAN",
"Url": "https://huggingface.co/microsoft/dml-ai-hub-models/blob/main/esrgan/esrgan.onnx",
"Description": "ESRGAN",
"HardwareAccelerator": [ "CPU", "DML" ],
"HardwareAccelerator": [
"CPU",
"DML"
],
"SupportedOnQualcomm": true,
"Size": 67162397,
"License": "apache-2.0"
Expand All @@ -153,7 +174,10 @@
"Name": "FFNet 78s",
"Url": "https://huggingface.co/microsoft/dml-ai-hub-models/blob/main/ffnet_78s/ffnet_78s.onnx",
"Description": "FFNet 78s",
"HardwareAccelerator": [ "CPU", "DML" ],
"HardwareAccelerator": [
"CPU",
"DML"
],
"SupportedOnQualcomm": true,
"Size": 109974166,
"License": "bsd-3-clause"
Expand All @@ -163,13 +187,38 @@
"Name": "FFNet 54s",
"Url": "https://huggingface.co/microsoft/dml-ai-hub-models/blob/main/ffnet_54s/ffnet_54s.onnx",
"Description": "FFNet 54s",
"HardwareAccelerator": [ "CPU", "DML" ],
"HardwareAccelerator": [
"CPU",
"DML"
],
"SupportedOnQualcomm": true,
"Size": 72188137,
"License": "bsd-3-clause"
}
},
"ReadmeUrl": "https://huggingface.co/qualcomm/FFNet-78S/blob/main/README.md"
},
"YOLO": {
"Id": "yolo",
"Name": "YOLO",
"Description": "YOLO (You Only Look Once) is a state-of-the-art, real-time object detection algorithm.",
"DocsUrl": "https://github.com/onnx/models/tree/main/validated/vision/object_detection_segmentation/yolov4",
"Models": {
"YOLOv4": {
"Id": "b7cae3a2-478f-1177-9559-709df5dfe703",
"Name": "YOLOv4",
"Url": "https://github.com/onnx/models/blob/main/validated/vision/object_detection_segmentation/yolov4/model/yolov4.onnx",
"Description": "YOLOv4 optimizes the speed and accuracy of object detection.",
"HardwareAccelerator": [
"CPU",
"DML"
],
"SupportedOnQualcomm": true,
"Size": 257470589,
"License": "mit"
}
},
"ReadmeUrl": "https://github.com/onnx/models/blob/main/validated/vision/object_detection_segmentation/yolov4/README.md"
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ private async Task EnhanceImage(string filePath)
return finalOutputBitmap;
});

BitmapImage outputImage = BitmapFunctions.ConvertBitmapToBitmapImageAsync(bitmapOutput);
BitmapImage outputImage = BitmapFunctions.ConvertBitmapToBitmapImage(bitmapOutput);
NarratorHelper.AnnounceImageChanged(DefaultImage, "Image enhancement complete."); // <exclude-line>

bitmapOutput.Dispose();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ private async Task Segment(string filePath)
});

// Convert the final overlay to BitmapImage for display
BitmapImage outputImage = BitmapFunctions.ConvertBitmapToBitmapImageAsync(processedImage);
BitmapImage outputImage = BitmapFunctions.ConvertBitmapToBitmapImage(processedImage);

NarratorHelper.AnnounceImageChanged(DefaultImage, "Image changed: all regions segmented."); // <exclude-line>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
using System;
using System.Collections.Generic;
using System.Drawing;
using System.IO;
using System.Linq;
using System.Threading.Tasks;
using Windows.Storage.Pickers;
Expand All @@ -31,7 +30,7 @@ namespace AIDevGallery.Samples.OpenSourceModels.ObjectDetection.FasterRCNN
"Microsoft.ML.OnnxRuntime.DirectML",
"Microsoft.ML.OnnxRuntime.Extensions"
],
Name = "Object Detection",
Name = "Faster RCNN Object Detection",
Id = "9b74ccc0-f5f7-430f-bed0-758ffc063508",
Icon = "\uE8B3")]
internal sealed partial class ObjectDetection : BaseSamplePage
Expand Down Expand Up @@ -164,30 +163,18 @@ private async Task DetectObjects(string filePath)
return predictions;
});

RenderPredictions(image, predictions);
image.Dispose();

Loader.IsActive = false;
Loader.Visibility = Visibility.Collapsed;
UploadButton.Visibility = Visibility.Visible;
}

private void RenderPredictions(Bitmap image, List<Prediction> predictions)
{
BitmapFunctions.DrawPredictions(image, predictions);
BitmapImage outputImage = BitmapFunctions.RenderPredictions(image, predictions);

BitmapImage bitmapImage = new();
using (MemoryStream memoryStream = new())
DispatcherQueue.TryEnqueue(() =>
{
image.Save(memoryStream, System.Drawing.Imaging.ImageFormat.Png);

memoryStream.Position = 0;

bitmapImage.SetSource(memoryStream.AsRandomAccessStream());
}
DefaultImage.Source = outputImage;
Loader.IsActive = false;
Loader.Visibility = Visibility.Collapsed;
UploadButton.Visibility = Visibility.Visible;
});

DefaultImage.Source = bitmapImage;
NarratorHelper.AnnounceImageChanged(DefaultImage, "Image changed: objects detected."); // <exclude-line>
image.Dispose();
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ namespace AIDevGallery.Samples.OpenSourceModels.HRNetPose
SharedCode = [
SharedCodeEnum.Prediction,
SharedCodeEnum.BitmapFunctions,
SharedCodeEnum.DeviceUtils
SharedCodeEnum.DeviceUtils,
SharedCodeEnum.PoseHelper
],
NugetPackageReferences = [
"System.Drawing.Common",
Expand Down Expand Up @@ -145,13 +146,20 @@ private async Task DetectPose(string filePath)
// Run inference
using IDisposableReadOnlyCollection<DisposableNamedOnnxValue> results = _inferenceSession!.Run(inputs);
var heatmaps = results[0].AsTensor<float>();
List<(float X, float Y)> keypointCoordinates = PostProcessResults(heatmaps, originalImageWidth, originalImageHeight);

var outputName = _inferenceSession!.OutputNames[0];
var outputDimensions = _inferenceSession!.OutputMetadata[outputName].Dimensions;

float outputWidth = outputDimensions[2];
float outputHeight = outputDimensions[3];

List<(float X, float Y)> keypointCoordinates = PoseHelper.PostProcessResults(heatmaps, originalImageWidth, originalImageHeight, outputWidth, outputHeight);
return keypointCoordinates;
});

// Render predictions and create output bitmap
using Bitmap output = RenderPredictions(image, predictions);
BitmapImage outputImage = BitmapFunctions.ConvertBitmapToBitmapImageAsync(output);
using Bitmap output = PoseHelper.RenderPredictions(image, predictions, .02f);
BitmapImage outputImage = BitmapFunctions.ConvertBitmapToBitmapImage(output);
NarratorHelper.AnnounceImageChanged(DefaultImage, "Image changed: key points rendered."); // <exclude-line>

DispatcherQueue.TryEnqueue(() =>
Expand All @@ -162,88 +170,5 @@ private async Task DetectPose(string filePath)
UploadButton.Visibility = Visibility.Visible;
});
}

private List<(float X, float Y)> PostProcessResults(Tensor<float> heatmaps, float originalWidth, float originalHeight)
{
List<(float X, float Y)> keypointCoordinates = [];

// Scaling factors from heatmap (64x48) directly to original image size
float scale_x = originalWidth / 64f;
float scale_y = originalHeight / 48f;

int numKeypoints = heatmaps.Dimensions[1];
int heatmapWidth = heatmaps.Dimensions[2];
int heatmapHeight = heatmaps.Dimensions[3];

for (int i = 0; i < numKeypoints; i++)
{
float maxVal = float.MinValue;
int maxX = 0, maxY = 0;

for (int x = 0; x < heatmapWidth; x++)
{
for (int y = 0; y < heatmapHeight; y++)
{
float value = heatmaps[0, i, y, x];
if (value > maxVal)
{
maxVal = value;
maxX = x;
maxY = y;
}
}
}

float scaledX = maxX * scale_x;
float scaledY = maxY * scale_y;

keypointCoordinates.Add((scaledX, scaledY));
}

return keypointCoordinates;
}

private Bitmap RenderPredictions(Bitmap originalImage, List<(float X, float Y)> keypoints)
{
Bitmap outputImage = new(originalImage);

using (Graphics g = Graphics.FromImage(outputImage))
{
int markerSize = (int)((originalImage.Width + originalImage.Height) * 0.02 / 2);
Brush brush = Brushes.Red;

using Pen linePen = new(Color.Blue, 5);
List<(int StartIdx, int EndIdx)> connections =
[
(5, 6), // Left shoulder to right shoulder
(5, 7), // Left shoulder to left elbow
(7, 9), // Left elbow to left wrist
(6, 8), // Right shoulder to right elbow
(8, 10), // Right elbow to right wrist
(11, 12), // Left hip to right hip
(5, 11), // Left shoulder to left hip
(6, 12), // Right shoulder to right hip
(11, 13), // Left hip to left knee
(13, 15), // Left knee to left ankle
(12, 14), // Right hip to right knee
(14, 16) // Right knee to right ankle
];

foreach (var (startIdx, endIdx) in connections)
{
var (startPointX, startPointY) = keypoints[startIdx];
var (endPointX, endPointY) = keypoints[endIdx];

g.DrawLine(linePen, startPointX, startPointY, endPointX, endPointY);
}

foreach (var (x, y) in keypoints)
{
g.FillEllipse(brush, x - markerSize / 2, y - markerSize / 2, markerSize, markerSize);
}
}

return outputImage;
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
<?xml version="1.0" encoding="utf-8"?>
<samples:BaseSamplePage
xmlns:samples="using:AIDevGallery.Samples"
x:Class="AIDevGallery.Samples.OpenSourceModels.MultiHRNetPose.Multipose"
xmlns="http://schemas.microsoft.com/winfx/2006/xaml/presentation"
xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml"
xmlns:local="using:AIDevGallery.Samples.OpenSourceModels.MultiHRNetPose"
xmlns:d="http://schemas.microsoft.com/expression/blend/2008"
xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006"
mc:Ignorable="d">

<ScrollViewer>
<Grid RowSpacing="16">
<Grid.RowDefinitions>
<RowDefinition Height="Auto" />
<RowDefinition Height="Auto" />
</Grid.RowDefinitions>
<Image
x:Name="DefaultImage"
MaxWidth="800"
MaxHeight="500" />
<ProgressRing
x:Name="Loader"
Grid.Row="1"
IsActive="false"
Visibility="Collapsed" />
<Button
x:Name="UploadButton"
Grid.Row="1"
HorizontalAlignment="Center"
Click="UploadButton_Click"
Content="Select image"
Style="{StaticResource AccentButtonStyle}" />
</Grid>
</ScrollViewer>
</samples:BaseSamplePage>
Loading