yhenon
diff --git a/‎.gitignore‎
Lines changed: 3 additions & 0 deletions b/‎.gitignore‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 108 additions & 11 deletions b/‎README.md‎
Lines changed: 108 additions & 11 deletions
diff --git a/‎anchors.py‎
Lines changed: 0 additions & 17 deletions b/‎anchors.py‎
Lines changed: 0 additions & 17 deletions
diff --git a/‎coco_eval.py‎
Lines changed: 60 additions & 76 deletions b/‎coco_eval.py‎
Lines changed: 60 additions & 76 deletions
@@ -98,3 +98,6 @@ ENV/
 
 # mypy
 .mypy_cache/
+
+*.zip
+*.pt
@@ -1,16 +1,113 @@
 # pytorch-retinanet
 
-WIP
 
+Pytorch implementation of RetinaNet object detection as described in [Focal Loss for Dense Object Detection](https://arxiv.org/abs/1708.02002) by Tsung-Yi Lin, Priya Goyal, Ross Girshick, Kaiming He and Piotr Dollár.
 
-# dependencies
 
-sudo pip install cffi
-sudo pip install pandas
-sudo pip install pycocotools
-sudo pip install cython
-sudo pip install pycocotools
-sudo apt-get install tk-dev
-sudo apt-get install python-tk
-sudo pip install opencv-python
-sudo pip install requests
+## Results
+Currently, this repo achieves 33.7% mAP at 600px resolution with a Resnet-50 backbone. The published result is 34.0% mAP. The difference is likely due to the use of Adam optimizer instead of SGD with weight decay.
+
+## Installation
+
+1) Clone this repo
+
+2) Install the required packages:
+
+```
+apt-get install tk-dev python-tk
+```
+
+3) Install the python packages:
+
+```
+pip install cffi
+
+pip install pandas
+
+pip install pycocotools
+
+pip install cython
+
+pip install pycocotools
+
+pip install opencv-python
+
+pip install requests
+
+```
+
+4) Build the NMS extension.
+
+## Training
+
+The network can be trained using the `train.py` script. Currently, two dataloaders are available: COCO and CSV. For training on coco, use
+
+```
+python train.py coco <path/to/coco>
+```
+
+For training using a custom dataset, with annotations in CSV format (see below), use
+
+```
+python train.py csv <path/to/annotations.csv> <path/to/classes.csv>
+```
+
+## Visualization
+
+To visualize the network detection, use `test.py`.
+
+## CSV datasets
+The `CSVGenerator` provides an easy way to define your own datasets.
+It uses two CSV files: one file containing annotations and one file containing a class name to ID mapping.
+
+### Annotations format
+The CSV file with annotations should contain one annotation per line.
+Images with multiple bounding boxes should use one row per bounding box.
+Note that indexing for pixel values starts at 0.
+The expected format of each line is:
+```
+path/to/image.jpg,x1,y1,x2,y2,class_name
+```
+
+Some images may not contain any labeled objects.
+To add these images to the dataset as negative examples,
+add an annotation where `x1`, `y1`, `x2`, `y2` and `class_name` are all empty:
+```
+path/to/image.jpg,,,,,
+```
+
+A full example:
+```
+/data/imgs/img_001.jpg,837,346,981,456,cow
+/data/imgs/img_002.jpg,215,312,279,391,cat
+/data/imgs/img_002.jpg,22,5,89,84,bird
+/data/imgs/img_003.jpg,,,,,
+```
+
+This defines a dataset with 3 images.
+`img_001.jpg` contains a cow.
+`img_002.jpg` contains a cat and a bird.
+`img_003.jpg` contains no interesting objects/animals.
+
+
+### Class mapping format
+The class name to ID mapping file should contain one mapping per line.
+Each line should use the following format:
+```
+class_name,id
+```
+
+Indexing for classes starts at 0.
+Do not include a background class as it is implicit.
+
+For example:
+```
+cow,0
+cat,1
+bird,2
+```
+
+## Acknowledgements
+
+- Significant amounts of code are borrowed from the [keras retinanet implementation](https://github.com/fizyr/keras-retinanet)
+- The NMS module used is from the [pytorch faster-rcnn implementation](https://github.com/ruotianluo/pytorch-faster-rcnn)
@@ -1,19 +1,3 @@
-"""
-Copyright 2017-2018 Fizyr (https://fizyr.com)
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
 import numpy as np
 import torch
 import torch.nn as nn
@@ -51,7 +35,6 @@ def forward(self, image):
  all_anchors = np.expand_dims(all_anchors, axis=0)
 
  return torch.from_numpy(all_anchors.astype(np.float32)).cuda()
- #return torch.autograd.Variable(torch.cuda.FloatTensor(all_anchors), requires_grad=False)
 
 def generate_anchors(base_size=16, ratios=None, scales=None):
  """
 
@@ -7,96 +7,80 @@
 import json
 import os
 
+import torch
 
 def evaluate_coco(dataset, model, threshold=0.05):
 
  model.eval()
+ 
+ with torch.no_grad():
 
- # start collecting results
- results = []
- image_ids = []
+  # start collecting results
+  results = []
+  image_ids = []
 
- for index in range(len(dataset)):
- data = dataset[index]
- scale = data['scale']
+  for index in range(len(dataset)):
+  data = dataset[index]
+  scale = data['scale']
 
- # run network
- scores, labels, boxes = model(data['img'].permute(2, 0, 1).cuda().float().unsqueeze(dim=0))
- scores = scores.cpu()
- labels = labels.cpu()
- boxes = boxes.cpu()
+  # run network
+  scores, labels, boxes = model(data['img'].permute(2, 0, 1).cuda().float().unsqueeze(dim=0))
+  scores = scores.cpu()
+  labels = labels.cpu()
+  boxes = boxes.cpu()
 
- # correct boxes for image scale
- boxes /= scale
+  # correct boxes for image scale
+  boxes /= scale
 
- # change to (x, y, w, h) (MS COCO standard)
- boxes[:, 2] -= boxes[:, 0]
- boxes[:, 3] -= boxes[:, 1]
+  # change to (x, y, w, h) (MS COCO standard)
+  boxes[:, 2] -= boxes[:, 0]
+  boxes[:, 3] -= boxes[:, 1]
 
- # compute predicted labels and scores
- #for box, score, label in zip(boxes[0], scores[0], labels[0]):
- for box_id in range(boxes.shape[0]):
- score = float(scores[box_id])
- label = int(labels[box_id])
- box = boxes[box_id, :]
+  # compute predicted labels and scores
+  #for box, score, label in zip(boxes[0], scores[0], labels[0]):
+  for box_id in range(boxes.shape[0]):
+  score = float(scores[box_id])
+  label = int(labels[box_id])
+  box = boxes[box_id, :]
 
- # scores are sorted, so we can break
- if score < threshold:
- break
+  # scores are sorted, so we can break
+  if score < threshold:
+  break
 
- # append detection for each positively labeled class
- image_result = {
- 'image_id' : dataset.image_ids[index],
- 'category_id' : dataset.label_to_coco_label(label),
- 'score' : float(score),
- 'bbox' : box.tolist(),
- }
+  # append detection for each positively labeled class
+  image_result = {
+  'image_id' : dataset.image_ids[index],
+  'category_id' : dataset.label_to_coco_label(label),
+  'score' : float(score),
+  'bbox' : box.tolist(),
+  }
 
- # append detection to results
- results.append(image_result)
+  # append detection to results
+  results.append(image_result)
 
- # append image to list of processed images
- image_ids.append(dataset.image_ids[index])
+  # append image to list of processed images
+  image_ids.append(dataset.image_ids[index])
 
- # print progress
- print('{}/{}'.format(index, len(dataset)), end='\r')
+  # print progress
+  print('{}/{}'.format(index, len(dataset)), end='\r')
 
- if not len(results):
- return
+ if not len(results):
+ return
+
+ # write output
+ json.dump(results, open('{}_bbox_results.json'.format(dataset.set_name), 'w'), indent=4)
+
+ # load results in COCO evaluation tool
+ coco_true = dataset.coco
+ coco_pred = coco_true.loadRes('{}_bbox_results.json'.format(dataset.set_name))
 
- # write output
- json.dump(results, open('{}_bbox_results.json'.format(dataset.set_name), 'w'), indent=4)
- json.dump(image_ids, open('{}_processed_image_ids.json'.format(dataset.set_name), 'w'), indent=4)
-
- # load results in COCO evaluation tool
- coco_true = dataset.coco
- coco_pred = coco_true.loadRes('{}_bbox_results.json'.format(dataset.set_name))
-
- # run COCO evaluation
- coco_eval = COCOeval(coco_true, coco_pred, 'bbox')
- coco_eval.params.imgIds = image_ids
- coco_eval.evaluate()
- coco_eval.accumulate()
- coco_eval.summarize()
-
- coco_tag = ['AP @[ IoU=0.50:0.95 | area= all | maxDets=100 ]',
- 'AP @[ IoU=0.50 | area= all | maxDets=100 ]',
- 'AP @[ IoU=0.75 | area= all | maxDets=100 ]',
- 'AP @[ IoU=0.50:0.95 | area= small | maxDets=100 ]',
- 'AP @[ IoU=0.50:0.95 | area=medium | maxDets=100 ]',
- 'AP @[ IoU=0.50:0.95 | area= large | maxDets=100 ]',
- 'AR @[ IoU=0.50:0.95 | area= all | maxDets= 1 ]',
- 'AR @[ IoU=0.50:0.95 | area= all | maxDets= 10 ]',
- 'AR @[ IoU=0.50:0.95 | area= all | maxDets=100 ]',
- 'AR @[ IoU=0.50:0.95 | area= small | maxDets=100 ]',
- 'AR @[ IoU=0.50:0.95 | area=medium | maxDets=100 ]',
- 'AR @[ IoU=0.50:0.95 | area= large | maxDets=100 ]']
-
- coco_eval_stats = coco_eval.stats
-
- for index, result in enumerate(coco_eval_stats):
- print('{}. {}: {}'.format(index + 1, coco_tag[index], coco_eval_stats[index]))
-
- model.train()
-
- return
+ # run COCO evaluation
+ coco_eval = COCOeval(coco_true, coco_pred, 'bbox')
+ coco_eval.params.imgIds = image_ids
+ coco_eval.evaluate()
+ coco_eval.accumulate()
+ coco_eval.summarize()
+
+ model.train()
+
+ return
-Original file line number
+Diff line change
 # mypy
 .mypy_cache/
++
 +*.zip
 +*.pt