rapid
A ROS robotics library.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Pages
image_recognition.h
Go to the documentation of this file.
1 #ifndef _RAPID_PERCEPTION_IMAGE_RECOGNITION_H_
2 #define _RAPID_PERCEPTION_IMAGE_RECOGNITION_H_
3 
4 #include <string>
5 #include <utility>
6 #include <vector>
7 
8 #include "Eigen/Core"
9 #include "boost/shared_ptr.hpp"
10 #include "caffe/blob.hpp"
11 #include "caffe/common.hpp"
12 #include "caffe/net.hpp"
13 #include "caffe/proto/caffe.pb.h"
14 #include "opencv2/opencv.hpp"
15 
16 namespace rapid {
17 namespace perception {
18 // Runs a Caffe CNN model on an image.
19 //
20 // Usage:
21 // string error(""); // Will be filled out on error.
22 // ImageRecognizer recognizer;
23 // ImageRecognizer::AlexNet("/home/user/alexnet", &recognizer, &error);
24 // cv::Mat image; // Input is an OpenCV image.
25 // recognizer.set_image(image);
26 // cv::Mat conv3 = recognizer.layer("conv3", &error);
27 // cv::Mat fc6 = recognizer.layer("fc6", &error);
28 // vector<std::pair<string, float>> predictions = recognizer.predictions(5,
29 // &error);
31  public:
32  ImageRecognizer(); // Do not use, call ImageRecognizer::AlexNet() instead.
33  ImageRecognizer(boost::shared_ptr<caffe::Net<float> > feature_extraction_net,
34  cv::Mat mean, cv::Size input_geometry, int num_channels,
35  const std::vector<std::string>& labels);
36  void set_image(const cv::Mat& image);
37  // Get a layer as a single vector
38  cv::Mat layer(const std::string& layer_name, std::string* error);
39  std::vector<std::pair<std::string, float> > predictions(int num_predictions,
40  std::string* error);
41  static bool AlexNet(std::string model_dir, ImageRecognizer* recognizer,
42  std::string* error);
43 
44  private:
45  void ForwardPass(std::string* error);
46  std::vector<float> Predict(const cv::Mat& image, std::string* error);
47  void Preprocess(const cv::Mat& img, std::vector<cv::Mat>* input_channels,
48  std::string* error);
49  void WrapInputLayer(std::vector<cv::Mat>* input_channels);
50 
51  static bool BuildFromFiles(const std::string& model_prototxt,
52  const std::string& pretrained_caffemodel,
53  const std::string& mean_binaryproto,
54  const std::string& labels_file,
55  ImageRecognizer* recognizer, std::string* error);
56 
57  boost::shared_ptr<caffe::Net<float> > net_;
58  cv::Mat image_;
59  cv::Mat mean_; // Mean of the training dataset
60  cv::Size input_geometry_;
61  int num_channels_;
62  std::vector<std::string> labels_;
63 
64  bool needs_update_; // True if new image.
65 };
66 
67 cv::Mat ReadMeanFile(const std::string& mean_file,
68  const cv::Size& input_geometry, int num_channels,
69  std::string* error = NULL);
70 
71 static bool PairCompare(const std::pair<float, int>& lhs,
72  const std::pair<float, int>& rhs);
73 
74 /* Return the indices of the top N values of vector v. */
75 static std::vector<int> Argmax(const std::vector<float>& v, int N);
76 } // namespace perception
77 } // namespace rapid
78 
79 #endif // _RAPID_PERCEPTION_IMAGE_RECOGNITION_H_
static bool PairCompare(const std::pair< float, int > &lhs, const std::pair< float, int > &rhs)
std::vector< std::pair< std::string, float > > predictions(int num_predictions, std::string *error)
static std::vector< int > Argmax(const std::vector< float > &v, int N)
void set_image(const cv::Mat &image)
cv::Mat ReadMeanFile(const std::string &mean_file, const cv::Size &input_geometry, int num_channels, std::string *error=NULL)
cv::Mat layer(const std::string &layer_name, std::string *error)
static bool AlexNet(std::string model_dir, ImageRecognizer *recognizer, std::string *error)