opencv/modules/dnn/src/layers/proposal_layer.cpp at master · pythonwebcoder/opencv

History

340 lines (285 loc) · 12.8 KB

Raw

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292

293

294

295

296

297

298

299

300

301

302

303

304

305

306

307

308

309

310

311

312

313

314

315

316

317

318

319

320

321

322

323

324

325

326

327

328

329

330

331

332

333

334

335

336

337

338

339

340

// This file is part of OpenCV project.

// It is subject to the license terms in the LICENSE file found in the top-level directory

// of this distribution and at http://opencv.org/license.html.

// Third party copyrights are property of their respective owners.

#include "../precomp.hpp"

#include "layers_common.hpp"

namespace cv { namespace dnn {

class ProposalLayerImpl : public ProposalLayer

{

public:

ProposalLayerImpl(const LayerParams& params)

{

setParamsFrom(params);

uint32_t featStride = params.get<uint32_t>("feat_stride", 16);

uint32_t baseSize = params.get<uint32_t>("base_size", 16);

// uint32_t minSize = params.get<uint32_t>("min_size", 16);

uint32_t keepTopBeforeNMS = params.get<uint32_t>("pre_nms_topn", 6000);

keepTopAfterNMS = params.get<uint32_t>("post_nms_topn", 300);

float nmsThreshold = params.get<float>("nms_thresh", 0.7);

DictValue ratios = params.get("ratio");

DictValue scales = params.get("scale");

{

LayerParams lp;

lp.set("step", featStride);

lp.set("flip", false);

lp.set("clip", false);

lp.set("normalized_bbox", false);

// Unused values.

float variance[] = {0.1f, 0.1f, 0.2f, 0.2f};

lp.set("variance", DictValue::arrayReal<float*>(&variance[0], 4));

// Compute widths and heights explicitly.

std::vector<float> widths, heights;

widths.reserve(ratios.size() * scales.size());

heights.reserve(ratios.size() * scales.size());

for (int i = 0; i < ratios.size(); ++i)

{

float ratio = ratios.get<float>(i);

for (int j = 0; j < scales.size(); ++j)

{

float scale = scales.get<float>(j);

float width = std::floor(baseSize / sqrt(ratio) + 0.5f);

float height = std::floor(width * ratio + 0.5f);

widths.push_back(scale * width);

heights.push_back(scale * height);

}

lp.set("width", DictValue::arrayReal<float*>(&widths[0], widths.size()));

lp.set("height", DictValue::arrayReal<float*>(&heights[0], heights.size()));

priorBoxLayer = PriorBoxLayer::create(lp);

}

{

int order[] = {0, 2, 3, 1};

LayerParams lp;

lp.set("order", DictValue::arrayInt<int*>(&order[0], 4));

deltasPermute = PermuteLayer::create(lp);

scoresPermute = PermuteLayer::create(lp);

}

{

LayerParams lp;

lp.set("code_type", "CENTER_SIZE");

lp.set("num_classes", 1);

lp.set("share_location", true);

lp.set("background_label_id", 1); // We won't pass background scores so set it out of range [0, num_classes)

lp.set("variance_encoded_in_target", true);

lp.set("keep_top_k", keepTopAfterNMS);

lp.set("top_k", keepTopBeforeNMS);

lp.set("nms_threshold", nmsThreshold);

lp.set("normalized_bbox", false);

lp.set("clip", true);

detectionOutputLayer = DetectionOutputLayer::create(lp);

}

bool getMemoryShapes(const std::vector<MatShape> &inputs,

const int requiredOutputs,

std::vector<MatShape> &outputs,

std::vector<MatShape> &internals) const

{

// We need to allocate the following blobs:

// - output priors from PriorBoxLayer

// - permuted priors

// - permuted scores

CV_Assert(inputs.size() == 3);

const MatShape& scores = inputs[0];

const MatShape& bboxDeltas = inputs[1];

std::vector<MatShape> layerInputs, layerOutputs, layerInternals;

// Prior boxes layer.

layerInputs.assign(1, scores);

priorBoxLayer->getMemoryShapes(layerInputs, 1, layerOutputs, layerInternals);

CV_Assert(layerOutputs.size() == 1);

CV_Assert(layerInternals.empty());

internals.push_back(layerOutputs[0]);

// Scores permute layer.

CV_Assert(scores.size() == 4);

MatShape objectScores = scores;

CV_Assert((scores[1] & 1) == 0); // Number of channels is even.

objectScores[1] /= 2;

layerInputs.assign(1, objectScores);

scoresPermute->getMemoryShapes(layerInputs, 1, layerOutputs, layerInternals);

CV_Assert(layerOutputs.size() == 1);

CV_Assert(layerInternals.empty());

internals.push_back(layerOutputs[0]);

// BBox predictions permute layer.

layerInputs.assign(1, bboxDeltas);

deltasPermute->getMemoryShapes(layerInputs, 1, layerOutputs, layerInternals);

CV_Assert(layerOutputs.size() == 1);

CV_Assert(layerInternals.empty());

internals.push_back(layerOutputs[0]);

outputs.resize(1, shape(keepTopAfterNMS, 5));

return false;

}

void finalize(const std::vector<Mat*> &inputs, std::vector<Mat> &outputs)

{

std::vector<Mat*> layerInputs;

std::vector<Mat> layerOutputs;

// Scores permute layer.

Mat scores = getObjectScores(*inputs[0]);

layerInputs.assign(1, &scores);

layerOutputs.assign(1, Mat(shape(scores.size[0], scores.size[2],

scores.size[3], scores.size[1]), CV_32FC1));

scoresPermute->finalize(layerInputs, layerOutputs);

// BBox predictions permute layer.

Mat* bboxDeltas = inputs[1];

CV_Assert(bboxDeltas->dims == 4);

layerInputs.assign(1, bboxDeltas);

layerOutputs.assign(1, Mat(shape(bboxDeltas->size[0], bboxDeltas->size[2],

bboxDeltas->size[3], bboxDeltas->size[1]), CV_32FC1));

deltasPermute->finalize(layerInputs, layerOutputs);

}

#ifdef HAVE_OPENCL

bool forward_ocl(InputArrayOfArrays inputs_, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_)

{

std::vector<UMat> inputs;

std::vector<UMat> outputs;

std::vector<UMat> internals;

inputs_.getUMatVector(inputs);

outputs_.getUMatVector(outputs);

internals_.getUMatVector(internals);

CV_Assert(inputs.size() == 3);

CV_Assert(internals.size() == 3);

const UMat& scores = inputs[0];

const UMat& bboxDeltas = inputs[1];

const UMat& imInfo = inputs[2];

UMat& priorBoxes = internals[0];

UMat& permuttedScores = internals[1];

UMat& permuttedDeltas = internals[2];

CV_Assert(imInfo.total() >= 2);

// We've chosen the smallest data type because we need just a shape from it.

Mat szMat;

imInfo.copyTo(szMat);

int rows = (int)szMat.at<float>(0);

int cols = (int)szMat.at<float>(1);

umat_fakeImageBlob.create(shape(1, 1, rows, cols), CV_8UC1);

umat_fakeImageBlob.setTo(0);

// Generate prior boxes.

std::vector<UMat> layerInputs(2), layerOutputs(1, priorBoxes);

layerInputs[0] = scores;

layerInputs[1] = umat_fakeImageBlob;

priorBoxLayer->forward(layerInputs, layerOutputs, internals);

// Permute scores.

layerInputs.assign(1, getObjectScores(scores));

layerOutputs.assign(1, permuttedScores);

scoresPermute->forward(layerInputs, layerOutputs, internals);

// Permute deltas.

layerInputs.assign(1, bboxDeltas);

layerOutputs.assign(1, permuttedDeltas);

deltasPermute->forward(layerInputs, layerOutputs, internals);

// Sort predictions by scores and apply NMS. DetectionOutputLayer allocates

// output internally because of different number of objects after NMS.

layerInputs.resize(4);

layerInputs[0] = permuttedDeltas;

layerInputs[1] = permuttedScores;

layerInputs[2] = priorBoxes;

layerInputs[3] = umat_fakeImageBlob;

layerOutputs[0] = UMat();

detectionOutputLayer->forward(layerInputs, layerOutputs, internals);

// DetectionOutputLayer produces 1x1xNx7 output where N might be less or

// equal to keepTopAfterNMS. We fill the rest by zeros.

const int numDets = layerOutputs[0].total() / 7;

CV_Assert(numDets <= keepTopAfterNMS);

MatShape s = shape(numDets, 7);

UMat src = layerOutputs[0].reshape(1, s.size(), &s[0]).colRange(3, 7);

UMat dst = outputs[0].rowRange(0, numDets);

src.copyTo(dst.colRange(1, 5));

dst.col(0).setTo(0); // First column are batch ids. Keep it zeros too.

if (numDets < keepTopAfterNMS)

outputs[0].rowRange(numDets, keepTopAfterNMS).setTo(0);

return true;

}

#endif

void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)

{

CV_TRACE_FUNCTION();

CV_TRACE_ARG_VALUE(name, "name", name.c_str());

CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&

OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),

forward_ocl(inputs_arr, outputs_arr, internals_arr))

Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);

}

void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)

{

CV_TRACE_FUNCTION();

CV_TRACE_ARG_VALUE(name, "name", name.c_str());

CV_Assert(inputs.size() == 3);

CV_Assert(internals.size() == 3);

const Mat& scores = *inputs[0];

const Mat& bboxDeltas = *inputs[1];

const Mat& imInfo = *inputs[2];

Mat& priorBoxes = internals[0];

Mat& permuttedScores = internals[1];

Mat& permuttedDeltas = internals[2];

CV_Assert(imInfo.total() >= 2);

// We've chosen the smallest data type because we need just a shape from it.

fakeImageBlob.create(shape(1, 1, imInfo.at<float>(0), imInfo.at<float>(1)), CV_8UC1);

// Generate prior boxes.

std::vector<Mat> layerInputs(2), layerOutputs(1, priorBoxes);

layerInputs[0] = scores;

layerInputs[1] = fakeImageBlob;

priorBoxLayer->forward(layerInputs, layerOutputs, internals);

// Permute scores.

layerInputs.assign(1, getObjectScores(scores));

layerOutputs.assign(1, permuttedScores);

scoresPermute->forward(layerInputs, layerOutputs, internals);

// Permute deltas.

layerInputs.assign(1, bboxDeltas);

layerOutputs.assign(1, permuttedDeltas);

deltasPermute->forward(layerInputs, layerOutputs, internals);

// Sort predictions by scores and apply NMS. DetectionOutputLayer allocates

// output internally because of different number of objects after NMS.

layerInputs.resize(4);

layerInputs[0] = permuttedDeltas;

layerInputs[1] = permuttedScores;

layerInputs[2] = priorBoxes;

layerInputs[3] = fakeImageBlob;

layerOutputs[0] = Mat();

detectionOutputLayer->forward(layerInputs, layerOutputs, internals);

// DetectionOutputLayer produces 1x1xNx7 output where N might be less or

// equal to keepTopAfterNMS. We fill the rest by zeros.

const int numDets = layerOutputs[0].total() / 7;

CV_Assert(numDets <= keepTopAfterNMS);

Mat src = layerOutputs[0].reshape(1, numDets).colRange(3, 7);

Mat dst = outputs[0].rowRange(0, numDets);

src.copyTo(dst.colRange(1, 5));

dst.col(0).setTo(0); // First column are batch ids. Keep it zeros too.

if (numDets < keepTopAfterNMS)

outputs[0].rowRange(numDets, keepTopAfterNMS).setTo(0);

}

private:

// A first half of channels are background scores. We need only a second one.

static Mat getObjectScores(const Mat& m)

{

CV_Assert(m.dims == 4);

CV_Assert(m.size[0] == 1);

int channels = m.size[1];

CV_Assert((channels & 1) == 0);

return slice(m, Range::all(), Range(channels / 2, channels));

}

#ifdef HAVE_OPENCL

static UMat getObjectScores(const UMat& m)

{

CV_Assert(m.dims == 4);

CV_Assert(m.size[0] == 1);

int channels = m.size[1];

CV_Assert((channels & 1) == 0);

Range r = Range(channels / 2, channels);

Range ranges[4] = { Range::all(), r, Range::all(), Range::all() };

return m(&ranges[0]);

}

#endif

Ptr<PriorBoxLayer> priorBoxLayer;

Ptr<DetectionOutputLayer> detectionOutputLayer;

Ptr<PermuteLayer> deltasPermute;

Ptr<PermuteLayer> scoresPermute;

uint32_t keepTopAfterNMS;

Mat fakeImageBlob;

#ifdef HAVE_OPENCL

UMat umat_fakeImageBlob;

#endif

};

Ptr<ProposalLayer> ProposalLayer::create(const LayerParams& params)

{

return Ptr<ProposalLayer>(new ProposalLayerImpl(params));

}

} // namespace dnn

} // namespace cv

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

FilesExpand file tree

proposal_layer.cpp

Latest commit

History

proposal_layer.cpp

File metadata and controls