From 10956cc2d5bcb2136a14514a086d48e3f2a7b4f8 Mon Sep 17 00:00:00 2001 From: licheng-iwish <522974878@qq.com> Date: Thu, 8 Feb 2024 10:33:21 +0800 Subject: [PATCH 1/2] [Other]add ploygon ability to dbdetector --- .../fastdeploy_capi/vision/ocr/ppocr/model.cc | 2 +- fastdeploy/vision/common/processors/manager.h | 2 +- fastdeploy/vision/common/result.cc | 16 +- fastdeploy/vision/common/result.h | 4 +- fastdeploy/vision/ocr/ppocr/dbdetector.cc | 8 +- fastdeploy/vision/ocr/ppocr/dbdetector.h | 4 +- .../vision/ocr/ppocr/det_postprocessor.cc | 21 ++- .../vision/ocr/ppocr/det_postprocessor.h | 9 +- .../vision/ocr/ppocr/ocrmodel_pybind.cc | 14 +- fastdeploy/vision/ocr/ppocr/ppocr_v2.cc | 10 +- .../vision/ocr/ppocr/ppstructurev2_table.cc | 4 +- .../vision/ocr/ppocr/structurev2_table.cc | 8 +- .../vision/ocr/ppocr/structurev2_table.h | 4 +- .../ppocr/structurev2_table_postprocessor.cc | 24 +-- .../ppocr/structurev2_table_postprocessor.h | 4 +- .../ocr/ppocr/utils/get_rotate_crop_image.cc | 10 +- fastdeploy/vision/ocr/ppocr/utils/matcher.cc | 6 +- .../ocr/ppocr/utils/ocr_postprocess_op.cc | 141 ++++++++++++------ .../ocr/ppocr/utils/ocr_postprocess_op.h | 14 +- fastdeploy/vision/ocr/ppocr/utils/ocr_utils.h | 6 +- .../vision/ocr/ppocr/utils/sorted_boxes.cc | 18 +-- fastdeploy/vision/visualize/ocr.cc | 27 ++-- .../fastdeploy/vision/ocr/ppocr/__init__.py | 27 ++++ 23 files changed, 242 insertions(+), 141 deletions(-) diff --git a/c_api/fastdeploy_capi/vision/ocr/ppocr/model.cc b/c_api/fastdeploy_capi/vision/ocr/ppocr/model.cc index ffffa5ee9a..6f205b2b71 100644 --- a/c_api/fastdeploy_capi/vision/ocr/ppocr/model.cc +++ b/c_api/fastdeploy_capi/vision/ocr/ppocr/model.cc @@ -288,7 +288,7 @@ FD_C_Bool FD_C_DBDetectorWrapperBatchPredict( FD_C_DBDetectorWrapper* fd_c_dbdetector_wrapper, FD_C_OneDimMat imgs, FD_C_ThreeDimArrayInt32* det_results) { std::vector imgs_vec; - std::vector>> det_results_out; + std::vector>>> det_results_out; for (int i = 0; i < imgs.size; i++) { imgs_vec.push_back(*(reinterpret_cast(imgs.data[i]))); } diff --git a/fastdeploy/vision/common/processors/manager.h b/fastdeploy/vision/common/processors/manager.h index 3412b16276..6e05ee38c1 100644 --- a/fastdeploy/vision/common/processors/manager.h +++ b/fastdeploy/vision/common/processors/manager.h @@ -26,7 +26,7 @@ namespace vision { */ class FASTDEPLOY_DECL ProcessorManager { public: - ~ProcessorManager(); + virtual ~ProcessorManager(); /** \brief Use CUDA to boost the performance of processors * diff --git a/fastdeploy/vision/common/result.cc b/fastdeploy/vision/common/result.cc index af3dccb92c..ba0b39dd89 100644 --- a/fastdeploy/vision/common/result.cc +++ b/fastdeploy/vision/common/result.cc @@ -695,11 +695,11 @@ std::string OCRResult::Str() { std::string out; for (int n = 0; n < boxes.size(); n++) { out = out + "det boxes: ["; - for (int i = 0; i < 4; i++) { - out = out + "[" + std::to_string(boxes[n][i * 2]) + "," + - std::to_string(boxes[n][i * 2 + 1]) + "]"; + for (int i = 0; i < boxes[n].size(); i++) { + out = out + "[" + std::to_string(boxes[n][i][0]) + "," + + std::to_string(boxes[n][i][1]) + "]"; - if (i != 3) { + if (i != boxes[n].size() - 1) { out = out + ","; } } @@ -720,8 +720,8 @@ std::string OCRResult::Str() { for (int n = 0; n < boxes.size(); n++) { out = out + "table boxes: ["; for (int i = 0; i < 4; i++) { - out = out + "[" + std::to_string(table_boxes[n][i * 2]) + "," + - std::to_string(table_boxes[n][i * 2 + 1]) + "]"; + out = out + "[" + std::to_string(table_boxes[n][i][0]) + "," + + std::to_string(table_boxes[n][i][1]) + "]"; if (i != 3) { out = out + ","; @@ -778,8 +778,8 @@ std::string OCRResult::Str() { for (int n = 0; n < table_boxes.size(); n++) { out = out + "table boxes: ["; for (int i = 0; i < 4; i++) { - out = out + "[" + std::to_string(table_boxes[n][i * 2]) + "," + - std::to_string(table_boxes[n][i * 2 + 1]) + "]"; + out = out + "[" + std::to_string(table_boxes[n][i][0]) + "," + + std::to_string(table_boxes[n][i][1]) + "]"; if (i != 3) { out = out + ","; diff --git a/fastdeploy/vision/common/result.h b/fastdeploy/vision/common/result.h index d57da323c3..cd3dd746e1 100755 --- a/fastdeploy/vision/common/result.h +++ b/fastdeploy/vision/common/result.h @@ -223,7 +223,7 @@ struct FASTDEPLOY_DECL KeyPointDetectionResult : public BaseResult { }; struct FASTDEPLOY_DECL OCRResult : public BaseResult { - std::vector> boxes; + std::vector>> boxes; std::vector text; std::vector rec_scores; @@ -231,7 +231,7 @@ struct FASTDEPLOY_DECL OCRResult : public BaseResult { std::vector cls_scores; std::vector cls_labels; - std::vector> table_boxes; + std::vector>> table_boxes; std::vector table_structure; std::string table_html; diff --git a/fastdeploy/vision/ocr/ppocr/dbdetector.cc b/fastdeploy/vision/ocr/ppocr/dbdetector.cc index 14957e3791..d48fd89aa8 100644 --- a/fastdeploy/vision/ocr/ppocr/dbdetector.cc +++ b/fastdeploy/vision/ocr/ppocr/dbdetector.cc @@ -63,8 +63,8 @@ std::unique_ptr DBDetector::Clone() const { } bool DBDetector::Predict(const cv::Mat& img, - std::vector>* boxes_result) { - std::vector>> det_results; + std::vector>>* boxes_result) { + std::vector>>> det_results; if (!BatchPredict({img}, &det_results)) { return false; } @@ -81,7 +81,7 @@ bool DBDetector::Predict(const cv::Mat& img, vision::OCRResult* ocr_result) { bool DBDetector::BatchPredict(const std::vector& images, std::vector* ocr_results) { - std::vector>> det_results; + std::vector>>> det_results; if (!BatchPredict(images, &det_results)) { return false; } @@ -94,7 +94,7 @@ bool DBDetector::BatchPredict(const std::vector& images, bool DBDetector::BatchPredict( const std::vector& images, - std::vector>>* det_results) { + std::vector>>>* det_results) { std::vector fd_images = WrapMat(images); if (!preprocessor_.Run(&fd_images, &reused_input_tensors_)) { FDERROR << "Failed to preprocess input image." << std::endl; diff --git a/fastdeploy/vision/ocr/ppocr/dbdetector.h b/fastdeploy/vision/ocr/ppocr/dbdetector.h index 60c47016f3..78b6005aae 100755 --- a/fastdeploy/vision/ocr/ppocr/dbdetector.h +++ b/fastdeploy/vision/ocr/ppocr/dbdetector.h @@ -60,7 +60,7 @@ class FASTDEPLOY_DECL DBDetector : public FastDeployModel { * \return true if the prediction is successed, otherwise false. */ virtual bool Predict(const cv::Mat& img, - std::vector>* boxes_result); + std::vector>>* boxes_result); /** \brief Predict the input image and get OCR detection model result. * @@ -77,7 +77,7 @@ class FASTDEPLOY_DECL DBDetector : public FastDeployModel { * \return true if the prediction is successed, otherwise false. */ virtual bool BatchPredict(const std::vector& images, - std::vector>>* det_results); + std::vector>>>* det_results); /** \brief BatchPredict the input image and get OCR detection model result. * diff --git a/fastdeploy/vision/ocr/ppocr/det_postprocessor.cc b/fastdeploy/vision/ocr/ppocr/det_postprocessor.cc index 428142fd2e..212d431a71 100644 --- a/fastdeploy/vision/ocr/ppocr/det_postprocessor.cc +++ b/fastdeploy/vision/ocr/ppocr/det_postprocessor.cc @@ -23,7 +23,7 @@ namespace ocr { bool DBDetectorPostprocessor::SingleBatchPostprocessor( const float* out_data, int n2, int n3, const std::array& det_img_info, - std::vector>* boxes_result) { + std::vector>>* boxes_result) { int n = n2 * n3; // prepare bitmap @@ -47,22 +47,27 @@ bool DBDetectorPostprocessor::SingleBatchPostprocessor( cv::dilate(bit_map, bit_map, dila_ele); } - std::vector>> boxes; + std::vector>> boxes; - boxes = util_post_processor_.BoxesFromBitmap( + if (det_db_use_ploy_) + { + boxes = util_post_processor_.PloygonsFromBitmap( pred_map, bit_map, det_db_box_thresh_, det_db_unclip_ratio_, det_db_score_mode_); + } else { + boxes = util_post_processor_.BoxesFromBitmap( + pred_map, bit_map, det_db_box_thresh_, det_db_unclip_ratio_, + det_db_score_mode_); + } boxes = util_post_processor_.FilterTagDetRes(boxes, det_img_info); // boxes to boxes_result for (int i = 0; i < boxes.size(); i++) { - std::array new_box; + std::vector> new_box; int k = 0; for (auto& vec : boxes[i]) { - for (auto& e : vec) { - new_box[k++] = e; - } + new_box.emplace_back(vec); } boxes_result->emplace_back(new_box); } @@ -72,7 +77,7 @@ bool DBDetectorPostprocessor::SingleBatchPostprocessor( bool DBDetectorPostprocessor::Run( const std::vector& tensors, - std::vector>>* results, + std::vector>>>* results, const std::vector>& batch_det_img_info) { // DBDetector have only 1 output tensor. const FDTensor& tensor = tensors[0]; diff --git a/fastdeploy/vision/ocr/ppocr/det_postprocessor.h b/fastdeploy/vision/ocr/ppocr/det_postprocessor.h index fc0d8c84d2..9ce408049f 100644 --- a/fastdeploy/vision/ocr/ppocr/det_postprocessor.h +++ b/fastdeploy/vision/ocr/ppocr/det_postprocessor.h @@ -33,7 +33,7 @@ class FASTDEPLOY_DECL DBDetectorPostprocessor { * \return true if the postprocess successed, otherwise false */ bool Run(const std::vector& tensors, - std::vector>>* results, + std::vector>>>* results, const std::vector>& batch_det_img_info); /// Set det_db_thresh for the detection postprocess, default is 0.3 @@ -67,6 +67,10 @@ class FASTDEPLOY_DECL DBDetectorPostprocessor { /// Get use_dilation of the detection postprocess int GetUseDilation() const { return use_dilation_; } + /// Set det_db_use_ploy for the detection postprocess, default is fasle + void SetDetDBUsePloy(int det_db_use_ploy) { det_db_use_ploy_ = det_db_use_ploy; } + /// Get det_db_use_ploy of the detection postprocess + int GetDetDBUsePloy() const { return det_db_use_ploy_; } private: double det_db_thresh_ = 0.3; @@ -74,10 +78,11 @@ class FASTDEPLOY_DECL DBDetectorPostprocessor { double det_db_unclip_ratio_ = 1.5; std::string det_db_score_mode_ = "slow"; bool use_dilation_ = false; + bool det_db_use_ploy_ = false; PostProcessor util_post_processor_; bool SingleBatchPostprocessor(const float* out_data, int n2, int n3, const std::array& det_img_info, - std::vector>* boxes_result); + std::vector>>* boxes_result); }; } // namespace ocr diff --git a/fastdeploy/vision/ocr/ppocr/ocrmodel_pybind.cc b/fastdeploy/vision/ocr/ppocr/ocrmodel_pybind.cc index b468a20d2d..7b76647317 100644 --- a/fastdeploy/vision/ocr/ppocr/ocrmodel_pybind.cc +++ b/fastdeploy/vision/ocr/ppocr/ocrmodel_pybind.cc @@ -17,7 +17,7 @@ namespace fastdeploy { void BindPPOCRModel(pybind11::module& m) { - m.def("sort_boxes", [](std::vector>& boxes) { + m.def("sort_boxes", [](std::vector>>& boxes) { vision::ocr::SortBoxes(&boxes); return boxes; }); @@ -77,12 +77,14 @@ void BindPPOCRModel(pybind11::module& m) { .def_property("use_dilation", &vision::ocr::DBDetectorPostprocessor::GetUseDilation, &vision::ocr::DBDetectorPostprocessor::SetUseDilation) - + .def_property("det_db_use_ploy", + &vision::ocr::DBDetectorPostprocessor::GetDetDBUsePloy, + &vision::ocr::DBDetectorPostprocessor::SetDetDBUsePloy) .def("run", [](vision::ocr::DBDetectorPostprocessor& self, std::vector& inputs, const std::vector>& batch_det_img_info) { - std::vector>> results; + std::vector>>> results; if (!self.Run(inputs, &results, batch_det_img_info)) { throw std::runtime_error( @@ -95,7 +97,7 @@ void BindPPOCRModel(pybind11::module& m) { [](vision::ocr::DBDetectorPostprocessor& self, std::vector& input_array, const std::vector>& batch_det_img_info) { - std::vector>> results; + std::vector>>> results; std::vector inputs; PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true); if (!self.Run(inputs, &results, batch_det_img_info)) { @@ -355,7 +357,7 @@ void BindPPOCRModel(pybind11::module& m) { [](vision::ocr::StructureV2TablePostprocessor& self, std::vector& inputs, const std::vector>& batch_det_img_info) { - std::vector>> boxes; + std::vector>>> boxes; std::vector> structure_list; if (!self.Run(inputs, &boxes, &structure_list, @@ -372,7 +374,7 @@ void BindPPOCRModel(pybind11::module& m) { const std::vector>& batch_det_img_info) { std::vector inputs; PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true); - std::vector>> boxes; + std::vector>>> boxes; std::vector> structure_list; if (!self.Run(inputs, &boxes, &structure_list, diff --git a/fastdeploy/vision/ocr/ppocr/ppocr_v2.cc b/fastdeploy/vision/ocr/ppocr/ppocr_v2.cc index b4ae7e25f6..348cd24587 100755 --- a/fastdeploy/vision/ocr/ppocr/ppocr_v2.cc +++ b/fastdeploy/vision/ocr/ppocr/ppocr_v2.cc @@ -64,7 +64,7 @@ int PPOCRv2::GetRecBatchSize() { } bool PPOCRv2::Initialized() const { - + if (detector_ != nullptr && !detector_->Initialized()) { return false; } @@ -76,7 +76,7 @@ bool PPOCRv2::Initialized() const { if (recognizer_ != nullptr && !recognizer_->Initialized()) { return false; } - return true; + return true; } std::unique_ptr PPOCRv2::Clone() const { @@ -109,7 +109,7 @@ bool PPOCRv2::BatchPredict(const std::vector& images, std::vector* batch_result) { batch_result->clear(); batch_result->resize(images.size()); - std::vector>> batch_boxes(images.size()); + std::vector>>> batch_boxes(images.size()); if (!detector_->BatchPredict(images, &batch_boxes)) { FDERROR << "There's error while detecting image in PPOCR." << std::endl; @@ -120,11 +120,11 @@ bool PPOCRv2::BatchPredict(const std::vector& images, vision::ocr::SortBoxes(&(batch_boxes[i_batch])); (*batch_result)[i_batch].boxes = batch_boxes[i_batch]; } - + for(int i_batch = 0; i_batch < images.size(); ++i_batch) { fastdeploy::vision::OCRResult& ocr_result = (*batch_result)[i_batch]; // Get croped images by detection result - const std::vector>& boxes = ocr_result.boxes; + const std::vector>>& boxes = ocr_result.boxes; const cv::Mat& img = images[i_batch]; std::vector image_list; if (boxes.size() == 0) { diff --git a/fastdeploy/vision/ocr/ppocr/ppstructurev2_table.cc b/fastdeploy/vision/ocr/ppocr/ppstructurev2_table.cc index d0b2fbb00a..0385d2ded3 100644 --- a/fastdeploy/vision/ocr/ppocr/ppstructurev2_table.cc +++ b/fastdeploy/vision/ocr/ppocr/ppstructurev2_table.cc @@ -83,7 +83,7 @@ bool PPStructureV2Table::BatchPredict( std::vector* batch_result) { batch_result->clear(); batch_result->resize(images.size()); - std::vector>> batch_boxes(images.size()); + std::vector>>> batch_boxes(images.size()); if (!detector_->BatchPredict(images, &batch_boxes)) { FDERROR << "There's error while detecting image in PPOCR." << std::endl; @@ -98,7 +98,7 @@ bool PPStructureV2Table::BatchPredict( for (int i_batch = 0; i_batch < images.size(); ++i_batch) { fastdeploy::vision::OCRResult& ocr_result = (*batch_result)[i_batch]; // Get croped images by detection result - const std::vector>& boxes = ocr_result.boxes; + const std::vector>>& boxes = ocr_result.boxes; const cv::Mat& img = images[i_batch]; std::vector image_list; if (boxes.size() == 0) { diff --git a/fastdeploy/vision/ocr/ppocr/structurev2_table.cc b/fastdeploy/vision/ocr/ppocr/structurev2_table.cc index 2dc9d543d1..0c2e717bd3 100644 --- a/fastdeploy/vision/ocr/ppocr/structurev2_table.cc +++ b/fastdeploy/vision/ocr/ppocr/structurev2_table.cc @@ -65,9 +65,9 @@ std::unique_ptr StructureV2Table::Clone() const { } bool StructureV2Table::Predict(const cv::Mat& img, - std::vector>* boxes_result, + std::vector>>* boxes_result, std::vector* structure_result) { - std::vector>> det_results; + std::vector>>> det_results; std::vector> structure_results; if (!BatchPredict({img}, &det_results, &structure_results)) { return false; @@ -89,7 +89,7 @@ bool StructureV2Table::Predict(const cv::Mat& img, bool StructureV2Table::BatchPredict( const std::vector& images, std::vector* ocr_results) { - std::vector>> det_results; + std::vector>>> det_results; std::vector> structure_results; if (!BatchPredict(images, &det_results, &structure_results)) { return false; @@ -104,7 +104,7 @@ bool StructureV2Table::BatchPredict( bool StructureV2Table::BatchPredict( const std::vector& images, - std::vector>>* det_results, + std::vector>>>* det_results, std::vector>* structure_results) { std::vector fd_images = WrapMat(images); if (!preprocessor_.Run(&fd_images, &reused_input_tensors_)) { diff --git a/fastdeploy/vision/ocr/ppocr/structurev2_table.h b/fastdeploy/vision/ocr/ppocr/structurev2_table.h index 2d8db1c5fe..eb40c3e697 100755 --- a/fastdeploy/vision/ocr/ppocr/structurev2_table.h +++ b/fastdeploy/vision/ocr/ppocr/structurev2_table.h @@ -62,7 +62,7 @@ class FASTDEPLOY_DECL StructureV2Table : public FastDeployModel { * \return true if the prediction is successed, otherwise false. */ virtual bool Predict(const cv::Mat& img, - std::vector>* boxes_result, + std::vector>>* boxes_result, std::vector* structure_result); /** \brief Predict the input image and get OCR detection model result. @@ -80,7 +80,7 @@ class FASTDEPLOY_DECL StructureV2Table : public FastDeployModel { * \return true if the prediction is successed, otherwise false. */ virtual bool BatchPredict(const std::vector& images, - std::vector>>* det_results, + std::vector>>>* det_results, std::vector>* structure_results); /** \brief BatchPredict the input image and get OCR detection model result. diff --git a/fastdeploy/vision/ocr/ppocr/structurev2_table_postprocessor.cc b/fastdeploy/vision/ocr/ppocr/structurev2_table_postprocessor.cc index 238da28b32..d70707b8b1 100644 --- a/fastdeploy/vision/ocr/ppocr/structurev2_table_postprocessor.cc +++ b/fastdeploy/vision/ocr/ppocr/structurev2_table_postprocessor.cc @@ -68,7 +68,7 @@ StructureV2TablePostprocessor::StructureV2TablePostprocessor( bool StructureV2TablePostprocessor::SingleBatchPostprocessor( const float* structure_probs, const float* bbox_preds, size_t slice_dim, size_t prob_dim, size_t box_dim, int img_width, int img_height, - std::vector>* boxes_result, + std::vector>>* boxes_result, std::vector* structure_list_result) { structure_list_result->push_back(""); structure_list_result->push_back(""); @@ -93,24 +93,24 @@ bool StructureV2TablePostprocessor::SingleBatchPostprocessor( std::string text = dict_character[structure_idx]; if (std::find(td_tokens.begin(), td_tokens.end(), text) != td_tokens.end()) { - std::array bbox; + std::vector> bbox; // box dim: en->4, ch->8 if (box_dim == 4) { - bbox[0] = bbox_preds[i * box_dim] * img_width; - bbox[1] = bbox_preds[i * box_dim + 1] * img_height; + bbox[0][0] = bbox_preds[i * box_dim] * img_width; + bbox[0][1] = bbox_preds[i * box_dim + 1] * img_height; - bbox[2] = bbox_preds[i * box_dim + 2] * img_width; - bbox[3] = bbox_preds[i * box_dim + 1] * img_height; + bbox[1][0] = bbox_preds[i * box_dim + 2] * img_width; + bbox[1][1] = bbox_preds[i * box_dim + 1] * img_height; - bbox[4] = bbox_preds[i * box_dim + 2] * img_width; - bbox[5] = bbox_preds[i * box_dim + 3] * img_height; + bbox[2][0] = bbox_preds[i * box_dim + 2] * img_width; + bbox[2][1] = bbox_preds[i * box_dim + 3] * img_height; - bbox[6] = bbox_preds[i * box_dim] * img_width; - bbox[7] = bbox_preds[i * box_dim + 3] * img_height; + bbox[3][0] = bbox_preds[i * box_dim] * img_width; + bbox[3][1] = bbox_preds[i * box_dim + 3] * img_height; } else { for (int k = 0; k < 8; k++) { float bbox_pred = bbox_preds[i * box_dim + k]; - bbox[k] = + bbox[k / 2][k % 2] = int(k % 2 == 0 ? bbox_pred * img_width : bbox_pred * img_height); } } @@ -128,7 +128,7 @@ bool StructureV2TablePostprocessor::SingleBatchPostprocessor( bool StructureV2TablePostprocessor::Run( const std::vector& tensors, - std::vector>>* bbox_batch_list, + std::vector>>>* bbox_batch_list, std::vector>* structure_batch_list, const std::vector>& batch_det_img_info) { // Table have 2 output tensors. diff --git a/fastdeploy/vision/ocr/ppocr/structurev2_table_postprocessor.h b/fastdeploy/vision/ocr/ppocr/structurev2_table_postprocessor.h index a617e068c9..e8d0805beb 100644 --- a/fastdeploy/vision/ocr/ppocr/structurev2_table_postprocessor.h +++ b/fastdeploy/vision/ocr/ppocr/structurev2_table_postprocessor.h @@ -40,7 +40,7 @@ class FASTDEPLOY_DECL StructureV2TablePostprocessor { * \return true if the postprocess successed, otherwise false */ bool Run(const std::vector& tensors, - std::vector>>* bbox_batch_list, + std::vector>>>* bbox_batch_list, std::vector>* structure_batch_list, const std::vector>& batch_det_img_info); @@ -53,7 +53,7 @@ class FASTDEPLOY_DECL StructureV2TablePostprocessor { size_t box_dim, int img_width, int img_height, - std::vector>* boxes_result, + std::vector>>* boxes_result, std::vector* structure_list_result); bool merge_no_span_structure{true}; diff --git a/fastdeploy/vision/ocr/ppocr/utils/get_rotate_crop_image.cc b/fastdeploy/vision/ocr/ppocr/utils/get_rotate_crop_image.cc index 1c1735dc41..5472120516 100644 --- a/fastdeploy/vision/ocr/ppocr/utils/get_rotate_crop_image.cc +++ b/fastdeploy/vision/ocr/ppocr/utils/get_rotate_crop_image.cc @@ -19,7 +19,7 @@ namespace vision { namespace ocr { cv::Mat GetRotateCropImage(const cv::Mat& srcimage, - const std::array& box) { + const std::vector>& box) { cv::Mat image; srcimage.copyTo(image); @@ -27,12 +27,12 @@ cv::Mat GetRotateCropImage(const cv::Mat& srcimage, for (int i = 0; i < 4; ++i) { std::vector tmp; - tmp.push_back(box[2 * i]); - tmp.push_back(box[2 * i + 1]); + tmp.push_back(box[i][0]); + tmp.push_back(box[i][1]); points.push_back(tmp); } - int x_collect[4] = {box[0], box[2], box[4], box[6]}; - int y_collect[4] = {box[1], box[3], box[5], box[7]}; + int x_collect[4] = {box[0][1], box[0][2], box[0][4], box[0][6]}; + int y_collect[4] = {box[1][1], box[1][3], box[1][5], box[1][7]}; int left = int(*std::min_element(x_collect, x_collect + 4)); int right = int(*std::max_element(x_collect, x_collect + 4)); int top = int(*std::min_element(y_collect, y_collect + 4)); diff --git a/fastdeploy/vision/ocr/ppocr/utils/matcher.cc b/fastdeploy/vision/ocr/ppocr/utils/matcher.cc index 7fa397bedf..8c53740de3 100644 --- a/fastdeploy/vision/ocr/ppocr/utils/matcher.cc +++ b/fastdeploy/vision/ocr/ppocr/utils/matcher.cc @@ -18,9 +18,9 @@ namespace fastdeploy { namespace vision { namespace ocr { -std::vector Xyxyxyxy2Xyxy(std::array &box) { - int x_collect[4] = {box[0], box[2], box[4], box[6]}; - int y_collect[4] = {box[1], box[3], box[5], box[7]}; +std::vector Xyxyxyxy2Xyxy(std::vector> &box) { + int x_collect[4] = {box[0][0], box[1][0], box[2][0], box[3][0]}; + int y_collect[4] = {box[0][1], box[1][1], box[2][1], box[3][1]}; int left = int(*std::min_element(x_collect, x_collect + 4)); int right = int(*std::max_element(x_collect, x_collect + 4)); int top = int(*std::min_element(y_collect, y_collect + 4)); diff --git a/fastdeploy/vision/ocr/ppocr/utils/ocr_postprocess_op.cc b/fastdeploy/vision/ocr/ppocr/utils/ocr_postprocess_op.cc index 7a8f387e23..74bb3ce991 100755 --- a/fastdeploy/vision/ocr/ppocr/utils/ocr_postprocess_op.cc +++ b/fastdeploy/vision/ocr/ppocr/utils/ocr_postprocess_op.cc @@ -22,7 +22,7 @@ namespace ocr { void PostProcessor::GetContourArea(const std::vector> &box, float unclip_ratio, float &distance) { - int pts_num = 4; + int pts_num = box.size(); float area = 0.0f; float dist = 0.0f; for (int i = 0; i < pts_num; i++) { @@ -38,7 +38,7 @@ void PostProcessor::GetContourArea(const std::vector> &box, distance = area * unclip_ratio / dist; } -cv::RotatedRect PostProcessor::UnClip(std::vector> box, +std::vector> PostProcessor::UnClip(std::vector> box, const float &unclip_ratio) { float distance = 1.0; @@ -46,28 +46,25 @@ cv::RotatedRect PostProcessor::UnClip(std::vector> box, ClipperLib::ClipperOffset offset; ClipperLib::Path p; - p << ClipperLib::IntPoint(int(box[0][0]), int(box[0][1])) - << ClipperLib::IntPoint(int(box[1][0]), int(box[1][1])) - << ClipperLib::IntPoint(int(box[2][0]), int(box[2][1])) - << ClipperLib::IntPoint(int(box[3][0]), int(box[3][1])); + + for (int i = 0; i < box.size(); i++) { + p << ClipperLib::IntPoint(int(box[i][0]), int(box[i][1])); + } offset.AddPath(p, ClipperLib::jtRound, ClipperLib::etClosedPolygon); ClipperLib::Paths soln; offset.Execute(soln, distance); - std::vector points; + std::vector> paths; for (int j = 0; j < soln.size(); j++) { - for (int i = 0; i < soln[soln.size() - 1].size(); i++) { - points.emplace_back(soln[j][i].X, soln[j][i].Y); + std::vector path; + for (int i = 0; i < soln[j].size(); i++) { + path.emplace_back(soln[j][i].X, soln[j][i].Y); } + paths.push_back(path); } - cv::RotatedRect res; - if (points.size() <= 0) { - res = cv::RotatedRect(cv::Point2f(0, 0), cv::Size2f(1, 1), 0); - } else { - res = cv::minAreaRect(points); - } - return res; + + return paths; } float **PostProcessor::Mat2Vec(cv::Mat mat) { @@ -123,8 +120,9 @@ bool PostProcessor::XsortInt(std::vector a, std::vector b) { return false; } -std::vector> PostProcessor::GetMiniBoxes(cv::RotatedRect box, +std::vector> PostProcessor::GetMiniBoxes(std::vector contour, float &ssid) { + cv::RotatedRect box = cv::minAreaRect(contour); ssid = std::max(box.size.width, box.size.height); cv::Mat points; @@ -242,7 +240,76 @@ float PostProcessor::BoxScoreFast(std::vector> box_array, return score; } -std::vector>> PostProcessor::BoxesFromBitmap( +std::vector>> PostProcessor::PloygonsFromBitmap( + const cv::Mat pred, const cv::Mat bitmap, const float &box_thresh, + const float &det_db_unclip_ratio, const std::string &det_db_score_mode) { + const int min_size = 3; + const int max_candidates = 1000; + + int width = bitmap.cols; + int height = bitmap.rows; + + std::vector> contours; + std::vector hierarchy; + + cv::findContours(bitmap, contours, hierarchy, cv::RETR_LIST, + cv::CHAIN_APPROX_SIMPLE); + + int num_contours = + contours.size() >= max_candidates ? max_candidates : contours.size(); + + std::vector>> boxes; + + for (int _i = 0; _i < num_contours; _i++) { + std::vector contour = contours[_i]; + double epsilon = 0.002 * cv::arcLength(contour, true); + std::vector approx; + cv::approxPolyDP(contour, approx, epsilon, true); + if (approx.size() < 4) continue; + + float score = PolygonScoreAcc(contours[_i], pred); + if (score < box_thresh) continue; + + std::vector> box; + for(int i=0; i < approx.size(); i++) { + box.push_back({float(approx[i].x), float(approx[i].y)}); + } + + // start for unclip + std::vector> paths = UnClip(box, det_db_unclip_ratio); + if (paths.size() > 1) continue; + + std::vector path = paths[0]; + + // end for unclip + + float ssid; + GetMiniBoxes(path, ssid); + + if (ssid < min_size + 2) continue; + + int dest_width = pred.cols; + int dest_height = pred.rows; + std::vector> intcliparray; + + for (int num_pt = 0; num_pt < path.size(); num_pt++) { + std::array a{ + int(clampf( + roundf(path[num_pt].x / float(width) * float(dest_width)), + 0, float(dest_width))), + int(clampf( + roundf(path[num_pt].y / float(height) * float(dest_height)), + 0, float(dest_height)))}; + intcliparray.push_back(a); + } + + boxes.push_back(intcliparray); + + } // end for + return boxes; +} + +std::vector>> PostProcessor::BoxesFromBitmap( const cv::Mat pred, const cv::Mat bitmap, const float &box_thresh, const float &det_db_unclip_ratio, const std::string &det_db_score_mode) { const int min_size = 3; @@ -260,15 +327,14 @@ std::vector>> PostProcessor::BoxesFromBitmap( int num_contours = contours.size() >= max_candidates ? max_candidates : contours.size(); - std::vector>> boxes; + std::vector>> boxes; for (int _i = 0; _i < num_contours; _i++) { if (contours[_i].size() <= 2) { continue; } float ssid; - cv::RotatedRect box = cv::minAreaRect(contours[_i]); - auto array = GetMiniBoxes(box, ssid); + auto array = GetMiniBoxes(contours[_i], ssid); auto box_for_unclip = array; // end get_mini_box @@ -286,23 +352,19 @@ std::vector>> PostProcessor::BoxesFromBitmap( if (score < box_thresh) continue; // start for unclip - cv::RotatedRect points = UnClip(box_for_unclip, det_db_unclip_ratio); - if (points.size.height < 1.001 && points.size.width < 1.001) { - continue; - } + std::vector> paths = UnClip(box_for_unclip, det_db_unclip_ratio); // end for unclip - cv::RotatedRect clipbox = points; - auto cliparray = GetMiniBoxes(clipbox, ssid); + auto cliparray = GetMiniBoxes(paths[0], ssid); if (ssid < min_size + 2) continue; int dest_width = pred.cols; int dest_height = pred.rows; - std::vector> intcliparray; + std::vector> intcliparray; for (int num_pt = 0; num_pt < 4; num_pt++) { - std::vector a{ + std::array a{ int(clampf( roundf(cliparray[num_pt][0] / float(width) * float(dest_width)), 0, float(dest_width))), @@ -317,18 +379,18 @@ std::vector>> PostProcessor::BoxesFromBitmap( return boxes; } -std::vector>> PostProcessor::FilterTagDetRes( - std::vector>> boxes, +std::vector>> PostProcessor::FilterTagDetRes( + std::vector>> boxes, const std::array& det_img_info) { int oriimg_w = det_img_info[0]; int oriimg_h = det_img_info[1]; float ratio_w = float(det_img_info[2])/float(oriimg_w); float ratio_h = float(det_img_info[3])/float(oriimg_h); - std::vector>> root_points; for (int n = 0; n < boxes.size(); n++) { - boxes[n] = OrderPointsClockwise(boxes[n]); - for (int m = 0; m < boxes[0].size(); m++) { + // boxes[n] = OrderPointsClockwise(boxes[n]); + + for (int m = 0; m < boxes[n].size(); m++) { boxes[n][m][0] /= ratio_w; boxes[n][m][1] /= ratio_h; @@ -337,16 +399,7 @@ std::vector>> PostProcessor::FilterTagDetRes( } } - for (int n = 0; n < boxes.size(); n++) { - int rect_width, rect_height; - rect_width = int(sqrt(pow(boxes[n][0][0] - boxes[n][1][0], 2) + - pow(boxes[n][0][1] - boxes[n][1][1], 2))); - rect_height = int(sqrt(pow(boxes[n][0][0] - boxes[n][3][0], 2) + - pow(boxes[n][0][1] - boxes[n][3][1], 2))); - if (rect_width <= 4 || rect_height <= 4) continue; - root_points.push_back(boxes[n]); - } - return root_points; + return boxes; } } // namespace ocr diff --git a/fastdeploy/vision/ocr/ppocr/utils/ocr_postprocess_op.h b/fastdeploy/vision/ocr/ppocr/utils/ocr_postprocess_op.h index 778f618edb..bc7fc0c8c2 100644 --- a/fastdeploy/vision/ocr/ppocr/utils/ocr_postprocess_op.h +++ b/fastdeploy/vision/ocr/ppocr/utils/ocr_postprocess_op.h @@ -37,7 +37,7 @@ class PostProcessor { void GetContourArea(const std::vector> &box, float unclip_ratio, float &distance); - cv::RotatedRect UnClip(std::vector> box, + std::vector> UnClip(std::vector> box, const float &unclip_ratio); float **Mat2Vec(cv::Mat mat); @@ -45,18 +45,22 @@ class PostProcessor { std::vector> OrderPointsClockwise( std::vector> pts); - std::vector> GetMiniBoxes(cv::RotatedRect box, + std::vector> GetMiniBoxes(std::vector box, float &ssid); float BoxScoreFast(std::vector> box_array, cv::Mat pred); float PolygonScoreAcc(std::vector contour, cv::Mat pred); - std::vector>> BoxesFromBitmap( + std::vector>> BoxesFromBitmap( const cv::Mat pred, const cv::Mat bitmap, const float &box_thresh, const float &det_db_unclip_ratio, const std::string &det_db_score_mode); - std::vector>> FilterTagDetRes( - std::vector>> boxes, + std::vector>> PloygonsFromBitmap( + const cv::Mat pred, const cv::Mat bitmap, const float &box_thresh, + const float &det_db_unclip_ratio, const std::string &det_db_score_mode); + + std::vector>> FilterTagDetRes( + std::vector>> boxes, const std::array& det_img_info); private: diff --git a/fastdeploy/vision/ocr/ppocr/utils/ocr_utils.h b/fastdeploy/vision/ocr/ppocr/utils/ocr_utils.h index fd6b277d53..ad5349fd53 100755 --- a/fastdeploy/vision/ocr/ppocr/utils/ocr_utils.h +++ b/fastdeploy/vision/ocr/ppocr/utils/ocr_utils.h @@ -28,15 +28,15 @@ namespace vision { namespace ocr { FASTDEPLOY_DECL cv::Mat GetRotateCropImage(const cv::Mat& srcimage, - const std::array& box); + const std::vector>& box); -FASTDEPLOY_DECL void SortBoxes(std::vector>* boxes); +FASTDEPLOY_DECL void SortBoxes(std::vector>>* boxes); FASTDEPLOY_DECL std::vector ArgSort(const std::vector &array); FASTDEPLOY_DECL std::vector Softmax(std::vector &src); -FASTDEPLOY_DECL std::vector Xyxyxyxy2Xyxy(std::array &box); +FASTDEPLOY_DECL std::vector Xyxyxyxy2Xyxy(std::vector> &box); FASTDEPLOY_DECL float Dis(std::vector &box1, std::vector &box2); diff --git a/fastdeploy/vision/ocr/ppocr/utils/sorted_boxes.cc b/fastdeploy/vision/ocr/ppocr/utils/sorted_boxes.cc index 053215e4a6..0c98870e7d 100755 --- a/fastdeploy/vision/ocr/ppocr/utils/sorted_boxes.cc +++ b/fastdeploy/vision/ocr/ppocr/utils/sorted_boxes.cc @@ -18,28 +18,28 @@ namespace fastdeploy { namespace vision { namespace ocr { -bool CompareBox(const std::array& result1, - const std::array& result2) { - if (result1[1] < result2[1]) { +bool CompareBox(const std::vector>& result1, + const std::vector>& result2) { + if (result1[0][1] < result2[0][1]) { return true; - } else if (result1[1] == result2[1]) { - return result1[0] < result2[0]; + } else if (result1[0][1] == result2[0][1]) { + return result1[0][0] < result2[0][0]; } else { return false; } } -void SortBoxes(std::vector>* boxes) { +void SortBoxes(std::vector>>* boxes) { std::sort(boxes->begin(), boxes->end(), CompareBox); if (boxes->size() == 0) { return; } - + for (int i = 0; i < boxes->size() - 1; i++) { for (int j = i; j >=0 ; j--){ - if (std::abs((*boxes)[j + 1][1] - (*boxes)[j][1]) < 10 && - ((*boxes)[j + 1][0] < (*boxes)[j][0])) { + if (std::abs((*boxes)[j + 1][0][1] - (*boxes)[j][0][1]) < 10 && + ((*boxes)[j + 1][0][0] < (*boxes)[j][0][0])) { std::swap((*boxes)[i], (*boxes)[i + 1]); } } diff --git a/fastdeploy/vision/visualize/ocr.cc b/fastdeploy/vision/visualize/ocr.cc index e35be91351..6b185d606f 100644 --- a/fastdeploy/vision/visualize/ocr.cc +++ b/fastdeploy/vision/visualize/ocr.cc @@ -20,7 +20,7 @@ namespace vision { cv::Mat VisOcr(const cv::Mat& im, const OCRResult& ocr_result, const float score_threshold) { auto vis_im = im.clone(); - bool have_score = + bool have_score = (ocr_result.boxes.size() == ocr_result.rec_scores.size()); for (int n = 0; n < ocr_result.boxes.size(); n++) { @@ -29,15 +29,18 @@ cv::Mat VisOcr(const cv::Mat& im, const OCRResult& ocr_result, continue; } } - cv::Point rook_points[4]; - for (int m = 0; m < 4; m++) { - rook_points[m] = cv::Point(int(ocr_result.boxes[n][m * 2]), - int(ocr_result.boxes[n][m * 2 + 1])); + int point_num = ocr_result.boxes[n].size(); + + cv::Point rook_points[point_num]; + + for (int m = 0; m < point_num; m++) { + rook_points[m] = cv::Point(int(ocr_result.boxes[n][m][0]), + int(ocr_result.boxes[n][m][1])); } const cv::Point* ppt[1] = {rook_points}; - int npt[] = {4}; + int npt[] = {point_num}; cv::polylines(vis_im, ppt, npt, 1, 1, CV_RGB(0, 255, 0), 2, 8, 0); } @@ -52,15 +55,17 @@ cv::Mat Visualize::VisOcr(const cv::Mat& im, const OCRResult& ocr_result) { auto vis_im = im.clone(); for (int n = 0; n < ocr_result.boxes.size(); n++) { - cv::Point rook_points[4]; - for (int m = 0; m < 4; m++) { - rook_points[m] = cv::Point(int(ocr_result.boxes[n][m * 2]), - int(ocr_result.boxes[n][m * 2 + 1])); + int point_num = ocr_result.boxes[n].size(); + cv::Point rook_points[point_num]; + + for (int m = 0; m < point_num; m++) { + rook_points[m] = cv::Point(int(ocr_result.boxes[n][m][0]), + int(ocr_result.boxes[n][m][1])); } const cv::Point* ppt[1] = {rook_points}; - int npt[] = {4}; + int npt[] = {point_num}; cv::polylines(vis_im, ppt, npt, 1, 1, CV_RGB(0, 255, 0), 2, 8, 0); } diff --git a/python/fastdeploy/vision/ocr/ppocr/__init__.py b/python/fastdeploy/vision/ocr/ppocr/__init__.py index 7cec600399..50f2263756 100755 --- a/python/fastdeploy/vision/ocr/ppocr/__init__.py +++ b/python/fastdeploy/vision/ocr/ppocr/__init__.py @@ -191,6 +191,23 @@ def use_dilation(self, value): bool), "The value to set `use_dilation` must be type of bool." self._postprocessor.use_dilation = value + @property + def det_db_use_ploy(self): + """ + Return the det_db_use_ploy of DBDetectorPostprocessor + """ + return self._postprocessor.det_db_use_ploy + + @det_db_use_ploy.setter + def det_db_use_ploy(self, value): + """Set the det_db_use_ploy for DBDetectorPostprocessor + + :param: value : the det_db_use_ploy value + """ + assert isinstance( + value, + bool), "The value to set `det_db_use_ploy` must be type of bool." + self._postprocessor.det_db_use_ploy = value class DBDetector(FastDeployModel): def __init__(self, @@ -322,6 +339,16 @@ def use_dilation(self, value): bool), "The value to set `use_dilation` must be type of bool." self._model.postprocessor.use_dilation = value + @property + def det_db_use_ploy(self): + return self._model.postprocessor.det_db_use_ploy + + @det_db_use_ploy.setter + def det_db_use_ploy(self, value): + assert isinstance( + value, + bool), "The value to set `det_db_use_ploy` must be type of bool." + self._model.postprocessor.det_db_use_ploy = value class ClassifierPreprocessor(ProcessorManager): def __init__(self): From 26c8ac0dcd6959d8084e63d120b1479800cb5df5 Mon Sep 17 00:00:00 2001 From: licheng-iwish <522974878@qq.com> Date: Thu, 8 Feb 2024 10:38:11 +0800 Subject: [PATCH 2/2] [Bug fix]fix rec_preprocessor property set error --- fastdeploy/vision/ocr/ppocr/rec_preprocessor.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fastdeploy/vision/ocr/ppocr/rec_preprocessor.h b/fastdeploy/vision/ocr/ppocr/rec_preprocessor.h index ca630bcd28..8e56da2c33 100644 --- a/fastdeploy/vision/ocr/ppocr/rec_preprocessor.h +++ b/fastdeploy/vision/ocr/ppocr/rec_preprocessor.h @@ -75,9 +75,9 @@ class FASTDEPLOY_DECL RecognizerPreprocessor : public ProcessorManager { std::vector GetRecImageShape() { return rec_image_shape_; } /// This function will disable normalize in preprocessing step. - void DisableNormalize() { disable_permute_ = true; } + void DisableNormalize() { disable_normalize_ = true; } /// This function will disable hwc2chw in preprocessing step. - void DisablePermute() { disable_normalize_ = true; } + void DisablePermute() { disable_permute_ = true; } private: void OcrRecognizerResizeImage(FDMat* mat, float max_wh_ratio,