Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 17 additions & 3 deletions SerialPrograms/Source/CommonTools/OCR/OCR_NumberReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,12 @@
#include "CommonFramework/ImageTypes/ImageRGB32.h"
#include "CommonFramework/ImageTools/ImageBoxes.h"
#include "CommonFramework/Tools/GlobalThreadPools.h"
#include "CommonFramework/GlobalSettingsPanel.h"
#include "CommonTools/Images/ImageManip.h"
#include "CommonTools/Images/ImageFilter.h"
#include "CommonTools/Images/BinaryImage_FilterRgb32.h"
#include "OCR_RawOCR.h"
#include "OCR_RawPaddleOCR.h"
#include "OCR_NumberReader.h"

#include <iostream>
Expand Down Expand Up @@ -81,7 +83,14 @@ std::string run_number_normalization(const std::string& input){


int read_number(Logger& logger, const ImageViewRGB32& image, Language language){
std::string ocr_text = OCR::ocr_read(language, image, OCR::PageSegMode::SINGLE_LINE);
bool use_paddle_ocr = false; // GlobalSettings::instance().USE_PADDLE_OCR;
std::string ocr_text;
if (use_paddle_ocr){
ocr_text = OCR::paddle_ocr_read(language, image);
}else{
ocr_text = OCR::ocr_read(language, image, OCR::PageSegMode::SINGLE_LINE);
}

std::string normalized = run_number_normalization(ocr_text);

std::string str;
Expand Down Expand Up @@ -167,8 +176,13 @@ std::string read_number_waterfill_no_normalization(
}

ImageRGB32 padded = pad_image(cropped, 1 * cropped.width(), 0xffffffff);
std::string ocr = OCR::ocr_read(Language::English, padded, OCR::PageSegMode::SINGLE_CHAR);

bool use_paddle_ocr = false; // GlobalSettings::instance().USE_PADDLE_OCR;
std::string ocr;
if (use_paddle_ocr){
ocr = OCR::paddle_ocr_read(Language::English, padded);
}else{
ocr = OCR::ocr_read(Language::English, padded, OCR::PageSegMode::SINGLE_CHAR);
}
// padded.save("zztest-cropped" + std::to_string(c) + "-" + std::to_string(i++) + ".png");
// std::cout << ocr[0] << std::endl;
if (!ocr.empty()){
Expand Down
137 changes: 137 additions & 0 deletions SerialPrograms/Source/CommonTools/OCR/OCR_RawPaddleOCR.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
/* Threadpools for PaddleOCR
*
* From: https://github.com/PokemonAutomation/
*
*/

#include <memory>
#include <deque>
#include <QFile>
#include <QDir>
#include "ML/Inference/ML_PaddleOCRPipeline.h"
#include "Common/Cpp/Exceptions.h"
#include "Common/Cpp/Concurrency/SpinLock.h"
#include "CommonFramework/Globals.h"
#include "CommonFramework/Logging/Logger.h"
#include "CommonFramework/ImageTypes/ImageViewRGB32.h"
#include "OCR_RawOCR.h"

#include <iostream>
using std::cout;
using std::endl;

namespace PokemonAutomation{
namespace OCR{



enum class LanguageGroup {
None,
English,
ChineseJapanese,
Latin,
Korean,
};

LanguageGroup language_to_languagegroup(Language language){
switch(language){
case Language::None:
throw InternalProgramError(nullptr, PA_CURRENT_FUNCTION, "Attempted to call OCR without a language.");
case Language::English:
return LanguageGroup::English;
case Language::Japanese:
return LanguageGroup::ChineseJapanese;
case Language::Spanish:
return LanguageGroup::Latin;
case Language::French:
return LanguageGroup::Latin;
case Language::German:
return LanguageGroup::Latin;
case Language::Italian:
return LanguageGroup::Latin;
case Language::Korean:
return LanguageGroup::Korean;
case Language::ChineseSimplified:
return LanguageGroup::ChineseJapanese;
case Language::ChineseTraditional:
return LanguageGroup::ChineseJapanese;
default:
throw InternalProgramError(nullptr, PA_CURRENT_FUNCTION, "Attempted to call OCR on an unknown language.");
}
}


// Global singleton managing the single PaddleOCR instance for each language.
// ocr_pool_lock protects the map
struct PaddleOcrGlobals{
SpinLock ocr_pool_lock; // Protects ocr_pool map.
std::map<LanguageGroup, ML::PaddleOCRPipeline> ocr_pool; // One instance per language.

static PaddleOcrGlobals& instance(){
static PaddleOcrGlobals globals;
return globals;
}
};

ML::PaddleOCRPipeline& ensure_paddle_ocr_instance(Language language){
if (language == Language::None){
throw InternalProgramError(nullptr, PA_CURRENT_FUNCTION, "Attempted to call OCR without a language.");
}

LanguageGroup language_group = language_to_languagegroup(language);

PaddleOcrGlobals& globals = PaddleOcrGlobals::instance();
std::map<LanguageGroup, ML::PaddleOCRPipeline>& ocr_pool = globals.ocr_pool;

// Get or create the Paddle instance for this language.
std::map<LanguageGroup, ML::PaddleOCRPipeline>::iterator iter;
{
WriteSpinLock lg(globals.ocr_pool_lock, "ensure_paddle_ocr_instances()");
// std::lock_guard<std::mutex> lg(globals.ocr_pool_lock);
iter = ocr_pool.find(language_group);
if (iter == ocr_pool.end()){
// This is creating a Paddle instance while under a lock; it isn't ideal if we need to run OCR on different languages at the same time.
// In practice, however, this doesn't really happen in our code base.
iter = ocr_pool.try_emplace(language_group, language).first;
}
}

return iter->second;
}


std::string paddle_ocr_read(Language language, const ImageViewRGB32& image){
// static size_t c = 0;
// image.save("ocr-" + std::to_string(c++) + ".png");

ML::PaddleOCRPipeline& paddle_instance = ensure_paddle_ocr_instance(language);

// Run inference with the paddle model.
// PaddleOCR with Onnx is threadsafe, so a single instance can be called by multiple threads.
std::string ret = paddle_instance.recognize(image);

// global_logger_tagged().log(ret);

return ret;
}




void clear_paddle_ocr_cache(){
PaddleOcrGlobals& globals = PaddleOcrGlobals::instance();
std::map<LanguageGroup, ML::PaddleOCRPipeline>& ocr_pool = globals.ocr_pool;
WriteSpinLock lg(globals.ocr_pool_lock, "clear_paddle_ocr_cache()");
// std::lock_guard<std::mutex> lg(globals.ocr_pool_lock);
ocr_pool.clear(); // Destroys all pools and their instances.
}




}
}




49 changes: 49 additions & 0 deletions SerialPrograms/Source/CommonTools/OCR/OCR_RawPaddleOCR.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
/* Threadpools for PaddleOCR
*
* From: https://github.com/PokemonAutomation/
*
*/

#ifndef PokemonAutomation_CommonTools_OCR_RawPaddleOCR_H
#define PokemonAutomation_CommonTools_OCR_RawPaddleOCR_H

#include <string>
#include "CommonFramework/Language.h"

namespace PokemonAutomation{
class ImageViewRGB32;
namespace ML {
class PaddleOCRPipeline;
}
namespace OCR{


// Pre-warm the PaddleOCR instance pool for a language. Ensure one instance exists.
// Avoids lazy initialization delays during runtime. Thread-safe.
// returns a pointer to a Paddle instance, for the given language.
ML::PaddleOCRPipeline& ensure_paddle_ocr_instance(Language language);

// OCR the image in the specified language.
// Main OCR entry point. Performs OCR on the image using the specified language.
// Thread-safe: internally uses a pool of PaddleOCR instances, able to accept
// multiple concurrent calls without delay or queueing.
// It creates one PaddleOCR instance for each language. You can
// call `ensure_instances()` to pre-warm to pool with a given number of instances.
//
std::string paddle_ocr_read(
Language language,
const ImageViewRGB32& image
);



// Clear all PaddleOCR instances for all languages. Used for cleanup or
// forcing re-initialization.
// This is not safe to call while any OCR is still running!
void clear_paddle_ocr_cache();



}
}
#endif
12 changes: 3 additions & 9 deletions SerialPrograms/Source/CommonTools/OCR/OCR_Routines.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
#include "CommonFramework/Tools/GlobalThreadPools.h"
#include "CommonFramework/GlobalSettingsPanel.h"
#include "CommonTools/Images/ImageFilter.h"
#include "ML/Inference/ML_PaddleOCRPipeline.h"
#include "OCR_RawPaddleOCR.h"
#include "OCR_RawOCR.h"
#include "OCR_DictionaryMatcher.h"
#include "OCR_Routines.h"
Expand Down Expand Up @@ -45,11 +45,6 @@ StringMatchResult multifiltered_OCR(
double pixels_inv = 1. / (image.width() * image.height());

bool use_paddle_ocr = GlobalSettings::instance().USE_PADDLE_OCR;
std::unique_ptr<ML::PaddleOCRPipeline> paddle_ocr;
if (use_paddle_ocr) {
// Initialize only if the setting is enabled
paddle_ocr = std::make_unique<ML::PaddleOCRPipeline>(language);
}

// Run all the filters.
SpinLock lock;
Expand All @@ -60,7 +55,7 @@ StringMatchResult multifiltered_OCR(

std::string text;
if (use_paddle_ocr) {
text = paddle_ocr->recognize(filtered.first);
text = paddle_ocr_read(language, filtered.first);
}else{
text = ocr_read(language, filtered.first, psm);
}
Expand Down Expand Up @@ -117,8 +112,7 @@ StringMatchResult dictionary_OCR(
// Run all the filters.
std::string text;
if (GlobalSettings::instance().USE_PADDLE_OCR){
ML::PaddleOCRPipeline paddle_ocr(language);
text = paddle_ocr.recognize(image);
text = paddle_ocr_read(language, image);
}else{
text = ocr_read(language, image, psm);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,7 @@
#include "Common/PABotBase2/PABotbase2_ReliableStreamConnection.h"
#include "Common/Cpp/StreamConnections/MockDevice.h"
#include "ML/Inference/ML_PaddleOCRPipeline.h"
#include "CommonTools/OCR/OCR_RawPaddleOCR.h"



Expand Down Expand Up @@ -771,12 +772,13 @@ void TestProgram::program(MultiSwitchProgramEnvironment& env, CancellableScope&
// ImageRGB32 image1(IMAGE_PATH);
auto image1 = feed.snapshot();
ImageViewRGB32 cropped = extract_box_reference(image1, ImageFloatBox{BOX.x(), BOX.y(), BOX.width(), BOX.height()});
ML::PaddleOCRPipeline paddle_ocr(LANGUAGE);

// auto snapshot = feed.snapshot();
std::string text = paddle_ocr.recognize(cropped);
std::string text = OCR::paddle_ocr_read(LANGUAGE, cropped);
cout << text << endl;



#endif

#if 0
Expand Down
2 changes: 2 additions & 0 deletions SerialPrograms/cmake/SourceFiles.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -615,6 +615,8 @@ file(GLOB LIBRARY_SOURCES
Source/CommonTools/OCR/OCR_LargeDictionaryMatcher.h
Source/CommonTools/OCR/OCR_NumberReader.cpp
Source/CommonTools/OCR/OCR_NumberReader.h
Source/CommonTools/OCR/OCR_RawPaddleOCR.cpp
Source/CommonTools/OCR/OCR_RawPaddleOCR.h
Source/CommonTools/OCR/OCR_RawOCR.cpp
Source/CommonTools/OCR/OCR_RawOCR.h
Source/CommonTools/OCR/OCR_Routines.cpp
Expand Down