diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..ed8cf657a4be92500f0c817d8e7356f60f6db5bf --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) [2020] [Max Drexler] + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..ece011cb1ad9c06d5f73842b514e45261703548e 100644 --- a/README.md +++ b/README.md @@ -0,0 +1,62 @@ +# Azure OCR Client + +A Python client to make OCR requests using Microsoft Azure. + +## Requirements + +A Microsoft Azure account with a [computer vision](https://azure.microsoft.com/en-us/products/cognitive-services/vision-services) resource. + +A .env file with two keys: VISION_KEY and VISION_ENDPOINT. + +python >= 3.7 + +## Installation + +Install using pip. + +``` +pip install -U git+https://gitlab.ssec.wisc.edu/mdrexler/azureocr +``` + +## Usage + +Create a client: + +``` +from azureocr import AzureClientOCR + +client = AzureClientOCR('path/to/env/file') +``` + +Scan images (filepaths, web urls, or images in memory): + +``` +client.scan('/path/to/image.png') +client.scan('https:/image/url.png') + +import cv2 as cv +img = cv.imread('/path/to/image.png') +client.scan(img) + +``` + +Specify output: + +``` +# just the text (default) + +client.scan('/path/to/image.png', parse_response=True) + +# Microsoft's return type +# https://learn.microsoft.com/en-us/python/api/azure-ai-vision/azure.ai.vision.image_analysis_data.detectedtext?view=azure-python-preview +client.scan('/path/to/image.png', parse_response=False) + +``` + +## Author + +Created by [Max Drexler](mailto:mndrexler@wisc.edu) + +### License + +MIT License. See LICENSE for more information. \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..64c577afec08b9462216e23c7c459c64996095b5 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +requests==2.29.0 +validators==0.20.0 +python-dotenv==1.0.0 +azure-ai-vision==0.11.1b1 \ No newline at end of file diff --git a/src/azureocr/__init__.py b/src/azureocr/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..04e025ac6c40ab64c33535173e570b1015092f64 --- /dev/null +++ b/src/azureocr/__init__.py @@ -0,0 +1,113 @@ +""" +azureocr.__init__ +~~~~~~~~~~~~~~~~~ + +A Python client for Microsoft Azure OCR +""" + +__author__ = 'Max Drexler' +__email__ = 'mndrexler@wisc.edu' + + +import atexit +import os +from tempfile import NamedTemporaryFile +from typing import Any, Optional + +import azure.ai.vision as sdk +import requests +import validators +from azure.ai.vision.image_analysis_client import DetectedTextLine +from dotenv import load_dotenv + + +class OCRError(Exception): + """Errors that occur while OCR-ing""" + + +class AzureClientOCR: + def __init__(self, env_path: Optional[str] = None) -> None: + load_dotenv(dotenv_path=env_path) + self._service_opts = get_service_options() + self._analyze_opts = sdk.ImageAnalysisOptions() + self._analyze_opts.features = sdk.ImageAnalysisFeature.TEXT + self._analyze_opts.language = "en" + + @property + def language(self) -> str: + return self._analyze_opts.language + + @language.setter + def language(self, new_lang: str) -> None: + self._analyze_opts.language = new_lang + + def scan(self, img, parse_response=True) -> list[DetectedTextLine] | None: + img_source = get_source(img) + analyzer = sdk.ImageAnalyzer( + self._service_opts, img_source, self._analyze_opts + ) + result = analyzer.analyze() + if result.reason == sdk.ImageAnalysisResultReason.ERROR: + error_details = sdk.ImageAnalysisErrorDetails.from_result(result) + raise OCRError( + f"[{error_details.reason}]: {error_details.message}" + ) + if result.text is None: + return None + if not parse_response: + return result.text.lines + return '\n'.join([line.content for line in result.text.lines]) + + +def get_service_options() -> sdk.VisionServiceOptions: + if ( + os.getenv("VISION_ENDPOINT", None) is None + or os.getenv("VISION_KEY", None) is None + ): + raise AttributeError( + "No 'VISION_ENDPOINT' and/or 'VISION_KEY' variables in env file!" + ) + return sdk.VisionServiceOptions( + os.environ["VISION_ENDPOINT"], os.environ["VISION_KEY"] + ) + + +def source_from_url(img_url: str) -> sdk.VisionSource: + r = requests.head(img_url, timeout=5).status_code + if r != 200: + raise ConnectionError(f"Could not get url '{img_url}' error code {r}") + return sdk.VisionSource(url=img_url) + + +def source_from_file(img_path: str) -> sdk.VisionSource: + if not os.path.isfile(img_path): + raise FileNotFoundError(f"File '{img_path}' doesn't exist!") + return sdk.VisionSource(filename=img_path) + + +def source_from_image(img: Any) -> sdk.VisionSource: + import cv2 as cv + + tmpfile = NamedTemporaryFile(delete=False, suffix=".png") + cv.imwrite(tmpfile.name, img) + atexit.register(_delete_temp_file, tmpfile.name) + return source_from_file(tmpfile.name) + + +def _delete_temp_file(path) -> None: + try: + os.remove(path) + except OSError: + pass + + +def get_source(img) -> sdk.VisionSource: + if isinstance(img, sdk.VisionSource): + return img + elif isinstance(img, str): + if validators.url(img): + return source_from_url(img) + else: + return source_from_file(img) + else: + return source_from_image(img)