project setup

db65b372 · Max Drexler · ee91fe4a · db65b372 · db65b372 · db65b372
Commit db65b372 authored 2 years ago by Max Drexler
--- a/LICENSE
+++ b/LICENSE
+MIT License
+Copyright (c) [2020] [Max Drexler]
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/README.md
+++ b/README.md
+# Azure OCR Client
+A Python client to make OCR requests using Microsoft Azure.
+## Requirements
+A Microsoft Azure account with a [computer vision](https://azure.microsoft.com/en-us/products/cognitive-services/vision-services) resource.
+A .env file with two keys: VISION_KEY and VISION_ENDPOINT.
+python >= 3.7
+## Installation
+Install using pip.
+```
+pip install -U git+https://gitlab.ssec.wisc.edu/mdrexler/azureocr
+```
+## Usage
+Create a client:
+```
+from azureocr import AzureClientOCR
+client = AzureClientOCR('path/to/env/file')
+```
+Scan images (filepaths, web urls, or images in memory):
+```
+client.scan('/path/to/image.png')
+client.scan('https:/image/url.png')
+import cv2 as cv
+img = cv.imread('/path/to/image.png')
+client.scan(img)
+```
+Specify output:
+```
+# just the text (default)
+client.scan('/path/to/image.png', parse_response=True)
+# Microsoft's return type
+# https://learn.microsoft.com/en-us/python/api/azure-ai-vision/azure.ai.vision.image_analysis_data.detectedtext?view=azure-python-preview
+client.scan('/path/to/image.png', parse_response=False)
+```
+## Author
+Created by [Max Drexler](mailto:mndrexler@wisc.edu)
+### License
+MIT License. See LICENSE for more information.
\ No newline at end of file
--- a/requirements.txt
+++ b/requirements.txt
+requests==2.29.0
+validators==0.20.0
+python-dotenv==1.0.0
+azure-ai-vision==0.11.1b1
\ No newline at end of file
--- a/src/azureocr/__init__.py
+++ b/src/azureocr/__init__.py
+"""
+azureocr.__init__
+~~~~~~~~~~~~~~~~~
+A Python client for Microsoft Azure OCR
+"""
+__author__ = 'Max Drexler'
+__email__ = 'mndrexler@wisc.edu'
+import atexit
+import os
+from tempfile import NamedTemporaryFile
+from typing import Any, Optional
+import azure.ai.vision as sdk
+import requests
+import validators
+from azure.ai.vision.image_analysis_client import DetectedTextLine
+from dotenv import load_dotenv
+class OCRError(Exception):
+    """Errors that occur while OCR-ing"""
+class AzureClientOCR:
+    def __init__(self, env_path: Optional[str] = None) -> None:
+        load_dotenv(dotenv_path=env_path)
+        self._service_opts = get_service_options()
+        self._analyze_opts = sdk.ImageAnalysisOptions()
+        self._analyze_opts.features = sdk.ImageAnalysisFeature.TEXT
+        self._analyze_opts.language = "en"
+    @property
+    def language(self) -> str:
+        return self._analyze_opts.language
+    @language.setter
+    def language(self, new_lang: str) -> None:
+        self._analyze_opts.language = new_lang
+    def scan(self, img, parse_response=True) -> list[DetectedTextLine] | None:
+        img_source = get_source(img)
+        analyzer = sdk.ImageAnalyzer(
+            self._service_opts, img_source, self._analyze_opts
+        )
+        result = analyzer.analyze()
+        if result.reason == sdk.ImageAnalysisResultReason.ERROR:
+            error_details = sdk.ImageAnalysisErrorDetails.from_result(result)
+            raise OCRError(
+                f"[{error_details.reason}]: {error_details.message}"
+            )
+        if result.text is None:
+            return None
+        if not parse_response:
+            return result.text.lines
+        return '\n'.join([line.content for line in result.text.lines])
+def get_service_options() -> sdk.VisionServiceOptions:
+    if (
+        os.getenv("VISION_ENDPOINT", None) is None
+        or os.getenv("VISION_KEY", None) is None
+    ):
+        raise AttributeError(
+            "No 'VISION_ENDPOINT' and/or 'VISION_KEY' variables in env file!"
+        )
+    return sdk.VisionServiceOptions(
+        os.environ["VISION_ENDPOINT"], os.environ["VISION_KEY"]
+    )
+def source_from_url(img_url: str) -> sdk.VisionSource:
+    r = requests.head(img_url, timeout=5).status_code
+    if r != 200:
+        raise ConnectionError(f"Could not get url '{img_url}' error code {r}")
+    return sdk.VisionSource(url=img_url)
+def source_from_file(img_path: str) -> sdk.VisionSource:
+    if not os.path.isfile(img_path):
+        raise FileNotFoundError(f"File '{img_path}' doesn't exist!")
+    return sdk.VisionSource(filename=img_path)
+def source_from_image(img: Any) -> sdk.VisionSource:
+    import cv2 as cv
+    tmpfile = NamedTemporaryFile(delete=False, suffix=".png")
+    cv.imwrite(tmpfile.name, img)
+    atexit.register(_delete_temp_file, tmpfile.name)
+    return source_from_file(tmpfile.name)
+def _delete_temp_file(path) -> None:
+    try:
+        os.remove(path)
+    except OSError:
+        pass
+def get_source(img) -> sdk.VisionSource:
+    if isinstance(img, sdk.VisionSource):
+        return img
+    elif isinstance(img, str):
+        if validators.url(img):
+            return source_from_url(img)
+        else:
+            return source_from_file(img)
+    else:
+        return source_from_image(img)