]> git.mxchange.org Git - friendica-addons.git/commitdiff
New addon "tesseract" for OCR
authorMichael <heluecht@pirati.ca>
Sun, 14 Jan 2024 19:21:08 +0000 (19:21 +0000)
committerHypolite Petovan <hypolite@mrpetovan.com>
Mon, 15 Jan 2024 22:58:41 +0000 (23:58 +0100)
28 files changed:
tesseract/composer.json [new file with mode: 0644]
tesseract/composer.lock [new file with mode: 0644]
tesseract/tesseract.php [new file with mode: 0644]
tesseract/vendor/autoload.php [new file with mode: 0644]
tesseract/vendor/composer/ClassLoader.php [new file with mode: 0644]
tesseract/vendor/composer/LICENSE [new file with mode: 0644]
tesseract/vendor/composer/autoload_classmap.php [new file with mode: 0644]
tesseract/vendor/composer/autoload_namespaces.php [new file with mode: 0644]
tesseract/vendor/composer/autoload_psr4.php [new file with mode: 0644]
tesseract/vendor/composer/autoload_real.php [new file with mode: 0644]
tesseract/vendor/composer/autoload_static.php [new file with mode: 0644]
tesseract/vendor/composer/installed.json [new file with mode: 0644]
tesseract/vendor/thiagoalessio/tesseract_ocr/.appveyor.yml [new file with mode: 0644]
tesseract/vendor/thiagoalessio/tesseract_ocr/MIT-LICENSE [new file with mode: 0644]
tesseract/vendor/thiagoalessio/tesseract_ocr/README.md [new file with mode: 0644]
tesseract/vendor/thiagoalessio/tesseract_ocr/codecov.yml [new file with mode: 0644]
tesseract/vendor/thiagoalessio/tesseract_ocr/composer.json [new file with mode: 0644]
tesseract/vendor/thiagoalessio/tesseract_ocr/src/Command.php [new file with mode: 0644]
tesseract/vendor/thiagoalessio/tesseract_ocr/src/FeatureNotAvailableException.php [new file with mode: 0644]
tesseract/vendor/thiagoalessio/tesseract_ocr/src/FriendlyErrors.php [new file with mode: 0644]
tesseract/vendor/thiagoalessio/tesseract_ocr/src/ImageNotFoundException.php [new file with mode: 0644]
tesseract/vendor/thiagoalessio/tesseract_ocr/src/NoWritePermissionsForOutputFile.php [new file with mode: 0644]
tesseract/vendor/thiagoalessio/tesseract_ocr/src/Option.php [new file with mode: 0644]
tesseract/vendor/thiagoalessio/tesseract_ocr/src/Process.php [new file with mode: 0644]
tesseract/vendor/thiagoalessio/tesseract_ocr/src/TesseractNotFoundException.php [new file with mode: 0644]
tesseract/vendor/thiagoalessio/tesseract_ocr/src/TesseractOCR.php [new file with mode: 0644]
tesseract/vendor/thiagoalessio/tesseract_ocr/src/TesseractOcrException.php [new file with mode: 0644]
tesseract/vendor/thiagoalessio/tesseract_ocr/src/UnsuccessfulCommandException.php [new file with mode: 0644]

diff --git a/tesseract/composer.json b/tesseract/composer.json
new file mode 100644 (file)
index 0000000..2a0937c
--- /dev/null
@@ -0,0 +1,5 @@
+{
+    "require": {
+        "thiagoalessio/tesseract_ocr": "^2.13"
+    }
+}
diff --git a/tesseract/composer.lock b/tesseract/composer.lock
new file mode 100644 (file)
index 0000000..036868e
--- /dev/null
@@ -0,0 +1,66 @@
+{
+    "_readme": [
+        "This file locks the dependencies of your project to a known state",
+        "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies",
+        "This file is @generated automatically"
+    ],
+    "content-hash": "778b5479cb5d2b31b57f40473a87f8eb",
+    "packages": [
+        {
+            "name": "thiagoalessio/tesseract_ocr",
+            "version": "2.13.0",
+            "source": {
+                "type": "git",
+                "url": "https://github.com/thiagoalessio/tesseract-ocr-for-php.git",
+                "reference": "232a8cb9d571992f9bd1e263f2f6909cf6c173a1"
+            },
+            "dist": {
+                "type": "zip",
+                "url": "https://api.github.com/repos/thiagoalessio/tesseract-ocr-for-php/zipball/232a8cb9d571992f9bd1e263f2f6909cf6c173a1",
+                "reference": "232a8cb9d571992f9bd1e263f2f6909cf6c173a1",
+                "shasum": ""
+            },
+            "require": {
+                "php": "^5.3 || ^7.0 || ^8.0"
+            },
+            "require-dev": {
+                "phpunit/php-code-coverage": "^2.2.4 || ^9.0.0"
+            },
+            "type": "library",
+            "autoload": {
+                "psr-4": {
+                    "thiagoalessio\\TesseractOCR\\": "src/"
+                }
+            },
+            "notification-url": "https://packagist.org/downloads/",
+            "license": [
+                "MIT"
+            ],
+            "authors": [
+                {
+                    "name": "thiagoalessio",
+                    "email": "thiagoalessio@me.com"
+                }
+            ],
+            "description": "A wrapper to work with Tesseract OCR inside PHP.",
+            "keywords": [
+                "OCR",
+                "Tesseract",
+                "text recognition"
+            ],
+            "time": "2023-10-05T21:14:48+00:00"
+        }
+    ],
+    "packages-dev": [],
+    "aliases": [],
+    "minimum-stability": "stable",
+    "stability-flags": [],
+    "prefer-stable": false,
+    "prefer-lowest": false,
+    "platform": [],
+    "platform-dev": [],
+    "platform-overrides": {
+        "php": "7.2"
+    },
+    "plugin-api-version": "1.1.0"
+}
diff --git a/tesseract/tesseract.php b/tesseract/tesseract.php
new file mode 100644 (file)
index 0000000..3c61273
--- /dev/null
@@ -0,0 +1,33 @@
+<?php
+/**
+ * Name: Tesseract OCR
+ * Description: Use OCR to get text from images
+ * Version: 0.1
+ * Author: Michael Vogel <http://pirati.ca/profile/heluecht>
+ */
+
+use Friendica\Core\Hook;
+use Friendica\Core\Logger;
+use Friendica\Core\System;
+use thiagoalessio\TesseractOCR\TesseractOCR;
+
+require_once __DIR__ . DIRECTORY_SEPARATOR . 'vendor' . DIRECTORY_SEPARATOR . 'autoload.php';
+
+function tesseract_install()
+{
+       Hook::register('ocr-detection', __FILE__, 'tesseract_ocr_detection');
+
+       Logger::notice('installed tesseract');
+}
+
+function tesseract_ocr_detection(&$media)
+{
+       $ocr = new TesseractOCR();
+       try {
+               $ocr->tempDir(System::getTempPath());
+               $ocr->imageData($media['img_str'], strlen($media['img_str']));
+               $media['description'] = $ocr->run();
+       } catch (\Throwable $th) {
+               Logger::info('Error calling TesseractOCR', ['message' => $th->getMessage()]);
+       }                       
+}
diff --git a/tesseract/vendor/autoload.php b/tesseract/vendor/autoload.php
new file mode 100644 (file)
index 0000000..1238ece
--- /dev/null
@@ -0,0 +1,7 @@
+<?php
+
+// autoload.php @generated by Composer
+
+require_once __DIR__ . '/composer/autoload_real.php';
+
+return ComposerAutoloaderInit695d781792f754383aa61632167d066e::getLoader();
diff --git a/tesseract/vendor/composer/ClassLoader.php b/tesseract/vendor/composer/ClassLoader.php
new file mode 100644 (file)
index 0000000..03b9bb9
--- /dev/null
@@ -0,0 +1,445 @@
+<?php
+
+/*
+ * This file is part of Composer.
+ *
+ * (c) Nils Adermann <naderman@naderman.de>
+ *     Jordi Boggiano <j.boggiano@seld.be>
+ *
+ * For the full copyright and license information, please view the LICENSE
+ * file that was distributed with this source code.
+ */
+
+namespace Composer\Autoload;
+
+/**
+ * ClassLoader implements a PSR-0, PSR-4 and classmap class loader.
+ *
+ *     $loader = new \Composer\Autoload\ClassLoader();
+ *
+ *     // register classes with namespaces
+ *     $loader->add('Symfony\Component', __DIR__.'/component');
+ *     $loader->add('Symfony',           __DIR__.'/framework');
+ *
+ *     // activate the autoloader
+ *     $loader->register();
+ *
+ *     // to enable searching the include path (eg. for PEAR packages)
+ *     $loader->setUseIncludePath(true);
+ *
+ * In this example, if you try to use a class in the Symfony\Component
+ * namespace or one of its children (Symfony\Component\Console for instance),
+ * the autoloader will first look for the class under the component/
+ * directory, and it will then fallback to the framework/ directory if not
+ * found before giving up.
+ *
+ * This class is loosely based on the Symfony UniversalClassLoader.
+ *
+ * @author Fabien Potencier <fabien@symfony.com>
+ * @author Jordi Boggiano <j.boggiano@seld.be>
+ * @see    http://www.php-fig.org/psr/psr-0/
+ * @see    http://www.php-fig.org/psr/psr-4/
+ */
+class ClassLoader
+{
+    // PSR-4
+    private $prefixLengthsPsr4 = array();
+    private $prefixDirsPsr4 = array();
+    private $fallbackDirsPsr4 = array();
+
+    // PSR-0
+    private $prefixesPsr0 = array();
+    private $fallbackDirsPsr0 = array();
+
+    private $useIncludePath = false;
+    private $classMap = array();
+    private $classMapAuthoritative = false;
+    private $missingClasses = array();
+    private $apcuPrefix;
+
+    public function getPrefixes()
+    {
+        if (!empty($this->prefixesPsr0)) {
+            return call_user_func_array('array_merge', array_values($this->prefixesPsr0));
+        }
+
+        return array();
+    }
+
+    public function getPrefixesPsr4()
+    {
+        return $this->prefixDirsPsr4;
+    }
+
+    public function getFallbackDirs()
+    {
+        return $this->fallbackDirsPsr0;
+    }
+
+    public function getFallbackDirsPsr4()
+    {
+        return $this->fallbackDirsPsr4;
+    }
+
+    public function getClassMap()
+    {
+        return $this->classMap;
+    }
+
+    /**
+     * @param array $classMap Class to filename map
+     */
+    public function addClassMap(array $classMap)
+    {
+        if ($this->classMap) {
+            $this->classMap = array_merge($this->classMap, $classMap);
+        } else {
+            $this->classMap = $classMap;
+        }
+    }
+
+    /**
+     * Registers a set of PSR-0 directories for a given prefix, either
+     * appending or prepending to the ones previously set for this prefix.
+     *
+     * @param string       $prefix  The prefix
+     * @param array|string $paths   The PSR-0 root directories
+     * @param bool         $prepend Whether to prepend the directories
+     */
+    public function add($prefix, $paths, $prepend = false)
+    {
+        if (!$prefix) {
+            if ($prepend) {
+                $this->fallbackDirsPsr0 = array_merge(
+                    (array) $paths,
+                    $this->fallbackDirsPsr0
+                );
+            } else {
+                $this->fallbackDirsPsr0 = array_merge(
+                    $this->fallbackDirsPsr0,
+                    (array) $paths
+                );
+            }
+
+            return;
+        }
+
+        $first = $prefix[0];
+        if (!isset($this->prefixesPsr0[$first][$prefix])) {
+            $this->prefixesPsr0[$first][$prefix] = (array) $paths;
+
+            return;
+        }
+        if ($prepend) {
+            $this->prefixesPsr0[$first][$prefix] = array_merge(
+                (array) $paths,
+                $this->prefixesPsr0[$first][$prefix]
+            );
+        } else {
+            $this->prefixesPsr0[$first][$prefix] = array_merge(
+                $this->prefixesPsr0[$first][$prefix],
+                (array) $paths
+            );
+        }
+    }
+
+    /**
+     * Registers a set of PSR-4 directories for a given namespace, either
+     * appending or prepending to the ones previously set for this namespace.
+     *
+     * @param string       $prefix  The prefix/namespace, with trailing '\\'
+     * @param array|string $paths   The PSR-4 base directories
+     * @param bool         $prepend Whether to prepend the directories
+     *
+     * @throws \InvalidArgumentException
+     */
+    public function addPsr4($prefix, $paths, $prepend = false)
+    {
+        if (!$prefix) {
+            // Register directories for the root namespace.
+            if ($prepend) {
+                $this->fallbackDirsPsr4 = array_merge(
+                    (array) $paths,
+                    $this->fallbackDirsPsr4
+                );
+            } else {
+                $this->fallbackDirsPsr4 = array_merge(
+                    $this->fallbackDirsPsr4,
+                    (array) $paths
+                );
+            }
+        } elseif (!isset($this->prefixDirsPsr4[$prefix])) {
+            // Register directories for a new namespace.
+            $length = strlen($prefix);
+            if ('\\' !== $prefix[$length - 1]) {
+                throw new \InvalidArgumentException("A non-empty PSR-4 prefix must end with a namespace separator.");
+            }
+            $this->prefixLengthsPsr4[$prefix[0]][$prefix] = $length;
+            $this->prefixDirsPsr4[$prefix] = (array) $paths;
+        } elseif ($prepend) {
+            // Prepend directories for an already registered namespace.
+            $this->prefixDirsPsr4[$prefix] = array_merge(
+                (array) $paths,
+                $this->prefixDirsPsr4[$prefix]
+            );
+        } else {
+            // Append directories for an already registered namespace.
+            $this->prefixDirsPsr4[$prefix] = array_merge(
+                $this->prefixDirsPsr4[$prefix],
+                (array) $paths
+            );
+        }
+    }
+
+    /**
+     * Registers a set of PSR-0 directories for a given prefix,
+     * replacing any others previously set for this prefix.
+     *
+     * @param string       $prefix The prefix
+     * @param array|string $paths  The PSR-0 base directories
+     */
+    public function set($prefix, $paths)
+    {
+        if (!$prefix) {
+            $this->fallbackDirsPsr0 = (array) $paths;
+        } else {
+            $this->prefixesPsr0[$prefix[0]][$prefix] = (array) $paths;
+        }
+    }
+
+    /**
+     * Registers a set of PSR-4 directories for a given namespace,
+     * replacing any others previously set for this namespace.
+     *
+     * @param string       $prefix The prefix/namespace, with trailing '\\'
+     * @param array|string $paths  The PSR-4 base directories
+     *
+     * @throws \InvalidArgumentException
+     */
+    public function setPsr4($prefix, $paths)
+    {
+        if (!$prefix) {
+            $this->fallbackDirsPsr4 = (array) $paths;
+        } else {
+            $length = strlen($prefix);
+            if ('\\' !== $prefix[$length - 1]) {
+                throw new \InvalidArgumentException("A non-empty PSR-4 prefix must end with a namespace separator.");
+            }
+            $this->prefixLengthsPsr4[$prefix[0]][$prefix] = $length;
+            $this->prefixDirsPsr4[$prefix] = (array) $paths;
+        }
+    }
+
+    /**
+     * Turns on searching the include path for class files.
+     *
+     * @param bool $useIncludePath
+     */
+    public function setUseIncludePath($useIncludePath)
+    {
+        $this->useIncludePath = $useIncludePath;
+    }
+
+    /**
+     * Can be used to check if the autoloader uses the include path to check
+     * for classes.
+     *
+     * @return bool
+     */
+    public function getUseIncludePath()
+    {
+        return $this->useIncludePath;
+    }
+
+    /**
+     * Turns off searching the prefix and fallback directories for classes
+     * that have not been registered with the class map.
+     *
+     * @param bool $classMapAuthoritative
+     */
+    public function setClassMapAuthoritative($classMapAuthoritative)
+    {
+        $this->classMapAuthoritative = $classMapAuthoritative;
+    }
+
+    /**
+     * Should class lookup fail if not found in the current class map?
+     *
+     * @return bool
+     */
+    public function isClassMapAuthoritative()
+    {
+        return $this->classMapAuthoritative;
+    }
+
+    /**
+     * APCu prefix to use to cache found/not-found classes, if the extension is enabled.
+     *
+     * @param string|null $apcuPrefix
+     */
+    public function setApcuPrefix($apcuPrefix)
+    {
+        $this->apcuPrefix = function_exists('apcu_fetch') && filter_var(ini_get('apc.enabled'), FILTER_VALIDATE_BOOLEAN) ? $apcuPrefix : null;
+    }
+
+    /**
+     * The APCu prefix in use, or null if APCu caching is not enabled.
+     *
+     * @return string|null
+     */
+    public function getApcuPrefix()
+    {
+        return $this->apcuPrefix;
+    }
+
+    /**
+     * Registers this instance as an autoloader.
+     *
+     * @param bool $prepend Whether to prepend the autoloader or not
+     */
+    public function register($prepend = false)
+    {
+        spl_autoload_register(array($this, 'loadClass'), true, $prepend);
+    }
+
+    /**
+     * Unregisters this instance as an autoloader.
+     */
+    public function unregister()
+    {
+        spl_autoload_unregister(array($this, 'loadClass'));
+    }
+
+    /**
+     * Loads the given class or interface.
+     *
+     * @param  string    $class The name of the class
+     * @return bool|null True if loaded, null otherwise
+     */
+    public function loadClass($class)
+    {
+        if ($file = $this->findFile($class)) {
+            includeFile($file);
+
+            return true;
+        }
+    }
+
+    /**
+     * Finds the path to the file where the class is defined.
+     *
+     * @param string $class The name of the class
+     *
+     * @return string|false The path if found, false otherwise
+     */
+    public function findFile($class)
+    {
+        // class map lookup
+        if (isset($this->classMap[$class])) {
+            return $this->classMap[$class];
+        }
+        if ($this->classMapAuthoritative || isset($this->missingClasses[$class])) {
+            return false;
+        }
+        if (null !== $this->apcuPrefix) {
+            $file = apcu_fetch($this->apcuPrefix.$class, $hit);
+            if ($hit) {
+                return $file;
+            }
+        }
+
+        $file = $this->findFileWithExtension($class, '.php');
+
+        // Search for Hack files if we are running on HHVM
+        if (false === $file && defined('HHVM_VERSION')) {
+            $file = $this->findFileWithExtension($class, '.hh');
+        }
+
+        if (null !== $this->apcuPrefix) {
+            apcu_add($this->apcuPrefix.$class, $file);
+        }
+
+        if (false === $file) {
+            // Remember that this class does not exist.
+            $this->missingClasses[$class] = true;
+        }
+
+        return $file;
+    }
+
+    private function findFileWithExtension($class, $ext)
+    {
+        // PSR-4 lookup
+        $logicalPathPsr4 = strtr($class, '\\', DIRECTORY_SEPARATOR) . $ext;
+
+        $first = $class[0];
+        if (isset($this->prefixLengthsPsr4[$first])) {
+            $subPath = $class;
+            while (false !== $lastPos = strrpos($subPath, '\\')) {
+                $subPath = substr($subPath, 0, $lastPos);
+                $search = $subPath . '\\';
+                if (isset($this->prefixDirsPsr4[$search])) {
+                    $pathEnd = DIRECTORY_SEPARATOR . substr($logicalPathPsr4, $lastPos + 1);
+                    foreach ($this->prefixDirsPsr4[$search] as $dir) {
+                        if (file_exists($file = $dir . $pathEnd)) {
+                            return $file;
+                        }
+                    }
+                }
+            }
+        }
+
+        // PSR-4 fallback dirs
+        foreach ($this->fallbackDirsPsr4 as $dir) {
+            if (file_exists($file = $dir . DIRECTORY_SEPARATOR . $logicalPathPsr4)) {
+                return $file;
+            }
+        }
+
+        // PSR-0 lookup
+        if (false !== $pos = strrpos($class, '\\')) {
+            // namespaced class name
+            $logicalPathPsr0 = substr($logicalPathPsr4, 0, $pos + 1)
+                . strtr(substr($logicalPathPsr4, $pos + 1), '_', DIRECTORY_SEPARATOR);
+        } else {
+            // PEAR-like class name
+            $logicalPathPsr0 = strtr($class, '_', DIRECTORY_SEPARATOR) . $ext;
+        }
+
+        if (isset($this->prefixesPsr0[$first])) {
+            foreach ($this->prefixesPsr0[$first] as $prefix => $dirs) {
+                if (0 === strpos($class, $prefix)) {
+                    foreach ($dirs as $dir) {
+                        if (file_exists($file = $dir . DIRECTORY_SEPARATOR . $logicalPathPsr0)) {
+                            return $file;
+                        }
+                    }
+                }
+            }
+        }
+
+        // PSR-0 fallback dirs
+        foreach ($this->fallbackDirsPsr0 as $dir) {
+            if (file_exists($file = $dir . DIRECTORY_SEPARATOR . $logicalPathPsr0)) {
+                return $file;
+            }
+        }
+
+        // PSR-0 include paths.
+        if ($this->useIncludePath && $file = stream_resolve_include_path($logicalPathPsr0)) {
+            return $file;
+        }
+
+        return false;
+    }
+}
+
+/**
+ * Scope isolated include.
+ *
+ * Prevents access to $this/self from included files.
+ */
+function includeFile($file)
+{
+    include $file;
+}
diff --git a/tesseract/vendor/composer/LICENSE b/tesseract/vendor/composer/LICENSE
new file mode 100644 (file)
index 0000000..f27399a
--- /dev/null
@@ -0,0 +1,21 @@
+
+Copyright (c) Nils Adermann, Jordi Boggiano
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is furnished
+to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
diff --git a/tesseract/vendor/composer/autoload_classmap.php b/tesseract/vendor/composer/autoload_classmap.php
new file mode 100644 (file)
index 0000000..7a91153
--- /dev/null
@@ -0,0 +1,9 @@
+<?php
+
+// autoload_classmap.php @generated by Composer
+
+$vendorDir = dirname(dirname(__FILE__));
+$baseDir = dirname($vendorDir);
+
+return array(
+);
diff --git a/tesseract/vendor/composer/autoload_namespaces.php b/tesseract/vendor/composer/autoload_namespaces.php
new file mode 100644 (file)
index 0000000..b7fc012
--- /dev/null
@@ -0,0 +1,9 @@
+<?php
+
+// autoload_namespaces.php @generated by Composer
+
+$vendorDir = dirname(dirname(__FILE__));
+$baseDir = dirname($vendorDir);
+
+return array(
+);
diff --git a/tesseract/vendor/composer/autoload_psr4.php b/tesseract/vendor/composer/autoload_psr4.php
new file mode 100644 (file)
index 0000000..80779c7
--- /dev/null
@@ -0,0 +1,10 @@
+<?php
+
+// autoload_psr4.php @generated by Composer
+
+$vendorDir = dirname(dirname(__FILE__));
+$baseDir = dirname($vendorDir);
+
+return array(
+    'thiagoalessio\\TesseractOCR\\' => array($vendorDir . '/thiagoalessio/tesseract_ocr/src'),
+);
diff --git a/tesseract/vendor/composer/autoload_real.php b/tesseract/vendor/composer/autoload_real.php
new file mode 100644 (file)
index 0000000..10af9c5
--- /dev/null
@@ -0,0 +1,55 @@
+<?php
+
+// autoload_real.php @generated by Composer
+
+class ComposerAutoloaderInit695d781792f754383aa61632167d066e
+{
+    private static $loader;
+
+    public static function loadClassLoader($class)
+    {
+        if ('Composer\Autoload\ClassLoader' === $class) {
+            require __DIR__ . '/ClassLoader.php';
+        }
+    }
+
+    /**
+     * @return \Composer\Autoload\ClassLoader
+     */
+    public static function getLoader()
+    {
+        if (null !== self::$loader) {
+            return self::$loader;
+        }
+
+        spl_autoload_register(array('ComposerAutoloaderInit695d781792f754383aa61632167d066e', 'loadClassLoader'), true, true);
+        self::$loader = $loader = new \Composer\Autoload\ClassLoader();
+        spl_autoload_unregister(array('ComposerAutoloaderInit695d781792f754383aa61632167d066e', 'loadClassLoader'));
+
+        $useStaticLoader = PHP_VERSION_ID >= 50600 && !defined('HHVM_VERSION') && (!function_exists('zend_loader_file_encoded') || !zend_loader_file_encoded());
+        if ($useStaticLoader) {
+            require_once __DIR__ . '/autoload_static.php';
+
+            call_user_func(\Composer\Autoload\ComposerStaticInit695d781792f754383aa61632167d066e::getInitializer($loader));
+        } else {
+            $map = require __DIR__ . '/autoload_namespaces.php';
+            foreach ($map as $namespace => $path) {
+                $loader->set($namespace, $path);
+            }
+
+            $map = require __DIR__ . '/autoload_psr4.php';
+            foreach ($map as $namespace => $path) {
+                $loader->setPsr4($namespace, $path);
+            }
+
+            $classMap = require __DIR__ . '/autoload_classmap.php';
+            if ($classMap) {
+                $loader->addClassMap($classMap);
+            }
+        }
+
+        $loader->register(true);
+
+        return $loader;
+    }
+}
diff --git a/tesseract/vendor/composer/autoload_static.php b/tesseract/vendor/composer/autoload_static.php
new file mode 100644 (file)
index 0000000..59b6605
--- /dev/null
@@ -0,0 +1,31 @@
+<?php
+
+// autoload_static.php @generated by Composer
+
+namespace Composer\Autoload;
+
+class ComposerStaticInit695d781792f754383aa61632167d066e
+{
+    public static $prefixLengthsPsr4 = array (
+        't' => 
+        array (
+            'thiagoalessio\\TesseractOCR\\' => 27,
+        ),
+    );
+
+    public static $prefixDirsPsr4 = array (
+        'thiagoalessio\\TesseractOCR\\' => 
+        array (
+            0 => __DIR__ . '/..' . '/thiagoalessio/tesseract_ocr/src',
+        ),
+    );
+
+    public static function getInitializer(ClassLoader $loader)
+    {
+        return \Closure::bind(function () use ($loader) {
+            $loader->prefixLengthsPsr4 = ComposerStaticInit695d781792f754383aa61632167d066e::$prefixLengthsPsr4;
+            $loader->prefixDirsPsr4 = ComposerStaticInit695d781792f754383aa61632167d066e::$prefixDirsPsr4;
+
+        }, null, ClassLoader::class);
+    }
+}
diff --git a/tesseract/vendor/composer/installed.json b/tesseract/vendor/composer/installed.json
new file mode 100644 (file)
index 0000000..70bcc01
--- /dev/null
@@ -0,0 +1,48 @@
+[
+    {
+        "name": "thiagoalessio/tesseract_ocr",
+        "version": "2.13.0",
+        "version_normalized": "2.13.0.0",
+        "source": {
+            "type": "git",
+            "url": "https://github.com/thiagoalessio/tesseract-ocr-for-php.git",
+            "reference": "232a8cb9d571992f9bd1e263f2f6909cf6c173a1"
+        },
+        "dist": {
+            "type": "zip",
+            "url": "https://api.github.com/repos/thiagoalessio/tesseract-ocr-for-php/zipball/232a8cb9d571992f9bd1e263f2f6909cf6c173a1",
+            "reference": "232a8cb9d571992f9bd1e263f2f6909cf6c173a1",
+            "shasum": ""
+        },
+        "require": {
+            "php": "^5.3 || ^7.0 || ^8.0"
+        },
+        "require-dev": {
+            "phpunit/php-code-coverage": "^2.2.4 || ^9.0.0"
+        },
+        "time": "2023-10-05T21:14:48+00:00",
+        "type": "library",
+        "installation-source": "dist",
+        "autoload": {
+            "psr-4": {
+                "thiagoalessio\\TesseractOCR\\": "src/"
+            }
+        },
+        "notification-url": "https://packagist.org/downloads/",
+        "license": [
+            "MIT"
+        ],
+        "authors": [
+            {
+                "name": "thiagoalessio",
+                "email": "thiagoalessio@me.com"
+            }
+        ],
+        "description": "A wrapper to work with Tesseract OCR inside PHP.",
+        "keywords": [
+            "OCR",
+            "Tesseract",
+            "text recognition"
+        ]
+    }
+]
diff --git a/tesseract/vendor/thiagoalessio/tesseract_ocr/.appveyor.yml b/tesseract/vendor/thiagoalessio/tesseract_ocr/.appveyor.yml
new file mode 100644 (file)
index 0000000..1debc1a
--- /dev/null
@@ -0,0 +1,14 @@
+---
+build: false
+
+install:
+  - ps: Set-Service wuauserv -StartupType Manual
+  - choco install php
+  - choco install capture2text --version 3.9
+  - choco install composer
+  - refreshenv
+  - cd %APPVEYOR_BUILD_FOLDER%
+  - composer install
+
+test_script:
+  - php tests\run.php unit e2e
diff --git a/tesseract/vendor/thiagoalessio/tesseract_ocr/MIT-LICENSE b/tesseract/vendor/thiagoalessio/tesseract_ocr/MIT-LICENSE
new file mode 100644 (file)
index 0000000..448104d
--- /dev/null
@@ -0,0 +1,19 @@
+Copyright (c) 2012-2021 Thiago Alessio Pereira
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/tesseract/vendor/thiagoalessio/tesseract_ocr/README.md b/tesseract/vendor/thiagoalessio/tesseract_ocr/README.md
new file mode 100644 (file)
index 0000000..b828344
--- /dev/null
@@ -0,0 +1,508 @@
+<img src="https://thiagoalessio.github.io/tesseract-ocr-for-php/images/logo.png" alt="Tesseract OCR for PHP" align="right" width="320px"/>
+
+# Tesseract OCR for PHP
+
+A wrapper to work with Tesseract OCR inside PHP.
+
+[![CI][ci_badge]][ci]
+[![AppVeyor][appveyor_badge]][appveyor]
+[![Codacy][codacy_badge]][codacy]
+[![Test Coverage][test_coverage_badge]][test_coverage]
+<br/>
+[![Latest Stable Version][stable_version_badge]][packagist]
+[![Total Downloads][total_downloads_badge]][packagist]
+[![Monthly Downloads][monthly_downloads_badge]][packagist]
+
+## Installation
+
+Via [Composer][]:
+
+    $ composer require thiagoalessio/tesseract_ocr
+
+:bangbang: **This library depends on [Tesseract OCR][], version _3.02_ or later.**
+
+<br/>
+
+### ![][windows_icon] Note for Windows users
+
+There are [many ways][tesseract_installation_on_windows] to install
+[Tesseract OCR][] on your system, but if you just want something quick to
+get up and running, I recommend installing the [Capture2Text][] package with
+[Chocolatey][].
+
+    choco install capture2text --version 3.9
+
+:warning: Recent versions of [Capture2Text][] stopped shipping the `tesseract` binary.
+
+<br/>
+
+### ![][macos_icon] Note for macOS users
+
+With [MacPorts][] you can install support for individual languages, like so:
+
+    $ sudo port install tesseract-<langcode>
+
+But that is not possible with [Homebrew][]. It comes only with **English** support
+by default, so if you intend to use it for other language, the quickest solution
+is to install them all:
+
+    $ brew install tesseract tesseract-lang
+
+<br/>
+
+## Usage
+
+### Basic usage
+
+<img align="right" width="50%" title="The quick brown fox jumps over the lazy dog." src="./tests/EndToEnd/images/text.png"/>
+
+```php
+use thiagoalessio\TesseractOCR\TesseractOCR;
+echo (new TesseractOCR('text.png'))
+    ->run();
+```
+
+```
+The quick brown fox
+jumps over
+the lazy dog.
+```
+
+<br/>
+
+### Other languages
+
+<img align="right" width="50%" title="Bülowstraße" src="./tests/EndToEnd/images/german.png"/>
+
+```php
+use thiagoalessio\TesseractOCR\TesseractOCR;
+echo (new TesseractOCR('german.png'))
+    ->lang('deu')
+    ->run();
+```
+
+```
+Bülowstraße
+```
+
+<br/>
+
+### Multiple languages
+
+<img align="right" width="50%" title="I eat すし y Pollo" src="./tests/EndToEnd/images/mixed-languages.png"/>
+
+```php
+use thiagoalessio\TesseractOCR\TesseractOCR;
+echo (new TesseractOCR('mixed-languages.png'))
+    ->lang('eng', 'jpn', 'spa')
+    ->run();
+```
+
+```
+I eat すし y Pollo
+```
+
+<br/>
+
+### Inducing recognition
+
+<img align="right" width="50%" title="8055" src="./tests/EndToEnd/images/8055.png"/>
+
+```php
+use thiagoalessio\TesseractOCR\TesseractOCR;
+echo (new TesseractOCR('8055.png'))
+    ->allowlist(range('A', 'Z'))
+    ->run();
+```
+
+```
+BOSS
+```
+
+<br/>
+
+### Breaking CAPTCHAs
+
+Yes, I know some of you might want to use this library for the *noble* purpose
+of breaking CAPTCHAs, so please take a look at this comment:
+
+<https://github.com/thiagoalessio/tesseract-ocr-for-php/issues/91#issuecomment-342290510>
+
+## API
+
+### run
+
+Executes a `tesseract` command, optionally receiving an integer as `timeout`,
+in case you experience stalled tesseract processes.
+
+```php
+$ocr = new TesseractOCR();
+$ocr->run();
+```
+```php
+$ocr = new TesseractOCR();
+$timeout = 500;
+$ocr->run($timeout);
+```
+
+### image
+
+Define the path of an image to be recognized by `tesseract`.
+
+```php
+$ocr = new TesseractOCR();
+$ocr->image('/path/to/image.png');
+$ocr->run();
+```
+
+### imageData
+
+Set the image to be recognized by `tesseract` from a string, with its size.
+This can be useful when dealing with files that are already loaded in memory.
+You can easily retrieve the image data and size of an image object :
+```php
+//Using Imagick
+$data = $img->getImageBlob();
+$size = $img->getImageLength();
+//Using GD
+ob_start();
+// Note that you can use any format supported by tesseract
+imagepng($img, null, 0);
+$size = ob_get_length();
+$data = ob_get_clean();
+
+$ocr = new TesseractOCR();
+$ocr->imageData($data, $size);
+$ocr->run();
+```
+
+### executable
+
+Define a custom location of the `tesseract` executable,
+if by any reason it is not present in the `$PATH`.
+
+```php
+echo (new TesseractOCR('img.png'))
+    ->executable('/path/to/tesseract')
+    ->run();
+```
+
+### version
+
+Returns the current version of `tesseract`.
+
+```php
+echo (new TesseractOCR())->version();
+```
+
+### availableLanguages
+
+Returns a list of available languages/scripts.
+
+```php
+foreach((new TesseractOCR())->availableLanguages() as $lang) echo $lang;
+```
+
+__More info:__ <https://github.com/tesseract-ocr/tesseract/blob/master/doc/tesseract.1.asc#languages-and-scripts>
+
+### tessdataDir
+
+Specify a custom location for the tessdata directory.
+
+```php
+echo (new TesseractOCR('img.png'))
+    ->tessdataDir('/path')
+    ->run();
+```
+
+### userWords
+
+Specify the location of user words file.
+
+This is a plain text file containing a list of words that you want to be
+considered as a normal dictionary words by `tesseract`.
+
+Useful when dealing with contents that contain technical terminology, jargon,
+etc.
+
+```
+$ cat /path/to/user-words.txt
+foo
+bar
+```
+
+```php
+echo (new TesseractOCR('img.png'))
+    ->userWords('/path/to/user-words.txt')
+    ->run();
+```
+
+### userPatterns
+
+Specify the location of user patterns file.
+
+If the contents you are dealing with have known patterns, this option can help
+a lot tesseract's recognition accuracy.
+
+```
+$ cat /path/to/user-patterns.txt'
+1-\d\d\d-GOOG-441
+www.\n\\\*.com
+```
+
+```php
+echo (new TesseractOCR('img.png'))
+    ->userPatterns('/path/to/user-patterns.txt')
+    ->run();
+```
+
+### lang
+
+Define one or more languages to be used during the recognition.
+A complete list of available languages can be found at:
+<https://github.com/tesseract-ocr/tesseract/blob/master/doc/tesseract.1.asc#languages>
+
+__Tip from [@daijiale][]:__ Use the combination `->lang('chi_sim', 'chi_tra')`
+for proper recognition of Chinese.
+
+```php
+ echo (new TesseractOCR('img.png'))
+     ->lang('lang1', 'lang2', 'lang3')
+     ->run();
+```
+
+### psm
+
+Specify the Page Segmentation Method, which instructs `tesseract` how to
+interpret the given image.
+
+__More info:__ <https://github.com/tesseract-ocr/tesseract/wiki/ImproveQuality#page-segmentation-method>
+
+```php
+echo (new TesseractOCR('img.png'))
+    ->psm(6)
+    ->run();
+```
+
+### oem
+
+Specify the OCR Engine Mode. (see `tesseract --help-oem`)
+
+```php
+echo (new TesseractOCR('img.png'))
+    ->oem(2)
+    ->run();
+```
+
+### dpi
+
+Specify the image DPI. It is useful if your image does not contain this information in its metadata.
+
+```php
+echo (new TesseractOCR('img.png'))
+    ->dpi(300)
+    ->run();
+```
+
+### allowlist
+
+This is a shortcut for `->config('tessedit_char_whitelist', 'abcdef....')`.
+
+```php
+echo (new TesseractOCR('img.png'))
+    ->allowlist(range('a', 'z'), range(0, 9), '-_@')
+    ->run();
+```
+
+### configFile
+
+Specify a config file to be used. It can either be the path to your own
+config file or the name of one of the predefined config files:
+<https://github.com/tesseract-ocr/tesseract/tree/master/tessdata/configs>
+
+```php
+echo (new TesseractOCR('img.png'))
+    ->configFile('hocr')
+    ->run();
+```
+
+### setOutputFile
+
+Specify an Outputfile to be used. Be aware: If you set an outputfile then
+the option `withoutTempFiles` is ignored.
+Tempfiles are written (and deleted) even if `withoutTempFiles = true`.
+
+In combination with `configFile` you are able to get the `hocr`, `tsv` or
+`pdf` files.
+
+```php
+echo (new TesseractOCR('img.png'))
+    ->configFile('pdf')
+    ->setOutputFile('/PATH_TO_MY_OUTPUTFILE/searchable.pdf')
+    ->run();
+```
+
+### digits
+
+Shortcut for `->configFile('digits')`.
+
+```php
+echo (new TesseractOCR('img.png'))
+    ->digits()
+    ->run();
+```
+
+### hocr
+
+Shortcut for `->configFile('hocr')`.
+
+```php
+echo (new TesseractOCR('img.png'))
+    ->hocr()
+    ->run();
+```
+
+### pdf
+
+Shortcut for `->configFile('pdf')`.
+
+```php
+echo (new TesseractOCR('img.png'))
+    ->pdf()
+    ->run();
+```
+
+### quiet
+
+Shortcut for `->configFile('quiet')`.
+
+```php
+echo (new TesseractOCR('img.png'))
+    ->quiet()
+    ->run();
+```
+
+### tsv
+
+Shortcut for `->configFile('tsv')`.
+
+```php
+echo (new TesseractOCR('img.png'))
+    ->tsv()
+    ->run();
+```
+
+### txt
+
+Shortcut for `->configFile('txt')`.
+
+```php
+echo (new TesseractOCR('img.png'))
+    ->txt()
+    ->run();
+```
+
+### tempDir
+
+Define a custom directory to store temporary files generated by tesseract.
+Make sure the directory actually exists and the user running `php` is allowed
+to write in there.
+
+```php
+echo (new TesseractOCR('img.png'))
+    ->tempDir('./my/custom/temp/dir')
+    ->run();
+```
+
+### withoutTempFiles
+
+Specify that `tesseract` should output the recognized text without writing to temporary files.
+The data is gathered from the standard output of `tesseract` instead.
+
+```php
+echo (new TesseractOCR('img.png'))
+    ->withoutTempFiles()
+    ->run();
+```
+
+### Other options
+
+Any configuration option offered by Tesseract can be used like that:
+
+```php
+echo (new TesseractOCR('img.png'))
+    ->config('config_var', 'value')
+    ->config('other_config_var', 'other value')
+    ->run();
+```
+
+Or like that:
+
+```php
+echo (new TesseractOCR('img.png'))
+    ->configVar('value')
+    ->otherConfigVar('other value')
+    ->run();
+```
+
+__More info:__ <https://github.com/tesseract-ocr/tesseract/wiki/ControlParams>
+
+### Thread-limit
+
+Sometimes, it may be useful to limit the number of threads that tesseract is
+allowed to use (e.g. in [this case](https://github.com/tesseract-ocr/tesseract/issues/898)).
+Set the maxmium number of threads as param for the `run` function:
+
+```php
+echo (new TesseractOCR('img.png'))
+    ->threadLimit(1)
+    ->run();
+```
+
+## How to contribute
+
+You can contribute to this project by:
+
+* Opening an [Issue][] if you found a bug or wish to propose a new feature;
+* Placing a [Pull Request][] with code that fix a bug, missing/wrong documentation
+  or implement a new feature;
+
+Just make sure you take a look at our [Code of Conduct][] and [Contributing][]
+instructions.
+
+## License
+
+tesseract-ocr-for-php is released under the [MIT License][].
+
+
+<h2></h2><p align="center"><sub>Made with <sub><a href="#"><img src="https://thiagoalessio.github.io/tesseract-ocr-for-php/images/heart.svg" alt="love" width="14px"/></a></sub> in Berlin</sub></p>
+
+[ci_badge]: https://github.com/thiagoalessio/tesseract-ocr-for-php/workflows/CI/badge.svg?event=push&branch=main
+[ci]: https://github.com/thiagoalessio/tesseract-ocr-for-php/actions?query=workflow%3ACI
+[appveyor_badge]: https://ci.appveyor.com/api/projects/status/xwy5ls0798iwcim3/branch/main?svg=true
+[appveyor]: https://ci.appveyor.com/project/thiagoalessio/tesseract-ocr-for-php/branch/main
+[codacy_badge]: https://app.codacy.com/project/badge/Grade/a81aa10012874f23a57df5b492d835f2
+[codacy]: https://www.codacy.com/gh/thiagoalessio/tesseract-ocr-for-php/dashboard
+[test_coverage_badge]: https://codecov.io/gh/thiagoalessio/tesseract-ocr-for-php/branch/main/graph/badge.svg?token=Y0VnrqiSIf
+[test_coverage]: https://codecov.io/gh/thiagoalessio/tesseract-ocr-for-php
+[stable_version_badge]: https://img.shields.io/packagist/v/thiagoalessio/tesseract_ocr.svg
+[packagist]: https://packagist.org/packages/thiagoalessio/tesseract_ocr
+[total_downloads_badge]: https://img.shields.io/packagist/dt/thiagoalessio/tesseract_ocr.svg
+[monthly_downloads_badge]: https://img.shields.io/packagist/dm/thiagoalessio/tesseract_ocr.svg
+[Tesseract OCR]: https://github.com/tesseract-ocr/tesseract
+[Composer]: http://getcomposer.org/
+[windows_icon]: https://thiagoalessio.github.io/tesseract-ocr-for-php/images/windows-18.svg
+[macos_icon]: https://thiagoalessio.github.io/tesseract-ocr-for-php/images/apple-18.svg
+[tesseract_installation_on_windows]: https://github.com/tesseract-ocr/tesseract/wiki#windows
+[Capture2Text]: https://chocolatey.org/packages/capture2text
+[Chocolatey]: https://chocolatey.org
+[MacPorts]: https://www.macports.org
+[Homebrew]: https://brew.sh
+[@daijiale]: https://github.com/daijiale
+[HOCR]: https://github.com/tesseract-ocr/tesseract/wiki/Command-Line-Usage#hocr-output
+[TSV]: https://github.com/tesseract-ocr/tesseract/wiki/Command-Line-Usage#tsv-output-currently-available-in-305-dev-in-master-branch-on-github
+[Issue]: https://github.com/thiagoalessio/tesseract-ocr-for-php/issues
+[Pull Request]: https://github.com/thiagoalessio/tesseract-ocr-for-php/pulls
+[Code of Conduct]: https://github.com/thiagoalessio/tesseract-ocr-for-php/blob/main/.github/CODE_OF_CONDUCT.md
+[Contributing]: https://github.com/thiagoalessio/tesseract-ocr-for-php/blob/main/.github/CONTRIBUTING.md
+[MIT License]: https://github.com/thiagoalessio/tesseract-ocr-for-php/blob/main/MIT-LICENSE
diff --git a/tesseract/vendor/thiagoalessio/tesseract_ocr/codecov.yml b/tesseract/vendor/thiagoalessio/tesseract_ocr/codecov.yml
new file mode 100644 (file)
index 0000000..8fd4c92
--- /dev/null
@@ -0,0 +1,4 @@
+fixes:
+- "/home/runner/work/tesseract-ocr-for-php/tesseract-ocr-for-php/::"
+- "/Users/runner/work/tesseract-ocr-for-php/tesseract-ocr-for-php/::"
+- "C:\\projects\\tesseract-ocr-for-php\\::"
diff --git a/tesseract/vendor/thiagoalessio/tesseract_ocr/composer.json b/tesseract/vendor/thiagoalessio/tesseract_ocr/composer.json
new file mode 100644 (file)
index 0000000..9a07e6d
--- /dev/null
@@ -0,0 +1,35 @@
+{
+       "name": "thiagoalessio/tesseract_ocr",
+       "description": "A wrapper to work with Tesseract OCR inside PHP.",
+       "version": "2.13.0",
+       "type": "library",
+       "keywords": ["Tesseract", "OCR", "text recognition"],
+       "license": "MIT",
+       "authors": [
+               {
+                       "name": "thiagoalessio",
+                       "email": "thiagoalessio@me.com"
+               }
+       ],
+       "support": {
+               "issues": "https://github.com/thiagoalessio/tesseract-ocr-for-php/issues",
+               "irc": "irc://irc.freenode.net/tesseract-ocr-for-php",
+               "source": "https://github.com/thiagoalessio/tesseract-ocr-for-php"
+       },
+       "require": {
+               "php": "^5.3 || ^7.0 || ^8.0"
+       },
+       "require-dev": {
+               "phpunit/php-code-coverage": "^2.2.4 || ^9.0.0"
+       },
+       "autoload": {
+               "psr-4": {
+                       "thiagoalessio\\TesseractOCR\\": "src/"
+               }
+       },
+       "autoload-dev": {
+               "psr-4": {
+                       "thiagoalessio\\TesseractOCR\\Tests\\": "tests/"
+               }
+       }
+}
diff --git a/tesseract/vendor/thiagoalessio/tesseract_ocr/src/Command.php b/tesseract/vendor/thiagoalessio/tesseract_ocr/src/Command.php
new file mode 100644 (file)
index 0000000..ad123e8
--- /dev/null
@@ -0,0 +1,80 @@
+<?php namespace thiagoalessio\TesseractOCR;
+
+class Command
+{
+       public $executable = 'tesseract';
+       public $useFileAsInput = true;
+       public $useFileAsOutput = true;
+       public $options = array();
+       public $configFile;
+       public $tempDir;
+       public $threadLimit;
+       public $image;
+       public $imageSize;
+       private $outputFile;
+
+       public function __construct($image=null, $outputFile=null)
+       {
+               $this->image = $image;
+               $this->outputFile = $outputFile;
+       }
+
+       public function build() { return "$this"; }
+
+       public function __toString()
+       {
+               $cmd = array();
+               if ($this->threadLimit) $cmd[] = "OMP_THREAD_LIMIT={$this->threadLimit}";
+               $cmd[] = self::escape($this->executable);
+               $cmd[] = $this->useFileAsInput ? self::escape($this->image) : "-";
+               $cmd[] = $this->useFileAsOutput ? self::escape($this->getOutputFile(false)) : "-";
+
+               $version = $this->getTesseractVersion();
+
+               foreach ($this->options as $option) {
+                       $cmd[] = is_callable($option) ? $option($version) : "$option";
+               }
+               if ($this->configFile) $cmd[] = $this->configFile;
+
+               return join(' ', $cmd);
+       }
+
+       public function getOutputFile($withExt=true)
+       {
+               if (!$this->outputFile)
+                       $this->outputFile = $this->getTempDir()
+                               .DIRECTORY_SEPARATOR
+                               .basename(tempnam($this->getTempDir(), 'ocr'));
+               if (!$withExt) return $this->outputFile;
+
+               $hasCustomExt = array('hocr', 'tsv', 'pdf');
+               $ext = in_array($this->configFile, $hasCustomExt) ? $this->configFile : 'txt';
+               return "{$this->outputFile}.{$ext}";
+       }
+
+       public function getTempDir()
+       {
+               return $this->tempDir ?: sys_get_temp_dir();
+       }
+
+       public function getTesseractVersion()
+       {
+               exec(self::escape($this->executable).' --version 2>&1', $output);
+               $outputParts = explode(' ', $output[0]);
+               return $outputParts[1];
+       }
+
+       public function getAvailableLanguages()
+       {
+               exec(self::escape($this->executable) . ' --list-langs 2>&1', $output);
+               array_shift($output);
+               sort($output);
+               return $output;
+       }
+
+       public static function escape($str)
+       {
+               $charlist = strtoupper(substr(PHP_OS, 0, 3)) == 'WIN' ? '$"`' : '$"\\`';
+               return '"'.addcslashes($str, $charlist).'"';
+       }
+}
diff --git a/tesseract/vendor/thiagoalessio/tesseract_ocr/src/FeatureNotAvailableException.php b/tesseract/vendor/thiagoalessio/tesseract_ocr/src/FeatureNotAvailableException.php
new file mode 100644 (file)
index 0000000..12264f5
--- /dev/null
@@ -0,0 +1,7 @@
+<?php
+
+namespace thiagoalessio\TesseractOCR;
+
+class FeatureNotAvailableException extends TesseractOcrException
+{
+}
diff --git a/tesseract/vendor/thiagoalessio/tesseract_ocr/src/FriendlyErrors.php b/tesseract/vendor/thiagoalessio/tesseract_ocr/src/FriendlyErrors.php
new file mode 100644 (file)
index 0000000..93c7caa
--- /dev/null
@@ -0,0 +1,120 @@
+<?php namespace thiagoalessio\TesseractOCR;
+
+class FriendlyErrors
+{
+       public static function checkImagePath($image)
+       {
+               if (file_exists($image)) return;
+
+               $currentDir = __DIR__;
+               $msg = array();
+               $msg[] = "Error! The image \"$image\" was not found.";
+               $msg[] = '';
+               $msg[] = "The current __DIR__ is $currentDir";
+               $msg = join(PHP_EOL, $msg);
+
+               throw new ImageNotFoundException($msg);
+       }
+
+       public static function checkTesseractPresence($executable)
+       {
+               if (file_exists($executable)) return;
+
+               $cmd = stripos(PHP_OS, 'win') === 0
+                       ? 'where.exe '.Command::escape($executable).' > NUL 2>&1'
+                       : 'type '.Command::escape($executable).' > /dev/null 2>&1';
+               system($cmd, $exitCode);
+
+               if ($exitCode == 0) return;
+
+               $currentPath = getenv('PATH');
+               $msg = array();
+               $msg[] = "Error! The command \"$executable\" was not found.";
+               $msg[] = '';
+               $msg[] = 'Make sure you have Tesseract OCR installed on your system:';
+               $msg[] = 'https://github.com/tesseract-ocr/tesseract';
+               $msg[] = '';
+               $msg[] = "The current \$PATH is $currentPath";
+               $msg = join(PHP_EOL, $msg);
+
+               throw new TesseractNotFoundException($msg);
+       }
+
+       public static function checkCommandExecution($command, $stdout, $stderr)
+       {
+               if ($command->useFileAsOutput) {
+                   $file = $command->getOutputFile();
+                   if (file_exists($file) && filesize($file) > 0)  return;
+               }
+
+               if (!$command->useFileAsOutput && $stdout) {
+                       return;
+               }
+
+               $msg = array();
+               $msg[] = 'Error! The command did not produce any output.';
+               $msg[] = '';
+               $msg[] = 'Generated command:';
+               $msg[] = "$command";
+               $msg[] = '';
+               $msg[] = 'Returned message:';
+               $arrayStderr = explode(PHP_EOL, $stderr);
+               array_pop($arrayStderr);
+               $msg = array_merge($msg, $arrayStderr);
+               $msg = join(PHP_EOL, $msg);
+
+               throw new UnsuccessfulCommandException($msg);
+       }
+
+       public static function checkProcessCreation($processHandle, $command)
+       {
+               if ($processHandle !== FALSE) return;
+
+               $msg = array();
+               $msg[] = 'Error! The command could not be launched.';
+               $msg[] = '';
+               $msg[] = 'Generated command:';
+               $msg[] = "$command";
+               $msg = join(PHP_EOL, $msg);
+
+               throw new UnsuccessfulCommandException($msg);
+       }
+
+       public static function checkTesseractVersion($expected, $action, $command)
+       {
+               $actual = $command->getTesseractVersion();
+
+               if ($actual[0] === 'v')
+                       $actual = substr($actual, 1);
+
+               if (version_compare($actual, $expected, ">=")) return;
+
+               $msg = array();
+               $msg[] = "Error! $action is not available this tesseract version";
+               $msg[] = "Required version is $expected, actual version is $actual";
+               $msg[] = '';
+               $msg[] = 'Generated command:';
+               $msg[] = "$command";
+               $msg = join(PHP_EOL, $msg);
+
+               throw new FeatureNotAvailableException($msg);
+       }
+
+       public static function checkWritePermissions($path)
+       {
+               if (!is_dir(dirname($path))) mkdir(dirname($path));
+               $writableDirectory = is_writable(dirname($path));
+               $writableFile = true;
+               if (file_exists($path)) $writableFile = is_writable($path);
+               if ($writableFile && $writableDirectory) return;
+
+               $msg = array();
+               $msg[] = "Error! No permission to write to $path";
+               $msg[] = "Make sure you have the right outputFile and permissions "
+                       ."to write to the folder";
+               $msg[] = '';
+               $msg = join(PHP_EOL, $msg);
+
+               throw new NoWritePermissionsForOutputFile($msg);
+       }
+}
diff --git a/tesseract/vendor/thiagoalessio/tesseract_ocr/src/ImageNotFoundException.php b/tesseract/vendor/thiagoalessio/tesseract_ocr/src/ImageNotFoundException.php
new file mode 100644 (file)
index 0000000..2ba7df6
--- /dev/null
@@ -0,0 +1,7 @@
+<?php
+
+namespace thiagoalessio\TesseractOCR;
+
+class ImageNotFoundException extends TesseractOcrException
+{
+}
diff --git a/tesseract/vendor/thiagoalessio/tesseract_ocr/src/NoWritePermissionsForOutputFile.php b/tesseract/vendor/thiagoalessio/tesseract_ocr/src/NoWritePermissionsForOutputFile.php
new file mode 100644 (file)
index 0000000..792a44e
--- /dev/null
@@ -0,0 +1,7 @@
+<?php
+
+namespace thiagoalessio\TesseractOCR;
+
+class NoWritePermissionsForOutputFile extends TesseractOcrException
+{
+}
diff --git a/tesseract/vendor/thiagoalessio/tesseract_ocr/src/Option.php b/tesseract/vendor/thiagoalessio/tesseract_ocr/src/Option.php
new file mode 100644 (file)
index 0000000..f39782e
--- /dev/null
@@ -0,0 +1,79 @@
+<?php namespace thiagoalessio\TesseractOCR;
+
+class Option
+{
+       public static function psm($psm)
+       {
+               return function($version) use ($psm) {
+                       $version = preg_replace('/^v/', '', $version);
+                       return (version_compare($version, 4, '>=') ? '-' : '')."-psm $psm";
+               };
+       }
+
+       public static function oem($oem)
+       {
+               return function($version) use ($oem) {
+                       Option::checkMinVersion('3.05', $version, 'oem');
+                       return "--oem $oem";
+               };
+       }
+
+       public static function dpi($dpi)
+       {
+               return function() use ($dpi) {
+                       return "--dpi $dpi";
+               };
+       }
+
+       public static function userWords($path)
+       {
+               return function($version) use ($path) {
+                       Option::checkMinVersion('3.04', $version, 'user-words');
+                       return '--user-words "'.addcslashes($path, '\\"').'"';
+               };
+       }
+
+       public static function userPatterns($path)
+       {
+               return function($version) use ($path) {
+                       Option::checkMinVersion('3.04', $version, 'user-patterns');
+                       return '--user-patterns "'.addcslashes($path, '\\"').'"';
+               };
+       }
+
+       public static function tessdataDir($path)
+       {
+               return function() use ($path) {
+                       return '--tessdata-dir "'.addcslashes($path, '\\"').'"';
+               };
+       }
+
+       public static function lang()
+       {
+               $languages = func_get_args();
+               return function() use ($languages) {
+                       return '-l '.join('+', $languages);
+               };
+       }
+
+       public static function config($var, $value)
+       {
+               return function() use($var, $value) {
+                       $snakeCase = function($str) {
+                               return strtolower(preg_replace('/([A-Z])+/', '_$1', $str));
+                       };
+                       $pair = $snakeCase($var).'='.$value;
+                       return '-c "'.addcslashes($pair, '\\"').'"';
+               };
+       }
+
+       public static function checkMinVersion($minVersion, $currVersion, $option)
+       {
+               $minVersion = preg_replace('/^v/', '', $minVersion);
+               $currVersion = preg_replace('/^v/', '', $currVersion);
+               if (!version_compare($currVersion, $minVersion, '<')) return;
+               $msg = "$option option is only available on Tesseract $minVersion or later.";
+               $msg.= PHP_EOL."Your version of Tesseract is $currVersion";
+               throw new \Exception($msg);
+       }
+}
diff --git a/tesseract/vendor/thiagoalessio/tesseract_ocr/src/Process.php b/tesseract/vendor/thiagoalessio/tesseract_ocr/src/Process.php
new file mode 100644 (file)
index 0000000..38460eb
--- /dev/null
@@ -0,0 +1,83 @@
+<?php namespace thiagoalessio\TesseractOCR;
+
+class Process {
+
+    private $stdin;
+    private $stdout;
+    private $stderr;
+    private $handle;
+    private $startTime;
+
+    public function __construct($command)
+    {
+        $this->startTime = microtime(true);
+        $streamDescriptors = [
+            array("pipe", "r"),
+            array("pipe", "w"),
+            array("pipe", "w")
+        ];
+        $this->handle = proc_open($command, $streamDescriptors, $pipes, NULL, NULL, ["bypass_shell" => true]);
+        list($this->stdin, $this->stdout, $this->stderr) = $pipes;
+
+        FriendlyErrors::checkProcessCreation($this->handle, $command);
+
+        //This is can avoid deadlock on some cases (when stderr buffer is filled up before writing to stdout and vice-versa)
+        stream_set_blocking($this->stdout, 0);
+        stream_set_blocking($this->stderr, 0);
+    }
+
+    public function write($data, $len)
+    {
+        $total = 0;
+        do
+        {
+            $res = fwrite($this->stdin, substr($data, $total));
+        } while($res && $total += $res < $len);
+        return $total === $len;
+    }
+
+
+    public function wait($timeout = 0)
+    {
+        $running = true;
+        $data = ["out" => "", "err" => ""];
+        while (($running === true) && !$this->hasTimedOut($timeout))
+        {
+            $data["out"] .= fread($this->stdout, 8192);
+            $data["err"] .= fread($this->stderr, 8192);
+            $procInfo = proc_get_status($this->handle);
+            $running = $procInfo["running"];
+            if ($running) {
+                usleep(1000); // Sleep 1ms to yield CPU time
+            }
+        }
+        return $data;
+    }
+
+    public function close()
+    {
+        $this->closeStream($this->stdin);
+        $this->closeStream($this->stdout);
+        $this->closeStream($this->stderr);
+        return proc_close($this->handle);
+    }
+
+    public function closeStdin()
+    {
+        $this->closeStream($this->stdin);
+    }
+
+    private function hasTimedOut($timeout)
+    {
+        return (($timeout > 0) &&  ($this->startTime + $timeout < microtime(true)));    
+    }
+    
+    private function closeStream(&$stream)
+    {
+        if ($stream !== NULL)
+        {
+            fclose($stream);
+            $stream = NULL;
+        }
+    }
+}
diff --git a/tesseract/vendor/thiagoalessio/tesseract_ocr/src/TesseractNotFoundException.php b/tesseract/vendor/thiagoalessio/tesseract_ocr/src/TesseractNotFoundException.php
new file mode 100644 (file)
index 0000000..7b7f0c1
--- /dev/null
@@ -0,0 +1,7 @@
+<?php
+
+namespace thiagoalessio\TesseractOCR;
+
+class TesseractNotFoundException extends TesseractOcrException
+{
+}
diff --git a/tesseract/vendor/thiagoalessio/tesseract_ocr/src/TesseractOCR.php b/tesseract/vendor/thiagoalessio/tesseract_ocr/src/TesseractOCR.php
new file mode 100644 (file)
index 0000000..343e8ae
--- /dev/null
@@ -0,0 +1,181 @@
+<?php namespace thiagoalessio\TesseractOCR;
+
+use thiagoalessio\TesseractOCR\Command;
+use thiagoalessio\TesseractOCR\Option;
+use thiagoalessio\TesseractOCR\FriendlyErrors;
+
+class TesseractOCR
+{
+       public $command;
+       private $outputFile = null;
+
+       public function __construct($image=null, $command=null)
+       {
+               $this->command = $command ?: new Command;
+               $this->image("$image");
+       }
+
+       public function run($timeout = 0)
+       {
+               try {
+                       if ($this->outputFile !== null) {
+                               FriendlyErrors::checkWritePermissions($this->outputFile);
+                               $this->command->useFileAsOutput = true;
+                       }
+
+                       FriendlyErrors::checkTesseractPresence($this->command->executable);
+                       if ($this->command->useFileAsInput) {
+                               FriendlyErrors::checkImagePath($this->command->image);
+                       }
+
+                       $process = new Process("{$this->command}");
+
+                       if (!$this->command->useFileAsInput) {
+                               $process->write($this->command->image, $this->command->imageSize);
+                               $process->closeStdin();
+                       }
+                       $output = $process->wait($timeout);
+
+                       FriendlyErrors::checkCommandExecution($this->command, $output["out"], $output["err"]);
+               }
+               catch (TesseractOcrException $e) {
+                       if ($this->command->useFileAsOutput) $this->cleanTempFiles();
+                       throw $e;
+               }
+
+               if ($this->command->useFileAsOutput) {
+                       $text = file_get_contents($this->command->getOutputFile());
+
+                       if ($this->outputFile !== null) {
+                               rename($this->command->getOutputFile(), $this->outputFile);
+                       }
+
+                       $this->cleanTempFiles();
+               }
+               else
+                       $text = $output["out"];
+
+               return trim($text, " \t\n\r\0\x0A\x0B\x0C");
+       }
+
+       public function imageData($image, $size)
+       {
+               FriendlyErrors::checkTesseractVersion("3.03-rc1", "Reading image data from stdin", $this->command);
+               $this->command->useFileAsInput = false;
+               $this->command->image = $image;
+               $this->command->imageSize = $size;
+               return $this;
+       }
+
+       public function withoutTempFiles()
+       {
+               FriendlyErrors::checkTesseractVersion("3.03-rc1", "Writing to stdout (without using temp files)", $this->command);
+               $this->command->useFileAsOutput = false;
+               return $this;
+       }
+
+       public function image($image)
+       {
+               $this->command->image = $image;
+               return $this;
+       }
+
+       public function executable($executable)
+       {
+               FriendlyErrors::checkTesseractPresence($executable);
+               $this->command->executable = $executable;
+               return $this;
+       }
+
+       public function configFile($configFile)
+       {
+               $this->command->configFile = $configFile;
+               return $this;
+       }
+
+       public function tempDir($tempDir)
+       {
+               $this->command->tempDir = $tempDir;
+               return $this;
+       }
+
+       public function threadLimit($limit)
+       {
+               $this->command->threadLimit = $limit;
+               return $this;
+       }
+
+       // @deprecated
+       public function format($fmt) { return $this->configFile($fmt); }
+
+       public function setOutputFile($path) {
+               $this->outputFile = $path;
+               return $this;
+       }
+
+       public function allowlist()
+       {
+               $concat = function ($arg) { return is_array($arg) ? join('', $arg) : $arg; };
+               $allowlist = join('', array_map($concat, func_get_args()));
+               $this->command->options[] = Option::config('tessedit_char_whitelist', $allowlist);
+               return $this;
+       }
+
+       public function whitelist()
+       {
+               $warningMsg = 'Notice: whitelist is deprecated, use allowlist instead.';
+               trigger_error($warningMsg, E_USER_NOTICE);
+
+               $concat = function ($arg) { return is_array($arg) ? join('', $arg) : $arg; };
+               $allowlist = join('', array_map($concat, func_get_args()));
+               return $this->allowlist($allowlist);
+       }
+
+       public function version()
+       {
+               return $this->command->getTesseractVersion();
+       }
+
+       public function availableLanguages()
+       {
+               return $this->command->getAvailableLanguages();
+       }
+
+       public function __call($method, $args)
+       {
+               if ($this->isConfigFile($method)) return $this->configFile($method);
+               if ($this->isOption($method)) {
+                       $option = $this->getOptionClassName().'::'.$method;
+                       $this->command->options[] = call_user_func_array($option, $args);
+                       return $this;
+               }
+               $arg = empty($args) ? null : $args[0];
+               $this->command->options[] = Option::config($method, $arg);
+               return $this;
+       }
+
+       private function isConfigFile($name)
+       {
+               return in_array($name, array('digits', 'hocr', 'pdf', 'quiet', 'tsv', 'txt'));
+       }
+
+       private function isOption($name)
+       {
+               return in_array($name, get_class_methods($this->getOptionClassName()));
+       }
+
+       private function getOptionClassName()
+       {
+               return __NAMESPACE__.'\\Option';
+       }
+
+       private function cleanTempFiles()
+       {
+               if (file_exists($this->command->getOutputFile(false))) {
+                       unlink($this->command->getOutputFile(false));
+               }
+               if (file_exists($this->command->getOutputFile(true))) {
+                       unlink($this->command->getOutputFile(true));
+               }
+       }
+}
diff --git a/tesseract/vendor/thiagoalessio/tesseract_ocr/src/TesseractOcrException.php b/tesseract/vendor/thiagoalessio/tesseract_ocr/src/TesseractOcrException.php
new file mode 100644 (file)
index 0000000..8c07861
--- /dev/null
@@ -0,0 +1,7 @@
+<?php
+
+namespace thiagoalessio\TesseractOCR;
+
+abstract class TesseractOcrException extends \Exception
+{
+}
diff --git a/tesseract/vendor/thiagoalessio/tesseract_ocr/src/UnsuccessfulCommandException.php b/tesseract/vendor/thiagoalessio/tesseract_ocr/src/UnsuccessfulCommandException.php
new file mode 100644 (file)
index 0000000..201b352
--- /dev/null
@@ -0,0 +1,7 @@
+<?php
+
+namespace thiagoalessio\TesseractOCR;
+
+class UnsuccessfulCommandException extends TesseractOcrException
+{
+}