]> git.mxchange.org Git - friendica-addons.git/commitdiff
Changed hook parameter / more languages added
authorMichael <heluecht@pirati.ca>
Fri, 6 Oct 2023 03:54:45 +0000 (03:54 +0000)
committerHypolite Petovan <hypolite@mrpetovan.com>
Sat, 7 Oct 2023 05:07:00 +0000 (07:07 +0200)
cld/cld.php

index 54e7a73e4178c30d5c4ad7f919f83bcdfbea16b9..fc8fa814570d7f9594f5d95d429a992b1e590f88 100644 (file)
@@ -12,7 +12,7 @@ use Friendica\DI;
 
 function cld_install()
 {
-       Hook::register('detect_languages', 'addon/cld/cld.php', 'cld_detect_languages');
+       Hook::register('detect_languages', __FILE__, 'cld_detect_languages');
 }
 
 function cld_detect_languages(array &$data)
@@ -25,6 +25,7 @@ function cld_detect_languages(array &$data)
        $cld2 = new \CLD2Detector();
 
        $cld2->setEncodingHint(CLD2Encoding::UTF8); // optional, hints about text encoding
+       $cld2->setPlainText(true);
 
        $result = $cld2->detect($data['text']);
        
@@ -37,18 +38,37 @@ function cld_detect_languages(array &$data)
        $detected = $result['language_code'];
        if ($detected == 'pt') {
                $detected = 'pt-PT';
+       } elseif ($detected == 'az') {
+               $detected = 'az-Latn';
+       } elseif ($detected == 'bs') {
+               $detected = 'bs-Latn';
        } elseif ($detected == 'el') {
                $detected = 'el-monoton';
+       } elseif ($detected == 'ht') {
+               $detected = 'fr';
+       } elseif ($detected == 'iw') {
+               $detected = 'he';
+       } elseif ($detected == 'jw') {
+               $detected = 'jv';
+       } elseif ($detected == 'ms') {
+               $detected = 'ms-Latn';
        } elseif ($detected == 'no') {
                $detected = 'nb';
+       } elseif ($detected == 'sr') {
+               $detected = 'sr-Cyrl';
        } elseif ($detected == 'zh') {
                $detected = 'zh-Hans';
        } elseif ($detected == 'zh-Hant') {
                $detected = 'zh-hant';
        }
 
+       // languages that aren't supported via the base language detection
+       if (in_array($detected, ['ceb', 'hmn', 'ht', 'kk', 'ky', 'mg', 'mk', 'ml', 'ny', 'or', 'pa', 'rw', 'su', 'st', 'tg', 'ts', 'xx-Qaai'])) {
+               return;
+       }
+
        if (!$result['is_reliable']) {
-               Logger::debug('Unreliable detection', ['original' => $original, 'detected' => $detected, 'name' => $result['language_name'], 'probability' => $result['language_probability'], 'text' => $data['text']]);
+               Logger::debug('Unreliable detection', ['uri-id' => $data['uri-id'], 'original' => $original, 'detected' => $detected, 'name' => $result['language_name'], 'probability' => $result['language_probability'], 'text' => $data['text']]);
                return;
        }
 
@@ -59,10 +79,10 @@ function cld_detect_languages(array &$data)
        $available = array_keys(DI::l10n()->convertForLanguageDetection(DI::l10n()->getAvailableLanguages(true)));
        
        if (!in_array($detected, $available)) {
-               Logger::debug('Unsupported language', ['original' => $original, 'detected' => $detected, 'name' => $result['language_name'], 'probability' => $result['language_probability'], 'text' => $data['text']]);
+               Logger::debug('Unsupported language', ['uri-id' => $data['uri-id'], 'original' => $original, 'detected' => $detected, 'name' => $result['language_name'], 'probability' => $result['language_probability'], 'text' => $data['text']]);
                return;
        }
 
-       Logger::debug('Detected different language', ['original' => $original, 'detected' => $detected, 'name' => $result['language_name'], 'probability' => $result['language_probability'], 'text' => $data['text']]);
+       Logger::debug('Detected different language', ['uri-id' => $data['uri-id'], 'original' => $original, 'detected' => $detected, 'name' => $result['language_name'], 'probability' => $result['language_probability'], 'text' => $data['text']]);
        $data['detected'] = [$detected => $result['language_probability'] / 100];
 }