3 * Name: Compact Language Detector
4 * Description: Improved language detection
6 * Author: Michael Vogel <heluecht@pirati.ca>
9 use Friendica\Core\Hook;
10 use Friendica\Core\Logger;
13 function cld_install()
15 Hook::register('get_language', 'addon/cld/cld.php', 'cld_get_language');
18 function cld_get_language(array &$data)
20 if (!in_array('cld2', get_loaded_extensions())) {
21 Logger::warning('CLD2 is not installed.');
25 $cld2 = new \CLD2Detector();
27 $cld2->setEncodingHint(CLD2Encoding::UTF8); // optional, hints about text encoding
29 $result = $cld2->detect($data['text']);
31 if ($data['detected']) {
32 $original = array_key_first($data['detected']);
37 $detected = $result['language_code'];
38 if ($detected == 'pt') {
40 } elseif ($detected == 'el') {
41 $detected = 'el-monoton';
42 } elseif ($detected == 'no') {
44 } elseif ($detected == 'zh') {
45 $detected = 'zh-Hans';
46 } elseif ($detected == 'zh-Hant') {
47 $detected = 'zh-hant';
50 if (!$result['is_reliable']) {
51 Logger::debug('Unreliable detection', ['original' => $original, 'detected' => $detected, 'name' => $result['language_name'], 'probability' => $result['language_probability'], 'text' => $data['text']]);
55 if ($original == $detected) {
59 // Nur aus Testgründen
60 if (in_array($detected, ['xx-Qaai', 'ht', 'ga'])) {
64 $available = array_keys(DI::l10n()->convertForLanguageDetection(DI::l10n()->getAvailableLanguages(true)));
66 if (!in_array($detected, $available)) {
67 Logger::debug('Unsupported language', ['original' => $original, 'detected' => $detected, 'name' => $result['language_name'], 'probability' => $result['language_probability'], 'text' => $data['text']]);
71 Logger::debug('Detected', ['original' => $original, 'detected' => $detected, 'name' => $result['language_name'], 'probability' => $result['language_probability'], 'text' => $data['text']]);
73 // Logger::debug('Detected different language', ['original' => $original, 'detected' => $detected, 'name' => $result['language_name'], 'probability' => $result['language_probability'], 'text' => $data['text']]);
74 $data['detected'] = [$detected => $result['language_probability'] / 100];