From: Michael <heluecht@pirati.ca>
Date: Sat, 3 Oct 2020 15:42:21 +0000 (+0000)
Subject: New language detection
X-Git-Url: https://git.mxchange.org/?a=commitdiff_plain;h=2cd54320e4521fbad276fc08a8b4ed744da7a121;p=friendica.git

New language detection
---

diff --git a/composer.json b/composer.json
index 71b7499dbd..f103ce2fe0 100644
--- a/composer.json
+++ b/composer.json
@@ -40,7 +40,6 @@
 		"nikic/fast-route": "^1.3",
 		"paragonie/hidden-string": "^1.0",
 		"pear/console_table": "^1.3",
-		"pear/text_languagedetect": "1.*",
 		"pragmarx/google2fa": "^5.0",
 		"pragmarx/recovery": "^0.1.0",
 		"psr/container": "^1.0",
@@ -64,7 +63,8 @@
 		"npm-asset/perfect-scrollbar": "0.6.16",
 		"npm-asset/textcomplete": "^0.18.2",
 		"npm-asset/typeahead.js": "^0.11.1",
-		"phpseclib/phpseclib": "^2.0"
+		"phpseclib/phpseclib": "^2.0",
+		"patrickschur/language-detection": "^3.4"
 	},
 	"repositories": [
 		{
diff --git a/composer.lock b/composer.lock
index 26412f83a2..de1a1bdc0b 100644
--- a/composer.lock
+++ b/composer.lock
@@ -4,7 +4,7 @@
         "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies",
         "This file is @generated automatically"
     ],
-    "content-hash": "ed9aa898eaf8a1f8a807f3be9eecc3d7",
+    "content-hash": "2cd2011a7e93f6f64524d1663651c016",
     "packages": [
         {
             "name": "asika/simple-console",
@@ -460,7 +460,6 @@
                     "jsonld.php"
                 ]
             },
-            "notification-url": "https://packagist.org/downloads/",
             "license": [
                 "BSD-3-Clause"
             ],
@@ -478,11 +477,11 @@
             "description": "A JSON-LD Processor and API implementation in PHP.",
             "homepage": "https://git.friendi.ca/friendica/php-json-ld",
             "keywords": [
+                "JSON",
                 "JSON-LD",
                 "Linked Data",
                 "RDF",
                 "Semantic Web",
-                "json",
                 "jsonld"
             ],
             "time": "2018-10-08T20:41:00+00:00"
@@ -2379,6 +2378,52 @@
             ],
             "time": "2020-03-20T21:48:09+00:00"
         },
+        {
+            "name": "patrickschur/language-detection",
+            "version": "v3.4.0",
+            "source": {
+                "type": "git",
+                "url": "https://github.com/patrickschur/language-detection.git",
+                "reference": "95b55109177d5c4bd6b1bec6e8835cd0df36ef5f"
+            },
+            "dist": {
+                "type": "zip",
+                "url": "https://api.github.com/repos/patrickschur/language-detection/zipball/95b55109177d5c4bd6b1bec6e8835cd0df36ef5f",
+                "reference": "95b55109177d5c4bd6b1bec6e8835cd0df36ef5f",
+                "shasum": ""
+            },
+            "require": {
+                "ext-mbstring": "*",
+                "php": "^7"
+            },
+            "require-dev": {
+                "phpunit/phpunit": "^6"
+            },
+            "type": "library",
+            "autoload": {
+                "psr-4": {
+                    "LanguageDetection\\": "src/LanguageDetection"
+                }
+            },
+            "notification-url": "https://packagist.org/downloads/",
+            "license": [
+                "MIT"
+            ],
+            "authors": [
+                {
+                    "name": "Patrick Schur",
+                    "email": "patrick_schur@outlook.de"
+                }
+            ],
+            "description": "A language detection library for PHP. Detects the language from a given text string.",
+            "homepage": "https://github.com/patrickschur/language-detection",
+            "keywords": [
+                "detect",
+                "detection",
+                "language"
+            ],
+            "time": "2018-09-19T21:45:51+00:00"
+        },
         {
             "name": "pear/console_table",
             "version": "v1.3.1",
@@ -2434,50 +2479,6 @@
             ],
             "time": "2018-01-25T20:47:17+00:00"
         },
-        {
-            "name": "pear/text_languagedetect",
-            "version": "v1.0.1",
-            "source": {
-                "type": "git",
-                "url": "https://github.com/pear/Text_LanguageDetect.git",
-                "reference": "9e253f26cef9a9066f53f200cc3e0684018cb5b5"
-            },
-            "dist": {
-                "type": "zip",
-                "url": "https://api.github.com/repos/pear/Text_LanguageDetect/zipball/9e253f26cef9a9066f53f200cc3e0684018cb5b5",
-                "reference": "9e253f26cef9a9066f53f200cc3e0684018cb5b5",
-                "shasum": ""
-            },
-            "require-dev": {
-                "phpunit/phpunit": "8.*|9.*"
-            },
-            "suggest": {
-                "ext-mbstring": "May require the mbstring PHP extension"
-            },
-            "type": "library",
-            "autoload": {
-                "psr-0": {
-                    "Text": "./"
-                }
-            },
-            "notification-url": "https://packagist.org/downloads/",
-            "include-path": [
-                "./"
-            ],
-            "license": [
-                "BSD-2-Clause"
-            ],
-            "authors": [
-                {
-                    "name": "Nicholas Pisarro",
-                    "email": "taak@php.net",
-                    "role": "Lead"
-                }
-            ],
-            "description": "Identify human languages from text samples",
-            "homepage": "http://pear.php.net/package/Text_LanguageDetect",
-            "time": "2020-05-17T12:19:40+00:00"
-        },
         {
             "name": "phpseclib/phpseclib",
             "version": "2.0.29",
@@ -4450,7 +4451,7 @@
                 }
             ],
             "description": "Provides the functionality to compare PHP values for equality",
-            "homepage": "https://github.com/sebastianbergmann/comparator",
+            "homepage": "http://www.github.com/sebastianbergmann/comparator",
             "keywords": [
                 "comparator",
                 "compare",
@@ -4552,7 +4553,7 @@
                 }
             ],
             "description": "Provides functionality to handle HHVM/PHP environments",
-            "homepage": "https://github.com/sebastianbergmann/environment",
+            "homepage": "http://www.github.com/sebastianbergmann/environment",
             "keywords": [
                 "Xdebug",
                 "environment",
@@ -4620,7 +4621,7 @@
                 }
             ],
             "description": "Provides the functionality to export PHP variables for visualization",
-            "homepage": "https://github.com/sebastianbergmann/exporter",
+            "homepage": "http://www.github.com/sebastianbergmann/exporter",
             "keywords": [
                 "export",
                 "exporter"
@@ -4672,7 +4673,7 @@
                 }
             ],
             "description": "Snapshotting of global state",
-            "homepage": "https://github.com/sebastianbergmann/global-state",
+            "homepage": "http://www.github.com/sebastianbergmann/global-state",
             "keywords": [
                 "global state"
             ],
@@ -4774,7 +4775,7 @@
                 }
             ],
             "description": "Provides functionality to recursively process PHP variables",
-            "homepage": "https://github.com/sebastianbergmann/recursion-context",
+            "homepage": "http://www.github.com/sebastianbergmann/recursion-context",
             "time": "2016-11-19T07:33:16+00:00"
         },
         {
diff --git a/src/Model/Item.php b/src/Model/Item.php
index c384eb4f94..e8ebe5671c 100644
--- a/src/Model/Item.php
+++ b/src/Model/Item.php
@@ -43,8 +43,8 @@ use Friendica\Util\Map;
 use Friendica\Util\Network;
 use Friendica\Util\Strings;
 use Friendica\Worker\Delivery;
-use Text_LanguageDetect;
 use Friendica\Repository\PermissionSet as RepPermissionSet;
+use LanguageDetection\Language;
 
 class Item
 {
@@ -1699,10 +1699,10 @@ class Item
 
 		$item['plink'] = ($item['plink'] ?? '') ?: DI::baseUrl() . '/display/' . urlencode($item['guid']);
 
-		$item['language'] = self::getLanguage($item);
-
 		$item['gravity'] = self::getGravity($item);
 
+		$item['language'] = self::getLanguage($item);
+
 		$default = ['url' => $item['author-link'], 'name' => $item['author-name'],
 			'photo' => $item['author-avatar'], 'network' => $item['network']];
 		$item['author-id'] = ($item['author-id'] ?? 0) ?: Contact::getIdForURL($item['author-link'], 0, null, $default);
@@ -2472,11 +2472,14 @@ class Item
 	 */
 	private static function getLanguage(array $item)
 	{
+		if (!in_array($item['gravity'], [GRAVITY_PARENT, GRAVITY_COMMENT])) {
+			return '';
+		}
+
 		$naked_body = BBCode::toPlaintext($item['body'], false);
 
-		$ld = new Text_LanguageDetect();
-		$ld->setNameMode(2);
-		$languages = $ld->detect($naked_body, 3);
+		$ld = new Language;
+		$languages = $ld->detect($naked_body)->limit(0, 3)->close();
 		if (is_array($languages)) {
 			return json_encode($languages);
 		}