4 * @package Text_LanguageDetect
5 * @version CVS: $Id: Text_LanguageDetectTest.php 322353 2012-01-16 08:41:43Z cweiske $
8 __DIR__ . '/../' . PATH_SEPARATOR . get_include_path()
10 error_reporting(E_ALL|E_STRICT);
12 require_once 'Text/LanguageDetect.php';
13 require_once 'PHPUnit/Framework/TestCase.php';
15 class Text_LanguageDetectTest extends PHPUnit_Framework_TestCase {
19 ini_set('magic_quotes_runtime', 0);
20 $this->x = new Text_LanguageDetect();
28 function test_get_data_locAbsolute()
32 $this->x->_get_data_loc('/path/to/file')
36 function test_get_data_locPearPath()
38 $this->x->_data_dir = '/path/to/pear/data';
40 '/path/to/pear/data/Text_LanguageDetect/file',
41 $this->x->_get_data_loc('file')
46 * @expectedException Text_LanguageDetect_Exception
47 * @expectedExceptionMessage Language database does not exist:
49 function test_readdbNonexistingFile()
51 $this->x->_readdb('thisfiledoesnotexist');
55 * @expectedException Text_LanguageDetect_Exception
56 * @expectedExceptionMessage Language database is not readable:
58 function test_readdbUnreadableFile()
60 $name = tempnam(sys_get_temp_dir(), 'unittest-Text_LanguageDetect-');
62 $this->x->_readdb($name);
66 * @expectedException Text_LanguageDetect_Exception
67 * @expectedExceptionMessage Language database has no elements.
69 function test_checkTrigramEmpty()
71 $this->x->_checkTrigram(array());
75 * @expectedException Text_LanguageDetect_Exception
76 * @expectedExceptionMessage Language database is not an array
78 function test_checkTrigramNoArray()
80 $this->x->_checkTrigram('foo');
84 * @expectedException Text_LanguageDetect_Exception
85 * @expectedExceptionMessage Error loading database. Try turning magic_quotes_runtime off
87 function test_checkTrigramNoArrayMagicQuotes()
89 if (version_compare(PHP_VERSION, '5.4.0-dev') >= 0) {
90 $this->markTestSkipped('5.4.0 has no magic quotes anymore');
92 ini_set('magic_quotes_runtime', 1);
93 $this->x->_checkTrigram('foo');
96 function test_splitter ()
100 $result = $this->x->_trigram($str);
102 $this->assertEquals(array(' he' => 1, 'hel' => 1, 'ell' => 1, 'llo' => 1, 'lo ' => 1), $result);
104 $str = 'aa aa whatever';
106 $result = $this->x->_trigram($str);
107 $this->assertEquals(2, $result[' aa']);
108 $this->assertEquals(2, $result['aa ']);
109 $this->assertEquals(1, $result['a a']);
112 $result = $this->x->_trigram($str);
113 $this->assertArrayNotHasKey(' a', $result, ' a');
114 $this->assertArrayNotHasKey('a ', $result, 'a ');
117 function test_splitter2 ()
121 $result = $this->x->_trigram($str);
123 $this->assertTrue(isset($result['mé ']), 'mé ');
124 $this->assertTrue(isset($result['umé']), 'umé');
125 $this->assertTrue(!isset($result['é ']), 'é');
127 // tests lower-casing accented characters
130 $result = $this->x->_trigram($str);
132 $this->assertTrue(isset($result['mé ']),'mé ');
133 $this->assertTrue(isset($result['umé']),'umé');
134 $this->assertTrue(!isset($result['é ']),'é');
137 function test_sort ()
139 $arr = array('a' => 1, 'b' => 2, 'c' => 2);
140 $this->x->_bub_sort($arr);
142 $final_arr = array('b' => 2, 'c' => 2, 'a' => 1);
144 $this->assertEquals($final_arr, $arr);
147 function test_error ()
149 // this test passes the object a series of bad strings to see how it handles them
151 $result = $this->x->detectSimple("");
153 $this->assertTrue(!$result);
155 $result = $this->x->detectSimple("\n");
157 $this->assertTrue(!$result);
159 // should fail on extremely short strings
160 $result = $this->x->detectSimple("a");
162 $this->assertTrue(!$result);
164 $result = $this->x->detectSimple("aa");
166 $this->assertTrue(!$result);
168 $result = $this->x->detectSimple('xxxxxxxxxxxxxxxxxxx');
170 $this->assertEquals(null, $result);
173 function testOmitLanguages()
175 $str = 'This function may return Boolean FALSE, but may also return a non-Boolean value which evaluates to FALSE, such as 0 or "". Please read the section on Booleans for more information. Use the === operator for testing the return value of this function.';
177 $myobj = new Text_LanguageDetect;
179 $myobj->_use_unicode_narrowing = false;
181 $count = $myobj->getLanguageCount();
182 $returnval = $myobj->omitLanguages('english');
183 $newcount = $myobj->getLanguageCount();
185 $this->assertEquals(1, $returnval);
186 $this->assertEquals(1, $count - $newcount);
188 $result = strtolower($myobj->detectSimple($str));
190 $this->assertTrue($result != 'english', $result);
192 $myobj = new Text_LanguageDetect;
194 $count = $myobj->getLanguageCount();
195 $returnval = $myobj->omitLanguages(array('danish', 'italian'), true);
196 $newcount = $myobj->getLanguageCount();
198 $this->assertEquals($count - $newcount, $returnval);
199 $this->assertEquals($count - $returnval, $newcount);
201 $result = strtolower($myobj->detectSimple($str));
203 $this->assertTrue($result == 'danish' || $result == 'italian', $result);
205 $result = $myobj->detect($str);
207 $this->assertEquals(2, count($result));
208 $this->assertTrue(isset($result['danish']));
209 $this->assertTrue(isset($result['italian']));
214 function testOmitLanguagesNameMode2()
216 $this->x->setNameMode(2);
217 $this->assertEquals(1, $this->x->omitLanguages('en'));
220 function testOmitLanguagesIncludeString()
222 $this->assertGreaterThan(1, $this->x->omitLanguages('english', true));
223 $langs = $this->x->getLanguages();
224 $this->assertEquals(1, count($langs));
225 $this->assertContains('english', $langs);
228 function testOmitLanguagesClearsClusterCache()
230 $this->x->omitLanguages(array('english', 'german'), true);
231 $this->assertNull($this->x->_clusters);
232 $this->x->clusterLanguages();
233 $this->assertNotNull($this->x->_clusters);
234 $this->x->omitLanguages('german');
235 $this->assertNull($this->x->_clusters, 'cluster cache be empty now');
238 function test_perl_compatibility()
240 // if this test fails, then many of the others will
242 $myobj = new Text_LanguageDetect;
243 $myobj->setPerlCompatible(true);
247 $result = $myobj->_trigram($testtext);
249 $this->assertTrue(!isset($result[' he']));
252 function test_french_db ()
256 "es " => 0, " de" => 1, "de " => 2, " le" => 3, "ent" => 4,
257 "le " => 5, "nt " => 6, "la " => 7, "s d" => 8, " la" => 9,
258 "ion" => 10, "on " => 11, "re " => 12, " pa" => 13, "e l" => 14,
259 "e d" => 15, " l'" => 16, "e p" => 17, " co" => 18, " pr" => 19,
260 "tio" => 20, "ns " => 21, " en" => 22, "ne " => 23, "que" => 24,
261 "r l" => 25, "les" => 26, "ur " => 27, "en " => 28, "ati" => 29,
262 "ue " => 30, " po" => 31, " d'" => 32, "par" => 33, " a " => 34,
263 "et " => 35, "it " => 36, " qu" => 37, "men" => 38, "ons" => 39,
264 "te " => 40, " et" => 41, "t d" => 42, " re" => 43, "des" => 44,
265 " un" => 45, "ie " => 46, "s l" => 47, " su" => 48, "pou" => 49,
266 " au" => 50, " à " => 51, "con" => 52, "er " => 53, " no" => 54,
267 "ait" => 55, "e c" => 56, "se " => 57, "té " => 58, "du " => 59,
268 " du" => 60, " dé" => 61, "ce " => 62, "e e" => 63, "is " => 64,
269 "n d" => 65, "s a" => 66, " so" => 67, "e r" => 68, "e s" => 69,
270 "our" => 70, "res" => 71, "ssi" => 72, "eur" => 73, " se" => 74,
271 "eme" => 75, "est" => 76, "us " => 77, "sur" => 78, "ant" => 79,
272 "iqu" => 80, "s p" => 81, "une" => 82, "uss" => 83, "l'a" => 84,
273 "pro" => 85, "ter" => 86, "tre" => 87, "end" => 88, "rs " => 89,
274 " ce" => 90, "e a" => 91, "t p" => 92, "un " => 93, " ma" => 94,
275 " ru" => 95, " ré" => 96, "ous" => 97, "ris" => 98, "rus" => 99,
276 "sse" => 100, "ans" => 101, "ar " => 102, "com" => 103, "e m" => 104,
277 "ire" => 105, "nce" => 106, "nte" => 107, "t l" => 108, " av" => 109,
278 " mo" => 110, " te" => 111, "il " => 112, "me " => 113, "ont" => 114,
279 "ten" => 115, "a p" => 116, "dan" => 117, "pas" => 118, "qui" => 119,
280 "s e" => 120, "s s" => 121, " in" => 122, "ist" => 123, "lle" => 124,
281 "nou" => 125, "pré" => 126, "'un" => 127, "air" => 128, "d'a" => 129,
282 "ir " => 130, "n e" => 131, "rop" => 132, "ts " => 133, " da" => 134,
283 "a s" => 135, "as " => 136, "au " => 137, "den" => 138, "mai" => 139,
284 "mis" => 140, "ori" => 141, "out" => 142, "rme" => 143, "sio" => 144,
285 "tte" => 145, "ux " => 146, "a d" => 147, "ien" => 148, "n a" => 149,
286 "ntr" => 150, "omm" => 151, "ort" => 152, "ouv" => 153, "s c" => 154,
287 "son" => 155, "tes" => 156, "ver" => 157, "ère" => 158, " il" => 159,
288 " m " => 160, " sa" => 161, " ve" => 162, "a r" => 163, "ais" => 164,
289 "ava" => 165, "di " => 166, "n p" => 167, "sti" => 168, "ven" => 169,
290 " mi" => 170, "ain" => 171, "enc" => 172, "for" => 173, "ité" => 174,
291 "lar" => 175, "oir" => 176, "rem" => 177, "ren" => 178, "rro" => 179,
292 "rés" => 180, "sie" => 181, "t a" => 182, "tur" => 183, " pe" => 184,
293 " to" => 185, "d'u" => 186, "ell" => 187, "err" => 188, "ers" => 189,
294 "ide" => 190, "ine" => 191, "iss" => 192, "mes" => 193, "por" => 194,
295 "ran" => 195, "sit" => 196, "st " => 197, "t r" => 198, "uti" => 199,
296 "vai" => 200, "é l" => 201, "ési" => 202, " di" => 203, " n'" => 204,
297 " ét" => 205, "a c" => 206, "ass" => 207, "e t" => 208, "in " => 209,
298 "nde" => 210, "pre" => 211, "rat" => 212, "s m" => 213, "ste" => 214,
299 "tai" => 215, "tch" => 216, "ui " => 217, "uro" => 218, "ès " => 219,
300 " es" => 220, " fo" => 221, " tr" => 222, "'ad" => 223, "app" => 224,
301 "aux" => 225, "e à" => 226, "ett" => 227, "iti" => 228, "lit" => 229,
302 "nal" => 230, "opé" => 231, "r d" => 232, "ra " => 233, "rai" => 234,
303 "ror" => 235, "s r" => 236, "tat" => 237, "uté" => 238, "à l" => 239,
304 " af" => 240, "anc" => 241, "ara" => 242, "art" => 243, "bre" => 244,
305 "ché" => 245, "dre" => 246, "e f" => 247, "ens" => 248, "lem" => 249,
306 "n r" => 250, "n t" => 251, "ndr" => 252, "nne" => 253, "onn" => 254,
307 "pos" => 255, "s t" => 256, "tiq" => 257, "ure" => 258, " tu" => 259,
308 "ale" => 260, "and" => 261, "ave" => 262, "cla" => 263, "cou" => 264,
309 "e n" => 265, "emb" => 266, "ins" => 267, "jou" => 268, "mme" => 269,
310 "rie" => 270, "rès" => 271, "sem" => 272, "str" => 273, "t i" => 274,
311 "ues" => 275, "uni" => 276, "uve" => 277, "é d" => 278, "ée " => 279,
312 " ch" => 280, " do" => 281, " eu" => 282, " fa" => 283, " lo" => 284,
313 " ne" => 285, " ra" => 286, "arl" => 287, "att" => 288, "ec " => 289,
314 "ica" => 290, "l a" => 291, "l'o" => 292, "l'é" => 293, "mmi" => 294,
315 "nta" => 295, "orm" => 296, "ou " => 297, "r u" => 298, "rle" => 299
319 $my_arr = $this->x->_lang_db['french'];
321 foreach ($safe_model as $key => $value) {
322 $this->assertTrue(isset($my_arr[$key]),$key);
323 if (isset($my_arr[$key])) {
324 $this->assertEquals($value, $my_arr[$key], $key);
329 function test_english_db ()
333 " th" => 0, "the" => 1, "he " => 2, "ed " => 3, " to" => 4,
334 " in" => 5, "er " => 6, "ing" => 7, "ng " => 8, " an" => 9,
335 "nd " => 10, " of" => 11, "and" => 12, "to " => 13, "of " => 14,
336 " co" => 15, "at " => 16, "on " => 17, "in " => 18, " a " => 19,
337 "d t" => 20, " he" => 21, "e t" => 22, "ion" => 23, "es " => 24,
338 " re" => 25, "re " => 26, "hat" => 27, " sa" => 28, " st" => 29,
339 " ha" => 30, "her" => 31, "tha" => 32, "tio" => 33, "or " => 34,
340 " ''" => 35, "en " => 36, " wh" => 37, "e s" => 38, "ent" => 39,
341 "n t" => 40, "s a" => 41, "as " => 42, "for" => 43, "is " => 44,
342 "t t" => 45, " be" => 46, "ld " => 47, "e a" => 48, "rs " => 49,
343 " wa" => 50, "ut " => 51, "ve " => 52, "ll " => 53, "al " => 54,
344 " ma" => 55, "e i" => 56, " fo" => 57, "'s " => 58, "an " => 59,
345 "est" => 60, " hi" => 61, " mo" => 62, " se" => 63, " pr" => 64,
346 "s t" => 65, "ate" => 66, "st " => 67, "ter" => 68, "ere" => 69,
347 "ted" => 70, "nt " => 71, "ver" => 72, "d a" => 73, " wi" => 74,
348 "se " => 75, "e c" => 76, "ect" => 77, "ns " => 78, " on" => 79,
349 "ly " => 80, "tol" => 81, "ey " => 82, "r t" => 83, " ca" => 84,
350 "ati" => 85, "ts " => 86, "all" => 87, " no" => 88, "his" => 89,
351 "s o" => 90, "ers" => 91, "con" => 92, "e o" => 93, "ear" => 94,
352 "f t" => 95, "e w" => 96, "was" => 97, "ons" => 98, "sta" => 99,
353 "'' " => 100, "sti" => 101, "n a" => 102, "sto" => 103, "t h" => 104,
354 " we" => 105, "id " => 106, "th " => 107, " it" => 108, "ce " => 109,
355 " di" => 110, "ave" => 111, "d h" => 112, "cou" => 113, "pro" => 114,
356 "ad " => 115, "oll" => 116, "ry " => 117, "d s" => 118, "e m" => 119,
357 " so" => 120, "ill" => 121, "cti" => 122, "te " => 123, "tor" => 124,
358 "eve" => 125, "g t" => 126, "it " => 127, " ch" => 128, " de" => 129,
359 "hav" => 130, "oul" => 131, "ty " => 132, "uld" => 133, "use" => 134,
360 " al" => 135, "are" => 136, "ch " => 137, "me " => 138, "out" => 139,
361 "ove" => 140, "wit" => 141, "ys " => 142, "chi" => 143, "t a" => 144,
362 "ith" => 145, "oth" => 146, " ab" => 147, " te" => 148, " wo" => 149,
363 "s s" => 150, "res" => 151, "t w" => 152, "tin" => 153, "e b" => 154,
364 "e h" => 155, "nce" => 156, "t s" => 157, "y t" => 158, "e p" => 159,
365 "ele" => 160, "hin" => 161, "s i" => 162, "nte" => 163, " li" => 164,
366 "le " => 165, " do" => 166, "aid" => 167, "hey" => 168, "ne " => 169,
367 "s w" => 170, " as" => 171, " fr" => 172, " tr" => 173, "end" => 174,
368 "sai" => 175, " el" => 176, " ne" => 177, " su" => 178, "'t " => 179,
369 "ay " => 180, "hou" => 181, "ive" => 182, "lec" => 183, "n't" => 184,
370 " ye" => 185, "but" => 186, "d o" => 187, "o t" => 188, "y o" => 189,
371 " ho" => 190, " me" => 191, "be " => 192, "cal" => 193, "e e" => 194,
372 "had" => 195, "ple" => 196, " at" => 197, " bu" => 198, " la" => 199,
373 "d b" => 200, "s h" => 201, "say" => 202, "t i" => 203, " ar" => 204,
374 "e f" => 205, "ght" => 206, "hil" => 207, "igh" => 208, "int" => 209,
375 "not" => 210, "ren" => 211, " is" => 212, " pa" => 213, " sh" => 214,
376 "ays" => 215, "com" => 216, "n s" => 217, "r a" => 218, "rin" => 219,
377 "y a" => 220, " un" => 221, "n c" => 222, "om " => 223, "thi" => 224,
378 " mi" => 225, "by " => 226, "d i" => 227, "e d" => 228, "e n" => 229,
379 "t o" => 230, " by" => 231, "e r" => 232, "eri" => 233, "old" => 234,
380 "ome" => 235, "whe" => 236, "yea" => 237, " gr" => 238, "ar " => 239,
381 "ity" => 240, "mpl" => 241, "oun" => 242, "one" => 243, "ow " => 244,
382 "r s" => 245, "s f" => 246, "tat" => 247, " ba" => 248, " vo" => 249,
383 "bou" => 250, "sam" => 251, "tim" => 252, "vot" => 253, "abo" => 254,
384 "ant" => 255, "ds " => 256, "ial" => 257, "ine" => 258, "man" => 259,
385 "men" => 260, " or" => 261, " po" => 262, "amp" => 263, "can" => 264,
386 "der" => 265, "e l" => 266, "les" => 267, "ny " => 268, "ot " => 269,
387 "rec" => 270, "tes" => 271, "tho" => 272, "ica" => 273, "ild" => 274,
388 "ir " => 275, "nde" => 276, "ose" => 277, "ous" => 278, "pre" => 279,
389 "ste" => 280, "era" => 281, "per" => 282, "r o" => 283, "red" => 284,
390 "rie" => 285, " bo" => 286, " le" => 287, "ali" => 288, "ars" => 289,
391 "ore" => 290, "ric" => 291, "s m" => 292, "str" => 293, " fa" => 294,
392 "ess" => 295, "ie " => 296, "ist" => 297, "lat" => 298, "uri" => 299,
395 $mod = $this->x->_lang_db['english'];
397 foreach ($realdb as $key => $value) {
398 $this->assertTrue(isset($mod[$key]), $key);
399 if (isset($mod[$key])) {
400 $this->assertEquals($value, $mod[$key], $key);
404 foreach ($mod as $key => $value) {
405 $this->assertTrue(isset($realdb[$key]));
406 if (isset($realdb[$key])) {
407 $this->assertEquals($value, $realdb[$key], $key);
412 function test_confidence ()
414 $str = 'The next thing to notice is the Content-length header. The Content-length header notifies the server of the size of the data that you intend to send. This prevents unexpected end-of-data errors from the server when dealing with binary data, because the server will read the specified number of bytes from the data stream regardless of any spurious end-of-data characters.';
416 $result = $this->x->detectConfidence($str);
418 $this->assertEquals(3, count($result));
419 $this->assertTrue(isset($result['language']), 'language');
420 $this->assertTrue(isset($result['similarity']), 'similarity');
421 $this->assertTrue(isset($result['confidence']), 'confidence');
422 $this->assertEquals('english', $result['language']);
423 $this->assertTrue($result['similarity'] <= 300 && $result['similarity'] >= 0, $result['similarity']);
424 $this->assertTrue($result['confidence'] <= 1 && $result['confidence'] >= 0, $result['confidence']);
426 // todo: tests for Danish and Norwegian should have lower confidence
429 function test_long_example ()
431 // an example that is more than 300 trigrams long
432 $str = 'The Italian Renaissance began the opening phase of the Renaissance, a period of great cultural change and achievement from the 14th to the 16th century. The word renaissance means "rebirth," and the era is best known for the renewed interest in the culture of classical antiquity. The Italian Renaissance began in northern Italy, centering in Florence. It then spread south, having an especially significant impact on Rome, which was largely rebuilt by the Renaissance popes. The Italian Renaissance is best known for its cultural achievements. This includes works of literature by such figures as Petrarch, Castiglione, and Machiavelli; artists such as Michaelangelo and Leonardo da Vinci, and great works of architecture such as The Duomo in Florence and St. Peter\'s Basilica in Rome. At the same time, present-day historians also see the era as one of economic regression and of little progress in science. Furthermore, some historians argue that the lot of the peasants and urban poor, the majority of the population, worsened during this period.';
434 $this->x->setPerlCompatible();
435 $tri = $this->x->_trigram($str);
951 $differences = array_diff(array_keys($tri), $exp_tri);
952 $this->assertEquals(0, count($differences));
953 $this->assertEquals(0, count(array_diff($exp_tri, array_keys($tri))));
954 $this->assertEquals(count($exp_tri), count($tri));
955 //print_r(array_diff($exp_tri, array_keys($tri)));
956 //print_r(array_diff(array_keys($tri), $exp_tri));
958 // tests the bubble sort mechanism
959 $this->x->_bub_sort($tri);
960 $this->assertEquals($exp_tri, array_keys($tri));
962 $true_differences = array(
963 "cas" => array('change' => 300, 'baserank' => 265, 'refrank' => null), "s i" => array('change' => 21, 'baserank' => 183, 'refrank' => 162),
964 "e b" => array('change' => 88, 'baserank' => 66, 'refrank' => 154), "ent" => array('change' => 12, 'baserank' => 27, 'refrank' => 39),
965 "ome" => array('change' => 152, 'baserank' => 83, 'refrank' => 235), "ral" => array('change' => 300, 'baserank' => 176, 'refrank' => null),
966 "ita" => array('change' => 300, 'baserank' => 44, 'refrank' => null), "bas" => array('change' => 300, 'baserank' => 258, 'refrank' => null),
967 " ar" => array('change' => 148, 'baserank' => 56, 'refrank' => 204), " in" => array('change' => 5, 'baserank' => 10, 'refrank' => 5),
968 " ti" => array('change' => 300, 'baserank' => 227, 'refrank' => null), "ty " => array('change' => 61, 'baserank' => 193, 'refrank' => 132),
969 "tur" => array('change' => 300, 'baserank' => 23, 'refrank' => null), "iss" => array('change' => 300, 'baserank' => 20, 'refrank' => null),
970 "ria" => array('change' => 300, 'baserank' => 179, 'refrank' => null), " me" => array('change' => 25, 'baserank' => 216, 'refrank' => 191),
971 "t k" => array('change' => 300, 'baserank' => 189, 'refrank' => null), " es" => array('change' => 300, 'baserank' => 207, 'refrank' => null),
972 "ren" => array('change' => 202, 'baserank' => 9, 'refrank' => 211), "in " => array('change' => 1, 'baserank' => 19, 'refrank' => 18),
973 "ly " => array('change' => 0, 'baserank' => 80, 'refrank' => 80), "st " => array('change' => 18, 'baserank' => 49, 'refrank' => 67),
974 "ne " => array('change' => 8, 'baserank' => 161, 'refrank' => 169), "all" => array('change' => 154, 'baserank' => 241, 'refrank' => 87),
975 "vin" => array('change' => 300, 'baserank' => 196, 'refrank' => null), " op" => array('change' => 300, 'baserank' => 219, 'refrank' => null),
976 "chi" => array('change' => 107, 'baserank' => 36, 'refrank' => 143), "e w" => array('change' => 197, 'baserank' => 293, 'refrank' => 96),
977 " ro" => array('change' => 300, 'baserank' => 113, 'refrank' => null), "act" => array('change' => 300, 'baserank' => 237, 'refrank' => null),
978 "d r" => array('change' => 300, 'baserank' => 280, 'refrank' => null), "nt " => array('change' => 11, 'baserank' => 82, 'refrank' => 71),
979 "can" => array('change' => 0, 'baserank' => 264, 'refrank' => 264), "rea" => array('change' => 300, 'baserank' => 88, 'refrank' => null),
980 "ssa" => array('change' => 300, 'baserank' => 22, 'refrank' => null), " fo" => array('change' => 47, 'baserank' => 104, 'refrank' => 57),
981 "eas" => array('change' => 300, 'baserank' => 296, 'refrank' => null), "mic" => array('change' => 300, 'baserank' => 157, 'refrank' => null),
982 "cul" => array('change' => 300, 'baserank' => 65, 'refrank' => null), " an" => array('change' => 6, 'baserank' => 3, 'refrank' => 9),
983 "n t" => array('change' => 120, 'baserank' => 160, 'refrank' => 40), "arg" => array('change' => 300, 'baserank' => 118, 'refrank' => null),
984 " it" => array('change' => 93, 'baserank' => 15, 'refrank' => 108), "ebi" => array('change' => 300, 'baserank' => 297, 'refrank' => null),
985 " re" => array('change' => 21, 'baserank' => 4, 'refrank' => 25), "res" => array('change' => 120, 'baserank' => 31, 'refrank' => 151),
986 " be" => array('change' => 13, 'baserank' => 33, 'refrank' => 46), "rom" => array('change' => 300, 'baserank' => 89, 'refrank' => null),
987 "'s " => array('change' => 175, 'baserank' => 233, 'refrank' => 58), "arc" => array('change' => 300, 'baserank' => 117, 'refrank' => null),
988 " su" => array('change' => 119, 'baserank' => 59, 'refrank' => 178), "s p" => array('change' => 300, 'baserank' => 184, 'refrank' => null),
989 "ich" => array('change' => 300, 'baserank' => 145, 'refrank' => null), "d d" => array('change' => 300, 'baserank' => 275, 'refrank' => null),
990 "cal" => array('change' => 70, 'baserank' => 263, 'refrank' => 193), "ci " => array('change' => 300, 'baserank' => 266, 'refrank' => null),
991 "ssi" => array('change' => 300, 'baserank' => 186, 'refrank' => null), "bes" => array('change' => 300, 'baserank' => 120, 'refrank' => null),
992 "des" => array('change' => 300, 'baserank' => 285, 'refrank' => null), "e s" => array('change' => 91, 'baserank' => 129, 'refrank' => 38),
993 "ch " => array('change' => 111, 'baserank' => 26, 'refrank' => 137), "san" => array('change' => 300, 'baserank' => 14, 'refrank' => null),
994 "asi" => array('change' => 300, 'baserank' => 249, 'refrank' => null), "ajo" => array('change' => 300, 'baserank' => 240, 'refrank' => null),
995 "ase" => array('change' => 300, 'baserank' => 248, 'refrank' => null), " wa" => array('change' => 181, 'baserank' => 231, 'refrank' => 50),
996 "vem" => array('change' => 300, 'baserank' => 195, 'refrank' => null), "ed " => array('change' => 128, 'baserank' => 131, 'refrank' => 3),
997 "ant" => array('change' => 191, 'baserank' => 64, 'refrank' => 255), "a p" => array('change' => 300, 'baserank' => 235, 'refrank' => null),
998 "lor" => array('change' => 300, 'baserank' => 155, 'refrank' => null), "kno" => array('change' => 300, 'baserank' => 151, 'refrank' => null),
999 "ais" => array('change' => 300, 'baserank' => 16, 'refrank' => null), " pe" => array('change' => 300, 'baserank' => 24, 'refrank' => null),
1000 "or " => array('change' => 51, 'baserank' => 85, 'refrank' => 34), "e i" => array('change' => 19, 'baserank' => 37, 'refrank' => 56),
1001 " sp" => array('change' => 300, 'baserank' => 225, 'refrank' => null), "ad " => array('change' => 123, 'baserank' => 238, 'refrank' => 115),
1002 " kn" => array('change' => 300, 'baserank' => 108, 'refrank' => null), "ega" => array('change' => 300, 'baserank' => 132, 'refrank' => null),
1003 " ba" => array('change' => 46, 'baserank' => 202, 'refrank' => 248), "d t" => array('change' => 261, 'baserank' => 281, 'refrank' => 20),
1004 "ork" => array('change' => 300, 'baserank' => 169, 'refrank' => null), "lia" => array('change' => 300, 'baserank' => 78, 'refrank' => null),
1005 "ard" => array('change' => 300, 'baserank' => 245, 'refrank' => null), "iev" => array('change' => 300, 'baserank' => 146, 'refrank' => null),
1006 "of " => array('change' => 6, 'baserank' => 8, 'refrank' => 14), " cu" => array('change' => 300, 'baserank' => 57, 'refrank' => null),
1007 "day" => array('change' => 300, 'baserank' => 284, 'refrank' => null), "cen" => array('change' => 300, 'baserank' => 122, 'refrank' => null),
1008 "re " => array('change' => 21, 'baserank' => 47, 'refrank' => 26), "ist" => array('change' => 220, 'baserank' => 77, 'refrank' => 297),
1009 " fl" => array('change' => 300, 'baserank' => 103, 'refrank' => null), "anc" => array('change' => 300, 'baserank' => 17, 'refrank' => null),
1010 "at " => array('change' => 19, 'baserank' => 35, 'refrank' => 16), "rch" => array('change' => 300, 'baserank' => 177, 'refrank' => null),
1011 "ang" => array('change' => 300, 'baserank' => 116, 'refrank' => null), " mi" => array('change' => 8, 'baserank' => 217, 'refrank' => 225),
1012 "y s" => array('change' => 300, 'baserank' => 198, 'refrank' => null), "ca " => array('change' => 300, 'baserank' => 262, 'refrank' => null),
1013 " ma" => array('change' => 55, 'baserank' => 110, 'refrank' => 55), " lo" => array('change' => 300, 'baserank' => 215, 'refrank' => null),
1014 "rin" => array('change' => 39, 'baserank' => 180, 'refrank' => 219), " im" => array('change' => 300, 'baserank' => 212, 'refrank' => null),
1015 " er" => array('change' => 300, 'baserank' => 102, 'refrank' => null), "ce " => array('change' => 103, 'baserank' => 6, 'refrank' => 109),
1016 "bui" => array('change' => 300, 'baserank' => 260, 'refrank' => null), "lit" => array('change' => 300, 'baserank' => 154, 'refrank' => null),
1017 "iod" => array('change' => 300, 'baserank' => 148, 'refrank' => null), "ame" => array('change' => 300, 'baserank' => 244, 'refrank' => null),
1018 "ter" => array('change' => 17, 'baserank' => 51, 'refrank' => 68), "e a" => array('change' => 78, 'baserank' => 126, 'refrank' => 48),
1019 "f l" => array('change' => 300, 'baserank' => 137, 'refrank' => null), "eri" => array('change' => 162, 'baserank' => 71, 'refrank' => 233),
1020 "ra " => array('change' => 300, 'baserank' => 175, 'refrank' => null), "ng " => array('change' => 38, 'baserank' => 46, 'refrank' => 8),
1021 "d i" => array('change' => 50, 'baserank' => 277, 'refrank' => 227), "asa" => array('change' => 300, 'baserank' => 247, 'refrank' => null),
1022 "wn " => array('change' => 300, 'baserank' => 197, 'refrank' => null), " at" => array('change' => 4, 'baserank' => 201, 'refrank' => 197),
1023 "now" => array('change' => 300, 'baserank' => 163, 'refrank' => null), " by" => array('change' => 133, 'baserank' => 98, 'refrank' => 231),
1024 "n s" => array('change' => 58, 'baserank' => 159, 'refrank' => 217), " li" => array('change' => 55, 'baserank' => 109, 'refrank' => 164),
1025 "l a" => array('change' => 300, 'baserank' => 153, 'refrank' => null), "da " => array('change' => 300, 'baserank' => 283, 'refrank' => null),
1026 "ean" => array('change' => 300, 'baserank' => 295, 'refrank' => null), "tal" => array('change' => 300, 'baserank' => 50, 'refrank' => null),
1027 "d a" => array('change' => 201, 'baserank' => 274, 'refrank' => 73), "ct " => array('change' => 300, 'baserank' => 272, 'refrank' => null),
1028 "ali" => array('change' => 226, 'baserank' => 62, 'refrank' => 288), "ian" => array('change' => 300, 'baserank' => 28, 'refrank' => null),
1029 " sa" => array('change' => 193, 'baserank' => 221, 'refrank' => 28), "do " => array('change' => 300, 'baserank' => 286, 'refrank' => null),
1030 "t o" => array('change' => 40, 'baserank' => 190, 'refrank' => 230), "ure" => array('change' => 300, 'baserank' => 54, 'refrank' => null),
1031 "e c" => array('change' => 213, 'baserank' => 289, 'refrank' => 76), "ing" => array('change' => 35, 'baserank' => 42, 'refrank' => 7),
1032 "d o" => array('change' => 63, 'baserank' => 124, 'refrank' => 187), " ha" => array('change' => 181, 'baserank' => 211, 'refrank' => 30),
1033 "ts " => array('change' => 33, 'baserank' => 53, 'refrank' => 86), "rth" => array('change' => 300, 'baserank' => 90, 'refrank' => null),
1034 "cla" => array('change' => 300, 'baserank' => 269, 'refrank' => null), " ac" => array('change' => 300, 'baserank' => 97, 'refrank' => null),
1035 "th " => array('change' => 55, 'baserank' => 52, 'refrank' => 107), "rio" => array('change' => 300, 'baserank' => 181, 'refrank' => null),
1036 "al " => array('change' => 7, 'baserank' => 61, 'refrank' => 54), "sto" => array('change' => 84, 'baserank' => 187, 'refrank' => 103),
1037 "e o" => array('change' => 55, 'baserank' => 38, 'refrank' => 93), "bir" => array('change' => 300, 'baserank' => 259, 'refrank' => null),
1038 " pr" => array('change' => 48, 'baserank' => 112, 'refrank' => 64), " le" => array('change' => 73, 'baserank' => 214, 'refrank' => 287),
1039 "nai" => array('change' => 300, 'baserank' => 21, 'refrank' => null), "t i" => array('change' => 15, 'baserank' => 188, 'refrank' => 203),
1040 " po" => array('change' => 204, 'baserank' => 58, 'refrank' => 262), "f t" => array('change' => 21, 'baserank' => 74, 'refrank' => 95),
1041 "ban" => array('change' => 300, 'baserank' => 257, 'refrank' => null), "an " => array('change' => 46, 'baserank' => 13, 'refrank' => 59),
1042 "wor" => array('change' => 300, 'baserank' => 55, 'refrank' => null), "pet" => array('change' => 300, 'baserank' => 172, 'refrank' => null),
1043 "ael" => array('change' => 300, 'baserank' => 239, 'refrank' => null), "ura" => array('change' => 300, 'baserank' => 194, 'refrank' => null),
1044 "eve" => array('change' => 11, 'baserank' => 136, 'refrank' => 125), "ion" => array('change' => 53, 'baserank' => 76, 'refrank' => 23),
1045 "nge" => array('change' => 300, 'baserank' => 162, 'refrank' => null), "cha" => array('change' => 300, 'baserank' => 123, 'refrank' => null),
1046 "ity" => array('change' => 90, 'baserank' => 150, 'refrank' => 240), " se" => array('change' => 160, 'baserank' => 223, 'refrank' => 63),
1047 " on" => array('change' => 32, 'baserank' => 111, 'refrank' => 79), "s b" => array('change' => 300, 'baserank' => 91, 'refrank' => null),
1048 "ans" => array('change' => 300, 'baserank' => 63, 'refrank' => null), "own" => array('change' => 300, 'baserank' => 170, 'refrank' => null),
1049 " si" => array('change' => 300, 'baserank' => 224, 'refrank' => null), "e r" => array('change' => 165, 'baserank' => 67, 'refrank' => 232),
1050 "est" => array('change' => 13, 'baserank' => 73, 'refrank' => 60), "hie" => array('change' => 300, 'baserank' => 144, 'refrank' => null),
1051 "aly" => array('change' => 300, 'baserank' => 243, 'refrank' => null), "and" => array('change' => 1, 'baserank' => 11, 'refrank' => 12),
1052 "beg" => array('change' => 300, 'baserank' => 119, 'refrank' => null), "dur" => array('change' => 300, 'baserank' => 288, 'refrank' => null),
1053 "reb" => array('change' => 300, 'baserank' => 178, 'refrank' => null), "e e" => array('change' => 67, 'baserank' => 127, 'refrank' => 194),
1054 "men" => array('change' => 104, 'baserank' => 156, 'refrank' => 260), " la" => array('change' => 14, 'baserank' => 213, 'refrank' => 199),
1055 "con" => array('change' => 179, 'baserank' => 271, 'refrank' => 92), " fu" => array('change' => 300, 'baserank' => 210, 'refrank' => null),
1056 "e l" => array('change' => 26, 'baserank' => 292, 'refrank' => 266), "s a" => array('change' => 7, 'baserank' => 48, 'refrank' => 41),
1057 "art" => array('change' => 300, 'baserank' => 246, 'refrank' => null), "ltu" => array('change' => 300, 'baserank' => 79, 'refrank' => null),
1058 "a i" => array('change' => 300, 'baserank' => 115, 'refrank' => null), "ctu" => array('change' => 300, 'baserank' => 273, 'refrank' => null),
1059 "tor" => array('change' => 68, 'baserank' => 192, 'refrank' => 124), "ach" => array('change' => 300, 'baserank' => 60, 'refrank' => null),
1060 "d g" => array('change' => 300, 'baserank' => 276, 'refrank' => null), "od " => array('change' => 300, 'baserank' => 166, 'refrank' => null),
1061 "nte" => array('change' => 1, 'baserank' => 164, 'refrank' => 163), "ena" => array('change' => 300, 'baserank' => 18, 'refrank' => null),
1062 "d l" => array('change' => 300, 'baserank' => 278, 'refrank' => null), "ene" => array('change' => 300, 'baserank' => 134, 'refrank' => null),
1063 "e h" => array('change' => 136, 'baserank' => 291, 'refrank' => 155), "era" => array('change' => 211, 'baserank' => 70, 'refrank' => 281),
1064 "on " => array('change' => 67, 'baserank' => 84, 'refrank' => 17), " ce" => array('change' => 300, 'baserank' => 99, 'refrank' => null),
1065 "ay " => array('change' => 76, 'baserank' => 256, 'refrank' => 180), " da" => array('change' => 300, 'baserank' => 100, 'refrank' => null),
1066 "ori" => array('change' => 300, 'baserank' => 87, 'refrank' => null), "atu" => array('change' => 300, 'baserank' => 253, 'refrank' => null),
1067 "ave" => array('change' => 143, 'baserank' => 254, 'refrank' => 111), "rks" => array('change' => 300, 'baserank' => 182, 'refrank' => null),
1068 "e d" => array('change' => 62, 'baserank' => 290, 'refrank' => 228), "ns " => array('change' => 3, 'baserank' => 81, 'refrank' => 78),
1069 " ca" => array('change' => 119, 'baserank' => 203, 'refrank' => 84), "d s" => array('change' => 7, 'baserank' => 125, 'refrank' => 118),
1070 "uch" => array('change' => 300, 'baserank' => 95, 'refrank' => null), "a v" => array('change' => 300, 'baserank' => 236, 'refrank' => null),
1071 "nce" => array('change' => 149, 'baserank' => 7, 'refrank' => 156), "his" => array('change' => 48, 'baserank' => 41, 'refrank' => 89),
1072 "flo" => array('change' => 300, 'baserank' => 138, 'refrank' => null), "ead" => array('change' => 300, 'baserank' => 294, 'refrank' => null),
1073 " vi" => array('change' => 300, 'baserank' => 230, 'refrank' => null), "me " => array('change' => 109, 'baserank' => 29, 'refrank' => 138),
1074 "suc" => array('change' => 300, 'baserank' => 93, 'refrank' => null), "e p" => array('change' => 120, 'baserank' => 39, 'refrank' => 159),
1075 "eci" => array('change' => 300, 'baserank' => 299, 'refrank' => null), "eme" => array('change' => 300, 'baserank' => 133, 'refrank' => null),
1076 "sen" => array('change' => 300, 'baserank' => 185, 'refrank' => null), "ks " => array('change' => 300, 'baserank' => 152, 'refrank' => null),
1077 " to" => array('change' => 224, 'baserank' => 228, 'refrank' => 4), " gr" => array('change' => 133, 'baserank' => 105, 'refrank' => 238),
1078 " ch" => array('change' => 76, 'baserank' => 204, 'refrank' => 128), "ati" => array('change' => 167, 'baserank' => 252, 'refrank' => 85),
1079 " th" => array('change' => 0, 'baserank' => 0, 'refrank' => 0), " ec" => array('change' => 300, 'baserank' => 206, 'refrank' => null),
1080 " wo" => array('change' => 115, 'baserank' => 34, 'refrank' => 149), "ope" => array('change' => 300, 'baserank' => 168, 'refrank' => null),
1081 " a " => array('change' => 180, 'baserank' => 199, 'refrank' => 19), "one" => array('change' => 76, 'baserank' => 167, 'refrank' => 243),
1082 "n f" => array('change' => 300, 'baserank' => 45, 'refrank' => null), "eat" => array('change' => 300, 'baserank' => 130, 'refrank' => null),
1083 "ica" => array('change' => 198, 'baserank' => 75, 'refrank' => 273), "inc" => array('change' => 300, 'baserank' => 147, 'refrank' => null),
1084 "enc" => array('change' => 300, 'baserank' => 69, 'refrank' => null), "ore" => array('change' => 204, 'baserank' => 86, 'refrank' => 290),
1085 "is " => array('change' => 1, 'baserank' => 43, 'refrank' => 44), " as" => array('change' => 139, 'baserank' => 32, 'refrank' => 171),
1086 "nts" => array('change' => 300, 'baserank' => 165, 'refrank' => null), "d m" => array('change' => 300, 'baserank' => 279, 'refrank' => null),
1087 "her" => array('change' => 112, 'baserank' => 143, 'refrank' => 31), " al" => array('change' => 65, 'baserank' => 200, 'refrank' => 135),
1088 " is" => array('change' => 105, 'baserank' => 107, 'refrank' => 212), "e t" => array('change' => 46, 'baserank' => 68, 'refrank' => 22),
1089 "c r" => array('change' => 300, 'baserank' => 261, 'refrank' => null), " hi" => array('change' => 45, 'baserank' => 106, 'refrank' => 61),
1090 "cia" => array('change' => 300, 'baserank' => 267, 'refrank' => null), " fr" => array('change' => 37, 'baserank' => 209, 'refrank' => 172),
1091 "ult" => array('change' => 300, 'baserank' => 96, 'refrank' => null), "e m" => array('change' => 9, 'baserank' => 128, 'refrank' => 119),
1092 "ass" => array('change' => 300, 'baserank' => 250, 'refrank' => null), "s o" => array('change' => 2, 'baserank' => 92, 'refrank' => 90),
1093 "pop" => array('change' => 300, 'baserank' => 173, 'refrank' => null), "nd " => array('change' => 2, 'baserank' => 12, 'refrank' => 10),
1094 "the" => array('change' => 0, 'baserank' => 1, 'refrank' => 1), " st" => array('change' => 197, 'baserank' => 226, 'refrank' => 29),
1095 " no" => array('change' => 130, 'baserank' => 218, 'refrank' => 88), "ast" => array('change' => 300, 'baserank' => 251, 'refrank' => null),
1096 " fi" => array('change' => 300, 'baserank' => 208, 'refrank' => null), "ess" => array('change' => 160, 'baserank' => 135, 'refrank' => 295),
1097 "gre" => array('change' => 300, 'baserank' => 40, 'refrank' => null), "h a" => array('change' => 300, 'baserank' => 142, 'refrank' => null),
1098 "duo" => array('change' => 300, 'baserank' => 287, 'refrank' => null), " so" => array('change' => 6, 'baserank' => 114, 'refrank' => 120),
1099 "es " => array('change' => 48, 'baserank' => 72, 'refrank' => 24), "for" => array('change' => 96, 'baserank' => 139, 'refrank' => 43),
1100 "gan" => array('change' => 300, 'baserank' => 140, 'refrank' => null), "per" => array('change' => 111, 'baserank' => 171, 'refrank' => 282),
1101 "thi" => array('change' => 33, 'baserank' => 191, 'refrank' => 224), " of" => array('change' => 6, 'baserank' => 5, 'refrank' => 11),
1102 " cl" => array('change' => 300, 'baserank' => 205, 'refrank' => null), " sc" => array('change' => 300, 'baserank' => 222, 'refrank' => null),
1103 "t t" => array('change' => 49, 'baserank' => 94, 'refrank' => 45), "als" => array('change' => 300, 'baserank' => 242, 'refrank' => null),
1104 "avi" => array('change' => 300, 'baserank' => 255, 'refrank' => null), "cie" => array('change' => 300, 'baserank' => 268, 'refrank' => null),
1105 " du" => array('change' => 300, 'baserank' => 101, 'refrank' => null), "pre" => array('change' => 105, 'baserank' => 174, 'refrank' => 279),
1106 "as " => array('change' => 17, 'baserank' => 25, 'refrank' => 42), "a a" => array('change' => 300, 'baserank' => 234, 'refrank' => null),
1107 "gel" => array('change' => 300, 'baserank' => 141, 'refrank' => null), "ite" => array('change' => 300, 'baserank' => 149, 'refrank' => null),
1108 "n r" => array('change' => 300, 'baserank' => 30, 'refrank' => null), "by " => array('change' => 105, 'baserank' => 121, 'refrank' => 226),
1109 "d u" => array('change' => 300, 'baserank' => 282, 'refrank' => null), "clu" => array('change' => 300, 'baserank' => 270, 'refrank' => null),
1110 " ur" => array('change' => 300, 'baserank' => 229, 'refrank' => null), "ebu" => array('change' => 300, 'baserank' => 298, 'refrank' => null),
1111 "n i" => array('change' => 300, 'baserank' => 158, 'refrank' => null), "he " => array('change' => 0, 'baserank' => 2, 'refrank' => 2),
1112 " wh" => array('change' => 195, 'baserank' => 232, 'refrank' => 37), " ph" => array('change' => 300, 'baserank' => 220, 'refrank' => null),
1115 $ranked = $this->x->_arr_rank($this->x->_trigram($str));
1116 $results = $this->x->detect($str);
1118 $count = count($ranked);
1121 //foreach ($this->x->_lang_db['english'] as $key => $value) {
1122 foreach ($ranked as $key => $value) {
1123 if (isset($ranked[$key]) && isset($this->x->_lang_db['english'][$key])) {
1124 $difference = abs($this->x->_lang_db['english'][$key] - $ranked[$key]);
1129 $this->assertTrue(isset($true_differences[$key]), "'$key'");
1130 if (isset($true_differences[$key])) {
1131 $this->assertEquals($true_differences[$key]['change'], $difference, "'$key'");
1133 $sum += $difference;
1136 $this->assertEquals(300, $count);
1137 $this->assertEquals(59490, $sum);
1139 $this->assertEquals('english', key($results));
1140 $this->assertEquals(198, floor(current($results)));
1142 $this->assertEquals('italian', key($results));
1143 $this->assertEquals(228, floor(current($results)));
1146 function test_french ()
1148 $this->x->setPerlCompatible();
1149 $str = "Verifions que le détecteur de langues marche";
1151 $trigrams = $this->x->_trigram($str);
1152 $this->assertEquals(42, count($trigrams));
1153 // verified in Language::Guess
1155 $ranked = $this->x->_arr_rank($trigrams);
1156 $this->assertEquals(0, $ranked['e l']);
1158 $correct_ranks = array(
1204 $this->assertEquals(count($correct_ranks), count($ranked), "different number of trigrams found");
1207 ' de' => array('change' => 0, 'baserank' => 1, 'refrank' => 1),
1208 'éte' => array('change' => 300, 'baserank' => 41, 'refrank' => null),
1209 'dét' => array('change' => 300, 'baserank' => 12, 'refrank' => null),
1210 'fio' => array('change' => 300, 'baserank' => 18, 'refrank' => null),
1211 'de ' => array('change' => 9, 'baserank' => 11, 'refrank' => 2),
1212 'ons' => array('change' => 11, 'baserank' => 28, 'refrank' => 39),
1213 'ect' => array('change' => 300, 'baserank' => 14, 'refrank' => null),
1214 'le ' => array('change' => 19, 'baserank' => 24, 'refrank' => 5),
1215 'arc' => array('change' => 300, 'baserank' => 8, 'refrank' => null),
1216 'lan' => array('change' => 300, 'baserank' => 23, 'refrank' => null),
1217 'es ' => array('change' => 16, 'baserank' => 16, 'refrank' => 0),
1218 'mar' => array('change' => 300, 'baserank' => 25, 'refrank' => null),
1219 ' dé' => array('change' => 59, 'baserank' => 2, 'refrank' => 61),
1220 'ifi' => array('change' => 300, 'baserank' => 21, 'refrank' => null),
1221 'gue' => array('change' => 300, 'baserank' => 19, 'refrank' => null),
1222 'ur ' => array('change' => 12, 'baserank' => 39, 'refrank' => 27),
1223 'rch' => array('change' => 300, 'baserank' => 31, 'refrank' => null),
1224 'ang' => array('change' => 300, 'baserank' => 7, 'refrank' => null),
1225 'que' => array('change' => 5, 'baserank' => 29, 'refrank' => 24),
1226 'ngu' => array('change' => 300, 'baserank' => 26, 'refrank' => null),
1227 'e d' => array('change' => 2, 'baserank' => 13, 'refrank' => 15),
1228 'rif' => array('change' => 300, 'baserank' => 32, 'refrank' => null),
1229 ' ma' => array('change' => 89, 'baserank' => 5, 'refrank' => 94),
1230 'tec' => array('change' => 300, 'baserank' => 35, 'refrank' => null),
1231 'ns ' => array('change' => 6, 'baserank' => 27, 'refrank' => 21),
1232 ' la' => array('change' => 6, 'baserank' => 3, 'refrank' => 9),
1233 ' le' => array('change' => 1, 'baserank' => 4, 'refrank' => 3),
1234 'r d' => array('change' => 202, 'baserank' => 30, 'refrank' => 232),
1235 'e l' => array('change' => 14, 'baserank' => 0, 'refrank' => 14),
1236 'che' => array('change' => 300, 'baserank' => 9, 'refrank' => null),
1237 's m' => array('change' => 180, 'baserank' => 33, 'refrank' => 213),
1238 'ue ' => array('change' => 7, 'baserank' => 37, 'refrank' => 30),
1239 'ver' => array('change' => 117, 'baserank' => 40, 'refrank' => 157),
1240 'teu' => array('change' => 300, 'baserank' => 36, 'refrank' => null),
1241 'eri' => array('change' => 300, 'baserank' => 15, 'refrank' => null),
1242 'cte' => array('change' => 300, 'baserank' => 10, 'refrank' => null),
1243 'ues' => array('change' => 237, 'baserank' => 38, 'refrank' => 275),
1244 's q' => array('change' => 300, 'baserank' => 34, 'refrank' => null),
1245 'eur' => array('change' => 56, 'baserank' => 17, 'refrank' => 73),
1246 ' qu' => array('change' => 31, 'baserank' => 6, 'refrank' => 37),
1247 'he ' => array('change' => 300, 'baserank' => 20, 'refrank' => null),
1248 'ion' => array('change' => 12, 'baserank' => 22, 'refrank' => 10),
1253 $french_ranks = $this->x->_lang_db['french'];
1256 foreach ($ranked as $key => $value) {
1257 if (isset($french_ranks[$key])) {
1258 $difference = abs($french_ranks[$key] - $ranked[$key]);
1262 $this->assertTrue(isset($distances[$key]), $key);
1263 if (isset($distances[$key])) {
1264 $this->assertEquals($distances[$key]['baserank'], $ranked[$key], "baserank for $key");
1265 if ($distances[$key]['refrank'] === null) {
1266 $this->assertArrayNotHasKey($key, $french_ranks);
1268 $this->assertEquals($distances[$key]['refrank'], $french_ranks[$key], "refrank for $key");
1270 $this->assertEquals($distances[$key]['change'], $difference, "difference for $key");
1273 $sumchange += $difference;
1276 $actual_result = $this->x->_distance($french_ranks, $ranked);
1277 $this->assertEquals($sumchange, $actual_result);
1278 $this->assertEquals(7091, $actual_result);
1279 $this->assertEquals(168, floor($sumchange/count($trigrams)));
1281 $final_result = $this->x->detect($str);
1282 $this->assertEquals(168, floor($final_result['french']));
1283 $this->assertEquals(211, $final_result['spanish']);
1286 function test_russian ()
1288 $str = 'авай проверить узнает ли наш угадатель русски язык';
1290 $this->x->setPerlCompatible();
1291 $trigrams = $this->x->_trigram($str);
1292 $ranked = $this->x->_arr_rank($trigrams);
1294 $correct_ranks = array(
1295 ' ру' => array('change' => 300, 'baserank' => 3, 'refrank' => null),
1296 'ай ' => array('change' => 300, 'baserank' => 10, 'refrank' => null),
1297 'ада' => array('change' => 300, 'baserank' => 8, 'refrank' => null),
1298 ' пр' => array('change' => 1, 'baserank' => 2, 'refrank' => 1),
1299 ' яз' => array('change' => 300, 'baserank' => 6, 'refrank' => null),
1300 'ить' => array('change' => 300, 'baserank' => 24, 'refrank' => null),
1301 ' на' => array('change' => 1, 'baserank' => 1, 'refrank' => 0),
1302 'зна' => array('change' => 153, 'baserank' => 20, 'refrank' => 173),
1303 'вай' => array('change' => 300, 'baserank' => 13, 'refrank' => null),
1304 'ш у' => array('change' => 300, 'baserank' => 44, 'refrank' => null),
1305 'ль ' => array('change' => 300, 'baserank' => 28, 'refrank' => null),
1306 ' ли' => array('change' => 300, 'baserank' => 0, 'refrank' => null),
1307 'сск' => array('change' => 300, 'baserank' => 37, 'refrank' => null),
1308 'ть ' => array('change' => 31, 'baserank' => 40, 'refrank' => 9),
1309 'ава' => array('change' => 300, 'baserank' => 7, 'refrank' => null),
1310 'про' => array('change' => 18, 'baserank' => 32, 'refrank' => 14),
1311 'гад' => array('change' => 300, 'baserank' => 15, 'refrank' => null),
1312 'усс' => array('change' => 300, 'baserank' => 43, 'refrank' => null),
1313 'ык ' => array('change' => 300, 'baserank' => 45, 'refrank' => null),
1314 'ель' => array('change' => 64, 'baserank' => 17, 'refrank' => 81),
1315 'язы' => array('change' => 300, 'baserank' => 47, 'refrank' => null),
1316 ' уг' => array('change' => 300, 'baserank' => 4, 'refrank' => null),
1317 'ате' => array('change' => 152, 'baserank' => 11, 'refrank' => 163),
1318 'и н' => array('change' => 63, 'baserank' => 22, 'refrank' => 85),
1319 'и я' => array('change' => 300, 'baserank' => 23, 'refrank' => null),
1320 'ает' => array('change' => 152, 'baserank' => 9, 'refrank' => 161),
1321 'узн' => array('change' => 300, 'baserank' => 42, 'refrank' => null),
1322 'ери' => array('change' => 300, 'baserank' => 18, 'refrank' => null),
1323 'ли ' => array('change' => 23, 'baserank' => 27, 'refrank' => 4),
1324 'т л' => array('change' => 300, 'baserank' => 38, 'refrank' => null),
1325 ' уз' => array('change' => 300, 'baserank' => 5, 'refrank' => null),
1326 'дат' => array('change' => 203, 'baserank' => 16, 'refrank' => 219),
1327 'зык' => array('change' => 300, 'baserank' => 21, 'refrank' => null),
1328 'ров' => array('change' => 59, 'baserank' => 34, 'refrank' => 93),
1329 'рит' => array('change' => 300, 'baserank' => 33, 'refrank' => null),
1330 'ь р' => array('change' => 300, 'baserank' => 46, 'refrank' => null),
1331 'ет ' => array('change' => 19, 'baserank' => 19, 'refrank' => 38),
1332 'ки ' => array('change' => 116, 'baserank' => 26, 'refrank' => 142),
1333 'рус' => array('change' => 300, 'baserank' => 35, 'refrank' => null),
1334 'тел' => array('change' => 16, 'baserank' => 39, 'refrank' => 23),
1335 'нае' => array('change' => 300, 'baserank' => 29, 'refrank' => null),
1336 'й п' => array('change' => 300, 'baserank' => 25, 'refrank' => null),
1337 'наш' => array('change' => 300, 'baserank' => 30, 'refrank' => null),
1338 'уга' => array('change' => 300, 'baserank' => 41, 'refrank' => null),
1339 'ове' => array('change' => 214, 'baserank' => 31, 'refrank' => 245),
1340 'ски' => array('change' => 112, 'baserank' => 36, 'refrank' => 148),
1341 'вер' => array('change' => 31, 'baserank' => 14, 'refrank' => 45),
1342 'аш ' => array('change' => 300, 'baserank' => 12, 'refrank' => null),
1345 $this->assertEquals(48, count($ranked));
1348 $russian = $this->x->_lang_db['russian'];
1351 foreach ($ranked as $key => $value) {
1352 if (isset($russian[$key])) {
1353 $difference = abs($russian[$key] - $ranked[$key]);
1357 $this->assertTrue(isset($correct_ranks[$key], $key));
1358 if (isset($correct_ranks[$key])) {
1359 $this->assertEquals($correct_ranks[$key]['baserank'], $ranked[$key], "baserank for $key");
1360 if ($correct_ranks[$key]['refrank'] === null) {
1361 $this->assertArrayNotHasKey($key, $russian);
1363 $this->assertEquals($correct_ranks[$key]['refrank'], $russian[$key], "refrank for $key");
1365 $this->assertEquals($correct_ranks[$key]['change'], $difference, "difference for $key");
1368 $sumchange += $difference;
1371 $actual_result = $this->x->_distance($russian, $ranked);
1372 $this->assertEquals($sumchange, $actual_result);
1373 $this->assertEquals(10428, $actual_result);
1374 $this->assertEquals(217, floor($sumchange/count($trigrams)));
1376 $final_result = $this->x->detect($str);
1377 $this->assertEquals(217,floor($final_result['russian']));
1380 function test_ranker ()
1384 $result = $this->x->_arr_rank($this->x->_trigram($str));
1386 $this->assertEquals(0, $result['s i']);
1390 function test_count ()
1392 $langs = $this->x->getLanguages();
1394 $count = $this->x->getLanguageCount();
1396 $this->assertEquals(count($langs), $count);
1398 foreach ($langs as $lang) {
1399 $this->assertTrue($this->x->languageExists($lang), $lang);
1403 function testLanguageExistsNameMode2()
1405 $this->x->setNameMode(2);
1406 $this->assertTrue($this->x->languageExists('en'));
1407 $this->assertFalse($this->x->languageExists('english'));
1410 function testLanguageExistsArrayNameMode2()
1412 $this->x->setNameMode(2);
1413 $this->assertTrue($this->x->languageExists(array('en', 'de')));
1414 $this->assertFalse($this->x->languageExists(array('en', 'doesnotexist')));
1418 * @expectedException Text_LanguageDetect_Exception
1419 * @expectedExceptionMessage Unsupported parameter type passed to languageExists()
1421 function testLanguageExistsUnsupportedType()
1423 $this->x->languageExists(1.23);
1426 function testGetLanguages()
1428 $langs = $this->x->getLanguages();
1429 $this->assertContains('english', $langs);
1430 $this->assertContains('swedish', $langs);
1433 function testGetLanguagesNameMode2()
1435 $this->x->setNameMode(2);
1436 $langs = $this->x->getLanguages();
1437 $this->assertContains('en', $langs);
1438 $this->assertContains('sv', $langs);
1441 function testDetect()
1443 $scores = $this->x->detect('Das ist ein kleiner Text für euch alle');
1444 $this->assertInternalType('array', $scores);
1445 $this->assertGreaterThan(5, count($scores));
1447 list($key, $value) = each($scores);
1448 $this->assertEquals('german', $key, 'text is german');
1451 function testDetectNameMode2()
1453 $this->x->setNameMode(2);
1454 $scores = $this->x->detect('Das ist ein kleiner Text für euch alle');
1455 list($key, $value) = each($scores);
1456 $this->assertEquals('de', $key, 'text is german');
1459 function testDetectNameMode2Limit()
1461 $this->x->setNameMode(2);
1462 $scores = $this->x->detect('Das ist ein kleiner Text für euch alle', 1);
1463 list($key, $value) = each($scores);
1464 $this->assertEquals('de', $key, 'text is german');
1467 function testDetectSimple()
1469 $lang = $this->x->detectSimple('Das ist ein kleiner Text für euch alle');
1470 $this->assertInternalType('string', $lang);
1471 $this->assertEquals('german', $lang, 'text is german');
1474 function testDetectSimpleNameMode2()
1476 $this->x->setNameMode(2);
1477 $lang = $this->x->detectSimple('Das ist ein kleiner Text für euch alle');
1478 $this->assertInternalType('string', $lang);
1479 $this->assertEquals('de', $lang, 'text is german');
1482 function testDetectSimpleNoLanguages()
1484 $this->x->omitLanguages('english', true);
1485 $this->x->omitLanguages('english', false);
1487 $this->x->detectSimple('Das ist ein kleiner Text für euch alle')
1491 function testLanguageSimilarity()
1493 $this->x->setPerlCompatible(true);
1494 $eng_dan = $this->x->languageSimilarity('english', 'danish');
1495 $nor_dan = $this->x->languageSimilarity('norwegian', 'danish');
1496 $swe_dan = $this->x->languageSimilarity('swedish', 'danish');
1498 // remember, lower means more similar
1499 $this->assertTrue($eng_dan > $nor_dan); // english is less similar to danish than norwegian is
1500 $this->assertTrue($eng_dan > $swe_dan); // english is less similar to danish than swedish is
1501 $this->assertTrue($nor_dan < $swe_dan); // norwegian is more similar to danish than swedish
1503 // test the range of the results
1504 $this->assertTrue($eng_dan <= 300, $eng_dan);
1505 $this->assertTrue($eng_dan >= 0, $eng_dan);
1507 // test it in perl compatible mode
1508 $this->x->setPerlCompatible(false);
1510 $eng_dan = $this->x->languageSimilarity('english', 'danish');
1511 $nor_dan = $this->x->languageSimilarity('norwegian', 'danish');
1512 $swe_dan = $this->x->languageSimilarity('swedish', 'danish');
1514 // now higher is more similar
1515 $this->assertTrue($eng_dan < $nor_dan);
1516 $this->assertTrue($eng_dan < $swe_dan);
1517 $this->assertTrue($nor_dan > $swe_dan);
1519 $this->assertTrue($eng_dan <= 1, $eng_dan);
1520 $this->assertTrue($eng_dan >= 0, $eng_dan);
1522 $this->x->setPerlCompatible(true);
1524 $eng_all = $this->x->languageSimilarity('english');
1525 $this->assertEquals($this->x->getLanguageCount() - 1, count($eng_all));
1526 $this->assertTrue(!isset($eng_all['english']));
1528 $this->assertTrue($eng_all['italian'] < $eng_all['turkish']);
1529 $this->assertTrue($eng_all['french'] < $eng_all['kyrgyz']);
1531 $all = $this->x->languageSimilarity();
1532 $this->assertTrue(!isset($all['english']['english']));
1533 $this->assertTrue($all['french']['spanish'] < $all['french']['mongolian']);
1534 $this->assertTrue($all['spanish']['latin'] < $all['hindi']['finnish']);
1535 $this->assertTrue($all['russian']['uzbek'] < $all['russian']['english']);
1539 function testLanguageSimilarityNameMode2()
1541 $this->x->setNameMode(2);
1542 $this->x->setPerlCompatible(true);
1543 $eng_dan = $this->x->languageSimilarity('en', 'dk');
1544 $nor_dan = $this->x->languageSimilarity('no', 'dk');
1546 // remember, lower means more similar
1547 $this->assertTrue($eng_dan > $nor_dan); // english is less similar to danish than norwegian is
1550 function testLanguageSimilarityUnknownLanguage()
1552 $this->assertNull($this->x->languageSimilarity('doesnotexist'));
1555 function testLanguageSimilarityUnknownLanguage2()
1557 $this->assertNull($this->x->languageSimilarity('english', 'doesnotexist'));
1560 function test_compatibility ()
1562 $str = "I am the very model of a modern major general.";
1565 $this->x->setPerlCompatible(false);
1566 $result = $this->x->detectConfidence($str);
1568 $this->assertTrue(!is_null($result));
1569 $this->assertTrue(is_array($result));
1571 $this->assertEquals('english', $language);
1572 $this->assertTrue($similarity <= 1 && $similarity >= 0, $similarity);
1573 $this->assertTrue($confidence <= 1 && $confidence >= 0, $confidence);
1575 $this->x->setPerlCompatible(true);
1576 $result = $this->x->detectConfidence($str);
1577 extract($result, EXTR_OVERWRITE);
1579 $this->assertEquals('english', $language);
1581 // technically the lowest possible score is 0 but it's extremely unlikely to hit that
1582 $this->assertTrue($similarity <= 300 && $similarity >= 1, $similarity);
1583 $this->assertTrue($confidence <= 1 && $confidence >= 0, $confidence);
1587 function testDetectConfidenceNoText()
1589 $this->assertNull($this->x->detectConfidence(''));
1592 function test_omit_error ()
1594 $str = 'On January 29, 1737, Thomas Paine was born in Thetford, England. His father, a corseter, had grand visions for his son, but by the age of 12, Thomas had failed out of school. The young Paine began apprenticing for his father, but again, he failed.';
1596 $myobj = new Text_LanguageDetect;
1598 $result = $myobj->detectSimple($str);
1599 $this->assertEquals('english', $result);
1601 // omit all languages and you should get an error
1602 $myobj->omitLanguages($myobj->getLanguages());
1604 $result = $myobj->detectSimple($str);
1606 $this->assertNull($result, gettype($result));
1609 function test_cyrillic ()
1611 // tests whether the cyrillic lower-casing works
1613 $uppercased = 'А Б В Г Д Е Ж З И Й К Л М Н О П'
1614 . 'Р С Т У Ф Х Ц Ч Ш Щ Ъ Ы Ь Э Ю Я';
1616 $lowercased = 'а б в г д е ж з и й к л м н о п'
1617 . 'р с т у ф х ц ч ш щ ъ ы ь э ю я';
1619 $this->assertEquals(strlen($uppercased), strlen($lowercased));
1624 while ($i < strlen($uppercased)) {
1625 $u = Text_LanguageDetect::_next_char($uppercased, $i, true);
1626 $l = Text_LanguageDetect::_next_char($lowercased, $j, true);
1627 $this->assertEquals($u, $l);
1632 $this->assertEquals($i, $j);
1633 $this->assertEquals($i, strlen($lowercased));
1634 if (function_exists('mb_strtolower')) {
1635 $this->assertEquals($new_u, mb_strtolower($uppercased, 'UTF-8'));
1639 function test_block_detection()
1641 $exp_output = <<<EOF
1645 [CJK Unified Ideographs] => 2
1647 [Latin-1 Supplement] => 4
1650 $teststr = 'lsdkfj あ 葉 叶 slskdfj s Åj;sdklf ÿjs;kdjåf î';
1651 $result = $this->x->detectUnicodeBlocks($teststr, false);
1656 $str_result = ob_get_contents();
1658 $this->assertEquals(trim($exp_output), trim($str_result));
1660 // test whether skipping the spaces reduces the basic latin count
1661 $result2 = $this->x->detectUnicodeBlocks($teststr, true);
1662 $this->assertTrue($result2['Basic Latin'] < $result['Basic Latin']);
1664 $result3 = $this->x->unicodeBlockName('и');
1665 $this->assertEquals('Cyrillic', $result3);
1667 $this->assertEquals('Basic Latin', $this->x->unicodeBlockName('A'));
1669 // see what happens when you try an unassigned range
1670 $utf8 = $this->code2utf(0x0800);
1672 $this->assertEquals(false, $this->x->unicodeBlockName($utf8));
1674 // try unicode vals in several different ranges
1675 $unicode['Supplementary Private Use Area-A'] = 0xF0001;
1676 $unicode['Supplementary Private Use Area-B'] = 0x100001;
1677 $unicode['CJK Unified Ideographs Extension B'] = 0x20001;
1678 $unicode['Ugaritic'] = 0x10381;
1679 $unicode['Gothic'] = 0x10331;
1680 $unicode['Low Surrogates'] = 0xDC01;
1681 $unicode['CJK Unified Ideographs'] = 0x4E00;
1682 $unicode['Glagolitic'] = 0x2C00;
1683 $unicode['Latin Extended Additional'] = 0x1EFF;
1684 $unicode['Devanagari'] = 0x0900;
1685 $unicode['Hebrew'] = 0x0590;
1686 $unicode['Latin Extended-B'] = 0x024F;
1687 $unicode['Latin-1 Supplement'] = 0x00FF;
1688 $unicode['Basic Latin'] = 0x007F;
1690 foreach ($unicode as $range => $codepoint) {
1691 $result = $this->x->unicodeBlockName($this->code2utf($codepoint));
1692 $this->assertEquals($range, $result, $codepoint);
1697 * @expectedException Text_LanguageDetect_Exception
1698 * @expectedExceptionMessage Pass a single char only to this method
1700 function testUnicodeBlockNameParamString()
1702 $this->x->unicodeBlockName('foo bar baz');
1706 * @expectedException Text_LanguageDetect_Exception
1707 * @expectedExceptionMessage Input must be of type string or int
1709 function testUnicodeBlockNameUnsupportedParamType()
1711 $this->x->unicodeBlockName(1.23);
1716 // found in http://www.php.net/manual/en/function.utf8-encode.php#49336
1717 function code2utf($num)
1722 } elseif ($num < 2048) {
1723 return chr(($num >> 6) + 192) . chr(($num & 63) + 128);
1725 } elseif ($num < 65536) {
1726 return chr(($num >> 12) + 224) . chr((($num >> 6) & 63) + 128) . chr(($num & 63) + 128);
1728 } elseif ($num < 2097152) {
1729 return chr(($num >> 18) + 240) . chr((($num >> 12) & 63) + 128) . chr((($num >> 6) & 63) + 128) . chr(($num & 63) + 128);
1735 function test_utf8len()
1737 $str = 'Iñtërnâtiônàlizætiøn';
1738 $this->assertEquals(20, $this->x->utf8strlen($str), utf8_decode($str));
1741 $this->assertEquals(3, $this->x->utf8strlen($str), utf8_decode($str));
1744 function test_unicode()
1746 // test whether it can get the right unicode values for utf8 chars
1748 $chars['ת'] = 0x5EA;
1750 $chars['ç'] = 0x00E7;
1752 $chars['a'] = 0x0061;
1754 $chars['Φ'] = 0x03A6;
1756 $chars['И'] = 0x0418;
1758 $chars['ڰ'] = 0x6B0;
1760 $chars['Ụ'] = 0x1EE4;
1762 $chars['놔'] = 0xB194;
1764 $chars['遮'] = 0x906E;
1766 $chars['怀'] = 0x6000;
1768 $chars['ฤ'] = 0x0E24;
1770 $chars['Я'] = 0x042F;
1772 $chars['ü'] = 0x00FC;
1774 $chars['Đ'] = 0x0110;
1776 $chars['א'] = 0x05D0;
1779 foreach ($chars as $utf8 => $unicode) {
1780 $this->assertEquals($unicode, $this->x->_utf8char2unicode($utf8), $utf8);
1784 function test_unicode_off()
1787 // see what happens when you turn the unicode setting off
1789 $myobj = new Text_LanguageDetect;
1791 $str = 'This is a delightful sample of English text';
1793 $myobj->useUnicodeBlocks(true);
1794 $result1 = $myobj->detectConfidence($str);
1796 $myobj->useUnicodeBlocks(false);
1797 $result2 = $myobj->detectConfidence($str);
1799 $this->assertEquals($result1, $result2);
1801 // note this test doesn't tell if unicode narrowing was actually used or not
1805 function test_detection()
1808 // WARNING: the below lines may make your terminal go ape! be warned
1832 // test strings from the test module used by perl's Language::Guess
1835 "english" => "This is a test of the language checker",
1836 "french" => "Verifions que le détecteur de langues marche",
1837 "polish" => "Sprawdźmy, czy odgadywacz języków pracuje",
1838 "russian" => "Давай проверим узнает ли нашь угадыватель русский язык",
1839 "spanish" => "La respuesta de los acreedores a la oferta argentina para salir del default no ha sido muy positiv",
1840 "romanian" => "în acest sens aparţinînd Adunării Generale a organizaţiei, în ciuda faptului că mai multe dintre solicitările organizaţiei privind organizarea scrutinului nu au fost soluţionate",
1841 "albanian" => "kaluan ditën e fundit të fushatës në shtetet kryesore për të siguruar sa më shumë votues.",
1842 "danish" => "På denne side bringer vi billeder fra de mange forskellige forberedelser til arrangementet, efterhånden som vi får dem ",
1843 "swedish" => "Vi säger att Frälsningen är en gåva till alla, fritt och för intet. Men som vi nämnt så finns det två villkor som måste",
1844 "norwegian" => "Nominasjonskomiteen i Akershus KrF har skviset ut Einar Holstad fra stortingslisten. Ytre Enebakk-mannen har plass p Stortinget s lenge Valgerd Svarstad Haugland sitter i",
1845 "finnish" => "on julkishallinnon verkkopalveluiden yhteinen osoite. Kansalaisten arkielämää helpottavaa tietoa on koottu eri aihealueisiin",
1846 "estonian" => "Ennetamaks reisil ebameeldivaid vahejuhtumeid vii end kurssi reisidokumentide ja viisade reeglitega ning muu praktilise informatsiooniga",
1847 "hungarian" => "Hiába jön létre az önkéntes magyar haderő, hiába nem lesz többé bevonulás, változatlanul fennmarad a hadkötelezettség intézménye",
1848 "uzbek" => "милиция ва уч солиқ идораси ходимлари яраланган. Шаҳарда хавфсизлик чоралари кучайтирилган.",
1851 "czech" => "Francouzský ministr financí zmírnil výhrady vůči nízkým firemním daním v nových členských státech EU",
1852 "dutch" => "Die kritiek was volgens hem bitter hard nodig, omdat Nederland binnen een paar jaar in een soort Belfast zou dreigen te nderen",
1854 "croatian" => "biće prilično izjednačena, sugerišu najnovije ankete. Oba kandidata tvrde da su sposobni da dobiju rat protiv terorizma",
1856 "romanian" => "în acest sens aparţinînd Adunării Generale a organizaţiei, în ciuda faptului că mai multe dintre solicitările organizaţiei ivind organizarea scrutinului nu au fost soluţionate",
1858 "turkish" => "yakın tarihin en çekişmeli başkanlık seçiminde oy verme işlemi sürerken, katılımda rekor bekleniyor.",
1860 "kyrgyz" => "көрбөгөндөй элдик толкундоо болуп, Кокон шаарынын көчөлөрүндө бир нече миң киши нааразылык билдирди.",
1863 "albanian" => "kaluan ditën e fundit të fushatës në shtetet kryesore për të siguruar sa më shumë votues.",
1866 "azeri" => "Daxil olan xəbərlərdə deyilir ki, 6 nəfər Bağdadın mərkəzində yerləşən Təhsil Nazirliyinin binası yaxınlığında baş vermiş partlayış zamanı həlak olub.",
1869 "macedonian" => "на јавното мислење покажуваат дека трката е толку тесна, што се очекува двајцата соперници да ја прекршат традицијата и да се појават и на самиот изборен ден.",
1873 "kazakh" => "Сайлау нәтижесінде дауыстардың басым бөлігін ел премьер министрі Виктор Янукович пен оның қарсыласы, оппозиция жетекшісі Виктор Ющенко алды.",
1876 "bulgarian" => " е готов да даде гаранции, че няма да прави ядрено оръжие, ако му се разреши мирна атомна програма",
1879 "arabic" => " ملايين الناخبين الأمريكيين يدلون بأصواتهم وسط إقبال قياسي على انتخابات هي الأشد تنافسا منذ عقود",
1907 // should be safe at this point
1910 $languages = $this->x->getLanguages();
1911 foreach (array_keys($testarr) as $key) {
1912 $this->assertTrue(in_array($key, $languages), "$key was not in known languages");
1915 foreach ($testarr as $key=>$value) {
1916 $this->assertEquals($key, $this->x->detectSimple($value));
1921 public function test_convertFromNameMode0()
1923 $this->assertEquals(
1925 $this->x->_convertFromNameMode('english')
1929 public function test_convertFromNameMode2String()
1931 $this->x->setNameMode(2);
1932 $this->assertEquals(
1934 $this->x->_convertFromNameMode('en')
1938 public function test_convertFromNameMode3String()
1940 $this->x->setNameMode(3);
1941 $this->assertEquals(
1943 $this->x->_convertFromNameMode('eng')
1947 public function test_convertFromNameMode2ArrayVal()
1949 $this->x->setNameMode(2);
1950 $this->assertEquals(
1951 array('english', 'german'),
1952 $this->x->_convertFromNameMode(array('en', 'de'))
1956 public function test_convertFromNameMode2ArrayKey()
1958 $this->x->setNameMode(2);
1959 $this->assertEquals(
1960 array('english' => 'foo', 'german' => 'test'),
1961 $this->x->_convertFromNameMode(
1962 array('en' => 'foo', 'de' => 'test'),
1968 public function test_convertFromNameMode3ArrayVal()
1970 $this->x->setNameMode(3);
1971 $this->assertEquals(
1972 array('english', 'german'),
1973 $this->x->_convertFromNameMode(array('eng', 'deu'))
1977 public function test_convertFromNameMode3ArrayKey()
1979 $this->x->setNameMode(3);
1980 $this->assertEquals(
1981 array('english' => 'foo', 'german' => 'test'),
1982 $this->x->_convertFromNameMode(
1983 array('eng' => 'foo', 'deu' => 'test'),
1989 public function test_convertToNameMode0()
1991 $this->assertEquals(
1993 $this->x->_convertToNameMode('english')
1997 public function test_convertToNameMode2String()
1999 $this->x->setNameMode(2);
2000 $this->assertEquals(
2002 $this->x->_convertToNameMode('english')
2006 public function test_convertToNameMode3String()
2008 $this->x->setNameMode(3);
2009 $this->assertEquals(
2011 $this->x->_convertToNameMode('english')
2015 public function test_convertToNameMode2ArrayVal()
2017 $this->x->setNameMode(2);
2018 $this->assertEquals(
2020 $this->x->_convertToNameMode(array('english', 'german'))
2024 public function test_convertToNameMode2ArrayKey()
2026 $this->x->setNameMode(2);
2027 $this->assertEquals(
2028 array('en' => 'foo', 'de' => 'test'),
2029 $this->x->_convertToNameMode(
2030 array('english' => 'foo', 'german' => 'test'),
2036 public function test_convertToNameMode3ArrayVal()
2038 $this->x->setNameMode(3);
2039 $this->assertEquals(
2040 array('eng', 'deu'),
2041 $this->x->_convertToNameMode(array('english', 'german'))
2045 public function test_convertToNameMode3ArrayKey()
2047 $this->x->setNameMode(3);
2048 $this->assertEquals(
2049 array('eng' => 'foo', 'deu' => 'test'),
2050 $this->x->_convertToNameMode(
2051 array('english' => 'foo', 'german' => 'test'),