4 * @package Text_LanguageDetect
8 __DIR__ . '/../' . PATH_SEPARATOR . get_include_path()
10 error_reporting(E_ALL|E_STRICT);
12 require_once 'Text/LanguageDetect.php';
13 require_once __DIR__ . '/PrivProxy.php';
15 class Text_LanguageDetectTest extends PHPUnit_Framework_TestCase {
19 ini_set('magic_quotes_runtime', 0);
20 $this->x = new Text_LanguageDetect();
21 $this->xproxy = new PrivProxy($this->x);
29 function test_get_data_locAbsolute()
33 $this->xproxy->_get_data_loc('/path/to/file')
37 function test_get_data_locPearPath()
39 $this->xproxy->_data_dir = '/path/to/pear/data';
41 '/path/to/pear/data/Text_LanguageDetect/file',
42 $this->xproxy->_get_data_loc('file')
47 * @expectedException Text_LanguageDetect_Exception
48 * @expectedExceptionMessage Language database does not exist:
50 function test_readdbNonexistingFile()
52 $this->xproxy->_readdb('thisfiledoesnotexist');
56 * @expectedException Text_LanguageDetect_Exception
57 * @expectedExceptionMessage Language database is not readable:
59 function test_readdbUnreadableFile()
61 $name = tempnam(sys_get_temp_dir(), 'unittest-Text_LanguageDetect-');
63 $this->xproxy->_readdb($name);
67 * @expectedException Text_LanguageDetect_Exception
68 * @expectedExceptionMessage Language database has no elements.
70 function test_checkTrigramEmpty()
72 $this->xproxy->_checkTrigram(array());
76 * @expectedException Text_LanguageDetect_Exception
77 * @expectedExceptionMessage Language database is not an array
79 function test_checkTrigramNoArray()
81 $this->xproxy->_checkTrigram('foo');
85 * @expectedException Text_LanguageDetect_Exception
86 * @expectedExceptionMessage Error loading database. Try turning magic_quotes_runtime off
88 function test_checkTrigramNoArrayMagicQuotes()
90 if (version_compare(PHP_VERSION, '5.4.0-dev') >= 0) {
91 $this->markTestSkipped('5.4.0 has no magic quotes anymore');
93 ini_set('magic_quotes_runtime', 1);
94 $this->xproxy->_checkTrigram('foo');
97 function test_splitter ()
101 $result = $this->xproxy->_trigram($str);
103 $this->assertEquals(array(' he' => 1, 'hel' => 1, 'ell' => 1, 'llo' => 1, 'lo ' => 1), $result);
105 $str = 'aa aa whatever';
107 $result = $this->xproxy->_trigram($str);
108 $this->assertEquals(2, $result[' aa']);
109 $this->assertEquals(2, $result['aa ']);
110 $this->assertEquals(1, $result['a a']);
113 $result = $this->xproxy->_trigram($str);
114 $this->assertArrayNotHasKey(' a', $result, ' a');
115 $this->assertArrayNotHasKey('a ', $result, 'a ');
118 function test_splitter2 ()
122 $result = $this->xproxy->_trigram($str);
124 $this->assertTrue(isset($result['mé ']), 'mé ');
125 $this->assertTrue(isset($result['umé']), 'umé');
126 $this->assertTrue(!isset($result['é ']), 'é');
128 // tests lower-casing accented characters
131 $result = $this->xproxy->_trigram($str);
133 $this->assertTrue(isset($result['mé ']),'mé ');
134 $this->assertTrue(isset($result['umé']),'umé');
135 $this->assertTrue(!isset($result['é ']),'é');
138 function test_sort ()
140 $arr = array('a' => 1, 'b' => 2, 'c' => 2);
141 $this->xproxy->__call('_bub_sort',[&$arr]);
143 $final_arr = array('b' => 2, 'c' => 2, 'a' => 1);
145 $this->assertEquals($final_arr, $arr);
148 function test_error ()
150 // this test passes the object a series of bad strings to see how it handles them
152 $result = $this->x->detectSimple("");
154 $this->assertTrue(!$result);
156 $result = $this->x->detectSimple("\n");
158 $this->assertTrue(!$result);
160 // should fail on extremely short strings
161 $result = $this->x->detectSimple("a");
163 $this->assertTrue(!$result);
165 $result = $this->x->detectSimple("aa");
167 $this->assertTrue(!$result);
169 $result = $this->x->detectSimple('xxxxxxxxxxxxxxxxxxx');
171 $this->assertEquals(null, $result);
174 function testOmitLanguages()
176 $str = 'This function may return Boolean FALSE, but may also return a non-Boolean value which evaluates to FALSE, such as 0 or "". Please read the section on Booleans for more information. Use the === operator for testing the return value of this function.';
178 $myobj = new Text_LanguageDetect;
179 $myobjproxy = new PrivProxy($myobj);
181 $myobjproxy->_use_unicode_narrowing = false;
183 $count = $myobj->getLanguageCount();
184 $returnval = $myobj->omitLanguages('english');
185 $newcount = $myobj->getLanguageCount();
187 $this->assertEquals(1, $returnval);
188 $this->assertEquals(1, $count - $newcount);
190 $result = strtolower($myobj->detectSimple($str));
192 $this->assertTrue($result != 'english', $result);
194 $myobj = new Text_LanguageDetect;
196 $count = $myobj->getLanguageCount();
197 $returnval = $myobj->omitLanguages(array('danish', 'italian'), true);
198 $newcount = $myobj->getLanguageCount();
200 $this->assertEquals($count - $newcount, $returnval);
201 $this->assertEquals($count - $returnval, $newcount);
203 $result = strtolower($myobj->detectSimple($str));
205 $this->assertTrue($result == 'danish' || $result == 'italian', $result);
207 $result = $myobj->detect($str);
209 $this->assertEquals(2, count($result));
210 $this->assertTrue(isset($result['danish']));
211 $this->assertTrue(isset($result['italian']));
216 function testOmitLanguagesNameMode2()
218 $this->x->setNameMode(2);
219 $this->assertEquals(1, $this->x->omitLanguages('en'));
222 function testOmitLanguagesIncludeString()
224 $this->assertGreaterThan(1, $this->x->omitLanguages('english', true));
225 $langs = $this->x->getLanguages();
226 $this->assertEquals(1, count($langs));
227 $this->assertContains('english', $langs);
230 function testOmitLanguagesClearsClusterCache()
232 $this->x->omitLanguages(array('english', 'german'), true);
233 $this->assertNull($this->xproxy->_clusters);
234 $this->x->clusterLanguages();
235 $this->assertNotNull($this->xproxy->_clusters);
236 $this->x->omitLanguages('german');
237 $this->assertNull($this->xproxy->_clusters, 'cluster cache be empty now');
240 function test_perl_compatibility()
242 // if this test fails, then many of the others will
244 $this->x->setPerlCompatible(true);
248 $result = $this->xproxy->_trigram($testtext);
250 $this->assertTrue(!isset($result[' he']));
253 function test_french_db ()
257 "es " => 0, " de" => 1, "de " => 2, " le" => 3, "ent" => 4,
258 "le " => 5, "nt " => 6, "la " => 7, "s d" => 8, " la" => 9,
259 "ion" => 10, "on " => 11, "re " => 12, " pa" => 13, "e l" => 14,
260 "e d" => 15, " l'" => 16, "e p" => 17, " co" => 18, " pr" => 19,
261 "tio" => 20, "ns " => 21, " en" => 22, "ne " => 23, "que" => 24,
262 "r l" => 25, "les" => 26, "ur " => 27, "en " => 28, "ati" => 29,
263 "ue " => 30, " po" => 31, " d'" => 32, "par" => 33, " a " => 34,
264 "et " => 35, "it " => 36, " qu" => 37, "men" => 38, "ons" => 39,
265 "te " => 40, " et" => 41, "t d" => 42, " re" => 43, "des" => 44,
266 " un" => 45, "ie " => 46, "s l" => 47, " su" => 48, "pou" => 49,
267 " au" => 50, " à " => 51, "con" => 52, "er " => 53, " no" => 54,
268 "ait" => 55, "e c" => 56, "se " => 57, "té " => 58, "du " => 59,
269 " du" => 60, " dé" => 61, "ce " => 62, "e e" => 63, "is " => 64,
270 "n d" => 65, "s a" => 66, " so" => 67, "e r" => 68, "e s" => 69,
271 "our" => 70, "res" => 71, "ssi" => 72, "eur" => 73, " se" => 74,
272 "eme" => 75, "est" => 76, "us " => 77, "sur" => 78, "ant" => 79,
273 "iqu" => 80, "s p" => 81, "une" => 82, "uss" => 83, "l'a" => 84,
274 "pro" => 85, "ter" => 86, "tre" => 87, "end" => 88, "rs " => 89,
275 " ce" => 90, "e a" => 91, "t p" => 92, "un " => 93, " ma" => 94,
276 " ru" => 95, " ré" => 96, "ous" => 97, "ris" => 98, "rus" => 99,
277 "sse" => 100, "ans" => 101, "ar " => 102, "com" => 103, "e m" => 104,
278 "ire" => 105, "nce" => 106, "nte" => 107, "t l" => 108, " av" => 109,
279 " mo" => 110, " te" => 111, "il " => 112, "me " => 113, "ont" => 114,
280 "ten" => 115, "a p" => 116, "dan" => 117, "pas" => 118, "qui" => 119,
281 "s e" => 120, "s s" => 121, " in" => 122, "ist" => 123, "lle" => 124,
282 "nou" => 125, "pré" => 126, "'un" => 127, "air" => 128, "d'a" => 129,
283 "ir " => 130, "n e" => 131, "rop" => 132, "ts " => 133, " da" => 134,
284 "a s" => 135, "as " => 136, "au " => 137, "den" => 138, "mai" => 139,
285 "mis" => 140, "ori" => 141, "out" => 142, "rme" => 143, "sio" => 144,
286 "tte" => 145, "ux " => 146, "a d" => 147, "ien" => 148, "n a" => 149,
287 "ntr" => 150, "omm" => 151, "ort" => 152, "ouv" => 153, "s c" => 154,
288 "son" => 155, "tes" => 156, "ver" => 157, "ère" => 158, " il" => 159,
289 " m " => 160, " sa" => 161, " ve" => 162, "a r" => 163, "ais" => 164,
290 "ava" => 165, "di " => 166, "n p" => 167, "sti" => 168, "ven" => 169,
291 " mi" => 170, "ain" => 171, "enc" => 172, "for" => 173, "ité" => 174,
292 "lar" => 175, "oir" => 176, "rem" => 177, "ren" => 178, "rro" => 179,
293 "rés" => 180, "sie" => 181, "t a" => 182, "tur" => 183, " pe" => 184,
294 " to" => 185, "d'u" => 186, "ell" => 187, "err" => 188, "ers" => 189,
295 "ide" => 190, "ine" => 191, "iss" => 192, "mes" => 193, "por" => 194,
296 "ran" => 195, "sit" => 196, "st " => 197, "t r" => 198, "uti" => 199,
297 "vai" => 200, "é l" => 201, "ési" => 202, " di" => 203, " n'" => 204,
298 " ét" => 205, "a c" => 206, "ass" => 207, "e t" => 208, "in " => 209,
299 "nde" => 210, "pre" => 211, "rat" => 212, "s m" => 213, "ste" => 214,
300 "tai" => 215, "tch" => 216, "ui " => 217, "uro" => 218, "ès " => 219,
301 " es" => 220, " fo" => 221, " tr" => 222, "'ad" => 223, "app" => 224,
302 "aux" => 225, "e à" => 226, "ett" => 227, "iti" => 228, "lit" => 229,
303 "nal" => 230, "opé" => 231, "r d" => 232, "ra " => 233, "rai" => 234,
304 "ror" => 235, "s r" => 236, "tat" => 237, "uté" => 238, "à l" => 239,
305 " af" => 240, "anc" => 241, "ara" => 242, "art" => 243, "bre" => 244,
306 "ché" => 245, "dre" => 246, "e f" => 247, "ens" => 248, "lem" => 249,
307 "n r" => 250, "n t" => 251, "ndr" => 252, "nne" => 253, "onn" => 254,
308 "pos" => 255, "s t" => 256, "tiq" => 257, "ure" => 258, " tu" => 259,
309 "ale" => 260, "and" => 261, "ave" => 262, "cla" => 263, "cou" => 264,
310 "e n" => 265, "emb" => 266, "ins" => 267, "jou" => 268, "mme" => 269,
311 "rie" => 270, "rès" => 271, "sem" => 272, "str" => 273, "t i" => 274,
312 "ues" => 275, "uni" => 276, "uve" => 277, "é d" => 278, "ée " => 279,
313 " ch" => 280, " do" => 281, " eu" => 282, " fa" => 283, " lo" => 284,
314 " ne" => 285, " ra" => 286, "arl" => 287, "att" => 288, "ec " => 289,
315 "ica" => 290, "l a" => 291, "l'o" => 292, "l'é" => 293, "mmi" => 294,
316 "nta" => 295, "orm" => 296, "ou " => 297, "r u" => 298, "rle" => 299
320 $my_arr = $this->xproxy->_lang_db['french'];
322 foreach ($safe_model as $key => $value) {
323 $this->assertTrue(isset($my_arr[$key]),$key);
324 if (isset($my_arr[$key])) {
325 $this->assertEquals($value, $my_arr[$key], $key);
330 function test_english_db ()
334 " th" => 0, "the" => 1, "he " => 2, "ed " => 3, " to" => 4,
335 " in" => 5, "er " => 6, "ing" => 7, "ng " => 8, " an" => 9,
336 "nd " => 10, " of" => 11, "and" => 12, "to " => 13, "of " => 14,
337 " co" => 15, "at " => 16, "on " => 17, "in " => 18, " a " => 19,
338 "d t" => 20, " he" => 21, "e t" => 22, "ion" => 23, "es " => 24,
339 " re" => 25, "re " => 26, "hat" => 27, " sa" => 28, " st" => 29,
340 " ha" => 30, "her" => 31, "tha" => 32, "tio" => 33, "or " => 34,
341 " ''" => 35, "en " => 36, " wh" => 37, "e s" => 38, "ent" => 39,
342 "n t" => 40, "s a" => 41, "as " => 42, "for" => 43, "is " => 44,
343 "t t" => 45, " be" => 46, "ld " => 47, "e a" => 48, "rs " => 49,
344 " wa" => 50, "ut " => 51, "ve " => 52, "ll " => 53, "al " => 54,
345 " ma" => 55, "e i" => 56, " fo" => 57, "'s " => 58, "an " => 59,
346 "est" => 60, " hi" => 61, " mo" => 62, " se" => 63, " pr" => 64,
347 "s t" => 65, "ate" => 66, "st " => 67, "ter" => 68, "ere" => 69,
348 "ted" => 70, "nt " => 71, "ver" => 72, "d a" => 73, " wi" => 74,
349 "se " => 75, "e c" => 76, "ect" => 77, "ns " => 78, " on" => 79,
350 "ly " => 80, "tol" => 81, "ey " => 82, "r t" => 83, " ca" => 84,
351 "ati" => 85, "ts " => 86, "all" => 87, " no" => 88, "his" => 89,
352 "s o" => 90, "ers" => 91, "con" => 92, "e o" => 93, "ear" => 94,
353 "f t" => 95, "e w" => 96, "was" => 97, "ons" => 98, "sta" => 99,
354 "'' " => 100, "sti" => 101, "n a" => 102, "sto" => 103, "t h" => 104,
355 " we" => 105, "id " => 106, "th " => 107, " it" => 108, "ce " => 109,
356 " di" => 110, "ave" => 111, "d h" => 112, "cou" => 113, "pro" => 114,
357 "ad " => 115, "oll" => 116, "ry " => 117, "d s" => 118, "e m" => 119,
358 " so" => 120, "ill" => 121, "cti" => 122, "te " => 123, "tor" => 124,
359 "eve" => 125, "g t" => 126, "it " => 127, " ch" => 128, " de" => 129,
360 "hav" => 130, "oul" => 131, "ty " => 132, "uld" => 133, "use" => 134,
361 " al" => 135, "are" => 136, "ch " => 137, "me " => 138, "out" => 139,
362 "ove" => 140, "wit" => 141, "ys " => 142, "chi" => 143, "t a" => 144,
363 "ith" => 145, "oth" => 146, " ab" => 147, " te" => 148, " wo" => 149,
364 "s s" => 150, "res" => 151, "t w" => 152, "tin" => 153, "e b" => 154,
365 "e h" => 155, "nce" => 156, "t s" => 157, "y t" => 158, "e p" => 159,
366 "ele" => 160, "hin" => 161, "s i" => 162, "nte" => 163, " li" => 164,
367 "le " => 165, " do" => 166, "aid" => 167, "hey" => 168, "ne " => 169,
368 "s w" => 170, " as" => 171, " fr" => 172, " tr" => 173, "end" => 174,
369 "sai" => 175, " el" => 176, " ne" => 177, " su" => 178, "'t " => 179,
370 "ay " => 180, "hou" => 181, "ive" => 182, "lec" => 183, "n't" => 184,
371 " ye" => 185, "but" => 186, "d o" => 187, "o t" => 188, "y o" => 189,
372 " ho" => 190, " me" => 191, "be " => 192, "cal" => 193, "e e" => 194,
373 "had" => 195, "ple" => 196, " at" => 197, " bu" => 198, " la" => 199,
374 "d b" => 200, "s h" => 201, "say" => 202, "t i" => 203, " ar" => 204,
375 "e f" => 205, "ght" => 206, "hil" => 207, "igh" => 208, "int" => 209,
376 "not" => 210, "ren" => 211, " is" => 212, " pa" => 213, " sh" => 214,
377 "ays" => 215, "com" => 216, "n s" => 217, "r a" => 218, "rin" => 219,
378 "y a" => 220, " un" => 221, "n c" => 222, "om " => 223, "thi" => 224,
379 " mi" => 225, "by " => 226, "d i" => 227, "e d" => 228, "e n" => 229,
380 "t o" => 230, " by" => 231, "e r" => 232, "eri" => 233, "old" => 234,
381 "ome" => 235, "whe" => 236, "yea" => 237, " gr" => 238, "ar " => 239,
382 "ity" => 240, "mpl" => 241, "oun" => 242, "one" => 243, "ow " => 244,
383 "r s" => 245, "s f" => 246, "tat" => 247, " ba" => 248, " vo" => 249,
384 "bou" => 250, "sam" => 251, "tim" => 252, "vot" => 253, "abo" => 254,
385 "ant" => 255, "ds " => 256, "ial" => 257, "ine" => 258, "man" => 259,
386 "men" => 260, " or" => 261, " po" => 262, "amp" => 263, "can" => 264,
387 "der" => 265, "e l" => 266, "les" => 267, "ny " => 268, "ot " => 269,
388 "rec" => 270, "tes" => 271, "tho" => 272, "ica" => 273, "ild" => 274,
389 "ir " => 275, "nde" => 276, "ose" => 277, "ous" => 278, "pre" => 279,
390 "ste" => 280, "era" => 281, "per" => 282, "r o" => 283, "red" => 284,
391 "rie" => 285, " bo" => 286, " le" => 287, "ali" => 288, "ars" => 289,
392 "ore" => 290, "ric" => 291, "s m" => 292, "str" => 293, " fa" => 294,
393 "ess" => 295, "ie " => 296, "ist" => 297, "lat" => 298, "uri" => 299,
396 $mod = $this->xproxy->_lang_db['english'];
398 foreach ($realdb as $key => $value) {
399 $this->assertTrue(isset($mod[$key]), $key);
400 if (isset($mod[$key])) {
401 $this->assertEquals($value, $mod[$key], $key);
405 foreach ($mod as $key => $value) {
406 $this->assertTrue(isset($realdb[$key]));
407 if (isset($realdb[$key])) {
408 $this->assertEquals($value, $realdb[$key], $key);
413 function test_confidence ()
415 $str = 'The next thing to notice is the Content-length header. The Content-length header notifies the server of the size of the data that you intend to send. This prevents unexpected end-of-data errors from the server when dealing with binary data, because the server will read the specified number of bytes from the data stream regardless of any spurious end-of-data characters.';
417 $result = $this->x->detectConfidence($str);
419 $this->assertEquals(3, count($result));
420 $this->assertTrue(isset($result['language']), 'language');
421 $this->assertTrue(isset($result['similarity']), 'similarity');
422 $this->assertTrue(isset($result['confidence']), 'confidence');
423 $this->assertEquals('english', $result['language']);
424 $this->assertTrue($result['similarity'] <= 300 && $result['similarity'] >= 0, $result['similarity']);
425 $this->assertTrue($result['confidence'] <= 1 && $result['confidence'] >= 0, $result['confidence']);
427 // todo: tests for Danish and Norwegian should have lower confidence
430 function test_long_example ()
432 // an example that is more than 300 trigrams long
433 $str = 'The Italian Renaissance began the opening phase of the Renaissance, a period of great cultural change and achievement from the 14th to the 16th century. The word renaissance means "rebirth," and the era is best known for the renewed interest in the culture of classical antiquity. The Italian Renaissance began in northern Italy, centering in Florence. It then spread south, having an especially significant impact on Rome, which was largely rebuilt by the Renaissance popes. The Italian Renaissance is best known for its cultural achievements. This includes works of literature by such figures as Petrarch, Castiglione, and Machiavelli; artists such as Michaelangelo and Leonardo da Vinci, and great works of architecture such as The Duomo in Florence and St. Peter\'s Basilica in Rome. At the same time, present-day historians also see the era as one of economic regression and of little progress in science. Furthermore, some historians argue that the lot of the peasants and urban poor, the majority of the population, worsened during this period.';
435 $this->x->setPerlCompatible();
436 $tri = $this->xproxy->_trigram($str);
952 $differences = array_diff(array_keys($tri), $exp_tri);
953 $this->assertEquals(0, count($differences));
954 $this->assertEquals(0, count(array_diff($exp_tri, array_keys($tri))));
955 $this->assertEquals(count($exp_tri), count($tri));
956 //print_r(array_diff($exp_tri, array_keys($tri)));
957 //print_r(array_diff(array_keys($tri), $exp_tri));
959 // tests the bubble sort mechanism
960 $this->xproxy->__call('_bub_sort', [&$tri]);
961 $this->assertEquals($exp_tri, array_keys($tri));
963 $true_differences = array(
964 "cas" => array('change' => 300, 'baserank' => 265, 'refrank' => null), "s i" => array('change' => 21, 'baserank' => 183, 'refrank' => 162),
965 "e b" => array('change' => 88, 'baserank' => 66, 'refrank' => 154), "ent" => array('change' => 12, 'baserank' => 27, 'refrank' => 39),
966 "ome" => array('change' => 152, 'baserank' => 83, 'refrank' => 235), "ral" => array('change' => 300, 'baserank' => 176, 'refrank' => null),
967 "ita" => array('change' => 300, 'baserank' => 44, 'refrank' => null), "bas" => array('change' => 300, 'baserank' => 258, 'refrank' => null),
968 " ar" => array('change' => 148, 'baserank' => 56, 'refrank' => 204), " in" => array('change' => 5, 'baserank' => 10, 'refrank' => 5),
969 " ti" => array('change' => 300, 'baserank' => 227, 'refrank' => null), "ty " => array('change' => 61, 'baserank' => 193, 'refrank' => 132),
970 "tur" => array('change' => 300, 'baserank' => 23, 'refrank' => null), "iss" => array('change' => 300, 'baserank' => 20, 'refrank' => null),
971 "ria" => array('change' => 300, 'baserank' => 179, 'refrank' => null), " me" => array('change' => 25, 'baserank' => 216, 'refrank' => 191),
972 "t k" => array('change' => 300, 'baserank' => 189, 'refrank' => null), " es" => array('change' => 300, 'baserank' => 207, 'refrank' => null),
973 "ren" => array('change' => 202, 'baserank' => 9, 'refrank' => 211), "in " => array('change' => 1, 'baserank' => 19, 'refrank' => 18),
974 "ly " => array('change' => 0, 'baserank' => 80, 'refrank' => 80), "st " => array('change' => 18, 'baserank' => 49, 'refrank' => 67),
975 "ne " => array('change' => 8, 'baserank' => 161, 'refrank' => 169), "all" => array('change' => 154, 'baserank' => 241, 'refrank' => 87),
976 "vin" => array('change' => 300, 'baserank' => 196, 'refrank' => null), " op" => array('change' => 300, 'baserank' => 219, 'refrank' => null),
977 "chi" => array('change' => 107, 'baserank' => 36, 'refrank' => 143), "e w" => array('change' => 197, 'baserank' => 293, 'refrank' => 96),
978 " ro" => array('change' => 300, 'baserank' => 113, 'refrank' => null), "act" => array('change' => 300, 'baserank' => 237, 'refrank' => null),
979 "d r" => array('change' => 300, 'baserank' => 280, 'refrank' => null), "nt " => array('change' => 11, 'baserank' => 82, 'refrank' => 71),
980 "can" => array('change' => 0, 'baserank' => 264, 'refrank' => 264), "rea" => array('change' => 300, 'baserank' => 88, 'refrank' => null),
981 "ssa" => array('change' => 300, 'baserank' => 22, 'refrank' => null), " fo" => array('change' => 47, 'baserank' => 104, 'refrank' => 57),
982 "eas" => array('change' => 300, 'baserank' => 296, 'refrank' => null), "mic" => array('change' => 300, 'baserank' => 157, 'refrank' => null),
983 "cul" => array('change' => 300, 'baserank' => 65, 'refrank' => null), " an" => array('change' => 6, 'baserank' => 3, 'refrank' => 9),
984 "n t" => array('change' => 120, 'baserank' => 160, 'refrank' => 40), "arg" => array('change' => 300, 'baserank' => 118, 'refrank' => null),
985 " it" => array('change' => 93, 'baserank' => 15, 'refrank' => 108), "ebi" => array('change' => 300, 'baserank' => 297, 'refrank' => null),
986 " re" => array('change' => 21, 'baserank' => 4, 'refrank' => 25), "res" => array('change' => 120, 'baserank' => 31, 'refrank' => 151),
987 " be" => array('change' => 13, 'baserank' => 33, 'refrank' => 46), "rom" => array('change' => 300, 'baserank' => 89, 'refrank' => null),
988 "'s " => array('change' => 175, 'baserank' => 233, 'refrank' => 58), "arc" => array('change' => 300, 'baserank' => 117, 'refrank' => null),
989 " su" => array('change' => 119, 'baserank' => 59, 'refrank' => 178), "s p" => array('change' => 300, 'baserank' => 184, 'refrank' => null),
990 "ich" => array('change' => 300, 'baserank' => 145, 'refrank' => null), "d d" => array('change' => 300, 'baserank' => 275, 'refrank' => null),
991 "cal" => array('change' => 70, 'baserank' => 263, 'refrank' => 193), "ci " => array('change' => 300, 'baserank' => 266, 'refrank' => null),
992 "ssi" => array('change' => 300, 'baserank' => 186, 'refrank' => null), "bes" => array('change' => 300, 'baserank' => 120, 'refrank' => null),
993 "des" => array('change' => 300, 'baserank' => 285, 'refrank' => null), "e s" => array('change' => 91, 'baserank' => 129, 'refrank' => 38),
994 "ch " => array('change' => 111, 'baserank' => 26, 'refrank' => 137), "san" => array('change' => 300, 'baserank' => 14, 'refrank' => null),
995 "asi" => array('change' => 300, 'baserank' => 249, 'refrank' => null), "ajo" => array('change' => 300, 'baserank' => 240, 'refrank' => null),
996 "ase" => array('change' => 300, 'baserank' => 248, 'refrank' => null), " wa" => array('change' => 181, 'baserank' => 231, 'refrank' => 50),
997 "vem" => array('change' => 300, 'baserank' => 195, 'refrank' => null), "ed " => array('change' => 128, 'baserank' => 131, 'refrank' => 3),
998 "ant" => array('change' => 191, 'baserank' => 64, 'refrank' => 255), "a p" => array('change' => 300, 'baserank' => 235, 'refrank' => null),
999 "lor" => array('change' => 300, 'baserank' => 155, 'refrank' => null), "kno" => array('change' => 300, 'baserank' => 151, 'refrank' => null),
1000 "ais" => array('change' => 300, 'baserank' => 16, 'refrank' => null), " pe" => array('change' => 300, 'baserank' => 24, 'refrank' => null),
1001 "or " => array('change' => 51, 'baserank' => 85, 'refrank' => 34), "e i" => array('change' => 19, 'baserank' => 37, 'refrank' => 56),
1002 " sp" => array('change' => 300, 'baserank' => 225, 'refrank' => null), "ad " => array('change' => 123, 'baserank' => 238, 'refrank' => 115),
1003 " kn" => array('change' => 300, 'baserank' => 108, 'refrank' => null), "ega" => array('change' => 300, 'baserank' => 132, 'refrank' => null),
1004 " ba" => array('change' => 46, 'baserank' => 202, 'refrank' => 248), "d t" => array('change' => 261, 'baserank' => 281, 'refrank' => 20),
1005 "ork" => array('change' => 300, 'baserank' => 169, 'refrank' => null), "lia" => array('change' => 300, 'baserank' => 78, 'refrank' => null),
1006 "ard" => array('change' => 300, 'baserank' => 245, 'refrank' => null), "iev" => array('change' => 300, 'baserank' => 146, 'refrank' => null),
1007 "of " => array('change' => 6, 'baserank' => 8, 'refrank' => 14), " cu" => array('change' => 300, 'baserank' => 57, 'refrank' => null),
1008 "day" => array('change' => 300, 'baserank' => 284, 'refrank' => null), "cen" => array('change' => 300, 'baserank' => 122, 'refrank' => null),
1009 "re " => array('change' => 21, 'baserank' => 47, 'refrank' => 26), "ist" => array('change' => 220, 'baserank' => 77, 'refrank' => 297),
1010 " fl" => array('change' => 300, 'baserank' => 103, 'refrank' => null), "anc" => array('change' => 300, 'baserank' => 17, 'refrank' => null),
1011 "at " => array('change' => 19, 'baserank' => 35, 'refrank' => 16), "rch" => array('change' => 300, 'baserank' => 177, 'refrank' => null),
1012 "ang" => array('change' => 300, 'baserank' => 116, 'refrank' => null), " mi" => array('change' => 8, 'baserank' => 217, 'refrank' => 225),
1013 "y s" => array('change' => 300, 'baserank' => 198, 'refrank' => null), "ca " => array('change' => 300, 'baserank' => 262, 'refrank' => null),
1014 " ma" => array('change' => 55, 'baserank' => 110, 'refrank' => 55), " lo" => array('change' => 300, 'baserank' => 215, 'refrank' => null),
1015 "rin" => array('change' => 39, 'baserank' => 180, 'refrank' => 219), " im" => array('change' => 300, 'baserank' => 212, 'refrank' => null),
1016 " er" => array('change' => 300, 'baserank' => 102, 'refrank' => null), "ce " => array('change' => 103, 'baserank' => 6, 'refrank' => 109),
1017 "bui" => array('change' => 300, 'baserank' => 260, 'refrank' => null), "lit" => array('change' => 300, 'baserank' => 154, 'refrank' => null),
1018 "iod" => array('change' => 300, 'baserank' => 148, 'refrank' => null), "ame" => array('change' => 300, 'baserank' => 244, 'refrank' => null),
1019 "ter" => array('change' => 17, 'baserank' => 51, 'refrank' => 68), "e a" => array('change' => 78, 'baserank' => 126, 'refrank' => 48),
1020 "f l" => array('change' => 300, 'baserank' => 137, 'refrank' => null), "eri" => array('change' => 162, 'baserank' => 71, 'refrank' => 233),
1021 "ra " => array('change' => 300, 'baserank' => 175, 'refrank' => null), "ng " => array('change' => 38, 'baserank' => 46, 'refrank' => 8),
1022 "d i" => array('change' => 50, 'baserank' => 277, 'refrank' => 227), "asa" => array('change' => 300, 'baserank' => 247, 'refrank' => null),
1023 "wn " => array('change' => 300, 'baserank' => 197, 'refrank' => null), " at" => array('change' => 4, 'baserank' => 201, 'refrank' => 197),
1024 "now" => array('change' => 300, 'baserank' => 163, 'refrank' => null), " by" => array('change' => 133, 'baserank' => 98, 'refrank' => 231),
1025 "n s" => array('change' => 58, 'baserank' => 159, 'refrank' => 217), " li" => array('change' => 55, 'baserank' => 109, 'refrank' => 164),
1026 "l a" => array('change' => 300, 'baserank' => 153, 'refrank' => null), "da " => array('change' => 300, 'baserank' => 283, 'refrank' => null),
1027 "ean" => array('change' => 300, 'baserank' => 295, 'refrank' => null), "tal" => array('change' => 300, 'baserank' => 50, 'refrank' => null),
1028 "d a" => array('change' => 201, 'baserank' => 274, 'refrank' => 73), "ct " => array('change' => 300, 'baserank' => 272, 'refrank' => null),
1029 "ali" => array('change' => 226, 'baserank' => 62, 'refrank' => 288), "ian" => array('change' => 300, 'baserank' => 28, 'refrank' => null),
1030 " sa" => array('change' => 193, 'baserank' => 221, 'refrank' => 28), "do " => array('change' => 300, 'baserank' => 286, 'refrank' => null),
1031 "t o" => array('change' => 40, 'baserank' => 190, 'refrank' => 230), "ure" => array('change' => 300, 'baserank' => 54, 'refrank' => null),
1032 "e c" => array('change' => 213, 'baserank' => 289, 'refrank' => 76), "ing" => array('change' => 35, 'baserank' => 42, 'refrank' => 7),
1033 "d o" => array('change' => 63, 'baserank' => 124, 'refrank' => 187), " ha" => array('change' => 181, 'baserank' => 211, 'refrank' => 30),
1034 "ts " => array('change' => 33, 'baserank' => 53, 'refrank' => 86), "rth" => array('change' => 300, 'baserank' => 90, 'refrank' => null),
1035 "cla" => array('change' => 300, 'baserank' => 269, 'refrank' => null), " ac" => array('change' => 300, 'baserank' => 97, 'refrank' => null),
1036 "th " => array('change' => 55, 'baserank' => 52, 'refrank' => 107), "rio" => array('change' => 300, 'baserank' => 181, 'refrank' => null),
1037 "al " => array('change' => 7, 'baserank' => 61, 'refrank' => 54), "sto" => array('change' => 84, 'baserank' => 187, 'refrank' => 103),
1038 "e o" => array('change' => 55, 'baserank' => 38, 'refrank' => 93), "bir" => array('change' => 300, 'baserank' => 259, 'refrank' => null),
1039 " pr" => array('change' => 48, 'baserank' => 112, 'refrank' => 64), " le" => array('change' => 73, 'baserank' => 214, 'refrank' => 287),
1040 "nai" => array('change' => 300, 'baserank' => 21, 'refrank' => null), "t i" => array('change' => 15, 'baserank' => 188, 'refrank' => 203),
1041 " po" => array('change' => 204, 'baserank' => 58, 'refrank' => 262), "f t" => array('change' => 21, 'baserank' => 74, 'refrank' => 95),
1042 "ban" => array('change' => 300, 'baserank' => 257, 'refrank' => null), "an " => array('change' => 46, 'baserank' => 13, 'refrank' => 59),
1043 "wor" => array('change' => 300, 'baserank' => 55, 'refrank' => null), "pet" => array('change' => 300, 'baserank' => 172, 'refrank' => null),
1044 "ael" => array('change' => 300, 'baserank' => 239, 'refrank' => null), "ura" => array('change' => 300, 'baserank' => 194, 'refrank' => null),
1045 "eve" => array('change' => 11, 'baserank' => 136, 'refrank' => 125), "ion" => array('change' => 53, 'baserank' => 76, 'refrank' => 23),
1046 "nge" => array('change' => 300, 'baserank' => 162, 'refrank' => null), "cha" => array('change' => 300, 'baserank' => 123, 'refrank' => null),
1047 "ity" => array('change' => 90, 'baserank' => 150, 'refrank' => 240), " se" => array('change' => 160, 'baserank' => 223, 'refrank' => 63),
1048 " on" => array('change' => 32, 'baserank' => 111, 'refrank' => 79), "s b" => array('change' => 300, 'baserank' => 91, 'refrank' => null),
1049 "ans" => array('change' => 300, 'baserank' => 63, 'refrank' => null), "own" => array('change' => 300, 'baserank' => 170, 'refrank' => null),
1050 " si" => array('change' => 300, 'baserank' => 224, 'refrank' => null), "e r" => array('change' => 165, 'baserank' => 67, 'refrank' => 232),
1051 "est" => array('change' => 13, 'baserank' => 73, 'refrank' => 60), "hie" => array('change' => 300, 'baserank' => 144, 'refrank' => null),
1052 "aly" => array('change' => 300, 'baserank' => 243, 'refrank' => null), "and" => array('change' => 1, 'baserank' => 11, 'refrank' => 12),
1053 "beg" => array('change' => 300, 'baserank' => 119, 'refrank' => null), "dur" => array('change' => 300, 'baserank' => 288, 'refrank' => null),
1054 "reb" => array('change' => 300, 'baserank' => 178, 'refrank' => null), "e e" => array('change' => 67, 'baserank' => 127, 'refrank' => 194),
1055 "men" => array('change' => 104, 'baserank' => 156, 'refrank' => 260), " la" => array('change' => 14, 'baserank' => 213, 'refrank' => 199),
1056 "con" => array('change' => 179, 'baserank' => 271, 'refrank' => 92), " fu" => array('change' => 300, 'baserank' => 210, 'refrank' => null),
1057 "e l" => array('change' => 26, 'baserank' => 292, 'refrank' => 266), "s a" => array('change' => 7, 'baserank' => 48, 'refrank' => 41),
1058 "art" => array('change' => 300, 'baserank' => 246, 'refrank' => null), "ltu" => array('change' => 300, 'baserank' => 79, 'refrank' => null),
1059 "a i" => array('change' => 300, 'baserank' => 115, 'refrank' => null), "ctu" => array('change' => 300, 'baserank' => 273, 'refrank' => null),
1060 "tor" => array('change' => 68, 'baserank' => 192, 'refrank' => 124), "ach" => array('change' => 300, 'baserank' => 60, 'refrank' => null),
1061 "d g" => array('change' => 300, 'baserank' => 276, 'refrank' => null), "od " => array('change' => 300, 'baserank' => 166, 'refrank' => null),
1062 "nte" => array('change' => 1, 'baserank' => 164, 'refrank' => 163), "ena" => array('change' => 300, 'baserank' => 18, 'refrank' => null),
1063 "d l" => array('change' => 300, 'baserank' => 278, 'refrank' => null), "ene" => array('change' => 300, 'baserank' => 134, 'refrank' => null),
1064 "e h" => array('change' => 136, 'baserank' => 291, 'refrank' => 155), "era" => array('change' => 211, 'baserank' => 70, 'refrank' => 281),
1065 "on " => array('change' => 67, 'baserank' => 84, 'refrank' => 17), " ce" => array('change' => 300, 'baserank' => 99, 'refrank' => null),
1066 "ay " => array('change' => 76, 'baserank' => 256, 'refrank' => 180), " da" => array('change' => 300, 'baserank' => 100, 'refrank' => null),
1067 "ori" => array('change' => 300, 'baserank' => 87, 'refrank' => null), "atu" => array('change' => 300, 'baserank' => 253, 'refrank' => null),
1068 "ave" => array('change' => 143, 'baserank' => 254, 'refrank' => 111), "rks" => array('change' => 300, 'baserank' => 182, 'refrank' => null),
1069 "e d" => array('change' => 62, 'baserank' => 290, 'refrank' => 228), "ns " => array('change' => 3, 'baserank' => 81, 'refrank' => 78),
1070 " ca" => array('change' => 119, 'baserank' => 203, 'refrank' => 84), "d s" => array('change' => 7, 'baserank' => 125, 'refrank' => 118),
1071 "uch" => array('change' => 300, 'baserank' => 95, 'refrank' => null), "a v" => array('change' => 300, 'baserank' => 236, 'refrank' => null),
1072 "nce" => array('change' => 149, 'baserank' => 7, 'refrank' => 156), "his" => array('change' => 48, 'baserank' => 41, 'refrank' => 89),
1073 "flo" => array('change' => 300, 'baserank' => 138, 'refrank' => null), "ead" => array('change' => 300, 'baserank' => 294, 'refrank' => null),
1074 " vi" => array('change' => 300, 'baserank' => 230, 'refrank' => null), "me " => array('change' => 109, 'baserank' => 29, 'refrank' => 138),
1075 "suc" => array('change' => 300, 'baserank' => 93, 'refrank' => null), "e p" => array('change' => 120, 'baserank' => 39, 'refrank' => 159),
1076 "eci" => array('change' => 300, 'baserank' => 299, 'refrank' => null), "eme" => array('change' => 300, 'baserank' => 133, 'refrank' => null),
1077 "sen" => array('change' => 300, 'baserank' => 185, 'refrank' => null), "ks " => array('change' => 300, 'baserank' => 152, 'refrank' => null),
1078 " to" => array('change' => 224, 'baserank' => 228, 'refrank' => 4), " gr" => array('change' => 133, 'baserank' => 105, 'refrank' => 238),
1079 " ch" => array('change' => 76, 'baserank' => 204, 'refrank' => 128), "ati" => array('change' => 167, 'baserank' => 252, 'refrank' => 85),
1080 " th" => array('change' => 0, 'baserank' => 0, 'refrank' => 0), " ec" => array('change' => 300, 'baserank' => 206, 'refrank' => null),
1081 " wo" => array('change' => 115, 'baserank' => 34, 'refrank' => 149), "ope" => array('change' => 300, 'baserank' => 168, 'refrank' => null),
1082 " a " => array('change' => 180, 'baserank' => 199, 'refrank' => 19), "one" => array('change' => 76, 'baserank' => 167, 'refrank' => 243),
1083 "n f" => array('change' => 300, 'baserank' => 45, 'refrank' => null), "eat" => array('change' => 300, 'baserank' => 130, 'refrank' => null),
1084 "ica" => array('change' => 198, 'baserank' => 75, 'refrank' => 273), "inc" => array('change' => 300, 'baserank' => 147, 'refrank' => null),
1085 "enc" => array('change' => 300, 'baserank' => 69, 'refrank' => null), "ore" => array('change' => 204, 'baserank' => 86, 'refrank' => 290),
1086 "is " => array('change' => 1, 'baserank' => 43, 'refrank' => 44), " as" => array('change' => 139, 'baserank' => 32, 'refrank' => 171),
1087 "nts" => array('change' => 300, 'baserank' => 165, 'refrank' => null), "d m" => array('change' => 300, 'baserank' => 279, 'refrank' => null),
1088 "her" => array('change' => 112, 'baserank' => 143, 'refrank' => 31), " al" => array('change' => 65, 'baserank' => 200, 'refrank' => 135),
1089 " is" => array('change' => 105, 'baserank' => 107, 'refrank' => 212), "e t" => array('change' => 46, 'baserank' => 68, 'refrank' => 22),
1090 "c r" => array('change' => 300, 'baserank' => 261, 'refrank' => null), " hi" => array('change' => 45, 'baserank' => 106, 'refrank' => 61),
1091 "cia" => array('change' => 300, 'baserank' => 267, 'refrank' => null), " fr" => array('change' => 37, 'baserank' => 209, 'refrank' => 172),
1092 "ult" => array('change' => 300, 'baserank' => 96, 'refrank' => null), "e m" => array('change' => 9, 'baserank' => 128, 'refrank' => 119),
1093 "ass" => array('change' => 300, 'baserank' => 250, 'refrank' => null), "s o" => array('change' => 2, 'baserank' => 92, 'refrank' => 90),
1094 "pop" => array('change' => 300, 'baserank' => 173, 'refrank' => null), "nd " => array('change' => 2, 'baserank' => 12, 'refrank' => 10),
1095 "the" => array('change' => 0, 'baserank' => 1, 'refrank' => 1), " st" => array('change' => 197, 'baserank' => 226, 'refrank' => 29),
1096 " no" => array('change' => 130, 'baserank' => 218, 'refrank' => 88), "ast" => array('change' => 300, 'baserank' => 251, 'refrank' => null),
1097 " fi" => array('change' => 300, 'baserank' => 208, 'refrank' => null), "ess" => array('change' => 160, 'baserank' => 135, 'refrank' => 295),
1098 "gre" => array('change' => 300, 'baserank' => 40, 'refrank' => null), "h a" => array('change' => 300, 'baserank' => 142, 'refrank' => null),
1099 "duo" => array('change' => 300, 'baserank' => 287, 'refrank' => null), " so" => array('change' => 6, 'baserank' => 114, 'refrank' => 120),
1100 "es " => array('change' => 48, 'baserank' => 72, 'refrank' => 24), "for" => array('change' => 96, 'baserank' => 139, 'refrank' => 43),
1101 "gan" => array('change' => 300, 'baserank' => 140, 'refrank' => null), "per" => array('change' => 111, 'baserank' => 171, 'refrank' => 282),
1102 "thi" => array('change' => 33, 'baserank' => 191, 'refrank' => 224), " of" => array('change' => 6, 'baserank' => 5, 'refrank' => 11),
1103 " cl" => array('change' => 300, 'baserank' => 205, 'refrank' => null), " sc" => array('change' => 300, 'baserank' => 222, 'refrank' => null),
1104 "t t" => array('change' => 49, 'baserank' => 94, 'refrank' => 45), "als" => array('change' => 300, 'baserank' => 242, 'refrank' => null),
1105 "avi" => array('change' => 300, 'baserank' => 255, 'refrank' => null), "cie" => array('change' => 300, 'baserank' => 268, 'refrank' => null),
1106 " du" => array('change' => 300, 'baserank' => 101, 'refrank' => null), "pre" => array('change' => 105, 'baserank' => 174, 'refrank' => 279),
1107 "as " => array('change' => 17, 'baserank' => 25, 'refrank' => 42), "a a" => array('change' => 300, 'baserank' => 234, 'refrank' => null),
1108 "gel" => array('change' => 300, 'baserank' => 141, 'refrank' => null), "ite" => array('change' => 300, 'baserank' => 149, 'refrank' => null),
1109 "n r" => array('change' => 300, 'baserank' => 30, 'refrank' => null), "by " => array('change' => 105, 'baserank' => 121, 'refrank' => 226),
1110 "d u" => array('change' => 300, 'baserank' => 282, 'refrank' => null), "clu" => array('change' => 300, 'baserank' => 270, 'refrank' => null),
1111 " ur" => array('change' => 300, 'baserank' => 229, 'refrank' => null), "ebu" => array('change' => 300, 'baserank' => 298, 'refrank' => null),
1112 "n i" => array('change' => 300, 'baserank' => 158, 'refrank' => null), "he " => array('change' => 0, 'baserank' => 2, 'refrank' => 2),
1113 " wh" => array('change' => 195, 'baserank' => 232, 'refrank' => 37), " ph" => array('change' => 300, 'baserank' => 220, 'refrank' => null),
1116 $ranked = $this->xproxy->_arr_rank($this->xproxy->_trigram($str));
1117 $results = $this->x->detect($str);
1119 $count = count($ranked);
1122 //foreach ($this->xproxy->_lang_db['english'] as $key => $value) {
1123 foreach ($ranked as $key => $value) {
1124 if (isset($ranked[$key]) && isset($this->xproxy->_lang_db['english'][$key])) {
1125 $difference = abs($this->xproxy->_lang_db['english'][$key] - $ranked[$key]);
1130 $this->assertTrue(isset($true_differences[$key]), "'$key'");
1131 if (isset($true_differences[$key])) {
1132 $this->assertEquals($true_differences[$key]['change'], $difference, "'$key'");
1134 $sum += $difference;
1137 $this->assertEquals(300, $count);
1138 $this->assertEquals(59490, $sum);
1140 $this->assertEquals('english', key($results));
1141 $this->assertEquals(198, floor(current($results)));
1143 $this->assertEquals('italian', key($results));
1144 $this->assertEquals(228, floor(current($results)));
1147 function test_french ()
1149 $this->x->setPerlCompatible();
1150 $str = "Verifions que le détecteur de langues marche";
1152 $trigrams = $this->xproxy->_trigram($str);
1153 $this->assertEquals(42, count($trigrams));
1154 // verified in Language::Guess
1156 $ranked = $this->xproxy->_arr_rank($trigrams);
1157 $this->assertEquals(0, $ranked['e l']);
1159 $correct_ranks = array(
1205 $this->assertEquals(count($correct_ranks), count($ranked), "different number of trigrams found");
1208 ' de' => array('change' => 0, 'baserank' => 1, 'refrank' => 1),
1209 'éte' => array('change' => 300, 'baserank' => 41, 'refrank' => null),
1210 'dét' => array('change' => 300, 'baserank' => 12, 'refrank' => null),
1211 'fio' => array('change' => 300, 'baserank' => 18, 'refrank' => null),
1212 'de ' => array('change' => 9, 'baserank' => 11, 'refrank' => 2),
1213 'ons' => array('change' => 11, 'baserank' => 28, 'refrank' => 39),
1214 'ect' => array('change' => 300, 'baserank' => 14, 'refrank' => null),
1215 'le ' => array('change' => 19, 'baserank' => 24, 'refrank' => 5),
1216 'arc' => array('change' => 300, 'baserank' => 8, 'refrank' => null),
1217 'lan' => array('change' => 300, 'baserank' => 23, 'refrank' => null),
1218 'es ' => array('change' => 16, 'baserank' => 16, 'refrank' => 0),
1219 'mar' => array('change' => 300, 'baserank' => 25, 'refrank' => null),
1220 ' dé' => array('change' => 59, 'baserank' => 2, 'refrank' => 61),
1221 'ifi' => array('change' => 300, 'baserank' => 21, 'refrank' => null),
1222 'gue' => array('change' => 300, 'baserank' => 19, 'refrank' => null),
1223 'ur ' => array('change' => 12, 'baserank' => 39, 'refrank' => 27),
1224 'rch' => array('change' => 300, 'baserank' => 31, 'refrank' => null),
1225 'ang' => array('change' => 300, 'baserank' => 7, 'refrank' => null),
1226 'que' => array('change' => 5, 'baserank' => 29, 'refrank' => 24),
1227 'ngu' => array('change' => 300, 'baserank' => 26, 'refrank' => null),
1228 'e d' => array('change' => 2, 'baserank' => 13, 'refrank' => 15),
1229 'rif' => array('change' => 300, 'baserank' => 32, 'refrank' => null),
1230 ' ma' => array('change' => 89, 'baserank' => 5, 'refrank' => 94),
1231 'tec' => array('change' => 300, 'baserank' => 35, 'refrank' => null),
1232 'ns ' => array('change' => 6, 'baserank' => 27, 'refrank' => 21),
1233 ' la' => array('change' => 6, 'baserank' => 3, 'refrank' => 9),
1234 ' le' => array('change' => 1, 'baserank' => 4, 'refrank' => 3),
1235 'r d' => array('change' => 202, 'baserank' => 30, 'refrank' => 232),
1236 'e l' => array('change' => 14, 'baserank' => 0, 'refrank' => 14),
1237 'che' => array('change' => 300, 'baserank' => 9, 'refrank' => null),
1238 's m' => array('change' => 180, 'baserank' => 33, 'refrank' => 213),
1239 'ue ' => array('change' => 7, 'baserank' => 37, 'refrank' => 30),
1240 'ver' => array('change' => 117, 'baserank' => 40, 'refrank' => 157),
1241 'teu' => array('change' => 300, 'baserank' => 36, 'refrank' => null),
1242 'eri' => array('change' => 300, 'baserank' => 15, 'refrank' => null),
1243 'cte' => array('change' => 300, 'baserank' => 10, 'refrank' => null),
1244 'ues' => array('change' => 237, 'baserank' => 38, 'refrank' => 275),
1245 's q' => array('change' => 300, 'baserank' => 34, 'refrank' => null),
1246 'eur' => array('change' => 56, 'baserank' => 17, 'refrank' => 73),
1247 ' qu' => array('change' => 31, 'baserank' => 6, 'refrank' => 37),
1248 'he ' => array('change' => 300, 'baserank' => 20, 'refrank' => null),
1249 'ion' => array('change' => 12, 'baserank' => 22, 'refrank' => 10),
1254 $french_ranks = $this->xproxy->_lang_db['french'];
1257 foreach ($ranked as $key => $value) {
1258 if (isset($french_ranks[$key])) {
1259 $difference = abs($french_ranks[$key] - $ranked[$key]);
1263 $this->assertTrue(isset($distances[$key]), $key);
1264 if (isset($distances[$key])) {
1265 $this->assertEquals($distances[$key]['baserank'], $ranked[$key], "baserank for $key");
1266 if ($distances[$key]['refrank'] === null) {
1267 $this->assertArrayNotHasKey($key, $french_ranks);
1269 $this->assertEquals($distances[$key]['refrank'], $french_ranks[$key], "refrank for $key");
1271 $this->assertEquals($distances[$key]['change'], $difference, "difference for $key");
1274 $sumchange += $difference;
1277 $actual_result = $this->xproxy->_distance($french_ranks, $ranked);
1278 $this->assertEquals($sumchange, $actual_result);
1279 $this->assertEquals(7091, $actual_result);
1280 $this->assertEquals(168, floor($sumchange/count($trigrams)));
1282 $final_result = $this->x->detect($str);
1283 $this->assertEquals(168, floor($final_result['french']));
1284 $this->assertEquals(211, $final_result['spanish']);
1287 function test_russian ()
1289 $str = 'авай проверить узнает ли наш угадатель русски язык';
1291 $this->x->setPerlCompatible();
1292 $trigrams = $this->xproxy->_trigram($str);
1293 $ranked = $this->xproxy->_arr_rank($trigrams);
1295 $correct_ranks = array(
1296 ' ру' => array('change' => 300, 'baserank' => 3, 'refrank' => null),
1297 'ай ' => array('change' => 300, 'baserank' => 10, 'refrank' => null),
1298 'ада' => array('change' => 300, 'baserank' => 8, 'refrank' => null),
1299 ' пр' => array('change' => 1, 'baserank' => 2, 'refrank' => 1),
1300 ' яз' => array('change' => 300, 'baserank' => 6, 'refrank' => null),
1301 'ить' => array('change' => 300, 'baserank' => 24, 'refrank' => null),
1302 ' на' => array('change' => 1, 'baserank' => 1, 'refrank' => 0),
1303 'зна' => array('change' => 153, 'baserank' => 20, 'refrank' => 173),
1304 'вай' => array('change' => 300, 'baserank' => 13, 'refrank' => null),
1305 'ш у' => array('change' => 300, 'baserank' => 44, 'refrank' => null),
1306 'ль ' => array('change' => 300, 'baserank' => 28, 'refrank' => null),
1307 ' ли' => array('change' => 300, 'baserank' => 0, 'refrank' => null),
1308 'сск' => array('change' => 300, 'baserank' => 37, 'refrank' => null),
1309 'ть ' => array('change' => 31, 'baserank' => 40, 'refrank' => 9),
1310 'ава' => array('change' => 300, 'baserank' => 7, 'refrank' => null),
1311 'про' => array('change' => 18, 'baserank' => 32, 'refrank' => 14),
1312 'гад' => array('change' => 300, 'baserank' => 15, 'refrank' => null),
1313 'усс' => array('change' => 300, 'baserank' => 43, 'refrank' => null),
1314 'ык ' => array('change' => 300, 'baserank' => 45, 'refrank' => null),
1315 'ель' => array('change' => 64, 'baserank' => 17, 'refrank' => 81),
1316 'язы' => array('change' => 300, 'baserank' => 47, 'refrank' => null),
1317 ' уг' => array('change' => 300, 'baserank' => 4, 'refrank' => null),
1318 'ате' => array('change' => 152, 'baserank' => 11, 'refrank' => 163),
1319 'и н' => array('change' => 63, 'baserank' => 22, 'refrank' => 85),
1320 'и я' => array('change' => 300, 'baserank' => 23, 'refrank' => null),
1321 'ает' => array('change' => 152, 'baserank' => 9, 'refrank' => 161),
1322 'узн' => array('change' => 300, 'baserank' => 42, 'refrank' => null),
1323 'ери' => array('change' => 300, 'baserank' => 18, 'refrank' => null),
1324 'ли ' => array('change' => 23, 'baserank' => 27, 'refrank' => 4),
1325 'т л' => array('change' => 300, 'baserank' => 38, 'refrank' => null),
1326 ' уз' => array('change' => 300, 'baserank' => 5, 'refrank' => null),
1327 'дат' => array('change' => 203, 'baserank' => 16, 'refrank' => 219),
1328 'зык' => array('change' => 300, 'baserank' => 21, 'refrank' => null),
1329 'ров' => array('change' => 59, 'baserank' => 34, 'refrank' => 93),
1330 'рит' => array('change' => 300, 'baserank' => 33, 'refrank' => null),
1331 'ь р' => array('change' => 300, 'baserank' => 46, 'refrank' => null),
1332 'ет ' => array('change' => 19, 'baserank' => 19, 'refrank' => 38),
1333 'ки ' => array('change' => 116, 'baserank' => 26, 'refrank' => 142),
1334 'рус' => array('change' => 300, 'baserank' => 35, 'refrank' => null),
1335 'тел' => array('change' => 16, 'baserank' => 39, 'refrank' => 23),
1336 'нае' => array('change' => 300, 'baserank' => 29, 'refrank' => null),
1337 'й п' => array('change' => 300, 'baserank' => 25, 'refrank' => null),
1338 'наш' => array('change' => 300, 'baserank' => 30, 'refrank' => null),
1339 'уга' => array('change' => 300, 'baserank' => 41, 'refrank' => null),
1340 'ове' => array('change' => 214, 'baserank' => 31, 'refrank' => 245),
1341 'ски' => array('change' => 112, 'baserank' => 36, 'refrank' => 148),
1342 'вер' => array('change' => 31, 'baserank' => 14, 'refrank' => 45),
1343 'аш ' => array('change' => 300, 'baserank' => 12, 'refrank' => null),
1346 $this->assertEquals(48, count($ranked));
1349 $russian = $this->xproxy->_lang_db['russian'];
1352 foreach ($ranked as $key => $value) {
1353 if (isset($russian[$key])) {
1354 $difference = abs($russian[$key] - $ranked[$key]);
1358 $this->assertTrue(isset($correct_ranks[$key], $key));
1359 if (isset($correct_ranks[$key])) {
1360 $this->assertEquals($correct_ranks[$key]['baserank'], $ranked[$key], "baserank for $key");
1361 if ($correct_ranks[$key]['refrank'] === null) {
1362 $this->assertArrayNotHasKey($key, $russian);
1364 $this->assertEquals($correct_ranks[$key]['refrank'], $russian[$key], "refrank for $key");
1366 $this->assertEquals($correct_ranks[$key]['change'], $difference, "difference for $key");
1369 $sumchange += $difference;
1372 $actual_result = $this->xproxy->_distance($russian, $ranked);
1373 $this->assertEquals($sumchange, $actual_result);
1374 $this->assertEquals(10428, $actual_result);
1375 $this->assertEquals(217, floor($sumchange/count($trigrams)));
1377 $final_result = $this->x->detect($str);
1378 $this->assertEquals(217,floor($final_result['russian']));
1381 function test_ranker ()
1385 $result = $this->xproxy->_arr_rank($this->xproxy->_trigram($str));
1387 $this->assertEquals(0, $result['s i']);
1391 function test_count ()
1393 $langs = $this->x->getLanguages();
1395 $count = $this->x->getLanguageCount();
1397 $this->assertEquals(count($langs), $count);
1399 foreach ($langs as $lang) {
1400 $this->assertTrue($this->x->languageExists($lang), $lang);
1404 function testLanguageExistsNameMode2()
1406 $this->x->setNameMode(2);
1407 $this->assertTrue($this->x->languageExists('en'));
1408 $this->assertFalse($this->x->languageExists('english'));
1411 function testLanguageExistsArrayNameMode2()
1413 $this->x->setNameMode(2);
1414 $this->assertTrue($this->x->languageExists(array('en', 'de')));
1415 $this->assertFalse($this->x->languageExists(array('en', 'doesnotexist')));
1419 * @expectedException Text_LanguageDetect_Exception
1420 * @expectedExceptionMessage Unsupported parameter type passed to languageExists()
1422 function testLanguageExistsUnsupportedType()
1424 $this->x->languageExists(1.23);
1427 function testGetLanguages()
1429 $langs = $this->x->getLanguages();
1430 $this->assertContains('english', $langs);
1431 $this->assertContains('swedish', $langs);
1434 function testGetLanguagesNameMode2()
1436 $this->x->setNameMode(2);
1437 $langs = $this->x->getLanguages();
1438 $this->assertContains('en', $langs);
1439 $this->assertContains('sv', $langs);
1442 function testDetect()
1444 $scores = $this->x->detect('Das ist ein kleiner Text für euch alle');
1445 $this->assertInternalType('array', $scores);
1446 $this->assertGreaterThan(5, count($scores));
1448 list($key, $value) = each($scores);
1449 $this->assertEquals('german', $key, 'text is german');
1452 function testDetectNameMode2()
1454 $this->x->setNameMode(2);
1455 $scores = $this->x->detect('Das ist ein kleiner Text für euch alle');
1456 list($key, $value) = each($scores);
1457 $this->assertEquals('de', $key, 'text is german');
1460 function testDetectNameMode2Limit()
1462 $this->x->setNameMode(2);
1463 $scores = $this->x->detect('Das ist ein kleiner Text für euch alle', 1);
1464 list($key, $value) = each($scores);
1465 $this->assertEquals('de', $key, 'text is german');
1468 function testDetectSimple()
1470 $lang = $this->x->detectSimple('Das ist ein kleiner Text für euch alle');
1471 $this->assertInternalType('string', $lang);
1472 $this->assertEquals('german', $lang, 'text is german');
1475 function testDetectSimpleNameMode2()
1477 $this->x->setNameMode(2);
1478 $lang = $this->x->detectSimple('Das ist ein kleiner Text für euch alle');
1479 $this->assertInternalType('string', $lang);
1480 $this->assertEquals('de', $lang, 'text is german');
1483 function testDetectSimpleNoLanguages()
1485 $this->x->omitLanguages('english', true);
1486 $this->x->omitLanguages('english', false);
1488 $this->x->detectSimple('Das ist ein kleiner Text für euch alle')
1492 function testLanguageSimilarity()
1494 $this->x->setPerlCompatible(true);
1495 $eng_dan = $this->x->languageSimilarity('english', 'danish');
1496 $nor_dan = $this->x->languageSimilarity('norwegian', 'danish');
1497 $swe_dan = $this->x->languageSimilarity('swedish', 'danish');
1499 // remember, lower means more similar
1500 $this->assertTrue($eng_dan > $nor_dan); // english is less similar to danish than norwegian is
1501 $this->assertTrue($eng_dan > $swe_dan); // english is less similar to danish than swedish is
1502 $this->assertTrue($nor_dan < $swe_dan); // norwegian is more similar to danish than swedish
1504 // test the range of the results
1505 $this->assertTrue($eng_dan <= 300, $eng_dan);
1506 $this->assertTrue($eng_dan >= 0, $eng_dan);
1508 // test it in perl compatible mode
1509 $this->x->setPerlCompatible(false);
1511 $eng_dan = $this->x->languageSimilarity('english', 'danish');
1512 $nor_dan = $this->x->languageSimilarity('norwegian', 'danish');
1513 $swe_dan = $this->x->languageSimilarity('swedish', 'danish');
1515 // now higher is more similar
1516 $this->assertTrue($eng_dan < $nor_dan);
1517 $this->assertTrue($eng_dan < $swe_dan);
1518 $this->assertTrue($nor_dan > $swe_dan);
1520 $this->assertTrue($eng_dan <= 1, $eng_dan);
1521 $this->assertTrue($eng_dan >= 0, $eng_dan);
1523 $this->x->setPerlCompatible(true);
1525 $eng_all = $this->x->languageSimilarity('english');
1526 $this->assertEquals($this->x->getLanguageCount() - 1, count($eng_all));
1527 $this->assertTrue(!isset($eng_all['english']));
1529 $this->assertTrue($eng_all['italian'] < $eng_all['turkish']);
1530 $this->assertTrue($eng_all['french'] < $eng_all['kyrgyz']);
1532 $all = $this->x->languageSimilarity();
1533 $this->assertTrue(!isset($all['english']['english']));
1534 $this->assertTrue($all['french']['spanish'] < $all['french']['mongolian']);
1535 $this->assertTrue($all['spanish']['latin'] < $all['hindi']['finnish']);
1536 $this->assertTrue($all['russian']['uzbek'] < $all['russian']['english']);
1540 function testLanguageSimilarityNameMode2()
1542 $this->x->setNameMode(2);
1543 $this->x->setPerlCompatible(true);
1544 $eng_dan = $this->x->languageSimilarity('en', 'dk');
1545 $nor_dan = $this->x->languageSimilarity('no', 'dk');
1547 // remember, lower means more similar
1548 $this->assertTrue($eng_dan > $nor_dan); // english is less similar to danish than norwegian is
1551 function testLanguageSimilarityUnknownLanguage()
1553 $this->assertNull($this->x->languageSimilarity('doesnotexist'));
1556 function testLanguageSimilarityUnknownLanguage2()
1558 $this->assertNull($this->x->languageSimilarity('english', 'doesnotexist'));
1561 function test_compatibility ()
1563 $str = "I am the very model of a modern major general.";
1566 $this->x->setPerlCompatible(false);
1567 $result = $this->x->detectConfidence($str);
1569 $this->assertTrue(!is_null($result));
1570 $this->assertTrue(is_array($result));
1572 $this->assertEquals('english', $language);
1573 $this->assertTrue($similarity <= 1 && $similarity >= 0, $similarity);
1574 $this->assertTrue($confidence <= 1 && $confidence >= 0, $confidence);
1576 $this->x->setPerlCompatible(true);
1577 $result = $this->x->detectConfidence($str);
1578 extract($result, EXTR_OVERWRITE);
1580 $this->assertEquals('english', $language);
1582 // technically the lowest possible score is 0 but it's extremely unlikely to hit that
1583 $this->assertTrue($similarity <= 300 && $similarity >= 1, $similarity);
1584 $this->assertTrue($confidence <= 1 && $confidence >= 0, $confidence);
1588 function testDetectConfidenceNoText()
1590 $this->assertNull($this->x->detectConfidence(''));
1593 function test_omit_error ()
1595 $str = 'On January 29, 1737, Thomas Paine was born in Thetford, England. His father, a corseter, had grand visions for his son, but by the age of 12, Thomas had failed out of school. The young Paine began apprenticing for his father, but again, he failed.';
1597 $myobj = new Text_LanguageDetect;
1599 $result = $myobj->detectSimple($str);
1600 $this->assertEquals('english', $result);
1602 // omit all languages and you should get an error
1603 $myobj->omitLanguages($myobj->getLanguages());
1605 $result = $myobj->detectSimple($str);
1607 $this->assertNull($result, gettype($result));
1610 function test_cyrillic ()
1612 // tests whether the cyrillic lower-casing works
1614 $uppercased = 'А Б В Г Д Е Ж З И Й К Л М Н О П'
1615 . 'Р С Т У Ф Х Ц Ч Ш Щ Ъ Ы Ь Э Ю Я';
1617 $lowercased = 'а б в г д е ж з и й к л м н о п'
1618 . 'р с т у ф х ц ч ш щ ъ ы ь э ю я';
1620 $this->assertEquals(strlen($uppercased), strlen($lowercased));
1625 $rm = new ReflectionMethod('Text_LanguageDetect', '_next_char');
1626 $rm->setAccessible(true);
1627 while ($i < strlen($uppercased)) {
1628 $u = $rm->invokeArgs($this->x, [$uppercased, &$i, true]);
1629 $l = $rm->invokeArgs($this->x, [$lowercased, &$j, true]);
1630 $this->assertEquals($u, $l);
1635 $this->assertEquals($i, $j);
1636 $this->assertEquals($i, strlen($lowercased));
1637 if (function_exists('mb_strtolower')) {
1638 $this->assertEquals($new_u, mb_strtolower($uppercased, 'UTF-8'));
1642 function test_block_detection()
1644 $exp_output = <<<EOF
1648 [CJK Unified Ideographs] => 2
1650 [Latin-1 Supplement] => 4
1653 $teststr = 'lsdkfj あ 葉 叶 slskdfj s Åj;sdklf ÿjs;kdjåf î';
1654 $result = $this->x->detectUnicodeBlocks($teststr, false);
1659 $str_result = ob_get_contents();
1661 $this->assertEquals(trim($exp_output), trim($str_result));
1663 // test whether skipping the spaces reduces the basic latin count
1664 $result2 = $this->x->detectUnicodeBlocks($teststr, true);
1665 $this->assertTrue($result2['Basic Latin'] < $result['Basic Latin']);
1667 $result3 = $this->x->unicodeBlockName('и');
1668 $this->assertEquals('Cyrillic', $result3);
1670 $this->assertEquals('Basic Latin', $this->x->unicodeBlockName('A'));
1672 // see what happens when you try an unassigned range
1673 $utf8 = $this->code2utf(0x0800);
1675 $this->assertEquals(false, $this->x->unicodeBlockName($utf8));
1677 // try unicode vals in several different ranges
1678 $unicode['Supplementary Private Use Area-A'] = 0xF0001;
1679 $unicode['Supplementary Private Use Area-B'] = 0x100001;
1680 $unicode['CJK Unified Ideographs Extension B'] = 0x20001;
1681 $unicode['Ugaritic'] = 0x10381;
1682 $unicode['Gothic'] = 0x10331;
1683 $unicode['Low Surrogates'] = 0xDC01;
1684 $unicode['CJK Unified Ideographs'] = 0x4E00;
1685 $unicode['Glagolitic'] = 0x2C00;
1686 $unicode['Latin Extended Additional'] = 0x1EFF;
1687 $unicode['Devanagari'] = 0x0900;
1688 $unicode['Hebrew'] = 0x0590;
1689 $unicode['Latin Extended-B'] = 0x024F;
1690 $unicode['Latin-1 Supplement'] = 0x00FF;
1691 $unicode['Basic Latin'] = 0x007F;
1693 foreach ($unicode as $range => $codepoint) {
1694 $result = $this->x->unicodeBlockName($this->code2utf($codepoint));
1695 $this->assertEquals($range, $result, $codepoint);
1700 * @expectedException Text_LanguageDetect_Exception
1701 * @expectedExceptionMessage Pass a single char only to this method
1703 function testUnicodeBlockNameParamString()
1705 $this->x->unicodeBlockName('foo bar baz');
1709 * @expectedException Text_LanguageDetect_Exception
1710 * @expectedExceptionMessage Input must be of type string or int
1712 function testUnicodeBlockNameUnsupportedParamType()
1714 $this->x->unicodeBlockName(1.23);
1719 // found in http://www.php.net/manual/en/function.utf8-encode.php#49336
1720 function code2utf($num)
1725 } elseif ($num < 2048) {
1726 return chr(($num >> 6) + 192) . chr(($num & 63) + 128);
1728 } elseif ($num < 65536) {
1729 return chr(($num >> 12) + 224) . chr((($num >> 6) & 63) + 128) . chr(($num & 63) + 128);
1731 } elseif ($num < 2097152) {
1732 return chr(($num >> 18) + 240) . chr((($num >> 12) & 63) + 128) . chr((($num >> 6) & 63) + 128) . chr(($num & 63) + 128);
1738 function test_utf8len()
1740 $str = 'Iñtërnâtiônàlizætiøn';
1741 $this->assertEquals(20, $this->x->utf8strlen($str), utf8_decode($str));
1744 $this->assertEquals(3, $this->x->utf8strlen($str), utf8_decode($str));
1747 function test_unicode()
1749 // test whether it can get the right unicode values for utf8 chars
1751 $chars['ת'] = 0x5EA;
1753 $chars['ç'] = 0x00E7;
1755 $chars['a'] = 0x0061;
1757 $chars['Φ'] = 0x03A6;
1759 $chars['И'] = 0x0418;
1761 $chars['ڰ'] = 0x6B0;
1763 $chars['Ụ'] = 0x1EE4;
1765 $chars['놔'] = 0xB194;
1767 $chars['遮'] = 0x906E;
1769 $chars['怀'] = 0x6000;
1771 $chars['ฤ'] = 0x0E24;
1773 $chars['Я'] = 0x042F;
1775 $chars['ü'] = 0x00FC;
1777 $chars['Đ'] = 0x0110;
1779 $chars['א'] = 0x05D0;
1782 foreach ($chars as $utf8 => $unicode) {
1783 $this->assertEquals($unicode, $this->xproxy->_utf8char2unicode($utf8), $utf8);
1787 function test_unicode_off()
1790 // see what happens when you turn the unicode setting off
1792 $myobj = new Text_LanguageDetect;
1794 $str = 'This is a delightful sample of English text';
1796 $myobj->useUnicodeBlocks(true);
1797 $result1 = $myobj->detectConfidence($str);
1799 $myobj->useUnicodeBlocks(false);
1800 $result2 = $myobj->detectConfidence($str);
1802 $this->assertEquals($result1, $result2);
1804 // note this test doesn't tell if unicode narrowing was actually used or not
1808 function test_detection()
1811 // WARNING: the below lines may make your terminal go ape! be warned
1835 // test strings from the test module used by perl's Language::Guess
1838 "english" => "This is a test of the language checker",
1839 "french" => "Verifions que le détecteur de langues marche",
1840 "polish" => "Sprawdźmy, czy odgadywacz języków pracuje",
1841 "russian" => "Давай проверим узнает ли нашь угадыватель русский язык",
1842 "spanish" => "La respuesta de los acreedores a la oferta argentina para salir del default no ha sido muy positiv",
1843 "romanian" => "în acest sens aparţinînd Adunării Generale a organizaţiei, în ciuda faptului că mai multe dintre solicitările organizaţiei privind organizarea scrutinului nu au fost soluţionate",
1844 "albanian" => "kaluan ditën e fundit të fushatës në shtetet kryesore për të siguruar sa më shumë votues.",
1845 "danish" => "På denne side bringer vi billeder fra de mange forskellige forberedelser til arrangementet, efterhånden som vi får dem ",
1846 "swedish" => "Vi säger att Frälsningen är en gåva till alla, fritt och för intet. Men som vi nämnt så finns det två villkor som måste",
1847 "norwegian" => "Nominasjonskomiteen i Akershus KrF har skviset ut Einar Holstad fra stortingslisten. Ytre Enebakk-mannen har plass p Stortinget s lenge Valgerd Svarstad Haugland sitter i",
1848 "finnish" => "on julkishallinnon verkkopalveluiden yhteinen osoite. Kansalaisten arkielämää helpottavaa tietoa on koottu eri aihealueisiin",
1849 "estonian" => "Ennetamaks reisil ebameeldivaid vahejuhtumeid vii end kurssi reisidokumentide ja viisade reeglitega ning muu praktilise informatsiooniga",
1850 "hungarian" => "Hiába jön létre az önkéntes magyar haderő, hiába nem lesz többé bevonulás, változatlanul fennmarad a hadkötelezettség intézménye",
1851 "uzbek" => "милиция ва уч солиқ идораси ходимлари яраланган. Шаҳарда хавфсизлик чоралари кучайтирилган.",
1854 "czech" => "Francouzský ministr financí zmírnil výhrady vůči nízkým firemním daním v nových členských státech EU",
1855 "dutch" => "Die kritiek was volgens hem bitter hard nodig, omdat Nederland binnen een paar jaar in een soort Belfast zou dreigen te nderen",
1857 "croatian" => "biće prilično izjednačena, sugerišu najnovije ankete. Oba kandidata tvrde da su sposobni da dobiju rat protiv terorizma",
1859 "romanian" => "în acest sens aparţinînd Adunării Generale a organizaţiei, în ciuda faptului că mai multe dintre solicitările organizaţiei ivind organizarea scrutinului nu au fost soluţionate",
1861 "turkish" => "yakın tarihin en çekişmeli başkanlık seçiminde oy verme işlemi sürerken, katılımda rekor bekleniyor.",
1863 "kyrgyz" => "көрбөгөндөй элдик толкундоо болуп, Кокон шаарынын көчөлөрүндө бир нече миң киши нааразылык билдирди.",
1866 "albanian" => "kaluan ditën e fundit të fushatës në shtetet kryesore për të siguruar sa më shumë votues.",
1869 "azeri" => "Daxil olan xəbərlərdə deyilir ki, 6 nəfər Bağdadın mərkəzində yerləşən Təhsil Nazirliyinin binası yaxınlığında baş vermiş partlayış zamanı həlak olub.",
1872 "macedonian" => "на јавното мислење покажуваат дека трката е толку тесна, што се очекува двајцата соперници да ја прекршат традицијата и да се појават и на самиот изборен ден.",
1876 "kazakh" => "Сайлау нәтижесінде дауыстардың басым бөлігін ел премьер министрі Виктор Янукович пен оның қарсыласы, оппозиция жетекшісі Виктор Ющенко алды.",
1879 "bulgarian" => " е готов да даде гаранции, че няма да прави ядрено оръжие, ако му се разреши мирна атомна програма",
1882 "arabic" => " ملايين الناخبين الأمريكيين يدلون بأصواتهم وسط إقبال قياسي على انتخابات هي الأشد تنافسا منذ عقود",
1910 // should be safe at this point
1913 $languages = $this->x->getLanguages();
1914 foreach (array_keys($testarr) as $key) {
1915 $this->assertTrue(in_array($key, $languages), "$key was not in known languages");
1918 foreach ($testarr as $key=>$value) {
1919 $this->assertEquals($key, $this->x->detectSimple($value));
1924 public function test_convertFromNameMode0()
1926 $this->assertEquals(
1928 $this->xproxy->_convertFromNameMode('english')
1932 public function test_convertFromNameMode2String()
1934 $this->x->setNameMode(2);
1935 $this->assertEquals(
1937 $this->xproxy->_convertFromNameMode('en')
1941 public function test_convertFromNameMode3String()
1943 $this->x->setNameMode(3);
1944 $this->assertEquals(
1946 $this->xproxy->_convertFromNameMode('eng')
1950 public function test_convertFromNameMode2ArrayVal()
1952 $this->x->setNameMode(2);
1953 $this->assertEquals(
1954 array('english', 'german'),
1955 $this->xproxy->_convertFromNameMode(array('en', 'de'))
1959 public function test_convertFromNameMode2ArrayKey()
1961 $this->x->setNameMode(2);
1962 $this->assertEquals(
1963 array('english' => 'foo', 'german' => 'test'),
1964 $this->xproxy->_convertFromNameMode(
1965 array('en' => 'foo', 'de' => 'test'),
1971 public function test_convertFromNameMode3ArrayVal()
1973 $this->x->setNameMode(3);
1974 $this->assertEquals(
1975 array('english', 'german'),
1976 $this->xproxy->_convertFromNameMode(array('eng', 'deu'))
1980 public function test_convertFromNameMode3ArrayKey()
1982 $this->x->setNameMode(3);
1983 $this->assertEquals(
1984 array('english' => 'foo', 'german' => 'test'),
1985 $this->xproxy->_convertFromNameMode(
1986 array('eng' => 'foo', 'deu' => 'test'),
1992 public function test_convertToNameMode0()
1994 $this->assertEquals(
1996 $this->xproxy->_convertToNameMode('english')
2000 public function test_convertToNameMode2String()
2002 $this->x->setNameMode(2);
2003 $this->assertEquals(
2005 $this->xproxy->_convertToNameMode('english')
2009 public function test_convertToNameMode3String()
2011 $this->x->setNameMode(3);
2012 $this->assertEquals(
2014 $this->xproxy->_convertToNameMode('english')
2018 public function test_convertToNameMode2ArrayVal()
2020 $this->x->setNameMode(2);
2021 $this->assertEquals(
2023 $this->xproxy->_convertToNameMode(array('english', 'german'))
2027 public function test_convertToNameMode2ArrayKey()
2029 $this->x->setNameMode(2);
2030 $this->assertEquals(
2031 array('en' => 'foo', 'de' => 'test'),
2032 $this->xproxy->_convertToNameMode(
2033 array('english' => 'foo', 'german' => 'test'),
2039 public function test_convertToNameMode3ArrayVal()
2041 $this->x->setNameMode(3);
2042 $this->assertEquals(
2043 array('eng', 'deu'),
2044 $this->xproxy->_convertToNameMode(array('english', 'german'))
2048 public function test_convertToNameMode3ArrayKey()
2050 $this->x->setNameMode(3);
2051 $this->assertEquals(
2052 array('eng' => 'foo', 'deu' => 'test'),
2053 $this->xproxy->_convertToNameMode(
2054 array('english' => 'foo', 'german' => 'test'),