X-Git-Url: https://git.mxchange.org/?a=blobdiff_plain;f=application%2Fhub%2Fmain%2Ffilter%2Ftask%2Fcrawler%2Fclass_CrawlerTaskHandlerInitializerFilter.php;fp=application%2Fhub%2Fmain%2Ffilter%2Ftask%2Fcrawler%2Fclass_CrawlerTaskHandlerInitializerFilter.php;h=cb2a1383c6e7b86cfea2e35ddf8d132a571fe1a7;hb=4fa56f38f1a42be96a93cbffadda4d7eb31851e1;hp=43a7be491f7b6c994a411a0216c65e3f296d5cfd;hpb=a5f1bd1c1a8fa9b506492e92b910947c21891c89;p=hub.git diff --git a/application/hub/main/filter/task/crawler/class_CrawlerTaskHandlerInitializerFilter.php b/application/hub/main/filter/task/crawler/class_CrawlerTaskHandlerInitializerFilter.php index 43a7be491..cb2a1383c 100644 --- a/application/hub/main/filter/task/crawler/class_CrawlerTaskHandlerInitializerFilter.php +++ b/application/hub/main/filter/task/crawler/class_CrawlerTaskHandlerInitializerFilter.php @@ -81,6 +81,26 @@ class CrawlerTaskHandlerInitializerFilter extends BaseCrawlerFilter implements F $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_remote_job_publisher_task_class'); $handlerInstance->registerTask('crawler_remote_job_publisher', $taskInstance); + // 5) MIME sniffer + $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_mime_sniffer_task_class'); + $handlerInstance->registerTask('crawler_mime_sniffer', $taskInstance); + + // 6) Document parser (converts document to meta format) + $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_document_parser_task_class'); + $handlerInstance->registerTask('crawler_document_parser', $taskInstance); + + // 7) Document structure analyzer + $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_structure_analyzer_task_class'); + $handlerInstance->registerTask('crawler_structure_analyzer', $taskInstance); + + // 8) Snippet extractor + $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_snippet_extractor_task_class'); + $handlerInstance->registerTask('crawler_snippet_extractor', $taskInstance); + + // 9) Node ping + $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_ping_task_class'); + $handlerInstance->registerTask('crawler_ping', $taskInstance); + // Put the task handler in registry Registry::getRegistry()->addInstance('task_handler', $handlerInstance); }