# robots.txt # # This file is to prevent the crawling and indexing of certain parts # of your site by web crawlers and spiders run by sites like Yahoo! # and Google. By telling these “robots†where not to go on your site, # you save bandwidth and server resources. # # This file will be ignored unless it is at the root of your host: # Used: https://www.teflexpress.co.uk//robots.txt # Ignored: http://www.teflexpress.co.uk//robots.txt # Website Sitemap Sitemap: https://www.teflexpress.co.uk/sitemap # Crawlers Setup User-agent: * Disallow: /CVS Disallow: /*.svn$ Disallow: /*.idea$ Disallow: /*.sql$ Disallow: /*.tgz$ ## Do not crawl links with session location Disallow: /*?location= Disallow: /uploads/ Disallow: /userImages/ Disallow: /images/ Disallow: /images1/ Disallow: /images_new/ Disallow: /includes/ Disallow: /includes_new/ Disallow: /is-integration/ Disallow: /js/ Disallow: /noname_config/ Disallow: /paypal/ Disallow: /pdf-cert/ Disallow: /promos/ Disallow: /promotions/ Disallow: /revealpopup/ Disallow: /taskImages/ Disallow: /.well-known/ Disallow: /__jrjlog/ Disallow: /TeflCambridge_cert/ Disallow: /Scripts/ Disallow: /admin/ Disallow: /cambridgeAdminMyPHP/ Disallow: /cgi-bin/ Disallow: /config/ Disallow: /courseIcons/ Disallow: /css/ Disallow: /css1/ Disallow: /data/ Disallow: /demo/ Disallow: /designtemplate/ Disallow: /dev/ Disallow: /font/ Disallow: /fpdf/ Disallow: /helps/ Disallow: /EF-Reports/ Disallow: /Mail-1.2.0/ Disallow: /Newsletters/ Disallow: /PHPExcel/ Disallow: /TEFLNCM_new/ Disallow: /TEFL_DB/ Disallow: /accredited-tefl-courses/ Disallow: /admin-old/ Disallow: /blog/ Disallow: /blog-old/ Disallow: /bugtracker/ Disallow: /captcha/ Disallow: /companyLogo/ Disallow: /controllers/ Disallow: /courseIcons/ Disallow: /course_images/ Disallow: /cron-jobs/ Disallow: /cspu/ Disallow: /db-restore-lv/ Disallow: /dump/ Disallow: /eleexpres/ Disallow: /email_signature/ Disallow: /email_template/ Disallow: /flash/ Disallow: /fpdf/ Disallow: /htm5videocompatibility/ Disallow: /i-to-itesol.com/ Disallow: /is-integration-te/ Disallow: /jplayer/ Disallow: /js1/ Disallow: /landing.teflexpress.co.uk/ Disallow: /libs/ Disallow: /mailfiles/ Disallow: /pdf-cert/ Disallow: /pdf-transcript/ Disallow: /pdfs/ Disallow: /picture_library/ Disallow: /player/ Disallow: /plesk-stat/ Disallow: /podcast/ Disallow: /practicum-integrated-course/ Disallow: /prettyphoto/ Disallow: /q76RIO9NphpMyAdmin/ Disallow: /res/ Disallow: /restore/ Disallow: /skin/ Disallow: /sql/ Disallow: /tcpdf/ Disallow: /te/ Disallow: /te-newhome/ Disallow: /teaching-english-to-young-learners/ Disallow: /tefl-dbaccess/ Disallow: /teflexpr_wblog/ Disallow: /teflexpress.co.uk/ Disallow: /teflexpressinternational/ Disallow: /teflnewsletter/ Disallow: /tesolexpressonline/ Disallow: /test/ Disallow: /vc-demo/ Disallow: /vci/ Disallow: /video/ Disallow: /vtigercrm/ Disallow: /webinar_help_downloads/ Disallow: /webinar_resources/ Disallow: /wiziq-test-api/ Disallow: /wiziq_download_record/ Disallow: /wiziqapi/ Disallow:/podcast/ Disallow: /newversion/ Disallow: /shop.php?* Disallow: /online_enroll1_com.php?* Disallow: /ContactEnquire.php* Disallow: /*.swf Disallow: /enroll1.php?* Disallow: /weekendCourse_reg.php?* Disallow: /jobsapply/ Disallow: /requestCallback.php Disallow: /OnlineCourses-* Disallow: /coursePageAudioPlay.php* Disallow: /maintenance.html Disallow: /index.php?succ=1 Disallow: /logoutUser.php Disallow: /JobsAll-* Disallow: /forgotPasswd.php Disallow: /grammarpdf_enroll.php Disallow: /lessonplanning_enroll.php Disallow: /podcast/wp-content/ Disallow: /podcast/wp-admin/ Disallow: /podcast/wp-includes/ Disallow: /podcast/page/* Disallow: /blog/wp-admin/ Disallow: /Shop-* Disallow: /efstudents Disallow: /online-tefl-course-new/* Disallow: /dev Disallow: /accredited-tefl-courses/tefl-courses-description-EE.php Allow: /wp-admin/admin-ajax.php # Some bots are known to be trouble, particularly those designed to copy # entire sites or download them for offline viewing. # User-agent: sitecheck.internetseer.com Disallow: / User-agent: Zealbot Disallow: / User-agent: MSIECrawler Disallow: / User-agent: SiteSnagger Disallow: / User-agent: WebStripper Disallow: / User-agent: WebCopier Disallow: / User-agent: Fetch Disallow: / User-agent: Offline Explorer Disallow: / User-agent: Teleport Disallow: / User-agent: TeleportPro Disallow: / User-agent: WebZIP Disallow: / User-agent: linko Disallow: / User-agent: HTTrack Disallow: / # Xenu Crawler User-agent: Xenu Disallow: / User-agent: larbin Disallow: / # W3C Crawler User-agent: libwww Disallow: / # LookSmart Crawler User-agent: ZyBORG Disallow: / User-agent: Download Ninja Disallow: / User-agent: Nutch Disallow: / User-agent: spock Disallow: / User-agent: OmniExplorer_Bot Disallow: / User-agent: TurnitinBot Disallow: / User-agent: BecomeBot Disallow: / User-agent: genieBot Disallow: / User-agent: dotbot Disallow: / User-agent: MLBot Disallow: / User-agent: 80bot Disallow: / User-agent: Linguee Bot Disallow: / User-agent: aiHitBot Disallow: / User-agent: Exabot Disallow: / User-agent: SBIder/Nutch Disallow: / User-agent: SBIder Disallow: / User-agent: Jyxobot Disallow: / User-agent: mAgent Disallow: / User-agent: MJ12bot Disallow: / User-agent: Speedy Spider Disallow: / User-agent: ShopWiki Disallow: / User-agent: Huasai Disallow: / User-agent: DataCha0s Disallow: / User-agent: Baiduspider Disallow: / User-agent: Atomic_Email_Hunter Disallow: / User-agent: Mp3Bot Disallow: / User-agent: WinHttp Disallow: / User-agent: betaBot Disallow: / User-agent: core-project Disallow: / User-agent: panscient.com Disallow: / User-agent: Java Disallow: / User-agent: libwww-perl Disallow: / User-agent: wget Disallow: / User-agent: grub-client Disallow: / User-agent: k2spider Disallow: / # Hits many times per second, not acceptable User-agent: NPBot Disallow: / User-agent: Microsoft.URL.Control Disallow: / User-agent: Orthogaffe Disallow: / # Crawlers that are kind enough to obey, but which we'd rather not have # unless they're feeding search engines. User-agent: UbiCrawler Disallow: / User-agent: DOC Disallow: / User-agent: Zao Disallow: / User-agent: Twiceler Disallow: / # A capture bot, downloads gazillions of pages with no public benefit User-agent: WebReaper Disallow: /