extra_toc

Блокировка ботов с помощью Nginx - наиболее эффективное решение. Но есть и неудобство - доступ к настройкам Nginx есть только у владельцев VPS и выделенных серверов, у виртуального хостинга доступа к подобных настройкам нет, но и в этом случае можно осуществлять блокировку с помощью .htaccess. Забегая вперед скажем, что на Джихост блокировка нежелательных ботов включена по умолчанию для всех тарифных планов.

Блокировка ботов с помощью Nginx

Для начала создадим конфигурационный файл в каталоге /etc/nginx, назовем его bot_block.conf и наполним следующим содержимым (список можно скорректировать под себя добавив или исключив ботов):

if ($http_user_agent ~* "Aboundex"){ return 403; }
if ($http_user_agent ~* "80legs"){ return 403; }
if ($http_user_agent ~* "360Spider"){ return 403; }
if ($http_user_agent ~* "Cogentbot"){ return 403; }
if ($http_user_agent ~* "Alexibot"){ return 403; }
if ($http_user_agent ~* "asterias"){ return 403; }
if ($http_user_agent ~* "attach"){ return 403; }
if ($http_user_agent ~* "BackDoorBot"){ return 403; }
if ($http_user_agent ~* "BackWeb"){ return 403; }
if ($http_user_agent ~* "Bandit"){ return 403; }
if ($http_user_agent ~* "BatchFTP"){ return 403; }
if ($http_user_agent ~* "Bigfoot"){ return 403; }
if ($http_user_agent ~* "Black.Hole"){ return 403; }
if ($http_user_agent ~* "BlackWidow"){ return 403; }
if ($http_user_agent ~* "BlowFish"){ return 403; }
if ($http_user_agent ~* "BotALot"){ return 403; }
if ($http_user_agent ~* "Buddy"){ return 403; }
if ($http_user_agent ~* "BuiltBotTough"){ return 403; }
if ($http_user_agent ~* "Bullseye"){ return 403; }
if ($http_user_agent ~* "BunnySlippers"){ return 403; }
if ($http_user_agent ~* "Cegbfeieh"){ return 403; }
if ($http_user_agent ~* "CheeseBot"){ return 403; }
if ($http_user_agent ~* "CherryPicker"){ return 403; }
if ($http_user_agent ~* "ChinaClaw"){ return 403; }
if ($http_user_agent ~* "Collector"){ return 403; }
if ($http_user_agent ~* "Copier"){ return 403; }
if ($http_user_agent ~* "CopyRightCheck"){ return 403; }
if ($http_user_agent ~* "cosmos"){ return 403; }
if ($http_user_agent ~* "Crescent"){ return 403; }
if ($http_user_agent ~* "Custo"){ return 403; }
if ($http_user_agent ~* "AIBOT"){ return 403; }
if ($http_user_agent ~* "DISCo"){ return 403; }
if ($http_user_agent ~* "DIIbot"){ return 403; }
if ($http_user_agent ~* "DittoSpyder"){ return 403; }
if ($http_user_agent ~* "Download Demon"){ return 403; }
if ($http_user_agent ~* "Download Devil"){ return 403; }
if ($http_user_agent ~* "Download Wonder"){ return 403; }
if ($http_user_agent ~* "dragonfly"){ return 403; }
if ($http_user_agent ~* "Drip"){ return 403; }
if ($http_user_agent ~* "eCatch"){ return 403; }
if ($http_user_agent ~* "EasyDL"){ return 403; }
if ($http_user_agent ~* "ebingbong"){ return 403; }
if ($http_user_agent ~* "EirGrabber"){ return 403; }
if ($http_user_agent ~* "EmailCollector"){ return 403; }
if ($http_user_agent ~* "EmailSiphon"){ return 403; }
if ($http_user_agent ~* "EmailWolf"){ return 403; }
if ($http_user_agent ~* "EroCrawler"){ return 403; }
if ($http_user_agent ~* "Exabot"){ return 403; }
if ($http_user_agent ~* "Express WebPictures"){ return 403; }
if ($http_user_agent ~* "Extractor"){ return 403; }
if ($http_user_agent ~* "EyeNetIE"){ return 403; }
if ($http_user_agent ~* "Foobot"){ return 403; }
if ($http_user_agent ~* "flunky"){ return 403; }
if ($http_user_agent ~* "FrontPage"){ return 403; }
if ($http_user_agent ~* "Go-Ahead-Got-It"){ return 403; }
if ($http_user_agent ~* "gotit"){ return 403; }
if ($http_user_agent ~* "GrabNet"){ return 403; }
if ($http_user_agent ~* "Grafula"){ return 403; }
if ($http_user_agent ~* "Harvest"){ return 403; }
if ($http_user_agent ~* "hloader"){ return 403; }
if ($http_user_agent ~* "HMView"){ return 403; }
if ($http_user_agent ~* "HTTrack"){ return 403; }
if ($http_user_agent ~* "humanlinks"){ return 403; }
if ($http_user_agent ~* "IlseBot"){ return 403; }
if ($http_user_agent ~* "Image Stripper"){ return 403; }
if ($http_user_agent ~* "Image Sucker"){ return 403; }
if ($http_user_agent ~* "Indy Library"){ return 403; }
if ($http_user_agent ~* "InfoNavibot"){ return 403; }
if ($http_user_agent ~* "InfoTekies"){ return 403; }
if ($http_user_agent ~* "Intelliseek"){ return 403; }
if ($http_user_agent ~* "InterGET"){ return 403; }
if ($http_user_agent ~* "Internet Ninja"){ return 403; }
if ($http_user_agent ~* "Iria"){ return 403; }
if ($http_user_agent ~* "Jakarta"){ return 403; }
if ($http_user_agent ~* "JennyBot"){ return 403; }
if ($http_user_agent ~* "JetCar"){ return 403; }
if ($http_user_agent ~* "JOC"){ return 403; }
if ($http_user_agent ~* "JustView"){ return 403; }
if ($http_user_agent ~* "Jyxobot"){ return 403; }
if ($http_user_agent ~* "Kenjin.Spider"){ return 403; }
if ($http_user_agent ~* "Keyword.Density"){ return 403; }
if ($http_user_agent ~* "larbin"){ return 403; }
if ($http_user_agent ~* "LexiBot"){ return 403; }
if ($http_user_agent ~* "libWeb/clsHTTP"){ return 403; }
if ($http_user_agent ~* "likse"){ return 403; }
if ($http_user_agent ~* "LinkextractorPro"){ return 403; }
if ($http_user_agent ~* "LinkScan/8.1a.Unix"){ return 403; }
if ($http_user_agent ~* "LNSpiderguy"){ return 403; }
if ($http_user_agent ~* "LinkWalker"){ return 403; }
if ($http_user_agent ~* "lwp-trivial"){ return 403; }
if ($http_user_agent ~* "LWP::Simple"){ return 403; }
if ($http_user_agent ~* "Magnet"){ return 403; }
if ($http_user_agent ~* "Mag-Net"){ return 403; }
if ($http_user_agent ~* "MarkWatch"){ return 403; }
if ($http_user_agent ~* "Mass Downloader"){ return 403; }
if ($http_user_agent ~* "Mata.Hari"){ return 403; }
if ($http_user_agent ~* "Microsoft.URL"){ return 403; }
if ($http_user_agent ~* "Microsoft URL Control"){ return 403; }
if ($http_user_agent ~* "MIDown tool"){ return 403; }
if ($http_user_agent ~* "MIIxpc"){ return 403; }
if ($http_user_agent ~* "Mirror"){ return 403; }
if ($http_user_agent ~* "Missigua Locator"){ return 403; }
if ($http_user_agent ~* "Mister PiX"){ return 403; }
if ($http_user_agent ~* "moget"){ return 403; }
if ($http_user_agent ~* "Mozilla/3.Mozilla/2.01"){ return 403; }
if ($http_user_agent ~* "Mozilla.*NEWT"){ return 403; }
if ($http_user_agent ~* "NAMEPROTECT"){ return 403; }
if ($http_user_agent ~* "Navroad"){ return 403; }
if ($http_user_agent ~* "NearSite"){ return 403; }
if ($http_user_agent ~* "NetAnts"){ return 403; }
if ($http_user_agent ~* "Netcraft"){ return 403; }
if ($http_user_agent ~* "NetMechanic"){ return 403; }
if ($http_user_agent ~* "NetSpider"){ return 403; }
if ($http_user_agent ~* "Net Vampire"){ return 403; }
if ($http_user_agent ~* "NetZIP"){ return 403; }
if ($http_user_agent ~* "NextGenSearchBot"){ return 403; }
if ($http_user_agent ~* "NICErsPRO"){ return 403; }
if ($http_user_agent ~* "niki-bot"){ return 403; }
if ($http_user_agent ~* "NimbleCrawler"){ return 403; }
if ($http_user_agent ~* "Ninja"){ return 403; }
if ($http_user_agent ~* "NPbot"){ return 403; }
if ($http_user_agent ~* "Octopus"){ return 403; }
if ($http_user_agent ~* "Offline Explorer"){ return 403; }
if ($http_user_agent ~* "Offline Navigator"){ return 403; }
if ($http_user_agent ~* "Openfind"){ return 403; }
if ($http_user_agent ~* "OutfoxBot"){ return 403; }
if ($http_user_agent ~* "PageGrabber"){ return 403; }
if ($http_user_agent ~* "Papa Foto"){ return 403; }
if ($http_user_agent ~* "pavuk"){ return 403; }
if ($http_user_agent ~* "pcBrowser"){ return 403; }
if ($http_user_agent ~* "PHP version tracker"){ return 403; }
if ($http_user_agent ~* "Pockey"){ return 403; }
if ($http_user_agent ~* "ProPowerBot/2.14"){ return 403; }
if ($http_user_agent ~* "ProWebWalker"){ return 403; }
if ($http_user_agent ~* "psbot"){ return 403; }
if ($http_user_agent ~* "Pump"){ return 403; }
if ($http_user_agent ~* "QueryN.Metasearch"){ return 403; }
if ($http_user_agent ~* "RealDownload"){ return 403; }
if ($http_user_agent ~* "Reaper"){ return 403; }
if ($http_user_agent ~* "Recorder"){ return 403; }
if ($http_user_agent ~* "ReGet"){ return 403; }
if ($http_user_agent ~* "RepoMonkey"){ return 403; }
if ($http_user_agent ~* "Siphon"){ return 403; }
if ($http_user_agent ~* "SiteSnagger"){ return 403; }
if ($http_user_agent ~* "SlySearch"){ return 403; }
if ($http_user_agent ~* "SmartDownload"){ return 403; }
if ($http_user_agent ~* "Snake"){ return 403; }
if ($http_user_agent ~* "Snapbot"){ return 403; }
if ($http_user_agent ~* "Snoopy"){ return 403; }
if ($http_user_agent ~* "sogou"){ return 403; }
if ($http_user_agent ~* "SpaceBison"){ return 403; }
if ($http_user_agent ~* "SpankBot"){ return 403; }
if ($http_user_agent ~* "spanner"){ return 403; }
if ($http_user_agent ~* "Sqworm"){ return 403; }
if ($http_user_agent ~* "Stripper"){ return 403; }
if ($http_user_agent ~* "Sucker"){ return 403; }
if ($http_user_agent ~* "SuperBot"){ return 403; }
if ($http_user_agent ~* "SuperHTTP"){ return 403; }
if ($http_user_agent ~* "Surfbot"){ return 403; }
if ($http_user_agent ~* "suzuran"){ return 403; }
if ($http_user_agent ~* "Szukacz/1.4"){ return 403; }
if ($http_user_agent ~* "tAkeOut"){ return 403; }
if ($http_user_agent ~* "Teleport"){ return 403; }
if ($http_user_agent ~* "Telesoft"){ return 403; }
if ($http_user_agent ~* "TurnitinBot/1.5"){ return 403; }
if ($http_user_agent ~* "The.Intraformant"){ return 403; }
if ($http_user_agent ~* "TheNomad"){ return 403; }
if ($http_user_agent ~* "TightTwatBot"){ return 403; }
if ($http_user_agent ~* "Titan"){ return 403; }
if ($http_user_agent ~* "True_bot"){ return 403; }
if ($http_user_agent ~* "turingos"){ return 403; }
if ($http_user_agent ~* "TurnitinBot"){ return 403; }
if ($http_user_agent ~* "URLy.Warning"){ return 403; }
if ($http_user_agent ~* "Vacuum"){ return 403; }
if ($http_user_agent ~* "VoidEYE"){ return 403; }
if ($http_user_agent ~* "Web Image Collector"){ return 403; }
if ($http_user_agent ~* "Web Sucker"){ return 403; }
if ($http_user_agent ~* "WebAuto"){ return 403; }
if ($http_user_agent ~* "WebBandit"){ return 403; }
if ($http_user_agent ~* "Webclipping.com"){ return 403; }
if ($http_user_agent ~* "WebCopier"){ return 403; }
if ($http_user_agent ~* "WebEnhancer"){ return 403; }
if ($http_user_agent ~* "WebFetch"){ return 403; }
if ($http_user_agent ~* "WebGo IS"){ return 403; }
if ($http_user_agent ~* "Web.Image.Collector"){ return 403; }
if ($http_user_agent ~* "WebLeacher"){ return 403; }
if ($http_user_agent ~* "WebmasterWorldForumBot"){ return 403; }
if ($http_user_agent ~* "WebReaper"){ return 403; }
if ($http_user_agent ~* "WebSauger"){ return 403; }
if ($http_user_agent ~* "Website eXtractor"){ return 403; }
if ($http_user_agent ~* "Website Quester"){ return 403; }
if ($http_user_agent ~* "Webster"){ return 403; }
if ($http_user_agent ~* "WebStripper"){ return 403; }
if ($http_user_agent ~* "WebWhacker"){ return 403; }
if ($http_user_agent ~* "WebZIP"){ return 403; }
if ($http_user_agent ~* "Whacker"){ return 403; }
if ($http_user_agent ~* "Widow"){ return 403; }
if ($http_user_agent ~* "WISENutbot"){ return 403; }
if ($http_user_agent ~* "WWWOFFLE"){ return 403; }
if ($http_user_agent ~* "WWW-Collector-E"){ return 403; }
if ($http_user_agent ~* "Xaldon"){ return 403; }
if ($http_user_agent ~* "Zeus"){ return 403; }
if ($http_user_agent ~* "ZmEu"){ return 403; }
if ($http_user_agent ~* "Zyborg"){ return 403; }
if ($http_user_agent ~* "AhrefsBot"){ return 403; }
if ($http_user_agent ~* "archive.org_bot"){ return 403; }
if ($http_user_agent ~* "bingbot"){ return 403; }
if ($http_user_agent ~* "Wget"){ return 403; }
if ($http_user_agent ~* "Acunetix"){ return 403; }
if ($http_user_agent ~* "FHscan"){ return 403; }
if ($http_user_agent ~* "BLEXBot"){ return 403; }
if ($http_user_agent ~* "MJ12bot"){ return 403; }
if ($http_user_agent ~* "SemrushBot"){ return 403; }
if ($http_user_agent ~* "Baiduspider"){ return 403; }
if ($http_user_agent ~* "Slurp"){ return 403; }
if ($http_user_agent ~* "DotBot"){ return 403; }

Как видно, блокировка происходит по
User-Agent.
Информация о
User-Agent передается в запросе к сайту поисковым ботом или другим устройством, с помощью которого происходит запрос, и мы блокируем обращения к сайту содержащие определенное вхождение.

Далее нужно добавить параметр в секцию server {} нужного сайта:

include /etc/nginx/bot_block.conf;

И перезапустить Nginx

service nginx reload

Блокировка ботов с помощью .htaccess

Если на вашем хостинге нет подобной блокировки, то ее можно организовать с помощью файла .htaccess, для этого внесите в него следующий текст:

SetEnvIfNoCase
User-Agent "^Aboundex" bot_blockSetEnvIfNoCase
User-Agent "^80legs" bot_blockSetEnvIfNoCase
User-Agent "^360Spider" bot_blockSetEnvIfNoCase
User-Agent "^Cogentbot" bot_blockSetEnvIfNoCase
User-Agent "^Alexibot" bot_blockSetEnvIfNoCase
User-Agent "^asterias" bot_blockSetEnvIfNoCase
User-Agent "^attach" bot_blockSetEnvIfNoCase
User-Agent "^BackDoorBot" bot_blockSetEnvIfNoCase
User-Agent "^BackWeb" bot_blockSetEnvIfNoCase
User-Agent "^Bandit" bot_blockSetEnvIfNoCase
User-Agent "^BatchFTP" bot_blockSetEnvIfNoCase
User-Agent "^Bigfoot" bot_blockSetEnvIfNoCase
User-Agent "^Black.Hole" bot_blockSetEnvIfNoCase
User-Agent "^BlackWidow" bot_blockSetEnvIfNoCase
User-Agent "^BlowFish" bot_blockSetEnvIfNoCase
User-Agent "^BotALot" bot_blockSetEnvIfNoCase
User-Agent "^Buddy" bot_blockSetEnvIfNoCase
User-Agent "^BuiltBotTough" bot_blockSetEnvIfNoCase
User-Agent "^Bullseye" bot_blockSetEnvIfNoCase
User-Agent "^BunnySlippers" bot_blockSetEnvIfNoCase
User-Agent "^Cegbfeieh" bot_blockSetEnvIfNoCase
User-Agent "^CheeseBot" bot_blockSetEnvIfNoCase
User-Agent "^CherryPicker" bot_blockSetEnvIfNoCase
User-Agent "^ChinaClaw" bot_blockSetEnvIfNoCase
User-Agent "^Collector" bot_blockSetEnvIfNoCase
User-Agent "^Copier" bot_blockSetEnvIfNoCase
User-Agent "^CopyRightCheck" bot_blockSetEnvIfNoCase
User-Agent "^cosmos" bot_blockSetEnvIfNoCase
User-Agent "^Crescent" bot_blockSetEnvIfNoCase
User-Agent "^Custo" bot_blockSetEnvIfNoCase
User-Agent "^AIBOT" bot_blockSetEnvIfNoCase
User-Agent "^DISCo" bot_blockSetEnvIfNoCase
User-Agent "^DIIbot" bot_blockSetEnvIfNoCase
User-Agent "^DittoSpyder" bot_blockSetEnvIfNoCase
User-Agent "^Download Demon" bot_blockSetEnvIfNoCase
User-Agent "^Download Devil" bot_blockSetEnvIfNoCase
User-Agent "^Download Wonder" bot_blockSetEnvIfNoCase
User-Agent "^dragonfly" bot_blockSetEnvIfNoCase
User-Agent "^Drip" bot_blockSetEnvIfNoCase
User-Agent "^eCatch" bot_blockSetEnvIfNoCase
User-Agent "^EasyDL" bot_blockSetEnvIfNoCase
User-Agent "^ebingbong" bot_blockSetEnvIfNoCase
User-Agent "^EirGrabber" bot_blockSetEnvIfNoCase
User-Agent "^EmailCollector" bot_blockSetEnvIfNoCase
User-Agent "^EmailSiphon" bot_blockSetEnvIfNoCase
User-Agent "^EmailWolf" bot_blockSetEnvIfNoCase
User-Agent "^EroCrawler" bot_blockSetEnvIfNoCase
User-Agent "^Exabot" bot_blockSetEnvIfNoCase
User-Agent "^Express WebPictures" bot_blockSetEnvIfNoCase
User-Agent "^Extractor" bot_blockSetEnvIfNoCase
User-Agent "^EyeNetIE" bot_blockSetEnvIfNoCase
User-Agent "^Foobot" bot_blockSetEnvIfNoCase
User-Agent "^flunky" bot_blockSetEnvIfNoCase
User-Agent "^FrontPage" bot_blockSetEnvIfNoCase
User-Agent "^Go-Ahead-Got-It" bot_blockSetEnvIfNoCase
User-Agent "^gotit" bot_blockSetEnvIfNoCase
User-Agent "^GrabNet" bot_blockSetEnvIfNoCase
User-Agent "^Grafula" bot_blockSetEnvIfNoCase
User-Agent "^Harvest" bot_blockSetEnvIfNoCase
User-Agent "^hloader" bot_blockSetEnvIfNoCase
User-Agent "^HMView" bot_blockSetEnvIfNoCase
User-Agent "^HTTrack" bot_blockSetEnvIfNoCase
User-Agent "^humanlinks" bot_blockSetEnvIfNoCase
User-Agent "^IlseBot" bot_blockSetEnvIfNoCase
User-Agent "^Image Stripper" bot_blockSetEnvIfNoCase
User-Agent "^Image Sucker" bot_blockSetEnvIfNoCase
User-Agent "^Indy Library" bot_blockSetEnvIfNoCase
User-Agent "^InfoNavibot" bot_blockSetEnvIfNoCase
User-Agent "^InfoTekies" bot_blockSetEnvIfNoCase
User-Agent "^Intelliseek" bot_blockSetEnvIfNoCase
User-Agent "^InterGET" bot_blockSetEnvIfNoCase
User-Agent "^Internet Ninja" bot_blockSetEnvIfNoCase
User-Agent "^Iria" bot_blockSetEnvIfNoCase
User-Agent "^Jakarta" bot_blockSetEnvIfNoCase
User-Agent "^JennyBot" bot_blockSetEnvIfNoCase
User-Agent "^JetCar" bot_blockSetEnvIfNoCase
User-Agent "^JOC" bot_blockSetEnvIfNoCase
User-Agent "^JustView" bot_blockSetEnvIfNoCase
User-Agent "^Jyxobot" bot_blockSetEnvIfNoCase
User-Agent "^Kenjin.Spider" bot_blockSetEnvIfNoCase
User-Agent "^Keyword.Density" bot_blockSetEnvIfNoCase
User-Agent "^larbin" bot_blockSetEnvIfNoCase
User-Agent "^LexiBot" bot_blockSetEnvIfNoCase
User-Agent "^libWeb/clsHTTP" bot_blockSetEnvIfNoCase
User-Agent "^likse" bot_blockSetEnvIfNoCase
User-Agent "^LinkextractorPro" bot_blockSetEnvIfNoCase
User-Agent "^LinkScan/8.1a.Unix" bot_blockSetEnvIfNoCase
User-Agent "^LNSpiderguy" bot_blockSetEnvIfNoCase
User-Agent "^LinkWalker" bot_blockSetEnvIfNoCase
User-Agent "^lwp-trivial" bot_blockSetEnvIfNoCase
User-Agent "^LWP::Simple" bot_blockSetEnvIfNoCase
User-Agent "^Magnet" bot_blockSetEnvIfNoCase
User-Agent "^Mag-Net" bot_blockSetEnvIfNoCase
User-Agent "^MarkWatch" bot_blockSetEnvIfNoCase
User-Agent "^Mass Downloader" bot_blockSetEnvIfNoCase
User-Agent "^Mata.Hari" bot_blockSetEnvIfNoCase
User-Agent "^Microsoft.URL" bot_blockSetEnvIfNoCase
User-Agent "^Microsoft URL Control" bot_blockSetEnvIfNoCase
User-Agent "^MIDown tool" bot_blockSetEnvIfNoCase
User-Agent "^MIIxpc" bot_blockSetEnvIfNoCase
User-Agent "^Mirror" bot_blockSetEnvIfNoCase
User-Agent "^Missigua Locator" bot_blockSetEnvIfNoCase
User-Agent "^Mister PiX" bot_blockSetEnvIfNoCase
User-Agent "^moget" bot_blockSetEnvIfNoCase
User-Agent "^Mozilla/3.Mozilla/2.01" bot_blockSetEnvIfNoCase
User-Agent "^Mozilla.*NEWT" bot_blockSetEnvIfNoCase
User-Agent "^NAMEPROTECT" bot_blockSetEnvIfNoCase
User-Agent "^Navroad" bot_blockSetEnvIfNoCase
User-Agent "^NearSite" bot_blockSetEnvIfNoCase
User-Agent "^NetAnts" bot_blockSetEnvIfNoCase
User-Agent "^Netcraft" bot_blockSetEnvIfNoCase
User-Agent "^NetMechanic" bot_blockSetEnvIfNoCase
User-Agent "^NetSpider" bot_blockSetEnvIfNoCase
User-Agent "^Net Vampire" bot_blockSetEnvIfNoCase
User-Agent "^NetZIP" bot_blockSetEnvIfNoCase
User-Agent "^NextGenSearchBot" bot_blockSetEnvIfNoCase
User-Agent "^NICErsPRO" bot_blockSetEnvIfNoCase
User-Agent "^niki-bot" bot_blockSetEnvIfNoCase
User-Agent "^NimbleCrawler" bot_blockSetEnvIfNoCase
User-Agent "^Ninja" bot_blockSetEnvIfNoCase
User-Agent "^NPbot" bot_blockSetEnvIfNoCase
User-Agent "^Octopus" bot_blockSetEnvIfNoCase
User-Agent "^Offline Explorer" bot_blockSetEnvIfNoCase
User-Agent "^Offline Navigator" bot_blockSetEnvIfNoCase
User-Agent "^Openfind" bot_blockSetEnvIfNoCase
User-Agent "^OutfoxBot" bot_blockSetEnvIfNoCase
User-Agent "^PageGrabber" bot_blockSetEnvIfNoCase
User-Agent "^Papa Foto" bot_blockSetEnvIfNoCase
User-Agent "^pavuk" bot_blockSetEnvIfNoCase
User-Agent "^pcBrowser" bot_blockSetEnvIfNoCase
User-Agent "^PHP version tracker" bot_blockSetEnvIfNoCase
User-Agent "^Pockey" bot_blockSetEnvIfNoCase
User-Agent "^ProPowerBot/2.14" bot_blockSetEnvIfNoCase
User-Agent "^ProWebWalker" bot_blockSetEnvIfNoCase
User-Agent "^psbot" bot_blockSetEnvIfNoCase
User-Agent "^Pump" bot_blockSetEnvIfNoCase
User-Agent "^QueryN.Metasearch" bot_blockSetEnvIfNoCase
User-Agent "^RealDownload" bot_blockSetEnvIfNoCase
User-Agent "^Reaper" bot_blockSetEnvIfNoCase
User-Agent "^Recorder" bot_blockSetEnvIfNoCase
User-Agent "^ReGet" bot_blockSetEnvIfNoCase
User-Agent "^RepoMonkey" bot_blockSetEnvIfNoCase
User-Agent "^Siphon" bot_blockSetEnvIfNoCase
User-Agent "^SiteSnagger" bot_blockSetEnvIfNoCase
User-Agent "^SlySearch" bot_blockSetEnvIfNoCase
User-Agent "^SmartDownload" bot_blockSetEnvIfNoCase
User-Agent "^Snake" bot_blockSetEnvIfNoCase
User-Agent "^Snapbot" bot_blockSetEnvIfNoCase
User-Agent "^Snoopy" bot_blockSetEnvIfNoCase
User-Agent "^sogou" bot_blockSetEnvIfNoCase
User-Agent "^SpaceBison" bot_blockSetEnvIfNoCase
User-Agent "^SpankBot" bot_blockSetEnvIfNoCase
User-Agent "^spanner" bot_blockSetEnvIfNoCase
User-Agent "^Sqworm" bot_blockSetEnvIfNoCase
User-Agent "^Stripper" bot_blockSetEnvIfNoCase
User-Agent "^Sucker" bot_blockSetEnvIfNoCase
User-Agent "^SuperBot" bot_blockSetEnvIfNoCase
User-Agent "^SuperHTTP" bot_blockSetEnvIfNoCase
User-Agent "^Surfbot" bot_blockSetEnvIfNoCase
User-Agent "^suzuran" bot_blockSetEnvIfNoCase
User-Agent "^Szukacz/1.4" bot_blockSetEnvIfNoCase
User-Agent "^tAkeOut" bot_blockSetEnvIfNoCase
User-Agent "^Teleport" bot_blockSetEnvIfNoCase
User-Agent "^Telesoft" bot_blockSetEnvIfNoCase
User-Agent "^TurnitinBot/1.5" bot_blockSetEnvIfNoCase
User-Agent "^The.Intraformant" bot_blockSetEnvIfNoCase
User-Agent "^TheNomad" bot_blockSetEnvIfNoCase
User-Agent "^TightTwatBot" bot_blockSetEnvIfNoCase
User-Agent "^Titan" bot_blockSetEnvIfNoCase
User-Agent "^True_bot" bot_blockSetEnvIfNoCase
User-Agent "^turingos" bot_blockSetEnvIfNoCase
User-Agent "^TurnitinBot" bot_blockSetEnvIfNoCase
User-Agent "^URLy.Warning" bot_blockSetEnvIfNoCase
User-Agent "^Vacuum" bot_blockSetEnvIfNoCase
User-Agent "^VoidEYE" bot_blockSetEnvIfNoCase
User-Agent "^Web Image Collector" bot_blockSetEnvIfNoCase
User-Agent "^Web Sucker" bot_blockSetEnvIfNoCase
User-Agent "^WebAuto" bot_blockSetEnvIfNoCase
User-Agent "^WebBandit" bot_blockSetEnvIfNoCase
User-Agent "^Webclipping.com" bot_blockSetEnvIfNoCase
User-Agent "^WebCopier" bot_blockSetEnvIfNoCase
User-Agent "^WebEnhancer" bot_blockSetEnvIfNoCase
User-Agent "^WebFetch" bot_blockSetEnvIfNoCase
User-Agent "^WebGo IS" bot_blockSetEnvIfNoCase
User-Agent "^Web.Image.Collector" bot_blockSetEnvIfNoCase
User-Agent "^WebLeacher" bot_blockSetEnvIfNoCase
User-Agent "^WebmasterWorldForumBot" bot_blockSetEnvIfNoCase
User-Agent "^WebReaper" bot_blockSetEnvIfNoCase
User-Agent "^WebSauger" bot_blockSetEnvIfNoCase
User-Agent "^Website eXtractor" bot_blockSetEnvIfNoCase
User-Agent "^Website Quester" bot_blockSetEnvIfNoCase
User-Agent "^Webster" bot_blockSetEnvIfNoCase
User-Agent "^WebStripper" bot_blockSetEnvIfNoCase
User-Agent "^WebWhacker" bot_blockSetEnvIfNoCase
User-Agent "^WebZIP" bot_blockSetEnvIfNoCase
User-Agent "^Whacker" bot_blockSetEnvIfNoCase
User-Agent "^Widow" bot_blockSetEnvIfNoCase
User-Agent "^WISENutbot" bot_blockSetEnvIfNoCase
User-Agent "^WWWOFFLE" bot_blockSetEnvIfNoCase
User-Agent "^WWW-Collector-E" bot_blockSetEnvIfNoCase
User-Agent "^Xaldon" bot_blockSetEnvIfNoCase
User-Agent "^Zeus" bot_blockSetEnvIfNoCase
User-Agent "^ZmEu" bot_blockSetEnvIfNoCase
User-Agent "^Zyborg" bot_blockSetEnvIfNoCase
User-Agent "^AhrefsBot" bot_blockSetEnvIfNoCase
User-Agent "^archive.org_bot" bot_blockSetEnvIfNoCase
User-Agent "^bingbot" bot_blockSetEnvIfNoCase
User-Agent "^Wget" bot_blockSetEnvIfNoCase
User-Agent "^Acunetix" bot_blockSetEnvIfNoCase
User-Agent "^FHscan" bot_blockSetEnvIfNoCase
User-Agent "^BLEXBot" bot_blockSetEnvIfNoCase
User-Agent "^MJ12bot" bot_blockSetEnvIfNoCase
User-Agent "^SemrushBot" bot_blockSetEnvIfNoCase
User-Agent "^Baiduspider" bot_blockSetEnvIfNoCase
User-Agent "^Slurp" bot_blockSetEnvIfNoCase
User-Agent "^DotBot" bot_block

<Limit GET POST HEAD> 
Order Allow,Deny 
Allow from all
Deny from env=bot_block </Limit>

Если вы не знаете
User-Agent бота, то на любом хостинге есть access логи, в которых записываются все обращения к сайту, там можно найти нежелатльные запросы и добавить блокировку для них.