Skip to content

Instantly share code, notes, and snippets.

@devzom
Last active January 19, 2022 09:08
Show Gist options
  • Save devzom/f55c092aa013ef81b6e99f74eb2e5ce8 to your computer and use it in GitHub Desktop.
Save devzom/f55c092aa013ef81b6e99f74eb2e5ce8 to your computer and use it in GitHub Desktop.

Revisions

  1. devzom revised this gist Jan 19, 2022. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion generate-seo-robots-agents-sitemap.js
    Original file line number Diff line number Diff line change
    @@ -1,5 +1,5 @@
    /*
    * Created: 18/12/2021
    * @created: 18/12/2021
    * @author Jakub [dev.zomerfeld@gmail.com]
    * @summary Utils funtion to generate an array of
    * UserAgents provided in structured schema
  2. devzom revised this gist Jan 19, 2022. 1 changed file with 2 additions and 1 deletion.
    3 changes: 2 additions & 1 deletion generate-seo-robots-agents-sitemap.js
    Original file line number Diff line number Diff line change
    @@ -1,4 +1,5 @@
    /**
    /*
    * Created: 18/12/2021
    * @author Jakub [dev.zomerfeld@gmail.com]
    * @summary Utils funtion to generate an array of
    * UserAgents provided in structured schema
  3. devzom revised this gist Jan 19, 2022. 1 changed file with 3 additions and 4 deletions.
    7 changes: 3 additions & 4 deletions generate-seo-robots-agents-sitemap.js
    Original file line number Diff line number Diff line change
    @@ -1,9 +1,8 @@
    /**
    * @author Jakub [dev.zomerfeld@gmail.com]
    * Robots Nuxt module
    * https://github.com/nuxt-community/robots-module
    * @summary Utils funtion to generate an array of
    * UserAgents provided in structured schema
    * to use with Robots Nuxt module https://github.com/nuxt-community/robots-module
    */

    const agentsArray = [
  4. devzom created this gist Jan 19, 2022.
    237 changes: 237 additions & 0 deletions generate-seo-robots-agents-sitemap.js
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,237 @@
    /**
    * @author Jakub [dev.zomerfeld@gmail.com]
    * Robots Nuxt module
    * https://github.com/nuxt-community/robots-module
    */

    const agentsArray = [
    'Abonti',
    'aggregator',
    'asterias',
    'BDCbot',
    'BLEXBot',
    'BuiltBotTough',
    'Bullseye',
    'BunnySlippers',
    'ca-crawler',
    'CCBot',
    'Cegbfeieh',
    'CheeseBot',
    'CherryPicker',
    'coccoc',
    'CopyRightCheck',
    'cosmos',
    'cosmos',
    'Crescent',
    'discobot',
    'DittoSpyder',
    'DOC',
    'DotBot',
    'Download Ninja',
    'EasouSpider',
    'EmailCollector',
    'EmailSiphon',
    'EmailWolf',
    'EroCrawler',
    'Exabot',
    'ExtractorPro',
    'Ezooms',
    'Fasterfox',
    'FeedBooster',
    'Fetch',
    'Foobot',
    'Genieo',
    'gigabot',
    'Harvest',
    'hloader',
    'HTTrack',
    'humanlinks',
    'ia_archiver',
    'archive.org_bot',
    'ia_archiver-web.archive.org',
    'ichiro',
    'ieautodiscovery',
    'Incutio',
    'InfoNaviRobot',
    'IstellaBot',
    'Java/1.',
    'JennyBot',
    'k2spider',
    'Kenjin Spider',
    'Keyword Density/0.9',
    'larbin',
    'LexiBot',
    'LexxeBot',
    'libWeb',
    'LinkextractorPro',
    'linko',
    'LinkScan/8.1a Unix',
    'LinkWalker',
    'lmspider',
    'LNSpiderguy',
    'lwp-trivial',
    'magpie',
    'Mata Hari',
    'MaxPointCrawler',
    'MegaIndex',
    'memoryBot',
    'Microsoft URL Control',
    'MIIxpc',
    'Mippin',
    'Missigua Locator',
    'Mister PiX',
    'MJ12bot',
    'moget',
    'MSIECrawler',
    'NetAnts',
    'NICErsPRO',
    'Niki-Bot',
    'NPBot',
    'Nutch',
    'Offline Explorer',
    'Openfind',
    'panscient.com',
    'PHP/5.{',
    'ProPowerBot/2.14',
    'ProWebWalker',
    'Python-urllib',
    'QueryN Metasearch',
    'RepoMonkey',
    'RMA',
    'rogerbot',
    'SemrushBot',
    'serf',
    'SISTRIX',
    'sitebot',
    'sitecheck.Internetseer.com',
    'SiteSnagger',
    'SnapPreviewBot',
    'Sogou',
    'SpankBot',
    'spanner',
    'spbot',
    'Spinn3r',
    'suzuran',
    'Szukacz/1.4',
    'Teleport',
    'TeleportPro',
    'Telesoft',
    'The Intraformant',
    'TheNomad',
    'TightTwatBot',
    'Titan',
    'toCrawl/UrlDispatcher',
    'trovitBot',
    'True_Robot',
    'turingos',
    'TurnitinBot',
    'Twiceler',
    'UbiCrawler',
    'UnisterBot',
    'Unknown',
    'uptime files',
    'URLy Warning',
    'VCI',
    'WBSearchBot',
    'Web Downloader',
    'Web Image Collector',
    'WebAuto',
    'WebBandit',
    'WebCopier',
    'WebEnhancer',
    'WebmasterWorldForumBot',
    'WebReaper',
    'WebSauger',
    'Website Quester',
    'Webster Pro',
    'WebStripper',
    'WebZip',
    'wget',
    'Wotbox',
    'wsr-agent',
    'WWW-Collector-E',
    'Zao',
    'Zealbot',
    'Zeus',
    'ZyBORG'
    ]

    let generatedUserAgentObjectsArrayWithDisallowedAll = []

    function generateAgentObjectForDisallowAll() {
    generatedUserAgentObjectsArrayWithDisallowedAll = agentsArray.map(
    UserAgent => {
    return {
    UserAgent: UserAgent,
    Disallow: '/'
    }
    }
    )
    }

    generateAgentObjectForDisallowAll()

    export default [
    /**
    * Sitemap default path
    * */
    {
    UserAgent: '*',
    Sitemap: () => process.env.SITEMAP_URL,
    /**
    * Definition:
    * Wait a minimum of XX seconds before requesting another URL.
    */
    CrawlDelay: 15 // in seconds,
    },
    /**
    * GLOBAL
    */
    {
    UserAgent: '*',
    Disallow: '/*?utm_source=*'
    },
    {
    UserAgent: '*',
    Disallow: '/?showContact=true'
    },
    {
    UserAgent: '*',
    Disallow: '/?cat=*'
    },
    {
    UserAgent: '*',
    Disallow: '/announcement-manager/'
    },
    {
    UserAgent: '*',
    Disallow: '/create/'
    },
    {
    UserAgent: '*',
    Disallow: '/page/'
    },
    {
    UserAgent: '*',
    Disallow: '/*?page=*'
    },
    {
    UserAgent: '*',
    Disallow: '/profile/'
    },
    {
    UserAgent: '*',
    Disallow: '/span['
    },
    /**
    * SPECIFIC UserAgents
    */
    {
    UserAgent: 'Mediapartners-Google',
    Allow: '/'
    },
    /* Disallow * */
    ...generatedUserAgentObjectsArrayWithDisallowedAll
    ]