sitemap.xml.builder 4.93 KB
Newer Older
hellekin's avatar
hellekin committed
1 2 3 4 5 6 7 8 9 10 11
# encoding: utf-8
xml.instruct!

# Drop this file in your source directory
#
# Uses the builder gem.
# Add
# gem 'builder', '~> 2.0' 
# to the Gemfile, and run builder install
#
# Add this line to config.rb: 
hellekin's avatar
hellekin committed
12
# page "/os/sitemap.xml", :layout => false  
hellekin's avatar
hellekin committed
13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114
# 
# To tell Google and other search services about the sitemap, add
# Sitemap: http://www.your-domain.com/sitemap.xml   
# to the robots.txt file. Or see
# http://support.google.com/webmasters/bin/answer.py?hl=en&answer=183669&topic=8476&ctx=topic for other options.
#
# For details on how to construct a site map, see
# http://www.sitemaps.org/protocol.html
# http://support.google.com/webmasters/bin/answer.py?hl=en&answer=156184&topic=8476&ctx=topic
#
# Remember to exchange www.your-domain.com for your domain (line 64)
# and to set an apropriate update frequency for your site (line 84)

xml.urlset "xmlns" => "http://www.sitemaps.org/schemas/sitemap/0.9" do
  page_count = 0
  sitemap.resources.each do |page|
    catch :next_page do

      # If all you want the sitemap to cover is normal html pages, use this:
      # throw :next_page unless page.url.end_with?('.html')
      #
      # If you need more detailed control, this is a starting point:
      throw :next_page if page.url.start_with?('/ui/')
      throw :next_page if page.url.start_with?('/layouts/')
      throw :next_page if page.url.start_with?('/apple-touch-icon')
      throw :next_page if page.url.end_with?('.xml')
      throw :next_page if page.url.start_with?('/.')   # .htaccess, .DS_Store, .git etc.
      throw :next_page if page.url == '/robots.txt'
      throw :next_page if page.url == '/favicon.ico'
      throw :next_page if page.url == '/humans.txt'

      # Exclude drafts
      throw :next_page if defined?(page.data) && page.data['published'] === false
      throw :next_page if defined?(page.data) && page.data['draft'] == true
      throw :next_page if defined?(page.data) && page.data['date'] && page.data['date'].to_time > Time.now

      # Exclude archive indexes
      #     I prefer not to have the calendar archive index pages in the sitemap.
      #     Remove this if you want them.
#      (2000..2030).each do |year|
#        # Exclude year index, directories and subdirectories
#        throw :next_page if page.url == "/#{year}.html" || page.url == "/#{year}" || page.url.start_with?("/#{year}/")
#      end

      # Count pages
      page_count += 1
      raise "Over 50 000 pages, too much for a single sitemap" if page_count > 50000

      # Build xml of sitemap
      xml.url do
        xml.loc 'https://devuan.org' + page.url.sub(/(index)?\.html$/,'')

        # As usual, it's hard to get any solid information on how the search engines are
        # using the information in the site map. lastmod (last modified) and changefreq
        # (change frequency) seams to overlap each other. My guess is that it's best to
        # use lastmod for pages where you know when the file changed, and changefreq for
        # dynamic pages, like the first page of a blog and archive pages, where it's
        # hard to know when the dynamic information was changed.
        #
        # Possible values for changefreq
        # always
        # hourly
        # daily
        # weekly
        # monthly
        # yearly
        # never

        case page.url
          when '/index.html'
            xml.changefreq 'weekly'    # Set this to a suitable value for your site
          else
            # Setting last modified (lastmod)
            #
            # Checks date in frontmatter and modification date of file, and uses the most recent.
            # (Note: Will not be able to get the correct date for pages with dynamic content)
            frontmatter_date = nil
            frontmatter_date = page.data['date'].to_time.getlocal if defined?(page.data) && page.data['date']
            file_mtime = nil
            file_mtime = File.mtime(page.source_file).to_time.getlocal if defined?(page.source_file)
            most_recent = [frontmatter_date, file_mtime].compact.max if  frontmatter_date || file_mtime

            xml.lastmod most_recent.strftime("%Y-%m-%d")  # Using the more recent of the two dates.
                                                          # Using only date, stripping clock
        end

        # Priority (priority)
        #
        # To adjust the priority of a page, set sitemap_priority: in the frontmatter.
        frontmatter_sitemap_priority = page.data['sitemap_priority'].to_f if (defined?(page.data) && page.data['sitemap_priority'])
        if frontmatter_sitemap_priority && frontmatter_sitemap_priority < 0 && frontmatter_sitemap_priority > 1
          raise "Incorrect sitemap_priority #{sitemap_priority} for #{page.destination_path}"
        end
        if frontmatter_sitemap_priority && frontmatter_sitemap_priority >= 0 && frontmatter_sitemap_priority <= 1
          xml.priority frontmatter_sitemap_priority unless frontmatter_sitemap_priority == 0.5 # (No use setting the default priority)
        end

      end
    end
  end
end