"Fossies" - the Fresh Open Source Software Archive

Member "example_config.xml" (19 Jul 2007, 6511 Bytes) of package /linux/www/old/sitemap_gen_1.5.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) XML source code syntax highlighting (style: standard) with prefixed line numbers. Alternatively you can here view or download the uninterpreted source code file.

    1 <?xml version="1.0" encoding="UTF-8"?>
    2 <!--
    3   sitemap_gen.py example configuration script
    4 
    5   This file specifies a set of sample input parameters for the
    6   sitemap_gen.py client.
    7 
    8   You should copy this file into "config.xml" and modify it for
    9   your server.
   10 
   11 
   12   ********************************************************* -->
   13 
   14 
   15 <!-- ** MODIFY **
   16   The "site" node describes your basic web site.
   17 
   18   Required attributes:
   19     base_url   - the top-level URL of the site being mapped
   20     store_into - the webserver path to the desired output file.
   21                  This should end in '.xml' or '.xml.gz'
   22                  (the script will create this file)
   23 
   24   Optional attributes:
   25     verbose    - an integer from 0 (quiet) to 3 (noisy) for
   26                  how much diagnostic output the script gives
   27     suppress_search_engine_notify="1"
   28                - disables notifying search engines about the new map
   29                  (same as the "testing" command-line argument.)
   30     default_encoding
   31                - names a character encoding to use for URLs and
   32                  file paths.  (Example: "UTF-8")
   33     sitemap_type
   34                - declares the Sitemap type,  Common values are
   35                  web, mobile and news.  'web" Sitemap is default. 
   36                  (Example: sitemap_type="news")
   37 -->
   38  <site
   39   base_url="http://www.example.com/"
   40   store_into="/var/www/docroot/sitemap.xml"
   41   verbose="1"
   42   sitemap_type="web"
   43  >
   44  
   45  <!--
   46  <site
   47   base_url="http://www.example.com/"
   48   store_into="/var/www/docroot/sitemap.xml"
   49   verbose="2"
   50   sitemap_type="news"
   51  >
   52  
   53  -->
   54  
   55 
   56   <!-- ********************************************************
   57           INPUTS
   58 
   59   All the various nodes in this section control where the script
   60   looks to find URLs.
   61 
   62   MODIFY or DELETE these entries as appropriate for your server.
   63   ********************************************************* -->
   64 
   65   <!-- ** MODIFY or DELETE **
   66     "url" nodes specify individual URLs to include in the map.
   67 
   68     Required attributes:
   69       href       - the URL
   70 
   71     Optional attributes:
   72       lastmod    - timestamp of last modification (ISO8601 format)
   73       changefreq - how often content at this URL is usually updated
   74       priority   - value 0.0 to 1.0 of relative importance in your site
   75   -->
   76 
   77 <!--
   78   <url  href="http://www.example.com/stats?q=name"  />
   79   <url
   80      href="http://www.example.com/stats?q=age"
   81      lastmod="2004-11-14T01:00:00-07:00"
   82      changefreq="yearly"
   83      priority="0.3"
   84   />
   85 -->
   86 
   87 
   88   <!-- ** MODIFY or DELETE **
   89     "urllist" nodes name text files with lists of URLs.
   90     An example file "example_urllist.txt" is provided.
   91 
   92     Required attribute for all Sitemap types:
   93       path       - path to the file
   94     
   95     Required attribute for News Sitemaps
   96       tag_order  - News Sitemaps metatag order, comma-separated.
   97                    (Example: tag_order="loc, changefreq, lastmod, 
   98                    publication_date, keywords")   
   99 
  100     Optional attributes:
  101       encoding   - encoding of the file if not US-ASCII
  102                      
  103   -->
  104   
  105 <!--
  106   <urllist 
  107     path="news_input.txt" 
  108     encoding="UTF-8"
  109     tag_order="loc, changefreq, priority, lastmod, publication_date, \
  110                keywords, stock_tickers"
  111    />
  112    
  113   <urllist path="web_urls.txt" encoding="UTF-8" /> 
  114 -->
  115 
  116   <!-- ** MODIFY or DELETE **
  117     "directory" nodes tell the script to walk the file system
  118     and include all files and directories in the Sitemap.
  119 
  120     Required attributes:
  121       path       - path to begin walking from
  122       url        - URL equivalent of that path
  123 
  124     Optional attributes:
  125       default_file - name of the index or default file for directory URLs
  126       remove_empty_directories - Values are true or false.  Default is false.
  127                                  true=remove empty directories
  128   -->
  129 
  130 <!--
  131   <directory  path="/var/www/icons"    url="http://www.example.com/images/" />
  132   <directory
  133      path="/var/www/docroot"
  134      url="http://www.example.com/"
  135      default_file="index.html"
  136      remove_empty_directories="true"
  137   />
  138 -->
  139   
  140   <!--
  141   "accesslog" nodes tell the script to scan webserver log files to
  142     extract URLs on your site.  Both Common Logfile Format (Apache's default
  143     logfile) and Extended Logfile Format (IIS's default logfile) can be read.
  144 
  145     Required attributes:
  146       path       - path to the file
  147 
  148     Optional attributes:
  149       encoding   - encoding of the file if not US-ASCII
  150   -->
  151   
  152 <!--
  153   <accesslog  path="/etc/httpd/logs/access.log"       encoding="UTF-8"  />
  154   <accesslog  path="/etc/httpd/logs/access.log.0"     encoding="UTF-8"  />
  155   <accesslog  path="/etc/httpd/logs/access.log.1.gz"  encoding="UTF-8"  />
  156 -->
  157   
  158 
  159   <!-- ********************************************************
  160           FILTERS
  161 
  162   Filters specify wild-card patterns that the script compares
  163   against all URLs it finds.  Filters can be used to exclude
  164   certain URLs from your Sitemap, for instance if you have
  165   hidden content that you hope the search engines don't find.
  166 
  167   Filters can be either type="wildcard", which means standard
  168   path wildcards (* and ?) are used to compare against URLs,
  169   or type="regexp", which means regular expressions are used
  170   to compare.
  171 
  172   Filters are applied in the order specified in this file.
  173 
  174   An action="drop" filter causes exclusion of matching URLs.
  175   An action="pass" filter causes inclusion of matching URLs,
  176   shortcutting any other later filters that might also match.
  177   If no filter at all matches a URL, the URL will be included.
  178   Together you can build up fairly complex rules.
  179 
  180   The default action is "drop".
  181   The default type is "wildcard".
  182 
  183   You can MODIFY or DELETE these entries as appropriate for
  184   your site.  However, unlike above, the example entries in
  185   this section are not contrived and may be useful to you as
  186   they are.
  187   ********************************************************* -->
  188 
  189   <!-- Exclude URLs that end with a '~'   (IE: emacs backup files)      -->
  190   <filter  action="drop"  type="wildcard"  pattern="*~"           />
  191 
  192   <!-- Exclude URLs within UNIX-style hidden files or directories       -->
  193   <filter action="drop" type="regexp"   pattern="/\.[^/]*"     />
  194   <filter action="drop" type="wildcard" pattern="*icons*" />
  195   <filter action="drop" type="wildcard" pattern="*logos*" />
  196   <filter action="drop" type="wildcard" pattern="*todo*" />
  197   <filter action="drop" type="wildcard" pattern="*Easter*" />
  198   <filter action="drop" type="wildcard" pattern="*/help/help/*" />
  199   <filter action="drop" type="wildcard" pattern="*/press/*.gif" />
  200   
  201 </site>