<?xml version='1.0' encoding='utf-8'?>
<rfc xmlns:xi="http://www.w3.org/2001/XInclude" version="3" category="std" consensus="true" docName="draft-koster-rep-12" indexInclude="true" ipr="trust200902" number="9309" prepTime="2022-09-09T17:39:23" scripts="Common,Latin" sortRefs="true" submissionType="IETF" symRefs="true" tocDepth="4" tocInclude="true" xml:lang="en">
  <link href="https://datatracker.ietf.org/doc/draft-koster-rep-12" rel="prev"/>
  <link href="https://dx.doi.org/10.17487/rfc9309" rel="alternate"/>
  <link href="urn:issn:2070-1721" rel="alternate"/>
  <front>
    <title abbrev="Robots Exclusion Protocol (REP)">Robots Exclusion Protocol</title>
    <seriesInfo name="RFC" value="9309" stream="IETF"/>
    <author initials="M." surname="Koster" fullname="Martijn Koster">
      <address>
        <postal>
          <extaddr>Stalworthy Manor Farm</extaddr>
          <street>Suton Lane</street>
          <city>Wymondham, Norfolk</city>
          <code>NR18 9JG</code>
          <country>United Kingdom</country>
        </postal>
        <email>m.koster@greenhills.co.uk</email>
      </address>
    </author>
    <author initials="G." surname="Illyes" fullname="Gary Illyes">
      <organization showOnFrontPage="true">Google LLC</organization>
      <address>
        <postal>
          <street>Brandschenkestrasse 110</street>
          <city>Zürich</city>
          <code>8002</code>
          <country>Switzerland</country>
        </postal>
        <email>garyillyes@google.com</email>
      </address>
    </author>
    <author initials="H." surname="Zeller" fullname="Henner Zeller">
      <organization showOnFrontPage="true">Google LLC</organization>
      <address>
        <postal>
          <street>1600 Amphitheatre Pkwy</street>
          <city>Mountain View</city>
          <region>CA</region>
          <code>94043</code>
          <country>United States of America</country>
        </postal>
        <email>henner@google.com</email>
      </address>
    </author>
    <author initials="L." surname="Sassman" fullname="Lizzi Sassman">
      <organization showOnFrontPage="true">Google LLC</organization>
      <address>
        <postal>
          <street>Brandschenkestrasse 110</street>
          <city>Zürich</city>
          <code>8002</code>
          <country>Switzerland</country>
        </postal>
        <email>lizzi@google.com</email>
      </address>
    </author>
    <date month="09" year="2022"/>
    <keyword>robot</keyword>
    <keyword>crawler</keyword>
    <keyword>robots.txt</keyword>
    <abstract pn="section-abstract">
      <t indent="0" pn="section-abstract-1"> This document specifies and extends the "Robots Exclusion Protocol"
          method originally defined by Martijn Koster in 1994 for service owners
          to control how content served by their services may be accessed, if at
          all, by automatic clients known as crawlers. Specifically, it adds
          definition language for the protocol, instructions for handling
          errors, and instructions for caching. </t>
    </abstract>
    <boilerplate>
      <section anchor="status-of-memo" numbered="false" removeInRFC="false" toc="exclude" pn="section-boilerplate.1">
        <name slugifiedName="name-status-of-this-memo">Status of This Memo</name>
        <t indent="0" pn="section-boilerplate.1-1">
            This is an Internet Standards Track document.
        </t>
        <t indent="0" pn="section-boilerplate.1-2">
            This document is a product of the Internet Engineering Task Force
            (IETF).  It represents the consensus of the IETF community.  It has
            received public review and has been approved for publication by
            the Internet Engineering Steering Group (IESG).  Further
            information on Internet Standards is available in Section 2 of 
            RFC 7841.
        </t>
        <t indent="0" pn="section-boilerplate.1-3">
            Information about the current status of this document, any
            errata, and how to provide feedback on it may be obtained at
            <eref target="https://www.rfc-editor.org/info/rfc9309" brackets="none"/>.
        </t>
      </section>
      <section anchor="copyright" numbered="false" removeInRFC="false" toc="exclude" pn="section-boilerplate.2">
        <name slugifiedName="name-copyright-notice">Copyright Notice</name>
        <t indent="0" pn="section-boilerplate.2-1">
            Copyright (c) 2022 IETF Trust and the persons identified as the
            document authors. All rights reserved.
        </t>
        <t indent="0" pn="section-boilerplate.2-2">
            This document is subject to BCP 78 and the IETF Trust's Legal
            Provisions Relating to IETF Documents
            (<eref target="https://trustee.ietf.org/license-info" brackets="none"/>) in effect on the date of
            publication of this document. Please review these documents
            carefully, as they describe your rights and restrictions with
            respect to this document. Code Components extracted from this
            document must include Revised BSD License text as described in
            Section 4.e of the Trust Legal Provisions and are provided without
            warranty as described in the Revised BSD License.
        </t>
      </section>
    </boilerplate>
    <toc>
      <section anchor="toc" numbered="false" removeInRFC="false" toc="exclude" pn="section-toc.1">
        <name slugifiedName="name-table-of-contents">Table of Contents</name>
        <ul bare="true" empty="true" indent="2" spacing="compact" pn="section-toc.1-1">
          <li pn="section-toc.1-1.1">
            <t indent="0" keepWithNext="true" pn="section-toc.1-1.1.1"><xref derivedContent="1" format="counter" sectionFormat="of" target="section-1"/>.  <xref derivedContent="" format="title" sectionFormat="of" target="name-introduction">Introduction</xref></t>
            <ul bare="true" empty="true" indent="2" spacing="compact" pn="section-toc.1-1.1.2">
              <li pn="section-toc.1-1.1.2.1">
                <t indent="0" keepWithNext="true" pn="section-toc.1-1.1.2.1.1"><xref derivedContent="1.1" format="counter" sectionFormat="of" target="section-1.1"/>.  <xref derivedContent="" format="title" sectionFormat="of" target="name-requirements-language">Requirements Language</xref></t>
              </li>
            </ul>
          </li>
          <li pn="section-toc.1-1.2">
            <t indent="0" pn="section-toc.1-1.2.1"><xref derivedContent="2" format="counter" sectionFormat="of" target="section-2"/>.  <xref derivedContent="" format="title" sectionFormat="of" target="name-specification">Specification</xref></t>
            <ul bare="true" empty="true" indent="2" spacing="compact" pn="section-toc.1-1.2.2">
              <li pn="section-toc.1-1.2.2.1">
                <t indent="0" keepWithNext="true" pn="section-toc.1-1.2.2.1.1"><xref derivedContent="2.1" format="counter" sectionFormat="of" target="section-2.1"/>.  <xref derivedContent="" format="title" sectionFormat="of" target="name-protocol-definition">Protocol Definition</xref></t>
              </li>
              <li pn="section-toc.1-1.2.2.2">
                <t indent="0" pn="section-toc.1-1.2.2.2.1"><xref derivedContent="2.2" format="counter" sectionFormat="of" target="section-2.2"/>.  <xref derivedContent="" format="title" sectionFormat="of" target="name-formal-syntax">Formal Syntax</xref></t>
                <ul bare="true" empty="true" indent="2" spacing="compact" pn="section-toc.1-1.2.2.2.2">
                  <li pn="section-toc.1-1.2.2.2.2.1">
                    <t indent="0" pn="section-toc.1-1.2.2.2.2.1.1"><xref derivedContent="2.2.1" format="counter" sectionFormat="of" target="section-2.2.1"/>.  <xref derivedContent="" format="title" sectionFormat="of" target="name-the-user-agent-line">The User-Agent Line</xref></t>
                  </li>
                  <li pn="section-toc.1-1.2.2.2.2.2">
                    <t indent="0" pn="section-toc.1-1.2.2.2.2.2.1"><xref derivedContent="2.2.2" format="counter" sectionFormat="of" target="section-2.2.2"/>.  <xref derivedContent="" format="title" sectionFormat="of" target="name-the-allow-and-disallow-line">The "Allow" and "Disallow" Lines</xref></t>
                  </li>
                  <li pn="section-toc.1-1.2.2.2.2.3">
                    <t indent="0" pn="section-toc.1-1.2.2.2.2.3.1"><xref derivedContent="2.2.3" format="counter" sectionFormat="of" target="section-2.2.3"/>.  <xref derivedContent="" format="title" sectionFormat="of" target="name-special-characters">Special Characters</xref></t>
                  </li>
                  <li pn="section-toc.1-1.2.2.2.2.4">
                    <t indent="0" pn="section-toc.1-1.2.2.2.2.4.1"><xref derivedContent="2.2.4" format="counter" sectionFormat="of" target="section-2.2.4"/>.  <xref derivedContent="" format="title" sectionFormat="of" target="name-other-records">Other Records</xref></t>
                  </li>
                </ul>
              </li>
              <li pn="section-toc.1-1.2.2.3">
                <t indent="0" pn="section-toc.1-1.2.2.3.1"><xref derivedContent="2.3" format="counter" sectionFormat="of" target="section-2.3"/>.  <xref derivedContent="" format="title" sectionFormat="of" target="name-access-method">Access Method</xref></t>
                <ul bare="true" empty="true" indent="2" spacing="compact" pn="section-toc.1-1.2.2.3.2">
                  <li pn="section-toc.1-1.2.2.3.2.1">
                    <t indent="0" pn="section-toc.1-1.2.2.3.2.1.1"><xref derivedContent="2.3.1" format="counter" sectionFormat="of" target="section-2.3.1"/>.  <xref derivedContent="" format="title" sectionFormat="of" target="name-access-results">Access Results</xref></t>
                    <ul bare="true" empty="true" indent="2" spacing="compact" pn="section-toc.1-1.2.2.3.2.1.2">
                      <li pn="section-toc.1-1.2.2.3.2.1.2.1">
                        <t indent="0" pn="section-toc.1-1.2.2.3.2.1.2.1.1"><xref derivedContent="2.3.1.1" format="counter" sectionFormat="of" target="section-2.3.1.1"/>.  <xref derivedContent="" format="title" sectionFormat="of" target="name-successful-access">Successful Access</xref></t>
                      </li>
                      <li pn="section-toc.1-1.2.2.3.2.1.2.2">
                        <t indent="0" pn="section-toc.1-1.2.2.3.2.1.2.2.1"><xref derivedContent="2.3.1.2" format="counter" sectionFormat="of" target="section-2.3.1.2"/>.  <xref derivedContent="" format="title" sectionFormat="of" target="name-redirects">Redirects</xref></t>
                      </li>
                      <li pn="section-toc.1-1.2.2.3.2.1.2.3">
                        <t indent="0" pn="section-toc.1-1.2.2.3.2.1.2.3.1"><xref derivedContent="2.3.1.3" format="counter" sectionFormat="of" target="section-2.3.1.3"/>.  <xref derivedContent="" format="title" sectionFormat="of" target="name-unavailable-status">"Unavailable" Status</xref></t>
                      </li>
                      <li pn="section-toc.1-1.2.2.3.2.1.2.4">
                        <t indent="0" pn="section-toc.1-1.2.2.3.2.1.2.4.1"><xref derivedContent="2.3.1.4" format="counter" sectionFormat="of" target="section-2.3.1.4"/>.  <xref derivedContent="" format="title" sectionFormat="of" target="name-unreachable-status">"Unreachable" Status</xref></t>
                      </li>
                      <li pn="section-toc.1-1.2.2.3.2.1.2.5">
                        <t indent="0" pn="section-toc.1-1.2.2.3.2.1.2.5.1"><xref derivedContent="2.3.1.5" format="counter" sectionFormat="of" target="section-2.3.1.5"/>.  <xref derivedContent="" format="title" sectionFormat="of" target="name-parsing-errors">Parsing Errors</xref></t>
                      </li>
                    </ul>
                  </li>
                </ul>
              </li>
              <li pn="section-toc.1-1.2.2.4">
                <t indent="0" pn="section-toc.1-1.2.2.4.1"><xref derivedContent="2.4" format="counter" sectionFormat="of" target="section-2.4"/>.  <xref derivedContent="" format="title" sectionFormat="of" target="name-caching">Caching</xref></t>
              </li>
              <li pn="section-toc.1-1.2.2.5">
                <t indent="0" pn="section-toc.1-1.2.2.5.1"><xref derivedContent="2.5" format="counter" sectionFormat="of" target="section-2.5"/>.  <xref derivedContent="" format="title" sectionFormat="of" target="name-limits">Limits</xref></t>
              </li>
            </ul>
          </li>
          <li pn="section-toc.1-1.3">
            <t indent="0" pn="section-toc.1-1.3.1"><xref derivedContent="3" format="counter" sectionFormat="of" target="section-3"/>.  <xref derivedContent="" format="title" sectionFormat="of" target="name-security-considerations">Security Considerations</xref></t>
          </li>
          <li pn="section-toc.1-1.4">
            <t indent="0" pn="section-toc.1-1.4.1"><xref derivedContent="4" format="counter" sectionFormat="of" target="section-4"/>.  <xref derivedContent="" format="title" sectionFormat="of" target="name-iana-considerations">IANA Considerations</xref></t>
          </li>
          <li pn="section-toc.1-1.5">
            <t indent="0" pn="section-toc.1-1.5.1"><xref derivedContent="5" format="counter" sectionFormat="of" target="section-5"/>.  <xref derivedContent="" format="title" sectionFormat="of" target="name-examples">Examples</xref></t>
            <ul bare="true" empty="true" indent="2" spacing="compact" pn="section-toc.1-1.5.2">
              <li pn="section-toc.1-1.5.2.1">
                <t indent="0" pn="section-toc.1-1.5.2.1.1"><xref derivedContent="5.1" format="counter" sectionFormat="of" target="section-5.1"/>.  <xref derivedContent="" format="title" sectionFormat="of" target="name-simple-example">Simple Example</xref></t>
              </li>
              <li pn="section-toc.1-1.5.2.2">
                <t indent="0" pn="section-toc.1-1.5.2.2.1"><xref derivedContent="5.2" format="counter" sectionFormat="of" target="section-5.2"/>.  <xref derivedContent="" format="title" sectionFormat="of" target="name-longest-match">Longest Match</xref></t>
              </li>
            </ul>
          </li>
          <li pn="section-toc.1-1.6">
            <t indent="0" pn="section-toc.1-1.6.1"><xref derivedContent="6" format="counter" sectionFormat="of" target="section-6"/>.  <xref derivedContent="" format="title" sectionFormat="of" target="name-references">References</xref></t>
            <ul bare="true" empty="true" indent="2" spacing="compact" pn="section-toc.1-1.6.2">
              <li pn="section-toc.1-1.6.2.1">
                <t indent="0" pn="section-toc.1-1.6.2.1.1"><xref derivedContent="6.1" format="counter" sectionFormat="of" target="section-6.1"/>.  <xref derivedContent="" format="title" sectionFormat="of" target="name-normative-references">Normative References</xref></t>
              </li>
              <li pn="section-toc.1-1.6.2.2">
                <t indent="0" pn="section-toc.1-1.6.2.2.1"><xref derivedContent="6.2" format="counter" sectionFormat="of" target="section-6.2"/>.  <xref derivedContent="" format="title" sectionFormat="of" target="name-informative-references">Informative References</xref></t>
              </li>
            </ul>
          </li>
          <li pn="section-toc.1-1.7">
            <t indent="0" pn="section-toc.1-1.7.1"><xref derivedContent="" format="none" sectionFormat="of" target="section-appendix.a"/><xref derivedContent="" format="title" sectionFormat="of" target="name-authors-addresses">Authors' Addresses</xref></t>
          </li>
        </ul>
      </section>
    </toc>
  </front>
  <middle>
    <section anchor="introduction" numbered="true" toc="include" removeInRFC="false" pn="section-1">
      <name slugifiedName="name-introduction">Introduction</name>
      <t indent="0" pn="section-1-1"> This document applies to services that provide resources that clients
          can access through URIs as defined in <xref target="RFC3986" format="default" sectionFormat="of" derivedContent="RFC3986"/>. For example,
          in the context of HTTP, a browser is a client that displays the content of a
          web page. </t>
      <t indent="0" pn="section-1-2"> Crawlers are automated clients. Search engines, for instance, have crawlers to
          recursively traverse links for indexing as defined in
          <xref target="RFC8288" format="default" sectionFormat="of" derivedContent="RFC8288"/>. </t>
      <t indent="0" pn="section-1-3"> It may be inconvenient for service owners if crawlers visit the entirety of
          their URI space. This document specifies the rules originally defined by
          the "Robots Exclusion Protocol" <xref target="ROBOTSTXT" format="default" sectionFormat="of" derivedContent="ROBOTSTXT"/> that crawlers
          are requested to honor when accessing URIs. </t>
      <t indent="0" pn="section-1-4"> These rules are not a form of access authorization. </t>
      <section anchor="requirements-language" numbered="true" toc="include" removeInRFC="false" pn="section-1.1">
        <name slugifiedName="name-requirements-language">Requirements Language</name>
        <t indent="0" pn="section-1.1-1">The key words "<bcp14>MUST</bcp14>", "<bcp14>MUST NOT</bcp14>",
        "<bcp14>REQUIRED</bcp14>", "<bcp14>SHALL</bcp14>",
        "<bcp14>SHALL NOT</bcp14>", "<bcp14>SHOULD</bcp14>",
        "<bcp14>SHOULD NOT</bcp14>",
        "<bcp14>RECOMMENDED</bcp14>", "<bcp14>NOT RECOMMENDED</bcp14>",
        "<bcp14>MAY</bcp14>", and "<bcp14>OPTIONAL</bcp14>" in this document
        are to be interpreted as described in BCP 14
        <xref target="RFC2119" format="default" sectionFormat="of" derivedContent="RFC2119"/> <xref target="RFC8174" format="default" sectionFormat="of" derivedContent="RFC8174"/> when, and only
        when, they appear in all capitals, as shown here.</t>
      </section>
    </section>
    <section anchor="specification" numbered="true" toc="include" removeInRFC="false" pn="section-2">
      <name slugifiedName="name-specification">Specification</name>
      <section anchor="protocol-definition" numbered="true" toc="include" removeInRFC="false" pn="section-2.1">
        <name slugifiedName="name-protocol-definition">Protocol Definition</name>
        <t indent="0" pn="section-2.1-1"> The protocol language consists of rule(s) and group(s) that the service
            makes available in a file named "robots.txt" as described in
            <xref target="access-method" format="default" sectionFormat="of" derivedContent="Section 2.3"/>: </t>
        <dl spacing="normal" indent="3" newline="false" pn="section-2.1-2">
          <dt pn="section-2.1-2.1"> Rule:</dt>
          <dd pn="section-2.1-2.2"> A line with a key-value pair that defines how a
                crawler may access URIs. See
                <xref target="the-allow-and-disallow-lines" format="default" sectionFormat="of" derivedContent="Section 2.2.2"/>. </dd>
          <dt pn="section-2.1-2.3"> Group:</dt>
          <dd pn="section-2.1-2.4"> One or more user-agent lines that are followed by
                one or more rules. The group is terminated by a user-agent line
                or end of file. See <xref target="the-user-agent-line" format="default" sectionFormat="of" derivedContent="Section 2.2.1"/>.
                The last group may have no rules, which means it implicitly
                allows everything. </dd>
        </dl>
      </section>
      <section anchor="formal-syntax" numbered="true" toc="include" removeInRFC="false" pn="section-2.2">
        <name slugifiedName="name-formal-syntax">Formal Syntax</name>
        <t indent="0" pn="section-2.2-1"> Below is an Augmented Backus-Naur Form (ABNF) description, as described
            in <xref target="RFC5234" format="default" sectionFormat="of" derivedContent="RFC5234"/>. </t>
        <sourcecode name="" type="abnf" markers="false" pn="section-2.2-2">
 robotstxt = *(group / emptyline)
 group = startgroupline                ; We start with a user-agent
                                       ; line
        *(startgroupline / emptyline)  ; ... and possibly more
                                       ; user-agent lines
        *(rule / emptyline)            ; followed by rules relevant
                                       ; for the preceding
                                       ; user-agent lines

 startgroupline = *WS "user-agent" *WS ":" *WS product-token EOL

 rule = *WS ("allow" / "disallow") *WS ":"
       *WS (path-pattern / empty-pattern) EOL

 ; parser implementors: define additional lines you need (for
 ; example, Sitemaps).

 product-token = identifier / "*"
 path-pattern = "/" *UTF8-char-noctl ; valid URI path pattern
 empty-pattern = *WS

 identifier = 1*(%x2D / %x41-5A / %x5F / %x61-7A)
 comment = "#" *(UTF8-char-noctl / WS / "#")
 emptyline = EOL
 EOL = *WS [comment] NL ; end-of-line may have
                        ; optional trailing comment
 NL = %x0D / %x0A / %x0D.0A
 WS = %x20 / %x09

 ; UTF8 derived from RFC 3629, but excluding control characters

 UTF8-char-noctl = UTF8-1-noctl / UTF8-2 / UTF8-3 / UTF8-4
 UTF8-1-noctl = %x21 / %x22 / %x24-7F ; excluding control, space, "#"
 UTF8-2 = %xC2-DF UTF8-tail
 UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2UTF8-tail /
          %xED %x80-9F UTF8-tail / %xEE-EF 2UTF8-tail
 UTF8-4 = %xF0 %x90-BF 2UTF8-tail / %xF1-F3 3UTF8-tail /
          %xF4 %x80-8F 2UTF8-tail

 UTF8-tail = %x80-BF
</sourcecode>
        <section anchor="the-user-agent-line" numbered="true" toc="include" removeInRFC="false" pn="section-2.2.1">
          <name slugifiedName="name-the-user-agent-line">The User-Agent Line</name>
          <t indent="0" pn="section-2.2.1-1"> Crawlers set their own name, which is called a product token, to find
              relevant groups. The product token <bcp14>MUST</bcp14> contain only
              uppercase and lowercase letters ("a-z" and "A-Z"),
              underscores ("_"), and hyphens ("-").
              The product token <bcp14>SHOULD</bcp14>
              be a substring of the identification string that the crawler sends to
              the service. For example, in the case of HTTP
              <xref target="RFC9110" format="default" sectionFormat="of" derivedContent="RFC9110"/>, the product token
              <bcp14>SHOULD</bcp14> be a substring in the User-Agent header.
              The identification string <bcp14>SHOULD</bcp14> describe the purpose of
              the crawler. Here's an example of a User-Agent HTTP request header
              with a link pointing to a page describing the purpose of the
              ExampleBot crawler, which appears as a substring in the User-Agent HTTP
              header and as a product token in the robots.txt user-agent line: </t>
          <figure anchor="fig-1" align="left" suppress-title="false" pn="figure-1">
            <name slugifiedName="name-example-of-a-user-agent-htt">Example of a User-Agent HTTP header and
                  robots.txt user-agent line for the ExampleBot product token</name>
            <artwork name="" type="" align="center" alt="" pn="section-2.2.1-2.1">
+==========================================+========================+
| User-Agent HTTP header                   | robots.txt user-agent  |
|                                          | line                   |
+==========================================+========================+
| User-Agent: Mozilla/5.0 (compatible;     | user-agent: ExampleBot |
| ExampleBot/0.1;                          |                        |
| https://www.example.com/bot.html)        |                        |
+------------------------------------------+------------------------+
</artwork>
          </figure>
          <t indent="0" pn="section-2.2.1-3"> Note that the product token (ExampleBot) is a substring of
            the User-Agent HTTP header.</t>
          <t indent="0" pn="section-2.2.1-4"> Crawlers <bcp14>MUST</bcp14> use case-insensitive matching
              to find the group that matches the product token and then
              obey the rules of the group. If there is more than one
              group matching the user-agent, the matching groups' rules
              <bcp14>MUST</bcp14> be combined into one group and parsed
              according to
              <xref target="the-allow-and-disallow-lines" format="default" sectionFormat="of" derivedContent="Section 2.2.2"/>.</t>
          <figure anchor="fig-2" align="left" suppress-title="false" pn="figure-2">
            <name slugifiedName="name-example-of-how-to-merge-two">Example of how to merge two robots.txt
                  groups that match the same product token</name>
            <artwork name="" type="" align="center" alt="" pn="section-2.2.1-5.1">
+========================================+========================+
| Two groups that match the same product | Merged group           |
| token exactly                          |                        |
+========================================+========================+
| user-agent: ExampleBot                 | user-agent: ExampleBot |
| disallow: /foo                         | disallow: /foo         |
| disallow: /bar                         | disallow: /bar         |
|                                        | disallow: /baz         |
| user-agent: ExampleBot                 |                        |
| disallow: /baz                         |                        |
+----------------------------------------+------------------------+
</artwork>
          </figure>
          <t indent="0" pn="section-2.2.1-6"> If no matching group exists, crawlers <bcp14>MUST</bcp14> obey the group
              with a user-agent line with the "*" value, if present. </t>
          <figure anchor="fig-3" align="left" suppress-title="false" pn="figure-3">
            <name slugifiedName="name-example-of-no-matching-grou">Example of no matching groups other than the "*"
                  for the ExampleBot product token</name>
            <artwork name="" type="" align="center" alt="" pn="section-2.2.1-7.1">
+==================================+======================+
| Two groups that don't explicitly | Applicable group for |
| match ExampleBot                 | ExampleBot           |
+==================================+======================+
| user-agent: *                    | user-agent: *        |
| disallow: /foo                   | disallow: /foo       |
| disallow: /bar                   | disallow: /bar       |
|                                  |                      |
| user-agent: BazBot               |                      |
| disallow: /baz                   |                      |
+----------------------------------+----------------------+
</artwork>
          </figure>
          <t indent="0" pn="section-2.2.1-8"> If no group matches the product token and there is no group with a user-agent
              line with the "*" value, or no groups are present at all, no
              rules apply. </t>
        </section>
        <section anchor="the-allow-and-disallow-lines" numbered="true" toc="include" removeInRFC="false" pn="section-2.2.2">
          <name slugifiedName="name-the-allow-and-disallow-line">The "Allow" and "Disallow" Lines</name>
          <t indent="0" pn="section-2.2.2-1"> These lines indicate whether accessing a URI that matches the
              corresponding path is allowed or disallowed. </t>
          <t indent="0" pn="section-2.2.2-2"> To evaluate if access to a URI is allowed, a crawler <bcp14>MUST</bcp14>
              match the paths in "allow" and "disallow" rules against the URI.
              The matching <bcp14>SHOULD</bcp14> be case sensitive. The matching
              <bcp14>MUST</bcp14> start with the first octet of the path. The most
              specific match found <bcp14>MUST</bcp14> be used. The most specific
              match is the match that has the most octets. Duplicate rules in a
              group <bcp14>MAY</bcp14> be deduplicated. If an "allow" rule and a "disallow"
              rule are equivalent, then the "allow" rule <bcp14>SHOULD</bcp14> be used. If no
              match is found amongst the rules in a group for a matching user-agent
              or there are no rules in the group, the URI is allowed. The
              /robots.txt URI is implicitly allowed. </t>
          <t indent="0" pn="section-2.2.2-3"> Octets in the URI and robots.txt paths outside the range of the
              ASCII coded character set, and those in the reserved range defined
              by <xref target="RFC3986" format="default" sectionFormat="of" derivedContent="RFC3986"/>, <bcp14>MUST</bcp14> be percent-encoded as
              defined by <xref target="RFC3986" format="default" sectionFormat="of" derivedContent="RFC3986"/> prior to comparison. </t>
          <t indent="0" pn="section-2.2.2-4"> If a percent-encoded ASCII octet is encountered in the URI, it
              <bcp14>MUST</bcp14> be unencoded prior to comparison, unless it is a
              reserved character in the URI as defined by <xref target="RFC3986" format="default" sectionFormat="of" derivedContent="RFC3986"/>
              or the character is outside the unreserved character range. The match
              evaluates positively if and only if the end of the path from the rule
              is reached before a difference in octets is encountered. </t>
          <t indent="0" pn="section-2.2.2-5"> For example: </t>
          <figure anchor="fig-4" align="left" suppress-title="false" pn="figure-4">
            <name slugifiedName="name-examples-of-matching-percen">Examples of matching percent-encoded URI components</name>
            <artwork name="" type="" align="center" alt="" pn="section-2.2.2-6.1">
+==================+=======================+=======================+
| Path             | Encoded Path          | Path to Match         |
+==================+=======================+=======================+
| /foo/bar?baz=quz | /foo/bar?baz=quz      | /foo/bar?baz=quz      |
+------------------+-----------------------+-----------------------+
| /foo/bar?baz=    | /foo/bar?baz=         | /foo/bar?baz=         |
| https://foo.bar  | https%3A%2F%2Ffoo.bar | https%3A%2F%2Ffoo.bar |
+------------------+-----------------------+-----------------------+
| /foo/bar/        | /foo/bar/%E3%83%84    | /foo/bar/%E3%83%84    |
| U+E38384         |                       |                       |
+------------------+-----------------------+-----------------------+
| /foo/            | /foo/bar/%E3%83%84    | /foo/bar/%E3%83%84    |
| bar/%E3%83%84    |                       |                       |
+------------------+-----------------------+-----------------------+
| /foo/            | /foo/bar/%62%61%7A    | /foo/bar/baz          |
| bar/%62%61%7A    |                       |                       |
+------------------+-----------------------+-----------------------+
</artwork>
          </figure>
          <t indent="0" pn="section-2.2.2-7"> The crawler <bcp14>SHOULD</bcp14> ignore "disallow" and
              "allow" rules that are not in any group (for example, any
              rule that precedes the first user-agent line). </t>
          <t indent="0" pn="section-2.2.2-8"> Implementors <bcp14>MAY</bcp14> bridge encoding mismatches if they
              detect that the robots.txt file is not UTF-8 encoded. </t>
        </section>
        <section anchor="special-characters" numbered="true" toc="include" removeInRFC="false" pn="section-2.2.3">
          <name slugifiedName="name-special-characters">Special Characters</name>
          <t indent="0" pn="section-2.2.3-1"> Crawlers <bcp14>MUST</bcp14> support the following special characters: </t>
          <figure anchor="fig-5" align="left" suppress-title="false" pn="figure-5">
            <name slugifiedName="name-list-of-special-characters-">List of special characters in robots.txt files</name>
            <artwork name="" type="" align="center" alt="" pn="section-2.2.3-2.1">
+===========+===================+==============================+
| Character | Description       | Example                      |
+===========+===================+==============================+
| #         | Designates a line | allow: / # comment in line   |
|           | comment.          |                              |
|           |                   | # comment on its own line    |
+-----------+-------------------+------------------------------+
| $         | Designates the    | allow: /this/path/exactly$   |
|           | end of the match  |                              |
|           | pattern.          |                              |
+-----------+-------------------+------------------------------+
| *         | Designates 0 or   | allow: /this/*/exactly       |
|           | more instances of |                              |
|           | any character.    |                              |
+-----------+-------------------+------------------------------+
</artwork>
          </figure>
          <t indent="0" pn="section-2.2.3-3"> If crawlers match special characters verbatim in the URI, crawlers
              <bcp14>SHOULD</bcp14> use "%" encoding. For example: </t>
          <figure anchor="fig-6" align="left" suppress-title="false" pn="figure-6">
            <name slugifiedName="name-example-of-percent-encoding">Example of percent-encoding</name>
            <artwork name="" type="" align="center" alt="" pn="section-2.2.3-4.1">
+============================+====================================+
| Percent-encoded Pattern    | URI                                |
+============================+====================================+
| /path/file-with-a-%2A.html | https://www.example.com/path/      |
|                            | file-with-a-*.html                 |
+----------------------------+------------------------------------+
| /path/foo-%24              | https://www.example.com/path/foo-$ |
+----------------------------+------------------------------------+
</artwork>
          </figure>
        </section>
        <section anchor="other-records" numbered="true" toc="include" removeInRFC="false" pn="section-2.2.4">
          <name slugifiedName="name-other-records">Other Records</name>
          <t indent="0" pn="section-2.2.4-1"> Crawlers <bcp14>MAY</bcp14> interpret other records that are not
              part of the robots.txt protocol -- for example, "Sitemaps"
              <xref target="SITEMAPS" format="default" sectionFormat="of" derivedContent="SITEMAPS"/>. Crawlers <bcp14>MAY</bcp14> be lenient when
              interpreting other records. For example, crawlers may accept
              common misspellings of the record. </t>
          <t indent="0" pn="section-2.2.4-2"> Parsing of other records
              <bcp14>MUST NOT</bcp14> interfere with the parsing of explicitly
              defined records in <xref target="specification" format="default" sectionFormat="of" derivedContent="Section 2"/>.
              For example, a "Sitemaps" record <bcp14>MUST NOT</bcp14> terminate a
              group. </t>
        </section>
      </section>
      <section anchor="access-method" numbered="true" toc="include" removeInRFC="false" pn="section-2.3">
        <name slugifiedName="name-access-method">Access Method</name>
        <t indent="0" pn="section-2.3-1"> The rules <bcp14>MUST</bcp14> be accessible in a file named
          "/robots.txt" (all lowercase) in the top-level path of
          the service. The file <bcp14>MUST</bcp14> be UTF-8 encoded (as
          defined in <xref target="RFC3629" format="default" sectionFormat="of" derivedContent="RFC3629"/>) and Internet Media Type
          "text/plain"
          (as defined in <xref target="RFC2046" format="default" sectionFormat="of" derivedContent="RFC2046"/>). </t>
        <t indent="0" pn="section-2.3-2"> As per <xref target="RFC3986" format="default" sectionFormat="of" derivedContent="RFC3986"/>, the URI of the robots.txt file is: </t>
        <t indent="0" pn="section-2.3-3"> "scheme:[//authority]/robots.txt" </t>
        <t indent="0" pn="section-2.3-4"> For example, in the context of HTTP or FTP, the URI is: </t>
        <artwork name="" type="" align="left" alt="" pn="section-2.3-5">
          https://www.example.com/robots.txt

          ftp://ftp.example.com/robots.txt
          </artwork>
        <section anchor="access-results" numbered="true" toc="include" removeInRFC="false" pn="section-2.3.1">
          <name slugifiedName="name-access-results">Access Results</name>
          <section anchor="successful-access" numbered="true" toc="include" removeInRFC="false" pn="section-2.3.1.1">
            <name slugifiedName="name-successful-access">Successful Access</name>
            <t indent="0" pn="section-2.3.1.1-1"> If the crawler successfully downloads the robots.txt file, the
              crawler <bcp14>MUST</bcp14> follow the parseable rules. </t>
          </section>
          <section anchor="redirects" numbered="true" toc="include" removeInRFC="false" pn="section-2.3.1.2">
            <name slugifiedName="name-redirects">Redirects</name>
            <t indent="0" pn="section-2.3.1.2-1"> It's possible that a server responds to a robots.txt fetch
              request with a redirect, such as HTTP 301 or HTTP 302 in the
              case of HTTP. The crawlers <bcp14>SHOULD</bcp14> follow at
              least five consecutive redirects, even across authorities
              (for example, hosts in the case of HTTP). </t>
            <t indent="0" pn="section-2.3.1.2-2"> If a robots.txt file is reached within five consecutive
              redirects, the robots.txt file <bcp14>MUST</bcp14> be fetched,
              parsed, and its rules followed in the context of the initial
              authority. </t>
            <t indent="0" pn="section-2.3.1.2-3"> If there are more than five consecutive redirects, crawlers
              <bcp14>MAY</bcp14> assume that the robots.txt file is
              unavailable. </t>
          </section>
          <section anchor="unavailable-status" numbered="true" toc="include" removeInRFC="false" pn="section-2.3.1.3">
            <name slugifiedName="name-unavailable-status">"Unavailable" Status</name>
            <t indent="0" pn="section-2.3.1.3-1"> "Unavailable" means the crawler tries to fetch the robots.txt file
              and the server responds with status codes indicating that the resource in question is unavailable. For
              example, in the context of HTTP, such status codes are
              in the 400-499 range. </t>
            <t indent="0" pn="section-2.3.1.3-2"> If a server status code indicates that the robots.txt file is
              unavailable to the crawler, then the crawler <bcp14>MAY</bcp14> access any
              resources on the server. </t>
          </section>
          <section anchor="unreachable-status" numbered="true" toc="include" removeInRFC="false" pn="section-2.3.1.4">
            <name slugifiedName="name-unreachable-status">"Unreachable" Status</name>
            <t indent="0" pn="section-2.3.1.4-1"> If the robots.txt file is unreachable due to server or network
              errors, this means the robots.txt file is undefined and the crawler
              <bcp14>MUST</bcp14> assume complete disallow. For example, in
              the context of HTTP, server errors are identified by status codes
              in the 500-599 range. </t>
            <t indent="0" pn="section-2.3.1.4-2"> If the robots.txt file is undefined for a reasonably long period of
              time (for example, 30 days), crawlers <bcp14>MAY</bcp14> assume that
              the robots.txt file is unavailable as defined in
              <xref target="unavailable-status" format="default" sectionFormat="of" derivedContent="Section 2.3.1.3"/> or continue to use a cached
              copy. </t>
          </section>
          <section anchor="parsing-errors" numbered="true" toc="include" removeInRFC="false" pn="section-2.3.1.5">
            <name slugifiedName="name-parsing-errors">Parsing Errors</name>
            <t indent="0" pn="section-2.3.1.5-1"> Crawlers <bcp14>MUST</bcp14> try to parse each line of the
              robots.txt file. Crawlers <bcp14>MUST</bcp14> use the parseable
              rules. </t>
          </section>
        </section>
      </section>
      <section anchor="caching" numbered="true" toc="include" removeInRFC="false" pn="section-2.4">
        <name slugifiedName="name-caching">Caching</name>
        <t indent="0" pn="section-2.4-1"> Crawlers <bcp14>MAY</bcp14> cache the fetched robots.txt file's
          contents. Crawlers <bcp14>MAY</bcp14> use standard cache control as
          defined in <xref target="RFC9111" format="default" sectionFormat="of" derivedContent="RFC9111"/>. Crawlers
          <bcp14>SHOULD NOT</bcp14> use the cached version for more than 24
          hours, unless the robots.txt file is unreachable. </t>
      </section>
      <section anchor="limits" numbered="true" toc="include" removeInRFC="false" pn="section-2.5">
        <name slugifiedName="name-limits">Limits</name>
        <t indent="0" pn="section-2.5-1"> Crawlers <bcp14>SHOULD</bcp14> impose a parsing limit to protect their systems;
          see <xref target="security" format="default" sectionFormat="of" derivedContent="Section 3"/>. The parsing limit <bcp14>MUST</bcp14> be at least
          500 kibibytes <xref target="KiB" format="default" sectionFormat="of" derivedContent="KiB"/>. </t>
      </section>
    </section>
    <section anchor="security" numbered="true" toc="include" removeInRFC="false" pn="section-3">
      <name slugifiedName="name-security-considerations">Security Considerations</name>
      <t indent="0" pn="section-3-1"> The Robots Exclusion Protocol is not a substitute for valid
          content security measures. Listing paths in the robots.txt file
          exposes them publicly and thus makes the paths discoverable. To
          control access to the URI paths in a robots.txt file, users of
          the protocol should employ a valid security measure relevant to
          the application layer on which the robots.txt file is served --
          for example, in the case of HTTP, HTTP Authentication as defined in
          <xref target="RFC9110" format="default" sectionFormat="of" derivedContent="RFC9110"/>. </t>
      <t indent="0" pn="section-3-2"> To protect against attacks against their system, implementors
          of robots.txt parsing and matching logic should take the
          following considerations into account: </t>
      <dl spacing="normal" indent="3" newline="false" pn="section-3-3">
        <dt pn="section-3-3.1"> Memory management:</dt>
        <dd pn="section-3-3.2">
          <xref target="limits" format="default" sectionFormat="of" derivedContent="Section 2.5"/> defines the lower
              limit of bytes that must be processed, which inherently also
              protects the parser from out-of-memory scenarios. </dd>
        <dt pn="section-3-3.3"> Invalid characters:</dt>
        <dd pn="section-3-3.4">
          <xref target="formal-syntax" format="default" sectionFormat="of" derivedContent="Section 2.2"/> defines
              a set of characters that parsers and matchers can expect in
              robots.txt files. Out-of-bound characters should be rejected
              as invalid, which limits the available attack vectors that
              attempt to compromise the system. </dd>
        <dt pn="section-3-3.5"> Untrusted content:</dt>
        <dd pn="section-3-3.6"> Implementors should treat the content of
              a robots.txt file as untrusted content, as defined by the
              specification of the application layer used. For example,
              in the context of HTTP, implementors should follow the
              Security Considerations section of
              <xref target="RFC9110" format="default" sectionFormat="of" derivedContent="RFC9110"/>. </dd>
      </dl>
    </section>
    <section anchor="IANA" numbered="true" toc="include" removeInRFC="false" pn="section-4">
      <name slugifiedName="name-iana-considerations">IANA Considerations</name>
      <t indent="0" pn="section-4-1"> This document has no IANA actions. </t>
    </section>
    <section anchor="examples" numbered="true" toc="include" removeInRFC="false" pn="section-5">
      <name slugifiedName="name-examples">Examples</name>
      <section anchor="simple-example" numbered="true" toc="include" removeInRFC="false" pn="section-5.1">
        <name slugifiedName="name-simple-example">Simple Example</name>
        <t indent="0" pn="section-5.1-1"> The following example shows: </t>
        <dl spacing="normal" indent="3" newline="false" pn="section-5.1-2">
          <dt pn="section-5.1-2.1"> *:</dt>
          <dd pn="section-5.1-2.2"> A group that's relevant to all user agents that
                don't have an explicitly defined matching group. It allows
                access to the URLs with the /publications/ path prefix, and it
                restricts access to the URLs with the /example/ path prefix
                and to all URLs with a .gif suffix. The "*" character designates
                any character, including the otherwise-required forward
                slash; see <xref target="formal-syntax" format="default" sectionFormat="of" derivedContent="Section 2.2"/>. </dd>
          <dt pn="section-5.1-2.3"> foobot:</dt>
          <dd pn="section-5.1-2.4"> A regular case. A single user agent followed
                by rules. The crawler only has access to two URL path
                prefixes on the site -- /example/page.html and
                /example/allowed.gif. The rules of the group are missing
                the optional space character, which is acceptable as
                defined in <xref target="formal-syntax" format="default" sectionFormat="of" derivedContent="Section 2.2"/>. </dd>
          <dt pn="section-5.1-2.5"> barbot and bazbot:</dt>
          <dd pn="section-5.1-2.6"> A group that's relevant for more
                than one user agent. The crawlers are not allowed to access
                the URLs with the /example/page.html path prefix but
                otherwise have unrestricted access to the rest of the URLs
                on the site. </dd>
          <dt pn="section-5.1-2.7"> quxbot:</dt>
          <dd pn="section-5.1-2.8"> An empty group at the end of the file. The crawler has
                unrestricted access to the URLs on the site. </dd>
        </dl>
        <artwork name="" type="" align="left" alt="" pn="section-5.1-3">
            User-Agent: *
            Disallow: *.gif$
            Disallow: /example/
            Allow: /publications/

            User-Agent: foobot
            Disallow:/
            Allow:/example/page.html
            Allow:/example/allowed.gif

            User-Agent: barbot
            User-Agent: bazbot
            Disallow: /example/page.html

            User-Agent: quxbot

            EOF
          </artwork>
      </section>
      <section anchor="longest-match" numbered="true" toc="include" removeInRFC="false" pn="section-5.2">
        <name slugifiedName="name-longest-match">Longest Match</name>
        <t indent="0" pn="section-5.2-1"> The following example shows that in the case of two rules, the
            longest one is used for matching. In the following case,
            /example/page/disallowed.gif <bcp14>MUST</bcp14> be used for
            the URI example.com/example/page/disallow.gif. </t>
        <artwork name="" type="" align="left" alt="" pn="section-5.2-2">
            User-Agent: foobot
            Allow: /example/page/
            Disallow: /example/page/disallowed.gif
          </artwork>
      </section>
    </section>
  </middle>
  <back>
    <references pn="section-6">
      <name slugifiedName="name-references">References</name>
      <references pn="section-6.1">
        <name slugifiedName="name-normative-references">Normative References</name>
        <reference anchor="RFC2046" target="https://www.rfc-editor.org/info/rfc2046" quoteTitle="true" derivedAnchor="RFC2046">
          <front>
            <title>Multipurpose Internet Mail Extensions (MIME) Part Two: Media Types</title>
            <author fullname="N. Freed" initials="N." surname="Freed"/>
            <author fullname="N. Borenstein" initials="N." surname="Borenstein"/>
            <date month="November" year="1996"/>
            <abstract>
              <t indent="0">This second document defines the general structure of the MIME media typing system and defines an initial set of media types. [STANDARDS-TRACK]</t>
            </abstract>
          </front>
          <seriesInfo name="RFC" value="2046"/>
          <seriesInfo name="DOI" value="10.17487/RFC2046"/>
        </reference>
        <reference anchor="RFC2119" target="https://www.rfc-editor.org/info/rfc2119" quoteTitle="true" derivedAnchor="RFC2119">
          <front>
            <title>Key words for use in RFCs to Indicate Requirement Levels</title>
            <author fullname="S. Bradner" initials="S." surname="Bradner"/>
            <date month="March" year="1997"/>
            <abstract>
              <t indent="0">In many standards track documents several words are used to signify the requirements in the specification.  These words are often capitalized.  This document defines these words as they should be interpreted in IETF documents.  This document specifies an Internet Best Current Practices for the Internet Community, and requests discussion and suggestions for improvements.</t>
            </abstract>
          </front>
          <seriesInfo name="BCP" value="14"/>
          <seriesInfo name="RFC" value="2119"/>
          <seriesInfo name="DOI" value="10.17487/RFC2119"/>
        </reference>
        <reference anchor="RFC3629" target="https://www.rfc-editor.org/info/rfc3629" quoteTitle="true" derivedAnchor="RFC3629">
          <front>
            <title>UTF-8, a transformation format of ISO 10646</title>
            <author fullname="F. Yergeau" initials="F." surname="Yergeau"/>
            <date month="November" year="2003"/>
            <abstract>
              <t indent="0">ISO/IEC 10646-1 defines a large character set called the Universal Character Set (UCS) which encompasses most of the world's writing systems.  The originally proposed encodings of the UCS, however, were not compatible with many current applications and protocols, and this has led to the development of UTF-8, the object of this memo.  UTF-8 has the characteristic of preserving the full US-ASCII range, providing compatibility with file systems, parsers and other software that rely on US-ASCII values but are transparent to other values.  This memo obsoletes and replaces RFC 2279.</t>
            </abstract>
          </front>
          <seriesInfo name="STD" value="63"/>
          <seriesInfo name="RFC" value="3629"/>
          <seriesInfo name="DOI" value="10.17487/RFC3629"/>
        </reference>
        <reference anchor="RFC3986" target="https://www.rfc-editor.org/info/rfc3986" quoteTitle="true" derivedAnchor="RFC3986">
          <front>
            <title>Uniform Resource Identifier (URI): Generic Syntax</title>
            <author fullname="T. Berners-Lee" initials="T." surname="Berners-Lee"/>
            <author fullname="R. Fielding" initials="R." surname="Fielding"/>
            <author fullname="L. Masinter" initials="L." surname="Masinter"/>
            <date month="January" year="2005"/>
            <abstract>
              <t indent="0">A Uniform Resource Identifier (URI) is a compact sequence of characters that identifies an abstract or physical resource.  This specification defines the generic URI syntax and a process for resolving URI references that might be in relative form, along with guidelines and security considerations for the use of URIs on the Internet.  The URI syntax defines a grammar that is a superset of all valid URIs, allowing an implementation to parse the common components of a URI reference without knowing the scheme-specific requirements of every possible identifier.  This specification does not define a generative grammar for URIs; that task is performed by the individual specifications of each URI scheme. [STANDARDS-TRACK]</t>
            </abstract>
          </front>
          <seriesInfo name="STD" value="66"/>
          <seriesInfo name="RFC" value="3986"/>
          <seriesInfo name="DOI" value="10.17487/RFC3986"/>
        </reference>
        <reference anchor="RFC5234" target="https://www.rfc-editor.org/info/rfc5234" quoteTitle="true" derivedAnchor="RFC5234">
          <front>
            <title>Augmented BNF for Syntax Specifications: ABNF</title>
            <author fullname="D. Crocker" initials="D." role="editor" surname="Crocker"/>
            <author fullname="P. Overell" initials="P." surname="Overell"/>
            <date month="January" year="2008"/>
            <abstract>
              <t indent="0">Internet technical specifications often need to define a formal syntax.  Over the years, a modified version of Backus-Naur Form (BNF), called Augmented BNF (ABNF), has been popular among many Internet specifications.  The current specification documents ABNF.  It balances compactness and simplicity with reasonable representational power.  The differences between standard BNF and ABNF involve naming rules, repetition, alternatives, order-independence, and value ranges.  This specification also supplies additional rule definitions and encoding for a core lexical analyzer of the type common to several Internet specifications. [STANDARDS-TRACK]</t>
            </abstract>
          </front>
          <seriesInfo name="STD" value="68"/>
          <seriesInfo name="RFC" value="5234"/>
          <seriesInfo name="DOI" value="10.17487/RFC5234"/>
        </reference>
        <reference anchor="RFC8174" target="https://www.rfc-editor.org/info/rfc8174" quoteTitle="true" derivedAnchor="RFC8174">
          <front>
            <title>Ambiguity of Uppercase vs Lowercase in RFC 2119 Key Words</title>
            <author fullname="B. Leiba" initials="B." surname="Leiba"/>
            <date month="May" year="2017"/>
            <abstract>
              <t indent="0">RFC 2119 specifies common key words that may be used in protocol specifications.  This document aims to reduce the ambiguity by clarifying that only UPPERCASE usage of the key words have the defined special meanings.</t>
            </abstract>
          </front>
          <seriesInfo name="BCP" value="14"/>
          <seriesInfo name="RFC" value="8174"/>
          <seriesInfo name="DOI" value="10.17487/RFC8174"/>
        </reference>
        <reference anchor="RFC8288" target="https://www.rfc-editor.org/info/rfc8288" quoteTitle="true" derivedAnchor="RFC8288">
          <front>
            <title>Web Linking</title>
            <author fullname="M. Nottingham" initials="M." surname="Nottingham"/>
            <date month="October" year="2017"/>
            <abstract>
              <t indent="0">This specification defines a model for the relationships between resources on the Web ("links") and the type of those relationships ("link relation types").</t>
              <t indent="0">It also defines the serialisation of such links in HTTP headers with the Link header field.</t>
            </abstract>
          </front>
          <seriesInfo name="RFC" value="8288"/>
          <seriesInfo name="DOI" value="10.17487/RFC8288"/>
        </reference>
        <reference anchor="RFC9110" target="https://www.rfc-editor.org/info/rfc9110" quoteTitle="true" derivedAnchor="RFC9110">
          <front>
            <title>HTTP Semantics</title>
            <author fullname="R. Fielding" initials="R." role="editor" surname="Fielding"/>
            <author fullname="M. Nottingham" initials="M." role="editor" surname="Nottingham"/>
            <author fullname="J. Reschke" initials="J." role="editor" surname="Reschke"/>
            <date month="June" year="2022"/>
            <abstract>
              <t indent="0">The Hypertext Transfer Protocol (HTTP) is a stateless application-level protocol for distributed, collaborative, hypertext information systems. This document describes the overall architecture of HTTP, establishes common terminology, and defines aspects of the protocol that are shared by all versions. In this definition are core protocol elements, extensibility mechanisms, and the "http" and "https" Uniform Resource Identifier (URI) schemes.</t>
              <t indent="0">This document updates RFC 3864 and obsoletes RFCs 2818, 7231, 7232, 7233, 7235, 7538, 7615, 7694, and portions of 7230.</t>
            </abstract>
          </front>
          <seriesInfo name="STD" value="97"/>
          <seriesInfo name="RFC" value="9110"/>
          <seriesInfo name="DOI" value="10.17487/RFC9110"/>
        </reference>
        <reference anchor="RFC9111" target="https://www.rfc-editor.org/info/rfc9111" quoteTitle="true" derivedAnchor="RFC9111">
          <front>
            <title>HTTP Caching</title>
            <author fullname="R. Fielding" initials="R." role="editor" surname="Fielding"/>
            <author fullname="M. Nottingham" initials="M." role="editor" surname="Nottingham"/>
            <author fullname="J. Reschke" initials="J." role="editor" surname="Reschke"/>
            <date month="June" year="2022"/>
            <abstract>
              <t indent="0">The Hypertext Transfer Protocol (HTTP) is a stateless application-level protocol for distributed, collaborative, hypertext information systems. This document defines HTTP caches and the associated header fields that control cache behavior or indicate cacheable response messages.</t>
              <t indent="0">This document obsoletes RFC 7234.</t>
            </abstract>
          </front>
          <seriesInfo name="STD" value="98"/>
          <seriesInfo name="RFC" value="9111"/>
          <seriesInfo name="DOI" value="10.17487/RFC9111"/>
        </reference>
      </references>
      <references pn="section-6.2">
        <name slugifiedName="name-informative-references">Informative References</name>
        <reference anchor="KiB" target="https://simple.wikipedia.org/wiki/Kibibyte" quoteTitle="true" derivedAnchor="KiB">
          <front>
            <title>Kibibyte</title>
            <author>
              <organization showOnFrontPage="true"/>
            </author>
            <date day="17" month="September" year="2020"/>
          </front>
          <refcontent>Simple English Wikipedia, the free encyclopedia</refcontent>
        </reference>
        <reference anchor="ROBOTSTXT" target="https://www.robotstxt.org/" quoteTitle="true" derivedAnchor="ROBOTSTXT">
          <front>
            <title>The Web Robots Pages (including /robots.txt)</title>
            <author>
              <organization showOnFrontPage="true"/>
            </author>
            <date>2007</date>
          </front>
        </reference>
        <reference anchor="SITEMAPS" target="https://www.sitemaps.org/index.html" quoteTitle="true" derivedAnchor="SITEMAPS">
          <front>
            <title>What are Sitemaps? (Sitemap protocol)</title>
            <author>
              <organization showOnFrontPage="true"/>
            </author>
            <date>April 2020</date>
          </front>
        </reference>
      </references>
    </references>
    <section anchor="authors-addresses" numbered="false" removeInRFC="false" toc="include" pn="section-appendix.a">
      <name slugifiedName="name-authors-addresses">Authors' Addresses</name>
      <author initials="M." surname="Koster" fullname="Martijn Koster">
        <address>
          <postal>
            <extaddr>Stalworthy Manor Farm</extaddr>
            <street>Suton Lane</street>
            <city>Wymondham, Norfolk</city>
            <code>NR18 9JG</code>
            <country>United Kingdom</country>
          </postal>
          <email>m.koster@greenhills.co.uk</email>
        </address>
      </author>
      <author initials="G." surname="Illyes" fullname="Gary Illyes">
        <organization showOnFrontPage="true">Google LLC</organization>
        <address>
          <postal>
            <street>Brandschenkestrasse 110</street>
            <city>Zürich</city>
            <code>8002</code>
            <country>Switzerland</country>
          </postal>
          <email>garyillyes@google.com</email>
        </address>
      </author>
      <author initials="H." surname="Zeller" fullname="Henner Zeller">
        <organization showOnFrontPage="true">Google LLC</organization>
        <address>
          <postal>
            <street>1600 Amphitheatre Pkwy</street>
            <city>Mountain View</city>
            <region>CA</region>
            <code>94043</code>
            <country>United States of America</country>
          </postal>
          <email>henner@google.com</email>
        </address>
      </author>
      <author initials="L." surname="Sassman" fullname="Lizzi Sassman">
        <organization showOnFrontPage="true">Google LLC</organization>
        <address>
          <postal>
            <street>Brandschenkestrasse 110</street>
            <city>Zürich</city>
            <code>8002</code>
            <country>Switzerland</country>
          </postal>
          <email>lizzi@google.com</email>
        </address>
      </author>
    </section>
  </back>
</rfc>
