<?xml version="1.0" encoding="US-ASCII"?>
<!DOCTYPE rfc SYSTEM "rfc2629.dtd" [
<!ENTITY RFC2119 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.2119.xml">
<!ENTITY RFC8174 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.8174.xml">
<!ENTITY RFC7432 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.7432.xml">
<!ENTITY RFC8365 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.8365.xml">
<!ENTITY RFC8584 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.8584.xml">
<!ENTITY RFC9136 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.9136.xml">
<!ENTITY RFC9252 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.9252.xml">
<!ENTITY RFC4364 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.4364.xml">
<!ENTITY RFC8214 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.8214.xml">
<!ENTITY RFC7348 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.7348.xml">
<!ENTITY RFC8926 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.8926.xml">
<!ENTITY RFC7510 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.7510.xml">
<!ENTITY RFC8986 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.8986.xml">
<!ENTITY RFC9012 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.9012.xml">
<!ENTITY RFC7938 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.7938.xml">
<!ENTITY RFC9469 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.9469.xml">
<!ENTITY I-D.ietf-bess-rfc7432bis SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml3/reference.I-D.ietf-bess-rfc7432bis.xml">
<!ENTITY I-D.ietf-bess-evpn-mh-split-horizon SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml3/reference.I-D.ietf-bess-evpn-mh-split-horizon.xml">
<!ENTITY I-D.ietf-bess-evpn-virtual-eth-segment SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml3/reference.I-D.ietf-bess-evpn-virtual-eth-segment.xml">
<!ENTITY I-D.ietf-bess-evpn-unequal-lb SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml3/reference.I-D.ietf-bess-evpn-unequal-lb.xml">
<!ENTITY I-D.ietf-bess-mvpn-evpn-aggregation-label SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml3/reference.I-D.ietf-bess-mvpn-evpn-aggregation-label.xml">
<!ENTITY I-D.ietf-bess-evpn-ip-aliasing SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml3/reference.I-D.ietf-bess-evpn-ip-aliasing.xml">
<!ENTITY I-D.burdet-bess-evpn-fast-reroute SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml3/reference.I-D.burdet-bess-evpn-fast-reroute.xml">
]>
<?rfc toc="yes"?>
<?rfc tocompact="yes"?>
<?rfc tocdepth="3"?>
<?rfc tocindent="yes"?>
<?rfc symrefs="yes"?>
<?rfc sortrefs="yes"?>
<?rfc comments="yes"?>
<?rfc inline="yes"?>
<?rfc compact="yes"?>
<?rfc subcompact="no"?>
<rfc category="std" docName="draft-rabnag-bess-evpn-anycast-aliasing-01"
     ipr="trust200902" submissionType="IETF">
  <!---->

  <?rfc strict="yes"?>

  <?rfc compact="yes"?>

  <?rfc subcompact="no"?>

  <?rfc symrefs="yes"?>

  <?rfc sortrefs="no"?>

  <?rfc text-list-symbols="-o+*"?>

  <?rfc toc="yes"?>

  <front>
    <title abbrev="EVPN Anycast Aliasing">EVPN Anycast Aliasing For
    Multi-Homing</title>

    <author fullname="Jorge Rabadan" initials="J." role="editor"
            surname="Rabadan">
      <organization>Nokia</organization>

      <address>
        <postal>
          <street>520 Almanor Avenue</street>

          <city>Sunnyvale</city>

          <region>CA</region>

          <code>94085</code>

          <country>USA</country>
        </postal>

        <email>jorge.rabadan@nokia.com</email>
      </address>
    </author>

    <author fullname="Kiran Nagaraj" initials="K." surname="Nagaraj">
      <organization>Nokia</organization>

      <address>
        <postal>
          <street>520 Almanor Avenue</street>

          <city>Sunnyvale</city>

          <region>CA</region>

          <code>94085</code>

          <country>USA</country>
        </postal>

        <email>kiran.nagaraj@nokia.com</email>
      </address>
    </author>

    <author fullname="Alex Nichol" initials="A." surname="Nichol">
      <organization>Arista</organization>

      <address>
        <postal>
          <street/>

          <city/>

          <region/>

          <code/>

          <country/>
        </postal>

        <phone/>

        <facsimile/>

        <email>anichol@arista.com</email>

        <uri/>
      </address>
    </author>

    <author fullname="Nick Morris" initials="N." surname="Morris">
      <organization>Verizon</organization>

      <address>
        <postal>
          <street/>

          <city/>

          <region/>

          <code/>

          <country/>
        </postal>

        <phone/>

        <facsimile/>

        <email>nicklous.morris@verizonwireless.com</email>

        <uri/>
      </address>
    </author>

    <date day="7" month="February" year="2024"/>

    <workgroup>BESS Workgroup</workgroup>

    <abstract>
      <t>The current Ethernet Virtual Private Network (EVPN) all-active
      multi-homing procedures in Network Virtualization Over Layer-3 (NVO3)
      networks provide the required Split Horizon filtering, Designated
      Forwarder Election and Aliasing functions that the network needs in
      order to handle the traffic to and from the multi-homed CE in an
      efficient way. In particular, the Aliasing function addresses the load
      balancing of unicast packets from remote Network Virtualization Edge
      (NVE) devices to the NVEs that are multi-homed to the same CE,
      irrespective of the learning of the CE's MAC/IP information on the NVEs.
      This document describes an optional optimization of the EVPN
      multi-homing Aliasing function - EVPN Anycast Aliasing - that is
      specific to the use of EVPN with NVO3 tunnels (i.e., IP tunnels) and, in
      typical Data Center designs, may provide savings in terms of data plane
      and control plane resources in the routers.</t>
    </abstract>
  </front>

  <middle>
    <section anchor="sect-1" title="Introduction">
      <t>Ethernet Virtual Private Network (EVPN) is the de-facto standard
      control plane in Network Virtualization Over Layer-3 (NVO3) networks
      deployed in multi-tenant Data Centers <xref target="RFC8365"/><xref
      target="RFC9469"/>. EVPN provides Network Virtualization Edge (NVE)
      auto-discovery, tenant MAC/IP dissemination and advanced features
      required by Network Virtualization Over Layer-3 (NVO3) networks, such as
      all-active multi-homing. The current EVPN all-active multi-homing
      procedures in NVO3 networks provide the required Split Horizon
      filtering, Designated Forwarder Election and Aliasing functions that the
      network needs in order to handle the traffic to and from the multi-homed
      CE in an efficient way. In particular, the Aliasing function addresses
      the load balancing of unicast packets from remote NVEs to the NVEs that
      are multi-homed to the same CE, irrespective of the learning of the CE's
      MAC/IP information on the NVEs. This document describes an optional
      optimization of the EVPN multi-homing Aliasing function - EVPN Anycast
      Aliasing - that is specific to the use of EVPN with NVO3 tunnels (i.e.,
      IP tunnels) and, in typical Data Center designs, may provide some
      savings in terms of data plane and control plane resources in the
      routers.</t>

      <section anchor="sect-1.1" title="Terminology and Conventions">
        <t>The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT",
        "SHOULD", "SHOULD NOT", "RECOMMENDED", "NOT RECOMMENDED", "MAY", and
        "OPTIONAL" in this document are to be interpreted as described in BCP
        14 <xref target="RFC2119"/> <xref target="RFC8174"/> when, and only
        when, they appear in all capitals, as shown here.</t>

        <t><list style="symbols">
            <t>A-D per EVI route: EVPN route type 1, Auto-Discovery per EVPN
            Instance route. Route used for aliasing or backup signaling in
            EVPN multi-homing procedures <xref target="RFC7432"/>.</t>

            <t>A-D per ES route: EVPN route type 1, Auto-Discovery per
            Ethernet Segment route. Route used for mass withdraw in EVPN
            multi-homing procedures <xref target="RFC7432"/>.</t>

            <t>BUM traffic: Broadcast, Unknown unicast and Multicast
            traffic.</t>

            <t>CE: Customer Edge, e.g., a host, router, or switch.</t>

            <t>Clos: a multistage network topology described in <xref
            target="CLOS1953"/>, where all the edge nodes (or Leaf routers)
            are connected to all the core nodes (or Spines). Typically used in
            Data Centers.</t>

            <t>ECMP: Equal Cost Multi-Path.</t>

            <t>ES: Ethernet Segment. When a Tenant System (TS) is connected to
            one or more NVEs via a set of Ethernet links, then that set of
            links is referred to as an 'Ethernet segment'. Each ES is
            represented by a unique Ethernet Segment Identifier (ESI) in the
            NVO3 network and the ESI is used in EVPN routes that are specific
            to that ES.</t>

            <t>EVI: or EVPN Instance. It is a Layer-2 Virtual Network that
            uses an EVPN control-plane to exchange reachability information
            among the member NVEs. It corresponds to a set of MAC-VRFs of the
            same tenant. See MAC-VRF in this section.</t>

            <t>GENEVE: Generic Network Virtualization Encapsulation, an NVO3
            encapsulation defined in <xref target="RFC8926"/>.</t>

            <t>IP-VRF: an IP Virtual Routing and Forwarding table, as defined
            in <xref target="RFC4364"/>. It stores IP Prefixes that are part
            of the tenant's IP space, and are distributed among NVEs of the
            same tenant by EVPN. Route Distinguisher (RD) and Route Target(s)
            (RTs) are required properties of an IP-VRF. An IP-VRF is
            instantiated in an NVE for a given tenant, if the NVE is attached
            to multiple subnets of the tenant and local
            inter-subnet-forwarding is required across those subnets.</t>

            <t>IRB: Integrated Routing and Bridging interface. It refers to
            the logical interface that connects a Broadcast Domain instance
            (or a BT) to an IP-VRF and allows to forward packets with
            destination in a different subnet.</t>

            <t>MAC-VRF: a MAC Virtual Routing and Forwarding table, as defined
            in <xref target="RFC7432"/>. The instantiation of an EVI (EVPN
            Instance) in an NVE. Route Distinguisher (RD) and Route Target(s)
            (RTs) are required properties of a MAC-VRF and they are normally
            different from the ones defined in the associated IP-VRF (if the
            MAC-VRF has an IRB interface).</t>

            <t>MPLS and non-MPLS NVO3 tunnels: refer to Multi-Protocol Label
            Switching (or the absence of it) Network Virtualization Overlay
            tunnels. Network Virtualization Overlay tunnels use an IP
            encapsulation for overlay frames, where the source IP address
            identifies the ingress NVE and the destination IP address the
            egress NVE.</t>

            <t>NLRI: BGP Network Layer Reachability Information.</t>

            <t>NVE: Network Virtualization Edge device, a network entity that
            sits at the edge of an underlay network and implements Layer-2
            and/or Layer-3 network virtualization functions. The
            network-facing side of the NVE uses the underlying Layer-3 network
            to tunnel tenant frames to and from other NVEs. The tenant-facing
            side of the NVE sends and receives Ethernet frames to and from
            individual Tenant Systems. In this document, an NVE could be
            implemented as a virtual switch within a hypervisor, a switch or a
            router, and runs EVPN in the control-plane. This document uses the
            terms NVE and "Leaf router" interchangeably.</t>

            <t>NVO3 tunnels: Network Virtualization Over Layer-3 tunnels. In
            this document, NVO3 tunnels refer to a way to encapsulate tenant
            frames or packets into IP packets whose IP Source Addresses (SA)
            or Destination Addresses (DA) belong to the underlay IP address
            space, and identify NVEs connected to the same underlay network.
            Examples of NVO3 tunnel encapsulations are VXLAN <xref
            target="RFC7348"/>, GENEVE <xref target="RFC8926"/> or MPLSoUDP
            <xref target="RFC7510"/>.</t>

            <t>SRv6: Segment routing with an IPv6 data plane, <xref
            target="RFC8986"/>.</t>

            <t>TS: Tenant System. A physical or virtual system that can play
            the role of a host or a forwarding element such as a router,
            switch, firewall, etc. It belongs to a single tenant and connects
            to one or more Broadcast Domains of that tenant.</t>

            <t>VNI: Virtual Network Identifier. Irrespective of the NVO3
            encapsulation, the tunnel header always includes a VNI that is
            added at the ingress NVE (based on the mapping table lookup) and
            identifies the BT at the egress NVE. This VNI is called VNI in
            VXLAN or GENEVE, VSID in nvGRE or Label in MPLSoGRE or MPLSoUDP.
            This document will refer to VNI as a generic Virtual Network
            Identifier for any NVO3 encapsulation.</t>

            <t>VTEP: VXLAN Termination End Point. A loopback IP address of the
            destination NVE that is used in the outer destination IP address
            of VXLAN packets directed to that NVE.</t>

            <t>VXLAN: Virtual eXtensible Local Area Network, an NVO3
            encapsulation defined in <xref target="RFC7348"/>.</t>
          </list></t>
      </section>

      <section anchor="sect-1.2" title="Problem Statement">
        <t><xref target="Figure1"/> depicts the typical Clos topology in
        multi-tenant Data Centers, only simplified to show three Leaf routers
        and two Spines, forming a 3-stage Clos topology as . The NVEs or Leaf
        routers run EVPN for NVO3 tunnels, as in <xref target="RFC8365"/>. We
        assume VXLAN is used as the NVO3 tunnel, given that VXLAN is highly
        prevalent in multi-tenant Data Centers. This diagram is used as a
        reference throught this document. In very large scale Data Centers
        though, the number of Tenant Systems, Leaf routers and Spines (in
        multiple layers) may be significant.</t>

        <t><figure anchor="Figure1"
            title="Simplified Clos topology in Data Centers">
            <artwork><![CDATA[          +-------+   +-------+
          |Spine-1|   |Spine-2|
          |       |   |       |
          +-------+   +-------+
           |  |  |     |  |  |
       +---+  |  |     |  |  +---+
       |      |  |     |  |      |
       |  +------------+  |      |
       |  |   |  |        |      |
       |  |   |  +------------+  |
       |  |   |           |   |  |
       |  |   +---+  +----+   |  |
   L1  |  |    L2 |  |     L3 |  |
    +-------+   +-------+   +-------+
    | +---+ |   | +---+ |   | +---+ |
    | |BD1| |   | |BD1| |   | |BD1| |
    | +---+ |   | +---+ |   | +---+ |
    +-------+   +-------+   +-------+
       | |         | |          |
       | +---+ +---+ |          |
       |     | |     |          |
       |    +---+    |        +---+
       |    |TS1|    |        |TS3|
       |    +---+    |        +---+
       |    ES-1     |
       +-----+ +-----+
             | |
            +---+
            |TS2|
            +---+
            ES-2

]]></artwork>
          </figure></t>

        <t>In the example of <xref target="Figure1"/> the Tenant Systems TS1
        and TS2 are multi-homed to Leaf routers L1 and L2, and Ethernet
        Segments Identifiers ESI-1 and ESI-2 are the representation of TS1 and
        TS2 Ethernet Segments in the EVPN control plane for the Split Horizon
        filtering, Designated Forwarder and Aliasing functions <xref
        target="RFC8365"/>.</t>

        <t>Taking Tenant Systems TS1 and TS3 as an example, the EVPN
        all-active multi-homing procedures guarantee that, when TS3 sends
        unicast traffic to TS1, Leaf L3 does per-flow load balancing towards
        Leaf routers L1 and L2. As explained in <xref target="RFC7432"/> and
        <xref target="RFC8365"/> this is possible due to L1 and/or L2 Leaf
        routers advertising TS1's MAC address in an EVPN MAC/IP Advertisement
        route that includes ESI-1 in the Ethernet Segment Identifier field.
        When the route is imported in Leaf L3, TS1's MAC address is programmed
        with a destination associated to ESI-1 next hop list. This ESI-1 next
        hop list is created based on the reception of the EVPN A-D per ES and
        A-D per EVI routes for ESI-1 received from Leaf routers L1 and L2.
        Assuming Ethernet Segment ES-1 links are operationally active, Leaf
        routers L1 and L2 advertise the EVPN A-D per ES/EVI routes for ESI-1
        and Leaf L3 adds L1 and L2 to its next hop list for ESI-1. Unicast
        flows from TS3 to TS1 are therefore load balanced to Leaf routers L1
        and L2, and L3's ESI-1 next hop list is what we refer to as the
        "overlay ECMP-set" for ESI-1 in Leaf L3. In addition, once Leaf L3
        selects one of the next hops in the overlay ECMP-set, e.g. L1, Leaf L3
        does a route lookup of the L1 address in the Base router route table.
        The lookup yields a list of two next hops, Spine-1 and Spine-2, which
        we refer to as the "underlay ECMP-set". Therefore, for a given unicast
        flow to TS1, Leaf L3 does per flow load balancing at two levels: a
        next hop in the overlay ECMP-set is selected first, e.g., L1, and then
        a next hop in the underlay ECMP-set is selected, e.g., Spine-1.</t>

        <t>While aliasing <xref target="RFC7432"/> provides an efficient
        method to load balance unicast traffic to the Leaf routers attached to
        the same all-active Ethernet Segment, there are some challenges in
        very large Data Centers where the number of Ethernet Segments and Leaf
        routers is significant:</t>

        <t><list style="letters">
            <t>Control Plane Scale: In a large Data Center environment, the
            number of multi-homed compute nodes can grow significantly to the
            1000s range, where each compute node requires a unique ES and
            hosts 10s of EVIs per ES. In the aliasing model defined within
            <xref target="RFC7432"/>, there is a requirement to advertise EVPN
            A-D per EVI routes for each active EVI on each ethernet segment.
            The resultant EVPN state that Route Reflectors, Data Center
            Gateways and a Leaf routers need to process becomes significant
            and will only grow as the number of Ethernet Segments, Broadcast
            Domains and Leaf routers are added. Removing the need to advertise
            the EVPN A-D per EVI routes would therefore offer a considerable
            advantage to the overall route scale and processing overhead.</t>

            <t>Convergence and Processing overhead: In accordance with <xref
            target="RFC8365"/> each node of an Ethernet Segment acts as an
            independent VTEP and therefore EVPN next hop. In a typical Data
            Center leaf-spine topology this results in ECMP being performed in
            both the underlay ECMP-set and also the overlay ECMP-set.
            Consequently, convergence at scale during a failure can be slow
            and CPU intensive as all leaf routers are required to process the
            overlay state change caused by the EVPN route(s) being withdrawn
            at the point of failure and update their overlay ECMP-set
            accordingly. Performing the load-balancing with just the underlay
            ECMP-set, offers the potential to dramatically reduce this network
            wide state-churn and processing overhead, while providing faster
            convergence at scale by limiting the scope of the re-convergence
            to just the intermediate Spine nodes.</t>

            <t>Hardware Resource consumption: As described in "b", the use of
            EVPN Aliasing procedures on the Leaf routers, requires the
            creation of both overlay and underlay ECMP-sets which typically
            utilize the same hardware resources. If the number of remote Leaf
            routers and Ethernet Segments grow significantly, the capacity to
            support both overlay and underlay ECMP-set in hardware can become
            a restricting factor.</t>

            <t>Inefficient forwarding during a failure: A further consequence
            of ECMP being performed in the overlay ECMP-set is the potential
            for in-flight packets sent by remote Leaf routers being rerouted
            in an inefficient way. Some examples follow:<list style="symbols">
                <t>Suppose the link L1-to-Spine-1 in <xref target="Figure1"/>
                fails. In-flight VXLAN packets already sent from L3 with
                destination VTEP equal L1 arrive at Spine-1 and are rerouted
                via e.g., L2-&gt;Spine-2-&gt;L1-&gt;TS1, while they could go
                directly via L2-&gt;TS1, since TS1 is also connected to Leaf
                L2. After the underlay routing protocol converges, all VXLAN
                packets with destination VTEP L1 are correctly sent to Spine-2
                and Leaf L3 removes Spine-1 from the underlay ECMP-set for
                Leaf L1.</t>

                <t>In a different example for the same diagram, suppose the
                link TS1-to-L1 fails. In-flight VXLAN packets already sent
                from L3 with destination VTEP equal L1 arrive at Leaf L1, and
                if the inner destination MAC address is TS1, the frame has to
                be encapsulated in a VXLAN packet again and rerouted to VTEP
                equal to L2. Eventually, the MP_UNREACH_NLRI messages for the
                ES-1 A-D routes make it to Leaf L3 and Leaf L3 starts sending
                the VXLAN packets to Leaf L2. The rerouting of in-flight
                packets following the path
                L3-&gt;Spine-1-&gt;L1-&gt;Spine-2-&gt;L2-&gt;TS1 is what we
                know as "Fast-Reroute" and procedures to avoid micro loops are
                described in <xref
                target="I-D.burdet-bess-evpn-fast-reroute"/>.</t>
              </list></t>
          </list></t>

        <t>There are existing proprietary multi-chassis Link Aggregation Group
        implementations, collectively and commonly known as MC-LAG, that
        attempt to work around the above challenges by using the concept of
        "Anycast VTEPs", or the use of a shared loopback IP address that the
        Leaf routers attached to the same multi-homed Tenant System can use to
        terminate VXLAN packets. As an example in <xref target="Figure1"/>, if
        Leaf routers L1 and L2 used an Anycast VTEP address "anycast-IP1" to
        identify VXLAN packets to Tenant System TS1:</t>

        <t><list style="symbols">
            <t>Leaf L3 would not need to create an overlay ECMP-set for
            packets to TS1, since the use of anycast-IP1 in the underlay
            ECMP-set would gurantee the per-flow load balancing to the two
            Leaf routers.</t>

            <t>In the same failure example as above for link L1-to-Spine-1
            failure, Spine-1 would reroute VXLAN packets directly to Leaf L2,
            since L2 also advertises the anycast-IP1 address that is used from
            Leaf L3 to send packets to TS1.</t>

            <t>In the same example as above for a TS1-to-L1 failure, Leaf L1
            could withdraw the anycast-IP1 address and Spine-1 would quickly
            reroute VXLAN packets directly to Leaf L2 without the need for
            Fast-Route.</t>

            <t>In addition, if Leaf routers L1 and L2 used proprietary MC-LAG
            techniques, no EVPN A-D per EVI routes would be needed, hence the
            number of EVPN routes would be significantly decreased in a large
            scale Data Center.</t>
          </list>However, the use of proprietary MC-LAG technologies in EVPN
        NVO3 networks is being abandoned due to the superior functionality of
        EVPN Multi-Homing, including mass withdraw <xref target="RFC7432"/>,
        advanced Designated Forwarding election <xref target="RFC8584"/> or
        weighted load balancing <xref
        target="I-D.ietf-bess-evpn-unequal-lb"/>, to name a few features.</t>
      </section>

      <section anchor="sect-1.3" title="Solution Overview">
        <t>This document specifies an EVPN Anycast Aliasing extension that can
        be used as an alternative to EVPN Aliasing <xref target="RFC7432"/>.
        EVPN Anycast Aliasing replaces the per-flow overlay ECMP
        load-balancing with a simplified per-flow underlay ECMP load
        balancing, in a similar way to how proprietary MC-LAG solutions do it,
        but in a standard way and keeping the superior advantages of EVPN
        Multi-Homing, such as the Designated Forwarder Election, Split Horizon
        filtering or the mass withdraw function, all of them described in
        <xref target="RFC8365"/> and <xref target="RFC7432"/>. The solution
        uses the A-D per ES routes to advertise the Anycast VTEP address to be
        used when sending traffic to the Ethernet Segment and suppresses the
        use of A-D per EVI routes for the Ethernet Segments configured in this
        mode. This solution addresses the challenges outlined in <xref
        target="sect-1.2"/>.</t>

        <t>The solution is valid for all NVO3 tunnels, or even for IP tunnels
        in general. Sometimes the description uses VXLAN as an example, given
        that VXLAN is highly prevalent in multi-tenant Data Centers. However,
        the examples and procedures are valid for any NVO3 tunnel type.</t>
      </section>
    </section>

    <section anchor="sect-2" title="BGP EVPN Extensions">
      <t>This specification makes use of two BGP extensions that are used
      along with the A-D per ES routes <xref target="RFC7432"/>.</t>

      <t>The first extension is the flag "A" or "Anycast Aliasing mode" and it
      is requested to IANA to be allocated in bit 2 of the EVPN ESI
      Multihoming Attributes registry for the 1-octect Flags field in the ESI
      Label Extended Community, as follows:</t>

      <figure anchor="Figure2" title="ESI Label Extended Community and Flags">
        <artwork><![CDATA[   0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
   | Type=0x06     | Sub-Type=0x01 | Flags(1 octet)|  Reserved=0   |
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
   |  Reserved=0   |          ESI Label                            |
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+

Flags field:

        0 1 2 3 4 5 6 7
       +-+-+-+-+-+-+-+-+
       |SHT|A|     |RED|   
       +-+-+-+-+-+-+-+-+

]]></artwork>
      </figure>

      <t>Where the following Flags are defined:</t>

      <texttable style="headers" suppress-title="true" title="Flags Field">
        <ttcol>Name</ttcol>

        <ttcol>Meaning</ttcol>

        <ttcol>Reference</ttcol>

        <c>RED</c>

        <c>Multihomed site redundancy mode</c>

        <c><xref target="I-D.ietf-bess-rfc7432bis"/></c>

        <c>SHT</c>

        <c>Split Horizon type</c>

        <c><xref target="I-D.ietf-bess-evpn-mh-split-horizon"/></c>

        <c>A</c>

        <c>Anycast Aliasing mode</c>

        <c>This document</c>
      </texttable>

      <t>When the NVE advertises an A-D per ES route with the A flag set, it
      indicates the Ethernet Segment is working in Anycast Aliasing mode. The
      A flag is set only if the RED = 00 (All-Active redundancy mode), and
      MUST NOT be set if RED is different from 00.</t>

      <t>The second extension that this document introduces is the encoding of
      the "Anycast VTEP" address in the BGP Tunnel Encapsulation Attribute,
      Tunnel Egress Endpoint Sub-TLV (code point 6) <xref target="RFC9012"/>.
      NOTE from the authors: a new Sub-TLV may also be considered in future
      versions of this document, depending on the feedback of the Working
      Group.</t>
    </section>

    <section anchor="sect-3" title="Anycast Aliasing Solution">
      <t>This document proposes an OPTIONAL "EVPN Anycast Aliasing" procedure
      that provides a solution to optimize the behavior in case the challenges
      described in <xref target="sect-1.2"/> become a problem. The description
      makes use of the terms "Ingress NVE" and "Egress NVE". In this document,
      Egress NVE refers to an NVE that is attached to an Ethernet Segment
      working in Anycast Aliasing mode, whereas Ingress NVE refers to the NVE
      that transmits unicast traffic to a MAC address that is associated to a
      remote Ethernet Segment that works in Anycast Aliasing mode. In
      addition, the concepts of Unicast VTEP and Anycast VTEP are used. A
      Unicast VTEP is a loopback IP address that is unique in the Data Center
      fabric and it is owned by a single NVE terminating VXLAN (or NVO3)
      traffic. An Anycast VTEP is a loopback IP address that is shared among
      the NVEs attached to the same Ethernet Segment and it is used to
      terminate VXLAN (or NVO3) traffic on those NVEs. An Anycast VTEP in this
      document MUST NOT be used as BGP next hop of any EVPN route NLRI. This
      is due to the need for the Multi-Homing procedures to uniquely identify
      the originator of the EVPN routes via their NLRI next hops.</t>

      <t>The solution consists of the following alternative modifications of
      the <xref target="RFC7432"/> EVPN Aliasing function:</t>

      <t><list style="numbers">
          <t>The <xref target="RFC8365"/> Designated Forwarder and Split
          Horizon filtering procedures remain unmodified. Only the Aliasing
          procedure is modified in this Anycast Aliasing mode.</t>

          <t>The forwarding of BUM traffic and related procedures are not
          modified by this document. Only the procedures related to the
          forwarding of unicast traffic to a remote Ethernet Segment are
          modified.</t>

          <t>Any two Egress NVEs attached to the same Ethernet Segment working
          in Anycast Aliasing mode MUST use the same VNI or label to identify
          the Broadcast Domain that makes use of the Ethernet Segment. For
          non-MPLS NVO3 tunnels, using the same VNI is implicit if global VNIs
          are used (<xref target="RFC8365"/> section 5.1.1). If locally
          significant values are used for the VNIs, at least all the Egress
          NVEs sharing Ethernet Segments MUST use the same VNI for the
          Broadcast Domain. For MPLS NVO3 tunnels, the Egress NVEs sharing
          Anycast Aliasing Ethernet Segments MUST use Domain-wide Common Block
          labels <xref target="I-D.ietf-bess-mvpn-evpn-aggregation-label"/> so
          that all can be configured with the same unicast label for the same
          Broadcast Domain. Note that this rule only affects unicast labels or
          the labels advertised with the EVPN MAC/IP Advertisement routes and
          not the Ingress Replication labels for BUM traffic advertised in the
          EVPN Inclusive Multicast Ethernet Tag routes.</t>

          <t>The default behavior for an Egress NVE attached to an Ethernet
          Segment follows <xref target="RFC8365"/>. The Anycast Aliasing mode
          MUST be explicitly configured for a given all-active Ethernet
          Segment. When the Egress NVE Ethernet Segment is configured to
          follow the Anycast Aliasing behavior, the egress NVE:<list
              style="letters">
              <t>Allocates an Anycast VTEP for the Ethernet Segment, that is
              shared by all egress NVEs attached to the Ethernet Segment. The
              egress NVE is assumed to advertise reachability for the Anycast
              VTEP in the underlay routing protocol, via an advertisement of
              an exact match route for the Anycast VTEP (mask /32 for IPv4 and
              /128 for IPv6) or a prefix of shorter length that covers the
              Anycast VTEP IP address.</t>

              <t>Advertises EVPN A-D per ES routes for the Ethernet Segment
              with:<list style="symbols">
                  <t>an "Anycast Aliasing" flag that indicates to the remote
                  NVEs that the EVPN MAC/IP Advertisement routes with matching
                  Ethernet Segment Identifier are resolved by only A-D per ES
                  routes for the Ethernet Segment. In other words, this flag
                  indicates to the ingress NVE that no A-D per EVI routes are
                  advertised for the Ethernet Segment.</t>

                  <t>an Anycast VTEP that identifies the Ethernet Segment and
                  is encoded in a BGP tunnel encapsulation attribute <xref
                  target="RFC9012"/> attached to the route.</t>
                </list></t>

              <t>Does not modify the procedures for the EVPN MAC/IP
              Advertisement routes.</t>

              <t>Suppresses the advertisement of the A-D per EVI routes for
              the Ethernet Segment configured in Anycast Aliasing mode.</t>

              <t>In case of a failure on the Ethernet Segment link, the Egress
              NVE withdraws the A-D per ES route(s), as well as the ES route
              for the Ethernet Segment. In addition, the Egress NVE withdraws
              the Anycast VTEP from the underlay routing protocol to avoid
              attracting traffic for the Ethernet Segment.</t>

              <t>In case of only a subset of Broadcast Domains on the Ethernet
              Segment fails (due to a mis-configuration), the Ingress NVE
              continues sending traffic for the failed Broadcast Domains, only
              to be dropped at the Egress NVE. This is due to the Egress NVE
              only withdrawing the Anycast VTEP underlay route on a complete
              Ethernet Segment link failure.</t>
            </list></t>

          <t>The Ingress NVE that supports this document:<list style="letters">
              <t>Follows the regular <xref target="RFC8365"/> Aliasing
              procedures for the Ethernet Segments of the received in A-D per
              ES routes without the Anycast Aliasing Flag.</t>

              <t>Identifies the imported EVPN A-D per ES routes with the
              Anycast Aliasing flag and process them for Anycast Aliasing.</t>

              <t>Upon receiving and importing (on a Broadcast Domain) an EVPN
              MAC/IP Advertisement route for MAC-1 with a non-zero Ethernet
              Segment Identifier ESI-1, the NVE looks for an A-D per ES route
              with the same Ethernet Segment Identifier ESI-1 imported in the
              same Broadcast Domain. If there is at least one A-D per ES route
              for ESI-1, the NVE checks if the Anycast Aliasing flag is set.
              If not, the ingress NVE follows the procedures in <xref
              target="RFC8365"/>. If the Anycast Aliasing flag is set, the
              ingress NVE programs MAC-1 associated to destination ESI-1. The
              ESI-1 destination is resolved to the Ethernet Segment Anycast
              VTEP that is extracted from the A-D per ES routes, and the VNI,
              e.g, VNI-1, that was received in the MAC/IP Advertisement
              route.</t>

              <t>When the Ingress NVE receives a frame with destination MAC
              address MAC-1 on any of the Attachment Circuits of the Broadcast
              Domain, the destination MAC lookup yields ESI-1 as destination.
              The frame is then encapsulated into a VXLAN (or NVO3) packet
              where the destination VTEP is the Anycast VTEP and the VNI is
              VNI-1. Since all the Egress NVEs attached to the Ethernet
              Segment previously announced reachability to the Anycast VTEP,
              the ingress NVE has an underlay ECMP-set created for the Anycast
              VTEP and per flow load balancing is accomplished.</t>

              <t>The Ingress NVE MUST NOT use an Anycast VTEP as the outer
              source IP address of the VXLAN (or NVO3) tunnel, unless the
              Ingress NVE is also an Egress NVE that re-encapsulates the
              traffic into a tunnel for the purpose of Fast Reroute (<xref
              target="sect-5"/>).</t>

              <t>The reception of one or more MP_UNREACH_NLRI messages for the
              A-D per ES routes for Ethernet Segment Identifier ESI-1 does not
              change the programming of the MAC addresses associated to ESI-1
              as long as there is at least one valid A-D per ES route for
              ESI-1 in the Bridge Domain. The reception of the MP_UNREACH_NLRI
              message for the last A-D per ES route for ESI-1 triggers the
              mass withdraw procedures for all MACs pointing at ESI-1.</t>
            </list></t>

          <t>The procedures on the Ingress NVE for Anycast Aliasing assume
          that all the Egress NVEs attached to the same Ethernet Segment
          advertise the same Anycast Aliasing flag value and Anycast VTEP in
          their A-D per ES routes for the Ethernet Segment. Inconsistency in
          any of those two received values makes the Ingress NVE fall back to
          the <xref target="RFC8365"/> behavior, which means that the MAC
          address will be programmed with the Unicast VTEP derived from the
          MAC/IP Advertisement route next hop.</t>
        </list>Non-upgraded NVEs ignore the Anycast Aliasing flag value and
      the BGP tunnel encapsulation attribute.</t>

      <section anchor="sect-3.1" title="Anycast Aliasing Example">
        <t>Consider the example of <xref target="Figure4"/> where three Leaf
        routers run EVPN over VXLAN tunnels. Suppose Leaf routers L1, L2 and
        L3 support Anycast Aliasing as per <xref target="sect-3"/> and
        Ethernet Segment ES-1 is configured as an Anycast Aliasing Ethernet
        Segment, all-active mode, with Anycast VTEP IP12. The three Leaf
        routers use VNI-1 to identify the Broadcast Domain BD1. Leaf routers
        L1 and L2 both advertise an A-D per ES route for ESI-1 with the
        Anycast Aliasing flag set and Anycast VTEP IP12. Suppose only Leaf L1
        learns TS1 MAC address, hence only L1 advertises a MAC/IP
        Advertisement route for TS1 MAC with ESI-1.</t>

        <t><figure anchor="Figure4" title="Anycast Aliasing Example">
            <artwork><![CDATA[          +-------+   +-------+         
          |Spine-1|   |Spine-2|         
          |       |   |       |        
          +-------+   +-------+       
           |  |  |     |  |  |           
       +---+  |  |     |  |  +---+        
       |      |  |     |  |      |       
       |  +------------+  |      |       
       |  |   |  |        |      |        
       |  |   |  +------------+  |           
       |  |   |           |   |  |         
       |  |   +---+  +----+   |  |         
   L1  |  |    L2 |  |     L3 |  |       
    +-------+   +-------+   +-------+      
    | +---+ |   | +---+ |   | +---+ |      
    | |BD1| |   | |BD1| |   | |BD1| |     
    | +---+ |   | +---+ |   | +---+ |        
    +-------+   +-------+   +-------+      
         | Anycast |            |         
         |  IP12   |            |        
         +---+ +---+            |        
             | |                |         
            +---+             +---+        
            |TS1|             |TS3|      
            +---+             +---+      
            ES-1                        

]]></artwork>
          </figure>In this example:<list style="symbols">
            <t>Leaf L3 has Anycast VTEP IP12 programmed in its route table
            against an underlay ECMP-set composed of Spine-1 and Spine-2.
            Tenant System TS1 MAC address is programmed with a destination
            ESI-1, which is resolved to Anycast VTEP IP12.</t>

            <t>When Tenant System TS3 sends unicast traffic to Tenant System
            TS1, Leaf L3 encapsulates the frames into VXLAN packets with
            destination VTEP being the Anycast VTEP IP12. Leaf L3 can perform
            per-flow load balancing just by using the ECMP resources in the
            underlay, and without the need to create an overlay ECMP-set. All
            the A-D per EVI routes for ES-1 are also suppressed.</t>

            <t>Spine-1 and Spine-2 also create underlay ECMP-sets for Anycast
            VTEP IP12 with next hops L1 and L2. Therefore, in case of:<list
                style="symbols">
                <t>A failure on the link L1-to-Spine-1, Spine-1 immediately
                removes L1 from the ECMP-set for IP12 and packets are rerouted
                faster than in the case regular Aliasing is used.</t>

                <t>A failure on the Ethernet Segment link TS1-to-L1, Leaf L1
                immediately withdraws its reachability to the Anycast VTEP
                IP12 from the underlay routing protocol, and Spine-1 and
                Spine-2 can remove L1 from their ECMP-sets to Anycast VTEP
                IP12. This results in much faster convergence compared to
                having to wait for the ingress Leaf L3 to remove Leaf L1 from
                the overlay ECMP-set for ESI-1 (which would be the required
                event in case of regular EVPN Aliasing).</t>
              </list></t>
          </list></t>
      </section>

      <section anchor="sect-3.2" title="Underlay Scale Impact">
        <t>While the solution described in <xref target="sect-3"/> suppresses
        the advertisement of an A-D per EVI route per Ethernet Segment per
        Broadcast Domain, it also requires the underlay routing protocol to
        advertise an additional Anycast VTEP IP address per Ethernet Segment.
        In very large scale Data Centers, the injection of as many /32 or /128
        prefixes as Ethernet Segments may have a significant impact in the
        Forwarding Information Base tables of the Leaf and Spine routers.
        Therefore the use of Anycast Aliasing becomes a trade-off between the
        number of A-D per EVI routes in regular EVPN Aliasing and the number
        of additional Anycast VTEP loopback addresses injected in the underlay
        routing protocol in the case of Anycast Aliasing. As an example,
        suppose two Leaf routers L1 and L2 are attached to the same 128
        Ethernet Segments and each Ethernet Segment has four Attachment
        Circuits (in four different Broadcast Domains). In this case:<list
            style="symbols">
            <t>If all the Ethernet Segments work in Anycast Aliasing mode, no
            A-D per EVI routes are advertised by Leaf routers L1 and L2. 128
            additional loopback addresses are advertised from L1/L2 into the
            underlay routing protocol.</t>

            <t>If all the Ethernet Segments work in regular Aliasing mode, 512
            A-D per EVI routes are advertised by each Leaf, L1 and L2, 1024 in
            total. However no additional loopback addresses are advertised
            into the underlay routing protocol.</t>
          </list></t>

        <t><xref target="sect-4"/> discusses solutions to minimize the impact
        of Anycast Aliasing into the underlay Forwarding tables. We refer to
        those solutions as Multi Ethernet Segment Anycast (MESA) Aliasing.</t>
      </section>
    </section>

    <section anchor="sect-4"
             title="Multi Ethernet Segment Anycast Aliasing Solution">
      <t>The procedures described in this section minimize the impact of
      Anycast Aliasing into the underlay, while preserving the benefits of the
      solution. The additional extensions build upon the procedure described
      in <xref target="sect-3"/>, with some modifications as follows:</t>

      <t><list style="numbers">
          <t>On the Egress NVEs:<list style="letters">
              <t>Instead of allocating an Anycast VTEP address per Ethernet
              Segment as in <xref target="sect-3"/>, a single Anycast VTEP
              address is allocated for all the Anycast Aliasing Ethernet
              Segments shared among the same group of Egress NVEs. That is the
              only additional address for which reachability needs to be
              announced in the underlay routing protocol.</t>

              <t>If "m" Egress NVEs are attached to the same "n Ethernet
              Segments, all the "m" Egress NVEs advertise the same Anycast
              VTEP address in the A-D per ES routes for the "n" Ethernet
              Segments.</t>

              <t>Upon a link failure on one of the Ethernet Segments, the
              Egress NVE cannot withdraw the Anycast VTEP address from the
              underlay routing protocol, as long as there is at least one
              Ethernet Segment left that makes use of the Anycast VTEP. Only
              in case of a failure on the entire Egress NVE (or all the
              Ethernet Segments sharing the Anycast VTEP) will the Anycast
              VTEP be withdrawn from the Egress NVE.</t>

              <t>Unicast traffic for a failed local Ethernet Segment may still
              be attracted by the Egress NVE, given that the Anycast VTEP
              address is still advertised in the underlay routing protocol. In
              this case, the Egress NVE SHOULD support the procedures in <xref
              target="sect-5"/> so that unicast traffic can be rerouted to
              another Egress NVE attached to the Ethernet Segment.</t>
            </list></t>

          <t>On the Ingress NVEs:<list style="letters">
              <t>An "anycast-aliasing-threshold" and a "collect-timer" are
              configured. The "anycast-aliasing-threshold" represents the
              number of active Egress NVEs per Ethernet Segment under which
              the ingress PE no longer uses the Anycast VTEP address to
              resolve the Ethernet Segment destination (and uses the Unicast
              VTEP instead, derived from the MAC/IP Advertisement route next
              hop). The "collect-timer" is triggered upon the creation of the
              Ethernet Segment destination, and it is needed to settle on the
              number of Egress NVEs for the Ethernet Segment against which the
              "anycast-aliasing-threshold" is compared.</t>

              <t>Upon expiration of the "collect-timer", the Ingress NVE
              computes the number of Egress NVEs for the Ethernet Segment
              based on the next hop count of the received A-D per ES routes.
              If the number of Egress NVEs for the Ethernet Segment is greater
              than or equal to the "anycast-aliasing-threshold" integer, the
              Ethernet Destination is resolved to the Anycast VTEP address. If
              lower than the threshold, the Ethernet Destination is resolved
              to the unicast VTEP address.</t>
            </list></t>
        </list>In most of the use cases in multi-tenant Data Centers, there
      are two Leaf routers per rack that share all the Ethernet Segments of
      Tenant Systems in the rack. In this case, a single Anycast VTEP address
      per rack is injected in the underlay routing protocol, making the
      solution highly scalable. In addition, in this common use case the
      "anycast-aliasing-threshold" is set to 2. In case of link failure on the
      Ethernet Segment, this limits the amount of "fast-rerouted" traffic to
      only the in-flight packets.</t>

      <section anchor="sect-4.1"
               title="Multi Ethernet Segment Anycast Aliasing Example">
        <t>Consider the example of <xref target="Figure1"/>. Suppose Leaf
        routers L1, L2 and L3 support Multi Ethernet Segment Anycast Aliasing
        as per <xref target="sect-4"/>. Leaf routers L1 and L2 both advertise
        an A-D per ES route for ESI-1, and an A-D per ES route for ESI-2. Both
        routes will carry the Anycast Aliasing flag set and the same Anycast
        VTEP IP12. Following the described procedure, Leaf L3 is configured
        with anycast-aliasing-threshold = 2 and collect-timer = t. Upon
        receiving MAC/IP Advertisement routes for the two Ethernet Segments
        and the expiration of "t" seconds, Leaf L3 determines that the number
        of NVEs for ESI-1 and ESI-2 is equal to the threshold. Therefore, when
        sending unicast packets to Tenant Systems TS1 or TS2, L3 uses the
        Anycast VTEP address as outer IP address.</t>

        <t>Suppose now that the link TS1-L1 fails. Leaf L1 then sends an
        MP_UNREACH_NLRI for the A-D per ES route for ESI-1. Upon reception of
        the message, Leaf L3 changes the resolution of the ESI-1 destination
        from the Anycast VTEP to the Unicast VTEP derived from the MAC/IP
        Advertisement route next hop. Packets sent to Tenant System TS2 (on
        ES-2) still use the Anycast VTEP. In-flight packets sent to TS1 but
        still arriving at Leaf L1 are "fast-rerouted" to Leaf L2 as per <xref
        target="sect-5"/>.</t>
      </section>

      <section anchor="sect-4.2"
               title="Multi Ethernet Segment Anycast Aliasing Alternative Option">
        <t>The proposal in <xref target="sect-4"/> uses a shared VTEP for all
        the Ethernet Segments in a common Egress NVE group. In case the number
        of Egress NVEs sharing the group of Ethernet Segments is limited to
        two, an alternative proposal is to still use a different Anycast VTEP
        per Ethernet Segment, however allocate all those Anycast VTEP
        addresses from the same subnet. A single IP Prefix for such subnet is
        announced in the underlay routing protocol by the Egress NVEs. The
        benefit of this proposal is that, in case of link failure in one
        individual Ethernet Segment, e.g., link TS1-L1 in <xref
        target="Figure1"/>, Leaf L2 detects the failure (based on the withdraw
        of the A-D per ES and ES routes) and can immediately announce the
        specific Anycast VTEP address (/32 or /128) into the underlay. Based
        on a Longest Prefix Match when routing NVO3 packets, Spines can
        immediately reroute packets (with destination the Anycast VTEP for
        ESI-1) to Leaf L2. This may reduce the amount of fast-rerouted VXLAN
        packets and spares the Ingress NVE from having to change the
        resolution of the Ethernet Segment destination from the Anycast VTEP
        to the Unicast VTEP.</t>
      </section>
    </section>

    <section anchor="sect-5"
             title="EVPN Fast Reroute Extensions For Anycast Aliasing">
      <t>The procedures in <xref target="sect-3"/> and <xref target="sect-4"/>
      may lead to some temporary situations in which traffic destined to an
      Anycast VTEP for an Ethernet Segment arrives at an Egress NVE where the
      Ethernet Segment link is in a failed state. In that case, the Egress NVE
      SHOULD re-encapsulate the traffic into a NVO3 tunnel following the
      procedures described in <xref
      target="I-D.burdet-bess-evpn-fast-reroute"/>, section 7.1, with the
      following modifications:<list style="numbers">
          <t>The Egress NVEs in this document do not advertise A-D per EVI
          routes, therefore there is no signaling of specific redirect labels
          or VNIs. The Egress NVE uses the global VNI or Domain-wide Common
          Block label of the Ethernet Segment NVEs when re-encapsulates the
          traffic into an NVO3 tunnel (<xref target="sect-3"/>, point 3).</t>

          <t>In addition, when rerouting traffic, the Egress NVE uses the
          Anycast VTEP of the Ethernet Segment as outer source IP address of
          the NVO3 tunnel. Note this is the only case in this document where
          the use of the Anycast VTEP as source IP address is allowed. When an
          Egress NVE receives NVO3-encapsulated packets where the source VTEP
          matches a local Anycast VTEP, there are two implicit behaviors on
          the Egress NVE:<list style="letters">
              <t>The packets pass the Local Bias Split Horizon filtering
              (which is based on the Unicast VTEP of the Ethernet Segment
              peers, and not the Anycast VTEP).</t>

              <t>Receiving NVO3-encapsulated packets with a local Anycast VTEP
              is an indication for the NVE that those packets have been
              "fast-rerouted", hence they MUST not be forwarded to another
              tunnel.</t>
            </list></t>
        </list></t>
    </section>

    <section anchor="sect-6"
             title="Applicability of Anycast Aliasing to IP Aliasing">
      <t>The procedures described in this document are applicable also to IP
      Aliasing use cases in <xref target="I-D.ietf-bess-evpn-ip-aliasing"/>.
      Details will be added in future versions of this document.</t>
    </section>

    <section anchor="sect-7"
             title="Applicability of Anycast Aliasing to SRv6 tunnels">
      <t>To be added.</t>
    </section>

    <section anchor="sect-8" title="Operational Considerations">
      <t>"Underlay convergence", or network convergence processed by the
      underlay routing protocol in case of a failure, is normally considered
      to be faster than "overlay convergence" (or network convergence
      processed by EVPN in case of failures). The use of Anycast Aliasing is
      extremely valuable in cases where the operator wants to optimize the
      convergence, since a failure on an Ethernet Segment Egress NVE simply
      means that the underlay routing protocol reroutes the traffic to another
      Egress NVE that uses the same Anycast VTEP. This underlay rerouting to a
      different owner of the Anycast VTEP is extremely fast and efficient,
      especially when used in Data Center designs that make use of BGP in the
      underlay and the Autonomous System allocation recommended in <xref
      target="RFC7938"/> for loop protection. To illustrate this statement,
      suppose a link failure on the link L1-Spine-1 <xref target="Figure1"/>,
      while Spine-1 and Spine-2 are assigned the same Autonomous System Number
      for their underlay BGP peering sessions, and no "Allowas-in" is
      configured <xref target="RFC7938"/>. If packets with destination Anycast
      VTEP IP12 are received on Spine-1, and the link L1-Spine-1 fails, the
      packets are immediately rerouted to L2. In the same example, if unicast
      VTEPs are used (as in regular all-active Ethernet Segments) and
      in-flight packets with destination unicast VTEP L1 get to Spine-1,
      packets would be dropped if link L1-Spine-1 is not available. This
      translates into a much faster convergence in the case of Anycast
      Aliasing.</t>

      <t>Another benefit of Anycast Aliasing is the reduction of EVPN control
      plane pressure (due to the suppression of the A-D per EVI routes).</t>

      <t>However, an operator must take into account the following operational
      considerations before deploying this solution:</t>

      <t><list style="numbers">
          <t>Troubleshooting Anycast Aliasing Ethernet Segments is different
          from troubleshooting regular all-active Ethernet Segments. Operators
          use an A-D per EVI route withdrawal as an indication that the
          Ethernet Segment has failed in a particular Broadcast Domain
          associated with that A-D per EVI route. The suppression of the A-D
          per EVI routes for the Anycast Aliasing Ethernet Segment means that
          logical failures on a subset of Broadcast Domains of the Ethernet
          Segment (while other Broadcast Domains are still operational) are
          more challenging to detect.</t>

          <t>Anycast Aliasing Ethernet Segments MUST NOT be used in in the
          following cases:<list style="letters">
              <t>If the Ethernet Segment multi-homing redundancy mode is
              different from All-Active mode.</t>

              <t>If the Ethernet Segment is used on EVPN VPWS Attachment
              Circuits <xref target="RFC8214"/>.</t>

              <t>If the Attachment Circuit Influenced Designated Forwarded
              capability is needed in the Ethernet Segment <xref
              target="RFC8584"/>.</t>

              <t>If advanced multi-homing features that make use of the
              signaling in EVPN A-D per EVI routes are needed. An example
              would be per EVI mass withdraw.</t>

              <t>If unequal load balancing is needed <xref
              target="I-D.ietf-bess-evpn-unequal-lb"/>.</t>

              <t>If the tunnels used by EVPN in the Broadcast Domains that use
              the Ethernet Segment are not IP tunnels, i.e., not NVO3
              tunnels.</t>

              <t>If the NVEs attached to the Ethernet Segment do not use the
              same VNI or label to identify the same Broadcast Domain.</t>
            </list></t>

          <t>The use of Multi Ethernet Segment Anycast Aliasing on Ethernet
          Segments (<xref target="sect-4"/>) attached to more than two Egress
          NVEs has to be carefully analyzed. Using this procedure when more
          than two Egress NVEs are multi-homed to the same set of CEs may mean
          that packets are permanently fast rerouted in case of a failure. To
          illustrate this, suppose three Egress NVEs attached to ES-1: L1, L2
          and L3. Suppose that the ingress NVE is configured with
          "anycast-aliasing-threshold"=2. In this case, a failure on ES-1 on
          L1 does not prevent the network from sending packets to L1 with
          destination the Anycast VTEP. Upon receiving those packets, L1
          re-encapsulates the packets and sends them to e.g., L2. This
          rerouting persists as long as ES-1 on L1 is in failed state. In
          these cases, the operator may consider direct inter node links on
          the egress NVEs to optimize the fast rerouting forwarding. That is,
          in the previous example, packets are more efficiently rerouted if
          L1, L2 and L3 are directly connected. It is important to understand
          that this inefficient rerouting (in case of a failing state) does
          not occur in case an Anycast VTEP per Ethernet Segment is allocated
          (<xref target="sect-3"/>), or in case there are only two Egress NVEs
          attached to the Ethernet Segment and the procedures of <xref
          target="sect-4"/> are applied.</t>
        </list></t>
    </section>

    <section anchor="sect-9" title="Security Considerations">
      <t>To be added.</t>
    </section>

    <section anchor="sect-10" title="IANA Considerations">
      <t>IANA is requested to allocate the flag "A" or "Anycast Aliasing mode"
      in bit 2 of the EVPN ESI Multihoming Attributes registry for the
      1-octect Flags field in the ESI Label Extended Community.</t>
    </section>

    <section title="Contributors">
      <t/>
    </section>

    <section title="Acknowledgments">
      <t>The authors would like to thank Jeff Tantsura for his comments.</t>
    </section>
  </middle>

  <back>
    <references title="Normative References">
      &RFC2119;

      &RFC8174;

      &RFC7432;

      &RFC8365;

      &I-D.ietf-bess-rfc7432bis;

      &I-D.ietf-bess-mvpn-evpn-aggregation-label;

      &RFC8584;

      &RFC9012;
    </references>

    <references title="Informative References">
      &RFC7348;

      &RFC8926;

      &RFC4364;

      &RFC7510;

      &RFC8986;

      &RFC8214;

      &RFC7938;

      &RFC9469;

      &I-D.ietf-bess-evpn-ip-aliasing;

      &I-D.ietf-bess-evpn-unequal-lb;

      &I-D.burdet-bess-evpn-fast-reroute;

      &I-D.ietf-bess-evpn-mh-split-horizon;

      <reference anchor="CLOS1953">
        <front>
          <title>A Study of Non-Blocking Switching Networks</title>

          <author fullname="C. Clos" initials="C." surname="Clos">
            <organization>The Bell System Technical Journal, Vol. 32(2), DOI
            10.1002/j.1538- 7305.1953.tb01433.x</organization>
          </author>

          <date month="March" year="1953"/>
        </front>
      </reference>
    </references>
  </back>
</rfc>
