#!/usr/bin/python

#-------------------------------------------------------------------------------
#
# Copyright 2014 Cumulus Networks, inc  all rights reserved
#
#-------------------------------------------------------------------------------

#
#   Import the necessary modules
#
try:
    import argparse
    import clag.clagnetlink
    import clag.clagthread
    import clag.cmdsrv
    import clag.fdbsync
    import clag.healthcheck
    import clag.iff
    import clag.lacpsync
    import clag.mdbsync
    import clag.vlansync
    import clag.vxlansync
    import copy
    import daemon
    import fcntl
    import glob
    import json
    import lockfile
    import os
    import Queue
    import re
    import select
    import signal
    import socket
    import struct
    import subprocess
    import sys
    import syslog
    import tempfile
    import threading
    import time
    import traceback
    import clag.util as util
    import clag.reloadconfig as reloadconfig
    import xml.etree.ElementTree as ET
    import cumulus.sdnotify
except ImportError, e:
    raise ImportError (str(e) + "- required module not found")

#
#   Define constants
#
_InterfacePath          = "/sys/class/net/"
_VLANPath               = "/proc/net/vlan/"
_BondPath               = "/proc/net/bonding/"
_PortTabFile            = "/var/lib/cumulus/porttab"

#
#   Global Variables
#
# Except for the initialization routine these are read-only.
Log                     = None
Parser                  = None
Daemon                  = None
Intf                    = None
Cmd                     = None

LacpSync                = None
VxLanSync               = None
HealthCheck             = None
ClagNetLink             = None
DataSyncs               = []

#
#   Events for waking up threads
#
stopEvent               = clag.clagthread.Event()

#
#   Misc - These are miscellaneous global variables. It is possible that these
#   should be protected by a threading lock, but they are not.
#
serverSock              = None
ourPeerIp               = None
clientSock              = None
sendGoodbye             = False
keep_going              = True
peerSendQueue           = Queue.Queue()
watchintv = 0


#-------------------------------------------------------------------------------
#
#   Threads - The clagd daemon is implemented with six threads, each handling
#   a specfic task. Communication between the threads is handled using events
#   and the global data structures, above. The threads are:
#
#   CollectSysInfo - Periodically runs, getting current infromation on this  
#   system, like LACP partner MACs and dynamically learned MACs.  Signals the
#   PeerSend thread after reading data by setting the peerSendEvent.  When   
#   the LACP information changes the FindDualConnectedIfs thread is signaled 
#   by setting the newLacpDataEvent.                                         
#
#   PeerSend - Waits for the CollectSysInfo thread to retrieve data, opens a    
#   socket to the peer, sends our information to the peer, and closes the       
#   socket.                                                                     
#
#   PeerRecv - Periodically attempts to create a socket with the peer to allow  
#   the peer to connect with us. Once successful, waits for the peer to open
#   a connection. Reads data from that connection, decodes the data, and closes
#   that connection. Then signals the PeerTimeout thread to let it know that
#   the peer is still alive, and if the data changed from the last time we
#   received it from the peer, signals the FindDualConnectedIfs thread or 
#   UpdateMacsFromPeer thread.
#
#   PeerTimeout - Waits for a peerRecvEvent from the PeerRecv thread or times
#   out waiting for that event. If the event is received, but we had previously
#   timed out (peer has just become alive) then change the sysMac of all bonds
#   to the common value. If we timed out, and had previously not timed out,
#   undo everything and act like a standalone switch.
#
#   FindDualConnectedIfs - Waits for a newLacpDataEvent from either the PeerRecv 
#   or CollectSysInfo threads. Compares the partner MAC addresses we collected
#   from our bonds to those received from our peer. Any new matches place the
#   bond in the dual-connected state. Any former matches that are no longer
#   matching, take the bond out of the dual connected state.
#
#   UpdateMacsFromPeer - Waits for a newMacDataEvent from the PeerRecv thread.
#   Compare the MACs we've learned to the MACs the peer has learned and add or
#   remove MACs from our bonds to keep in sync with the peer's dynamically
#   learned MAC addresses.
#
#-------------------------------------------------------------------------------

def SendInitialSyncDone():
    if Intf.isPeerAlive() and not Parser.syncDoneToPeer:
        # Once initial data sync is done we send a sync_done notification to
        # the peer. This is used by the peer as a handshake-done indication.
        Parser.syncDoneToPeer = True
        clagDataAttr = {'version' : Parser._ClagDataVersion}
        root = ET.Element("clag_data", clagDataAttr)
        ET.SubElement(root, "sync_done")
        xmlStr = ET.tostring(root)
        peerSendQueue.put(xmlStr)
        util.queueSetMaxLen('peerSendQueue')
        Log.log("Initial data sync to peer done.")

def CollectSysInfo():
    '''
    This function operates as an independent thread of the script. It retrieves
    information about the system which is used by CLAG and possibly sent to
    the peer switch. There are two types of information collected:
        1. The 802.3ad partner MAC addresses of each bond -> ourLacpDB
        2. The dynamically learned MAC addresses of dual connected bonds -> ourMacDB
    '''
    Log.log_debug(Log._LOG_DEBUG_COLLECT_SYS, "Beginning execution of the thread CollectSysInfo")
    while keep_going:
        # Collect all of the local switch's information
        for sync in DataSyncs:
            sync.CollectLocalInfo()

        # Send this information to our peer
        xmlStr = MakeXmlMessage()
        peerSendQueue.put(xmlStr)
        util.queueSetMaxLen('peerSendQueue')

        SendInitialSyncDone()

        # Indicate that we are alive
        Daemon.TouchKeepAlive()

        Log.log_debug(Log._LOG_DEBUG_COLLECT_SYS, "Waiting %d seconds before continuing execution of the thread CollectSysInfo" % (Parser.getCurrLacpPoll(),))
        stopEvent.wait(Parser.getCurrLacpPoll())
        Log.log_debug(Log._LOG_DEBUG_COLLECT_SYS, "Executing the thread CollectSysInfo")

    Log.log_debug(Log._LOG_DEBUG_COLLECT_SYS, "Finished execution of the thread CollectSysInfo")


def PeerSend():
    '''
    This function is an independent thread of the script. It opens a socket to
    the peer and sends the LACP data retrieved by the CollectSysInfo thread 
    to the peer, formatted in XML. The socket is then closed and the thread goes
    to sleep waiting for more data from the CollectSysInfo thread.
    '''
    global clientSock
    Log.log_debug(Log._LOG_DEBUG_INFORM_PEER, "Beginning execution of the thread PeerSend")

    clientSock = None
    while keep_going and not sendGoodbye:

        # Get the next message to send to the peer
        Log.log_debug(Log._LOG_DEBUG_INFORM_PEER, "Waiting for an event before continuing execution of the thread PeerSend")
        queueEmpty = False
        try:
            sendStr = peerSendQueue.get(True, Parser.getPeriodicRun())
        except Queue.Empty:
            queueEmpty = True
        Log.log_debug(Log._LOG_DEBUG_INFORM_PEER, "Executing the thread PeerSend.")

        # Create a socket to the peer switch if necessary
        if not queueEmpty and keep_going and clientSock is None:
            for (family, socktype, proto, canonname, sockaddr) in \
                socket.getaddrinfo(Parser.args.peerIp, Parser.args.peerPort, socket.AF_UNSPEC,
                                   socket.SOCK_STREAM, 0, socket.AI_PASSIVE):

                try:
                    clientSock = socket.socket(family, socktype, proto)
                except socket.error:
                    clientSock = None
                    Log.log_debug(Log._LOG_DEBUG_INFORM_PEER, "Unable to create socket for %s:%d" % (Parser.args.peerIp, Parser.args.peerPort))
                    continue
                # Attempt to connect to the peer
                try:
                    clientSock.setsockopt(socket.SOL_SOCKET, 25, Parser.args.peerIf)
                    clientSock.settimeout(Parser.args.peerConnect)
                    clientSock.connect(sockaddr)
                    clientSock.settimeout(Parser.args.sendTimeout)
                    clientSock.setsockopt(socket.SOL_SOCKET, socket.SO_SNDBUF, Parser.args.sendBufSize)
                except:
                    if Parser:
                        Parser.CloseClientSock()
                    Log.log_debug(Log._LOG_DEBUG_INFORM_PEER, "Unable to connect socket to %s:%d" % (Parser.args.peerIp, Parser.args.peerPort))
                    continue

                Log.log_debug(Log._LOG_DEBUG_INFORM_PEER, "Opened a socket to the peer switch")
                break

        while not queueEmpty and clientSock is not None and sendStr is not None:
            # Format the data in XML
            Log.log_debug(Log._LOG_DEBUG_INFORM_PEER, "Sending data to peer (%d): %s" % (len(sendStr), sendStr[:100]))

            # Send the data to the peer
            sendStr += chr(0)
            while sendStr:
                try:
                    numSent = clientSock.send(sendStr)
                except:
                    Log.log_debug(Log._LOG_DEBUG_INFORM_PEER, "There was an error sending data to the peer switch.")
                    numSent = 0
                if numSent == 0:
                    numSent = len(sendStr)
                    if Parser:
                        Parser.CloseClientSock()
                    Log.log_debug(Log._LOG_DEBUG_INFORM_PEER, "Error sending data to peer, connection broken.")
                sendStr = sendStr[numSent:]

            try:    
                sendStr = peerSendQueue.get(True, 0.001)
            except Queue.Empty:
                queueEmpty = True

    Log.log_debug(Log._LOG_DEBUG_INFORM_PEER, "Finished execution of the thread PeerSend")


def PeerRecv():
    '''
    This function is an independent thread of the script. It creates a socket,
    serverSock, and accepts connections on that socket. It receives the information
    send through those connections and signals the PeerTimeout thread to let it
    know that the peer is alive. If the information we receive has changed, it 
    signals the LacpDualConnectedIfs thread.
    '''
    global serverSock
    global ourPeerIp

    Log.log_debug(Log._LOG_DEBUG_RECV_PEER, "Beginning execution of the thread PeerRecv")

    # Initially, all bonds have unique system IDs
    Intf.SetAllClagBondsSysMac("00:00:00:00:00:00", [Parser.args.peerIf])

    myPeerXmlStr = {}
    while keep_going:
        # If we don't have a socket accepting connections from the peer, create one.
        if serverSock is None:
            ourPeerIp = Intf.GetIpAddressInSameNet(Parser.args.peerIf, Parser.args.peerIp)
            Log.log_debug(Log._LOG_DEBUG_RECV_PEER, "The IP address of the %s interface is %s." % (Parser.args.peerIf, ourPeerIp))

            if ourPeerIp:
                for (family, socktype, proto, canonname, sockaddr) in \
                    socket.getaddrinfo(ourPeerIp, Parser.args.peerPort, socket.AF_UNSPEC,
                                       socket.SOCK_STREAM, 0, socket.AI_PASSIVE):

                    try:
                        serverSock = socket.socket(family, socktype, proto)
                    except socket.error as msg:
                        serverSock = None
                        Log.log_debug(Log._LOG_DEBUG_RECV_PEER, "Unable to create socket for %s:%d" % (ourPeerIp, Parser.args.peerPort))
                        continue

                    try:
                        serverSock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
                        serverSock.setsockopt(socket.SOL_SOCKET, 25, Parser.args.peerIf)
                        serverSock.bind(sockaddr)
                        serverSock.listen(5)
                    except socket.error as msg:
                        serverSock.close()
                        serverSock = None
                        Log.log_debug(Log._LOG_DEBUG_RECV_PEER, "Unable to bind socket to %s:%d (%s) %s" % (ourPeerIp, Parser.args.peerPort, sockaddr, msg))
                        continue

                    Log.log_debug(Log._LOG_DEBUG_RECV_PEER, "Server socket to %s:%d was created" % (ourPeerIp, Parser.args.peerPort))
                    break

        if serverSock is not None:
            inputs  = [ serverSock ]
            while inputs and keep_going and serverSock:

                # Wait until there is a socket to read or an error
                Log.log_debug(Log._LOG_DEBUG_RECV_PEER, "Waiting for something to happen")

                # If the user supplied a timeout, use it
                currTimeout = Parser.getPeerTimeout()
                if not currTimeout:
                    currTimeout = Parser.getCurrLacpPoll() * 6

                try:
                    (readable, writable, exceptional) = select.select(inputs, [], inputs, currTimeout)
                except (select.error, socket.error) as e:
                    Log.log_debug(Log._LOG_DEBUG_RECV_PEER, "An error occurred: %s" % (e,))
                    (readable, writable, exceptional) = ([],[],[])
                else:
                    if keep_going and (readable, writable, exceptional) != ([],[],[]):
                        if not Intf.isPeerLinkDown:
                            # Did the peer just become alive?
                            PeerAliveProc()
                    else:
                        # Has the peer just gone silent?
                        if Intf.isPeerAlive():
                            PeerIsNotActiveGraceful()

                # An error occured on a socket, gracefully shut it down
                for errSock in exceptional:
                    Log.log_debug(Log._LOG_DEBUG_RECV_PEER, "Closing a socket because of an error")
                    inputs.remove(errSock)
                    errSock.close()
                    if errSock in readable:
                        readable.remove(errSock)
                    if errSock in myPeerXmlStr:
                        del myPeerXmlStr[errSock]
                        if Intf.isPeerAlive():
                            PeerIsNotActiveGraceful()
                    if errSock is serverSock:
                        Log.log_debug(Log._LOG_DEBUG_RECV_PEER, "   The socket was the server socket")
                        serverSock = None

                if keep_going:
                    for readSock in readable:
                        if readSock is serverSock:
                            # Accept a new socket connection
                            Log.log_debug(Log._LOG_DEBUG_RECV_PEER, "Accepting a new peer connection")
                            (newSock, clientAddr) = readSock.accept()
                            # Add the new socket to our list, and clear its receive data buffer
                            inputs.append(newSock)
                            myPeerXmlStr[newSock] = ""
                        else:
                            # Read data from this socket
                            Log.log_debug(Log._LOG_DEBUG_RECV_PEER, "Reading data from the peer")
                            try:
                                myPeerData = readSock.recv(2048)
                            except:
                                myPeerData = 0
                            if myPeerData:
                                if Intf.GetPeerDeathPending():
                                    # ignore the data - peer conn. is gone; we
                                    # don't want to accidentally the peer as alive
                                    Log.log_debug(Log._LOG_DEBUG_RECV_PEER,\
                                                 "Ignoring data from the peer")
                                else:
                                    # Add the data read to this socket's receive buffer
                                    myPeerXmlStr[readSock] += myPeerData
                                    while chr(0) in myPeerXmlStr[readSock]:
                                        inEnd = myPeerXmlStr[readSock].find(chr(0))
                                        inCmd = myPeerXmlStr[readSock][:inEnd]
                                        myPeerXmlStr[readSock] = myPeerXmlStr[readSock][inEnd+1:]
                                        Log.log_debug(Log._LOG_DEBUG_RECV_PEER, "PeerData: (%d) %s" % (len(inCmd), inCmd[:100],))
                                        ParseXmlMessage(inCmd)
                            else:
                                # If no data was available, the socket has been closed, end of message.
                                Log.log_debug(Log._LOG_DEBUG_RECV_PEER, "Closing a socket because there's nothing to read")
                                inputs.remove(readSock)
                                readSock.close()
                                myPeerXmlStr.pop(readSock, None)
                                if Intf.isPeerAlive():
                                    PeerIsNotActiveGraceful()

            if keep_going and not serverSock:
                for inputSock in inputs:
                    myPeerXmlStr.pop(inputSock, None)
                    inputSock.close()

        if keep_going:
            Log.log_debug(Log._LOG_DEBUG_RECV_PEER, "Waiting %d seconds before continuing execution of the thread PeerRecv" % (Parser.args.peerConnect,))
            stopEvent.wait(Parser.args.peerConnect)
            Log.log_debug(Log._LOG_DEBUG_RECV_PEER, "Executing the thread PeerRecv.")

    Log.log_debug(Log._LOG_DEBUG_RECV_PEER, "Finished execution of the thread PeerRecv")


def PeerAliveProc():
    if not Intf.isPeerAlive():
        Intf.peerDownLock.acquire()
        Log.log("The peer switch is active.")
        Log.log_debug(Log._LOG_DEBUG_FSM, "toListeners: add clag-state = up")
        Intf.SetPeerAlive(True)
        Cmd.CmdGetAllOutput("add clag-state = up")
        Intf.SetAllClagBondsSysMac(Parser.args.sysMac, [Parser.args.peerIf])
        if not Intf.initHandShakeDone:
            if not Parser.isPeerStaticClagStateAware():
                # Peer versions less than 1.1.0 are not capable of 
                # sending initial sync done message so move to handshake
                # done right away.
                Intf.SetInitHandShakeDone()
        Intf.SetPeerDeathPending(False)
        Intf.peerDownLock.release()


def PeerLinkChange():
    '''
    This function is an independent thread of the script. It checks the state
    of the interface(s) to the peer switch. If the interface(s) to the peer
    switch go down, then the peer switch is marked as no longer active.
    '''
    Log.log_debug(Log._LOG_DEBUG_PEER_LINK, "Beginning execution of the thread PeerLinkChange")
    prevPeerIfDown = True;

    while keep_going:

        # Determine if the interfaces to the peer are down
        peerIfDown = Intf.isPeerLinkDown()

        # Has the peer link just gone down?
        if not prevPeerIfDown and peerIfDown:
            PeerIsNotActiveGraceful()
        # Has the peer link just come up?
        if prevPeerIfDown and not peerIfDown:
            ConfigurePeerLearning(True)
            peerLink = Parser.args.peerIf
            if Intf.isSubIfName(peerLink):
                peerLink = Intf.GetLogicalMastersOfSubIf(peerLink)[0]
            Log.log_debug(Log._LOG_DEBUG_FSM, "toListeners: add peer-link = %s" % peerLink)
            Cmd.CmdGetAllOutput("add peer-link = %s" % (peerLink,))

        prevPeerIfDown = peerIfDown

        Log.log_debug(Log._LOG_DEBUG_PEER_LINK, "Waiting %d seconds before continuing execution of the thread PeerLinkChange" % (Parser.getPeerLinkPoll(),))
        stopEvent.wait(Parser.getPeerLinkPoll())
        Log.log_debug(Log._LOG_DEBUG_PEER_LINK, "Executing the thread PeerLinkChange.")

    Log.log_debug(Log._LOG_DEBUG_PEER_LINK, "Finished execution of the thread PeerLinkChange")


#-------------------------------------------------------------------------------
#
#   Thread Support Routines - These functions are called by the above threads.
#
#-------------------------------------------------------------------------------

#-------------------------------------------------------------------------------
#
#   clagdata XML format - The messages sent between peers use XML to encode the
#   data. This provides a convienient and extensible way to transfer data
#   between peers. There are many libraries available which can encode and
#   parse XML-formatted data. The XML format used is shown by this example:
#
#       <clag_data version="0.1.0">
#         <clagid uniqueId="44:38:39:00:11:16" priority=32768 role=Primary />
#         <update_interval units="seconds">4</update_interval>
#         <mgmt_if ipaddr="10.0.1.5" />
#         <sync_done />
#         <i8023ad system_id="00:00:00:01:01:01">
#           <bond ad_partner_mac="00:02:00:00:00:09" name="bond4" portId="8.003">
#             <mac vlan="101">00:02:00:00:00:09</mac>
#             <multicast vlan="100" dev="br0">234.10.10.10</multicast>
#             <vlan id="101" name="bond4.101" />
#             <vlan id="102" name="bond4.102" />
#             <vlan id="103" name="bond4.103" />
#           </bond>
#           <bond ad_partner_mac="00:02:00:00:00:0b" name="bond5" portId="8.005" />
#           <bond ad_partner_mac="00:02:00:00:00:24" name="bond0" portId="8.009" />
#           <bond ad_partner_mac="00:02:00:00:00:03" name="bond1" portId="8.001">
#             <mac vlan="101">00:02:00:00:00:03</mac>
#             <vlan id="101" name="bond1.101"/>
#           </bond>
#           <bond ad_partner_mac="00:02:00:00:00:05" name="bond2" portId="8.004" />
#             <vlan id="101" name="bond2.101" />
#           <bond ad_partner_mac="00:02:00:00:00:07" name="bond3" portId="None" />
#           <neighbor ipaddr="10.1.0.4" dev="br100" mac="00:02:00:00:00:08" flag="reachable" bond="bond4" vlan="100" />
#           <neighbor ipaddr="10.1.0.12" dev="br10" mac="00:02:00:00:00:28" flag="stale" bond="bond3" vlan=None />
#         </i8023ad>
#       </clag_data>
#
#   The root element tag is "clag_data" and the required "version" attribute
#   defines the format of the child elements. Different major versions indicate 
#   entirely different formats. New minor versions indicate additions of data
#   while retaining all previous data. New sub-versions indicate cosmeic changes
#   which don't affect the data itself.
#
#   The sender of the XML data indicates how frequently it sends the data with
#   the update_interval element. Receivers can use this to know how long before
#   it should expect to wait before receiving the next data from the sender.
#
#   IEEE 802.3ad (LAG) information is contained as children of the i8023ad 
#   element. The system_id attribute contains the ad_actor_system that the
#   sender has assigned to all of its bonds.
#
#   The "bond" elements contain a "name" attribute which is the ifname of
#   the bond on the sender's system. It also contains the ad_partner_mac
#   which is the MAC address of the partner of the bond (used by the receiver 
#   to determine if the bond is dual connected).
#
#   Bonds which are dual connected may have "mac" sub elements, which are the
#   MAC addresses which have been dynamically learned on the interface. If the
#   bond has vlan sub-interfaces, then a vlan attribute will include the VLAN
#   ID associated with the mac addres.
#
#   Bonds can also have "vlan" sub elements, which are the VLANs configured
#   on that bond. 
#
#-------------------------------------------------------------------------------

def PrintElement(node, level):
    Log.log_debug(Log._LOG_DEBUG_COLLECT_SYS, "%s%s: %s: %s" % (" " * (level*3), node.tag, node.text, str(node.attrib)))
    for child in node.findall('*'):
        PrintElement(child, level+1)

def MakeXmlMessage():
    '''
    This function uses this switch's LACP information and clagd state and 
    formulates an XML message which will be sent to the peer.
    '''
    clagDataAttr = {'version' : Parser._ClagDataVersion}
    if sendGoodbye:
        clagDataAttr['goodbye'] = 'yes'
    root = ET.Element("clag_data", clagDataAttr)
    ET.SubElement(root, "clagid", {"priority" : str(Parser.clagId[0]), "uniqueId" : Parser.clagId[1], "role" : Parser.clagRole})
    ET.SubElement(root, "update_interval", {"units" : "seconds"}).text = str(Parser.getCfgLacpPoll())
    if serverSock is not None and ourPeerIp is not None:
        ET.SubElement(root, "mgmt_if", {"ipaddr" : ourPeerIp})
    if Parser.syncDoneToPeer:
        ET.SubElement(root, "sync_done")
    ET.SubElement(root, "i8023ad", {"system_id" : Parser.args.sysMac})
    ET.SubElement(root, "vxlan_config", {"anycast_ip" : str(Parser.args.vxlanAnycast)})
    if Intf.isPeerAlive():
        for sync in DataSyncs:
            sync.EncodeXmlInfo(root)
    #PrintElement(root, 0)
    return(ET.tostring(root))


def ParseXmlMessage(xmlStr):
    '''
    This function takes XML received from the peer and parses it.
    '''
    # If we're in the process of quitting, don't do any processing
    if sendGoodbye:
        return

    # Convert the XML string into an ElementTree element
    try:
        root = ET.fromstring(xmlStr)
    except ET.ParseError:
        Log.log_error("The data received from the peer is not in a valid format:\n%s" % (xmlStr,))
        return

    # Is the root element named properly?
    if root is None or root.tag != "clag_data":
        Log.log_error("The data received from the peer is not in a valid format:\n%s" % (xmlStr,))
        Daemon.SignalAllThreadsToStop()
        return

    # Is there a version attribute with the root element?
    verStr = root.get("version")
    if verStr is None:
        Log.log_error("The data received from the peer does not include a version:\n%s" % (xmlStr,))
        Daemon.SignalAllThreadsToStop()
        return

    # Is the version something we understand?
    (peerMajVer, peerMinVer, peerSubVer) = verStr.split(".")
    (myMajVer,   myMinVer,   mySubVer)   = Parser._ClagDataVersion.split(".")
    if int(peerMajVer) > int(myMajVer):
        Log.log_error("The peer is running a version I do not understand. My version: %s, Peer version: %s" % (Parser._ClagDataVersion, verStr))
        Daemon.SignalAllThreadsToStop()
        return

    if Parser.peerDataVersion != verStr:
        Parser.peerDataVersion = verStr

    # Is there a goodbye attribute with the root element?
    byeStr = root.get("goodbye")
    if byeStr == 'yes': 
        Intf.SetClagRole(Parser._ROLE_PRIMARY, "peer sent goodbye")
        return

    # Ignore the rest of the message if the peer link is down (enqueued prior to
    # link going down).
    if Intf.isPeerLinkDown():
        return

    clagIdNode = root.find('clagid')
    if clagIdNode is not None:
        Parser.peerId[0] = int(clagIdNode.get('priority'))
        Parser.peerId[1] = clagIdNode.get('uniqueId')
        Parser.peerRole = clagIdNode.get('role')
        PeerAliveProc()
        if Parser.clagId[0] < Parser.peerId[0] or (Parser.clagId[0] == Parser.peerId[0] and Parser.clagId[1] < Parser.peerId[1]):
            Intf.SetClagRole(Parser._ROLE_PRIMARY, "elected")
        else:
            Intf.SetClagRole(Parser._ROLE_SECONDARY, "elected")

    # Was a transmit interval supplied?
    interval = root.find('update_interval')
    if interval is not None:
        # If a units attribute was supplied it must be "seconds"
        units = interval.get('units')
        if units is not None and units != 'seconds':
            Log.log_warn("The units for the update_interval: %s, are not recognized. Skipping the interval" % (units,))
        else:
            # Get the peer's send rate. XXX: This checking can be tightened up.
            Parser.setPeerRate(int(interval.text))

    # Was a mgmt IP address supplied?
    mgmt_if = root.find('mgmt_if')
    if mgmt_if is not None:
        # Get the IP address
        peerIpAddr = mgmt_if.get('ipaddr')
        if peerIpAddr is not None and peerIpAddr != Parser.args.peerIp:
            Log.log_error("The peer's IP address supplied when starting clagd (%s) is not the IP address the peer is using (%s)" % (Parser.args.peerIp, peerIpAddr))
            Log.log_error("Restart clagd with the correct peerIp address (%s)" % (peerIpAddr,))
            Daemon.SignalAllThreadsToStop()
            return

    # Peer indicated that he has synced all the initial data
    sync_done = root.find('sync_done')
    if sync_done is not None:
        if not Parser.syncDoneFromPeer:
            Parser.SetSyncDoneFromPeer(True)
            Log.log("Initial data sync from peer done.")
            Intf.SetInitHandShakeDone()
            # Check if any of the dormant bonds can be moved to up without 
            # involving the peer
            Intf.UpdatePendingDormantBonds()

    # Go through all of the i8023ad elements
    i8023ad = root.find('i8023ad')
    if i8023ad is not None:
        # Is the peer's sysMac the same as ours?
        sysId = i8023ad.get('system_id')
        if sysId is None or sysId != Parser.args.sysMac:
            Log.log_error("The system ID of the peer does not match our system ID. Peer = %s, Our = %s" % ("None" if not sysId else sysId, Parser.args.sysMac))
            Daemon.SignalAllThreadsToStop()
            return

    # Go through all of the vxlan_config elements
    vxlan_config = root.find('vxlan_config')
    if vxlan_config is not None:
        peerAnycast = vxlan_config.get('anycast_ip')
        if peerAnycast == "None":
            peerAnycast = None
        # Is the peer's vxlan anycast IP the same as ours?
        if peerAnycast != Parser.args.vxlanAnycast:
            Log.log_error("The vxlanAnycast IP of the peer does not match our vxlanAnycast IP. Peer = %s, Our = %s" % (str(peerAnycast), Parser.args.vxlanAnycast))
            if Parser.clagRole != Parser._ROLE_PRIMARY:
                Daemon.SignalAllThreadsToStop()
            return

    # Let each of the data syncs process their portion of the XML
    for sync in DataSyncs:
        sync.DecodeXmlInfo(root)

    return


def ConfigurePeerLearning(enable):
    Intf.bridgeAttrLock.acquire()
    masterVlanMap = Intf.GetMasterIfVlanMap()
    peerMasters = Intf.GetLogicalMastersOfSubIf(Parser.args.peerIf)
    if not peerMasters:
        peerMasters = [Parser.args.peerIf]
    allPeerIfs = list(peerMasters)
    for peerMaster in peerMasters:
        allPeerIfs += masterVlanMap.get(peerMaster, {}).values()

    params = []
    for peerIfName in allPeerIfs:
        params += [(peerIfName, "off" if enable else "on", "on" if enable else "off")]
    Intf.ExecuteBatch(["/sbin/bridge", "-force", "-batch"], "link set dev %s learning %s peerlink %s", params)
    Intf.bridgeAttrLock.release()


def PeerIsNotActive(peerSwitchDead):
    Intf.peerDownLock.acquire()
    Log.log("The peer switch is no longer active")
    if Parser:
        Parser.CloseClientSock()

    global serverSock
    if serverSock:
        myServerSock = serverSock
        serverSock = None
        try:
            myServerSock.shutdown(socket.SHUT_RDWR)
            myServerSock.close()
        except:
            pass

    for sync in DataSyncs:
        sync.ClearPeerInfo()

    Parser.setPeerRate(None)
    Parser.syncDoneToPeer = False
    Parser.SetSyncDoneFromPeer(False)
    Intf.SetPeerAlive(False)

    if Parser.clagRole != Parser._ROLE_PRIMARY:
        Intf.SetAllClagBondsSysMac("00:00:00:00:00:00", [Parser.args.peerIf])
    Log.log_debug(Log._LOG_DEBUG_FSM, "toListeners: add clag-state = down")
    Cmd.CmdGetAllOutput("add clag-state = down")

    # Now that the peer is dead bonds stuck in a dormant state can be moved to 
    # up
    Intf.UpdatePendingDormantBonds()
    Intf.peerDownLock.release()

def PeerIsNotActiveGraceful():
    if not keep_going or not HealthCheck or not HealthCheck.GetBackupActive() or Intf.GetBackupRole() == Parser._ROLE_PRIMARY or not Intf.isPeerAlive():
        PeerIsNotActive(True)
        return

    # we need to check if peer switch is still accessible via the backup link
    if not Intf.GetPeerDeathPending():
        Intf.SetPeerDeathPending(True)
        HealthCheck.HelloRapidDetectionSetup()

def signal_handler(signum, frame):
    sigrt_map = { 
        signal.SIGRTMIN   : ClagParser._SWITCHD_STARTING,
        signal.SIGRTMIN+1 : ClagParser._SWITCHD_READY,
        signal.SIGRTMIN+2 : ClagParser._SWITCHD_DEAD
    }
    if signum == signal.SIGHUP:
        reopen()
    elif signum == signal.SIGTERM:
        do_exit(0)
    elif signum in sigrt_map:
        if Parser:
            Parser.switchdState = sigrt_map[signum]

def reopen():
    if Log:
        if Log.getLogfile() not in ["syslog", "stdout"]:
            try:
                Log.handle = open(Log.getLogfile(), 'a')
                Log.log("Continuing logging after SIGHUP (possibly due to log rotation)")
            except IOError:
                Log.handle = None
                Log.log_handler = Log.log_handler_nothing


def do_exit(status = 0):
    '''
    Clean up and exit.
    '''
    global Cmd
    global sendGoodbye
    global HealthCheck
    global ClagNetLink

    # If the peer is alive, send a goodbye message
    if Intf and Intf.isPeerAlive():
        root = ET.Element("clag_data", {'version' : Parser._ClagDataVersion, 'goodbye' : 'yes'})
        xmlStr = ET.tostring(root)
        # Flush the send queue
        queueEmpty = False
        while not queueEmpty:
            try:
                peerSendQueue.get_nowait()
            except Queue.Empty:
                queueEmpty = True
        sendGoodbye = True
        # Since the peer will now be primary, change our role to secondary
        if Parser and Parser.clagRole != Parser._ROLE_SECONDARY:
            Parser.clagRole = Parser._ROLE_SECONDARY
            if Log:
                Log.log("Role is now secondary; sent goodbye to peer")
            if Cmd:
                Log.log_debug(Log._LOG_DEBUG_FSM, "toListeners: add clag-role = %s" % (str(Parser.clagRole)))
                Cmd.CmdGetAllOutput("add clag-role = %s" % (str(Parser.clagRole),))
        # Send the goodbye message and wait for a short time so it can go out
        peerSendQueue.put(xmlStr)
        util.queueSetMaxLen('peerSendQueue')
        time.sleep(0.5)

    if Intf and Parser:
        Intf.SetProtoDownOnExit()
        ConfigurePeerLearning(False)
        Intf.ClearDormantMode()

    if Daemon:
        Daemon.SignalAllThreadsToStop()
    for thread in Threads:
        if Threads[thread][1]:
            Threads[thread][1].join()

    for sync in DataSyncs:
        sync.JoinThreads()
        del sync

    if Cmd:
        Cmd.JoinThreads()
    del Cmd

    if HealthCheck:
        HealthCheck.JoinThreads()
        del HealthCheck

    if ClagNetLink:
        ClagNetLink.JoinThreads()
        del ClagNetLink

    if Daemon:
        if Daemon.started and Daemon.pid_file:
            os.unlink(Daemon.pid_file)
            Daemon.lock.release()
    if Log:
        if status == 0:
            Log.log("clean exit")
        else:
            Log.log_error("exit with status %d" % status)
    sys.exit(status)



#-------------------------------------------------------------------------------
#
#   This dictionary contains the threads which we run. The key is the name of
#   the thread and the value is an array. The first elemenet is the address of
#   the function which implements the thread, and the second is the thread
#   object for this thread (filled in when the thread is created).
#
#-------------------------------------------------------------------------------

def CollectSysInfoT():
    try:
        CollectSysInfo()
    except:
        Daemon.DumpTraceback()

def PeerSendT():
    try:
        PeerSend()
    except:
        Daemon.DumpTraceback()

def PeerRecvT():
    try:
        PeerRecv()
    except:
        Daemon.DumpTraceback()

def PeerLinkChangeT():
    try:
        PeerLinkChange()
    except:
        Daemon.DumpTraceback()


Threads = {
    "CollectSysInfo"       : [CollectSysInfoT,      None],
    "PeerSendThread"       : [PeerSendT,            None],
    "PeerRecvThread"       : [PeerRecvT,            None],
    "PeerLinkChange"       : [PeerLinkChangeT,      None],
}


#-------------------------------------------------------------------------------
#
#   Error handling
#
#-------------------------------------------------------------------------------

class ClagdError(RuntimeError):
    '''
    Standard exception for all our runtime errors.
    The only argument is a string.
    '''
    pass

class ArgumentParsingError(Exception):
    '''
    Exception for argparse errors.
    '''
    pass


#-------------------------------------------------------------------------------
#
#   Daemon handling code
#
#-------------------------------------------------------------------------------

class ClagDaemon:
    '''
    Process management for a daemon.
    An instance should be created as early as possible.
    '''

    def __init__(self, name = None):
        '''
        name is the program name (defaulting to basename(argv[0]))
        '''
        if name is None:
            name = os.path.basename(sys.argv[0])
        self.name = name
        self.tick = 0
        self.pid_file = os.path.abspath("/var/run/%s.pid" % self.name)
        self.lock = lockfile.FileLock(self.pid_file)
        self.started = False

    def start(self):
        '''
        Start running.
        Check for other instances of this program and create pid file.
        This should be called after early initialization (like
        argument parsing), before any serious work.
        '''
        global watchintv
        try:
            self.write_pidfile()
        except Exception as e:
            raise ClagdError("Unable to write to PID file - %s." % (str(e),))
            do_exit(4)
        self.started = True

        # the clag period for calling the watchdog is 2 seconds
        # (default_lacp_poll), and is rarely changed, and never
        # to large values, so no need to verify that the watchdog
        # interval is sufficiently longer than the period.
        watchintv = int(os.getenv('WATCHDOG_USEC', '0')) / 1000000

    def isClagAlreadyRunning(self):
        '''
        Check if another instance of clagd is already running. Check the PID file
        and if the PID file contents point to a valid running clagd instance.
        '''
        running = False
        try:
            with open(self.pid_file) as f:
                clag_pid = f.readline().strip()
            with open("/proc/" + clag_pid + "/cmdline") as f:
                cmdline = f.readline().strip().split('\x00')[1]
                if cmdline == "/usr/sbin/clagd":
                    running = True
        except (IOError, ValueError, IndexError):
            pass

        return running

    def write_pidfile(self):
        pid = str(os.getpid())
        try:
            f = open(self.pid_file, 'w')
        except:
            self.lock.release()
            raise ClagdError("Cannot open pid file %s" % self.pid_file)
        f.write("{0}\n".format(pid))
        f.flush()
        self.pidfd = f

    def DumpTraceback(self):
        (exc_type, exc_value, exc_traceback) = sys.exc_info()
        err = "".join(traceback.format_exception(exc_type, exc_value,
                                                 exc_traceback))
        if Log:
            Log.log_error("unhandled exception:")
            for line in err.split('\n'):
                Log.log_error("    %s" % (line,))
        else:
            print "unhandled exception:"
            for line in err.split('\n'):
                print "    %s" % (line,)
        self.SignalAllThreadsToStop()

    def TouchKeepAlive(self):
        global watchintv
        if watchintv:
            cumulus.sdnotify.sd_notify(0, "WATCHDOG=1")
        try:
            # The file alive code should be removed as soon as
            # we are sure that no test or other code requires it.
            aliveFile = "/var/run/%s.alive" % (self.name,)
            with open(aliveFile, 'a'):
                os.utime(aliveFile, None)
        except:
            pass

    def SignalAllThreadsToStop(self):
        '''
        This function wakes up all threads which are waiting on a signal so that
        they can realize that they should stop execution.
        '''
        global keep_going
        global serverSock
        keep_going = False
        if serverSock is not None:
            myServerSock = serverSock
            serverSock = None
            myServerSock.shutdown(socket.SHUT_RDWR)
        for sync in DataSyncs:
            sync.ClearPeerInfo()
            sync.SignalThreads()
        if Cmd is not None:
            Cmd.SignalThreads()
        if HealthCheck:
            HealthCheck.SignalThreads()
        if ClagNetLink:
            ClagNetLink.SignalThreads()

        try:
            peerSendQueue.put_nowait(None)
            util.queueSetMaxLen('peerSendQueue')
        except Queue.Full:
            pass
        stopEvent.set()


#-------------------------------------------------------------------------------
#
#   Log handling code
#
#-------------------------------------------------------------------------------

class Logger:

    _LOG_DEBUG_HOST_EVENTS  = 0x00000001
    _LOG_DEBUG_DAEMON       = 0x00000002
    _LOG_DEBUG_COLLECT_SYS  = 0x00000004
    _LOG_DEBUG_INFORM_PEER  = 0x00000008
    _LOG_DEBUG_RECV_PEER    = 0x00000010
    _LOG_DEBUG_PEER_TIMEOUT = 0x00000020
    _LOG_DEBUG_LACP_SYNC    = 0x00000040
    _LOG_DEBUG_PARSER       = 0x00000080
    _LOG_DEBUG_DUAL_HOME    = 0x00000200
    _LOG_DEBUG_FDB_SYNC     = 0x00000400
    _LOG_DEBUG_CMD_SERVER   = 0x00000800
    _LOG_DEBUG_PEER_LINK    = 0x00001000
    _LOG_DEBUG_MDB_SYNC     = 0x00002000
    _LOG_DEBUG_HEALTH_CHECK = 0x00004000
    _LOG_DEBUG_FSM          = 0x00008000
    _LOG_DEBUG_NETLINK      = 0x00010000
    _LOG_DEBUG_VXLAN_SYNC   = 0x00020000

    #
    #   Default logging values
    #
    default_quiet           = False
    default_debug           = 0
    default_verbose         = False
    default_log             = "syslog"

    def __init__(self):
        self.syslog_priority_map = { "crit"  : syslog.LOG_CRIT,
                                     "error" : syslog.LOG_ERR,
                                     "info"  : syslog.LOG_INFO,
                                     "warn"  : syslog.LOG_WARNING,
                                     "debug" : syslog.LOG_DEBUG }
        self.stdout_priority_map = { "crit"  : "error",
                                     "error" : "error",
                                     "info"  : " ",
                                     "warn"  : "warning",
                                     "debug" : "debug" }
        self.setQuiet(Logger.default_quiet)
        self.setDebug(Logger.default_debug)
        self.setVerbose(Logger.default_verbose)
        self.handle = None
        self.logfile = None
        self.log_handler = self.log_handler_nothing
        self.setLogfile(Logger.default_log)

    def setDebug(self, debug):
        self.debug = debug

    def setQuiet(self, quiet):
        self.quiet = quiet

    def setVerbose(self, verbose):
        self.verbose = verbose

    def setLogfile(self, logfile):
        if self.logfile != logfile:
            self.logfile = logfile
            if self.handle:
                if self.handle != sys.stdout:
                    self.handle.close()
                self.handle = None
                self.log_handler = self.log_handler_nothing
            if self.logfile == "syslog":
                try:
                    syslog.openlog("clagd", syslog.LOG_CONS | syslog.LOG_PID,
                                   syslog.LOG_DAEMON)
                    self.log_handler = self.log_handler_syslog
                except Exception:
                    self.log_handler = self.log_handler_nothing
            elif self.logfile == "stdout":
                self.handle = sys.stdout
                self.log_handler = self.log_handler_file
            else:
                try:
                    self.handle = open(self.logfile, 'a')
                    self.log_handler = self.log_handler_file
                except IOError:
                    self.handle = None
                    self.log_handler = self.log_handler_nothing

    def getDebug(self):
        return self.debug

    def getQuiet(self):
        return self.quiet

    def getVerbose(self):
        return self.verbose

    def getLogfile(self):
        return self.logfile

    def log_handler_nothing(self, priority, buf):
        pass

    def log_handler_file(self, priority, buf):
        p = self.stdout_priority_map.get(priority, " ")
        if p != " ":
            p = ' ' + p + ': '
        t = ""
        if priority == "debug":
            t = " [" + threading.current_thread().name + "] "
        pid = " [" + str(os.getpid()) + "] "
        try:
            self.handle.write(time.ctime() + t + p + pid + buf + "\n")
            self.handle.flush()
        except IOError:
            pass

    def log_handler_syslog(self, priority, buf):
        t = ""
        if priority == "debug":
            t = "[" + threading.current_thread().name + "] "
        try:
            syslog.syslog(self.syslog_priority_map.get(priority, syslog.LOG_INFO), t + buf)
        except Exception:
            pass

    def log_msg_begin(self, *args, **kwargs):
        if self.quiet == False:
            self.log_handler('', ''.join(args))

    def log_msg_end(self, ret):
        if self.quiet == False:
            if ret == 0:
                self.log_handler('', 'done.')
            else:
                self.log_handler('', 'failed.')

    def log(self, *args, **kwargs):
        if self.quiet == False:
            self.log_handler("info", ''.join(args))

    def log_debug(self, debugBit, *args, **kwargs):
        if self.debug & debugBit:
            self.log_handler("debug", ''.join(args))

    def log_verbose(self, *args, **kargs):
        if self.verbose == True:
            self.log_handler("", ''.join(args))

    def log_error(self, *args, **kwargs):
        self.log_handler("error", ''.join(args))

    def log_warn(self, *args, **kwargs):
        self.log_handler("warn", ''.join(args))

    def log_crit(self, *args, **kwargs):
        self.log_handler("crit", ''.join(args))

    def log_cmderr(self, cmd, cmdout):
        self.log_error('error: cmd \'%s\'' %cmd +
                       ' failed with the following error:\n' +
                       '(' + cmdout.rstrip('\n') + ')')

#-------------------------------------------------------------------------------
#       
#   CLAG interface manager
#           
#-------------------------------------------------------------------------------
class ClagIntf:
    '''
    utility class to represent a single CLAG intf/bond
    ''' 
    # when clagd comes up all clag bonds are place in a protoDown state. This
    # is cleared when the initial handshake complete or once the reload timer 
    # fires
    _PROTO_DOWNF_INIT  = (1 << 0)
    _PROTO_DOWNF_PEER_LINK_DOWN_BACKUP_ACTIVE  = (1 << 1)
    # as part of clagd exit handling all clag bonds are placed in a protoDown 
    # state (this flag can only be set; never cleared)
    _PROTO_DOWNF_SHUTDOWN  = (1 << 2)
    _PROTO_DOWNF_SINGLE_VXLAN = (1 << 3)
    _PROTO_DOWNF_NO_ANYCAST   = (1 << 4)

    # some of the proto_down reason codes are ignored on the primary as
    # we want to keep the pimary bonds up and running most of the time
    _PROTO_DOWNF_USED_ON_PRIMARY = (_PROTO_DOWNF_INIT | _PROTO_DOWNF_SHUTDOWN | \
        _PROTO_DOWNF_SINGLE_VXLAN | _PROTO_DOWNF_NO_ANYCAST)

    def __init__(self, intfName, intfType, clagId = 0):
        self.intfName = intfName
        self.intfType = intfType
        self.clagId = clagId
        self.conflicts = Parser._CLAG_INTF_CONFLICT_NONE
        self.protoDownFlags = 0 if Intf.initHandShakeDone else self._PROTO_DOWNF_INIT
        if self.intfType == Parser._CLAG_INTF_TYPE_VXLAN:
            self.protoDownFlags |= self._PROTO_DOWNF_SINGLE_VXLAN
        self.kernProtoDown = None
        
        # add clag bond to an id dict
        if self.clagId:
            Parser.bondClagIdDB[self.clagId] = self.intfName

        if self.intfType == Parser._CLAG_INTF_TYPE_BOND:
            self.SetClagBondCompatProtoDown()
            if Intf.isPeerAlive():
                Intf.SetBondSysMac(Parser.args.sysMac, self.intfName)
            self.SetClagIntfModeDormant(dormant=True)
        self.UpdateIntfProtoDown()

        # initHandShakeDone is set once the peer has sent his
        # peer db over or after reload timer has fired. Once
        # this attr is set we can start conflict evaluation
        # on clag bond add itself
        if Intf.ReadyToRunClagConflictChecks() and LacpSync and self.intfType == Parser._CLAG_INTF_TYPE_BOND:
            setConflicts = LacpSync.EvaluateClagIntfConflicts(self.intfName, self.clagId)
            self.UpdateConflicts(setConflicts, 0)

    def CleanUp(self):
        if self.intfType == Parser._CLAG_INTF_TYPE_BOND:
            if self.clagId in Parser.bondClagIdDB:
                del Parser.bondClagIdDB[self.clagId]
            self.SetClagIntfModeDormant(dormant=False)

            Intf.SetBondSysMac('00:00:00:00:00:00', self.intfName)
            self.SetIntfKernProtoDown(protoDown=0)

    def SetNewClagId(self, clagId):
        if self.clagId == clagId:
            return

        if self.clagId in Parser.bondClagIdDB:
            del Parser.bondClagIdDB[self.clagId]

        self.clagId = clagId

        # add clag bond to an id dict
        if self.clagId:
            Parser.bondClagIdDB[self.clagId] = self.intfName

        self.SetClagIntfModeDormant(dormant=True)
        self.UpdateIntfProtoDown()

        # Reset the conflicts
        if Intf.ReadyToRunClagConflictChecks() and LacpSync and self.intfType == Parser._CLAG_INTF_TYPE_BOND:
            setConflicts = LacpSync.EvaluateClagIntfConflicts(self.intfName, self.clagId)
            self.UpdateConflicts(setConflicts, Parser._CLAG_INTF_CONFLICT_LACP_ALL & ~setConflicts)

    def GetProtoDownRole(self): 
        if not Intf.isPeerAlive() and HealthCheck and\
                     HealthCheck.IsBackupRoleAvailable():
            role = HealthCheck.GetBackupRole()
        else:
            role = Parser.clagRole

        return role

    def GetProtoDown(self): 
        role = self.GetProtoDownRole()

        if role == Parser._ROLE_PRIMARY:
            stateFlags = (self.protoDownFlags & self._PROTO_DOWNF_USED_ON_PRIMARY)
        else:
            stateFlags = self.protoDownFlags
        return 1 if stateFlags else 0

    def GetProtoDownReasons(self):
        reasons = []
        role = self.GetProtoDownRole()
        if role == Parser._ROLE_PRIMARY:
            stateFlags = (self.protoDownFlags & self._PROTO_DOWNF_USED_ON_PRIMARY)
        else:
            stateFlags = self.protoDownFlags

        if stateFlags & self._PROTO_DOWNF_INIT:
            reasons.append('init')
        if stateFlags & self._PROTO_DOWNF_PEER_LINK_DOWN_BACKUP_ACTIVE:
            reasons.append('isl-down')
        if stateFlags & self._PROTO_DOWNF_SHUTDOWN:
            reasons.append('shut')
        if stateFlags & self._PROTO_DOWNF_SINGLE_VXLAN:
            reasons.append('vxlan-single')
        if stateFlags & self._PROTO_DOWNF_NO_ANYCAST:
            reasons.append('no-anycast-ip')

        return reasons

    def UpdateProtoDownFlags(self, addProtoDownFlags, delProtoDownFlags, batch=False):
        retVal = []
        self.protoDownFlags |= addProtoDownFlags
        self.protoDownFlags &= ~delProtoDownFlags
        newProtoDown = self.GetProtoDown()
        if self.kernProtoDown != newProtoDown:
            Log.log_debug(Log._LOG_DEBUG_FSM, "%s: protoDownFlags set to 0x%x" % (self.intfName, self.protoDownFlags))
            retVal = self.SetIntfKernProtoDown(newProtoDown, batch=batch)
            self.kernProtoDown = newProtoDown
        return retVal

    def DoSetIntfKernProtoDown(self, intfName, newProtoDown, batch=False):
        retVal = []
        oldProtoDown = Intf.GetLinkProtoDownFlags(intfName)
        if oldProtoDown != newProtoDown:
            Log.log_debug(Log._LOG_DEBUG_FSM, "Interface %s mbr %s proto_down change; from %d to %d" % (self.intfName, intfName, oldProtoDown, newProtoDown))
            if batch:
                retVal = [(intfName, "on" if newProtoDown else "off")]
            else:
                Intf.SetLinkProtoDownFlags(intfName, newProtoDown)
        return retVal

    def SetClagBondCompatProtoDown(self):
        # In <=CL2.5.3 CL bonding driver used to place the CLAG bond
        # in proto-down. Check and proto-up the bond if needed.
        if self.intfType == Parser._CLAG_INTF_TYPE_BOND:
            self.DoSetIntfKernProtoDown(self.intfName, 0)

    def SetIntfKernProtoDown(self, protoDown, batch=False):
        retVal = []
        newProtoDown = 1 if protoDown else 0
        if self.intfType == Parser._CLAG_INTF_TYPE_BOND:
            for intfMbr in Intf.GetBaseInterfaces(self.intfName):
                retVal += self.DoSetIntfKernProtoDown(intfMbr, newProtoDown, batch=batch)
        else:
            retVal += self.DoSetIntfKernProtoDown(self.intfName, newProtoDown, batch=batch)
        return retVal

    def UpdateIntfProtoDown(self, batch=False):
        protoDown = self.GetProtoDown()
        #Log.log_debug(Log._LOG_DEBUG_FSM, "Interface %s protoDown %d" % (intf, protoDown))
        return self.SetIntfKernProtoDown(protoDown, batch=batch)

    def SetClagIntfModeDormant(self, dormant, retainProtoDown=False):
        # Check if dormant mode setting has been disabled
        if dormant:
            if Parser.args.dormantDisable:
                Log.log_debug(Log._LOG_DEBUG_FSM, "Interface %s dormant mode setting skipped" % (self.intfName))
                return
            if (self.protoDownFlags & self._PROTO_DOWNF_SHUTDOWN):
                # clagd is in the process of exiting; don't allow dormant
                # setting from the UpdateLacpConfig thread at this point
                Log.log_debug(Log._LOG_DEBUG_FSM,
                    "%s dormant mode skipped; clag exit in prog" % (self.intfName))
                return

        # check if the mode already matches
        oldMode = Intf.GetLinkMode(self.intfName)
        if oldMode == dormant:
            return

        # shut down the slaves first
        self.SetIntfKernProtoDown(protoDown=1)

        # set the mode
        modeStr = "dormant" if dormant else "default"
        oldModeStr = "dormant" if oldMode else "default"
        Log.log_debug(Log._LOG_DEBUG_FSM, "Interface %s mode changed from %s to %s" % (self.intfName, oldModeStr, modeStr))
        Intf.SetLinkMode(self.intfName, dormant)

        # change the mode to match local state
        if retainProtoDown:
            self.UpdateIntfProtoDown()

    def LogConflictSetFlags(self, flags):
        if (flags & Parser._CLAG_INTF_CONFLICT_NO_PEER):
            Log.log("Conflict (%s): matching clag-id (%d) not configured on peer" % (self.intfName, self.clagId))

        if (flags & Parser._CLAG_INTF_CONFLICT_LACP_PARTNER_MAC_MISMATCH):
            Log.log("Conflict (%s): LACP partner MAC mismatch" % (self.intfName))

        if (flags & Parser._CLAG_INTF_CONFLICT_LACP_MAC_EQ_SYS_MAC):
            Log.log("Conflict (%s): LACP partner MAC is same as our CLAG sysmac" % (self.intfName))

        if (flags & Parser._CLAG_INTF_CONFLICT_LACP_MAC_DUP):
            Log.log("Conflict (%s): duplicate LACP partner MAC" % (self.intfName))

    def LogConflictClearFlags(self, flags):
        if (flags & Parser._CLAG_INTF_CONFLICT_NO_PEER):
            Log.log("Conflict cleared (%s): matching clag-id (%d) detected on peer" % (self.intfName, self.clagId))

        if (flags & Parser._CLAG_INTF_CONFLICT_LACP_PARTNER_MAC_MISMATCH):
            Log.log("Conflict cleared (%s): LACP partner MAC is no longer mismatched" % (self.intfName))

        if (flags & Parser._CLAG_INTF_CONFLICT_LACP_MAC_EQ_SYS_MAC):
            Log.log("Conflict (%s): LACP partner MAC is no longer same as our CLAG sysmac" % (self.intfName))

        if (flags & Parser._CLAG_INTF_CONFLICT_LACP_MAC_DUP):
            Log.log("Conflict (%s): LACP partner MAC is no longer duplicate" % (self.intfName))

    def UpdateConflicts(self, set, clear, logConflict=True):
        set &= ~self.conflicts
        clear &= self.conflicts
        if clear:
            self.conflicts &= ~clear
            if logConflict:
                self.LogConflictClearFlags(clear)

        if set:
            self.conflicts |= set
            if logConflict:
                self.LogConflictSetFlags(set)


#-------------------------------------------------------------------------------
#
#   Command line parsing code
#
#-------------------------------------------------------------------------------

class ThrowingArgumentParser(argparse.ArgumentParser):
    def error(self, message):
        raise ArgumentParsingError(message)

class ClagParser:

    #
    #   CLAG Version information
    #
    _ClagVersion            = "1.3.0"
    _ClagDataVersion        = "1.3.0"
    _ClagCmdVersion         = "1.1.0"
    _ClagHelloVersion       = "1.1.0"

    _ROLE_SECONDARY         = "secondary"
    _ROLE_PRIMARY           = "primary"

    _CLAG_ID_MAX            = 65535
    #
    #   Default command line parameter values
    #
    default_lacp_poll       = 2
    default_peer_connect    = 1
    default_send_timeout    = 30
    default_send_bufsize    = 65536
    default_cmd_connect     = 1
    default_link_poll       = 1
    default_daemon          = False
    default_peer_port       = 5342
    default_backup_port     = 5342
    default_peer_timeout    = None
    default_role            = _ROLE_SECONDARY
    default_priority        = 32768
    default_switchd_timeout = 120
    default_periodic_run    = 4
    default_force_dynamic   = False
    default_dormant_disable = False
    default_vxlan_anycast   = None

    # bond to clagIntf association
    # {
    #     "bond1" : <clagIntf1>,
    #     "bond2" : <clagIntf2>
    # }
    # key=intfName : val=class ClagIntf
    clagIntfDB              = {}
    # lock for clagIntfDB and bondClagIdDb access
    clagIntfDBLock          = threading.Lock()

    # clag-id to bond association
    # {
    #     100 : "bond1",
    #     2 : "bond2"
    # }
    bondClagIdDB            = {}

    peerDataVersion         = "0.0.0"
    # set once all the initial data has been replayed to peer. cleared when peer
    # connectivity is lost
    syncDoneToPeer          = False
    # set once the initial replay of data from peer is done (peer will send a 
    # sync_done notification). cleared when peer connectivity is lost.
    syncDoneFromPeer        = False

    # We wait for all the config to be replayed from ifupdown2 before starting
    # communication with the peer
    configReloadDone        = False

    # Consistency checks are run between the peer switches for all clag
    # interfaces/bonds. If there is config/state inconsistency it is reported
    # as a conflict on the on the interface via one of these reason codes.
    _CLAG_INTF_CONFLICT_NONE = 0
    _CLAG_INTF_CONFLICT_NO_PEER = (1 << 0)
    _CLAG_INTF_CONFLICT_LACP_PARTNER_MAC_MISMATCH = (1 << 1)
    _CLAG_INTF_CONFLICT_LACP_MAC_EQ_SYS_MAC = (1 << 2)
    _CLAG_INTF_CONFLICT_LACP_MAC_DUP = (1 << 3)
    _CLAG_INTF_CONFLICT_LACP_MAX = (1 << 4)

    _CLAG_INTF_CONFLICT_LACP_ALL = (_CLAG_INTF_CONFLICT_LACP_MAX -1)

    _clag_intf_conflict_map = \
            {_CLAG_INTF_CONFLICT_NO_PEER : 'matching clagid not configured on peer',\
             _CLAG_INTF_CONFLICT_LACP_PARTNER_MAC_MISMATCH : 'lacp partner mac mismatch',
             _CLAG_INTF_CONFLICT_LACP_MAC_EQ_SYS_MAC : 'lacp partner mac is same as our clag sysmac',
             _CLAG_INTF_CONFLICT_LACP_MAC_DUP : 'duplicate lacp partner mac'}


    _CLAG_INTF_TYPE_BOND  = 0
    _CLAG_INTF_TYPE_VXLAN = 1

    _SWITCHD_STARTING   = 1
    _SWITCHD_READY      = 2
    _SWITCHD_DEAD       = 3

    def __init__(self):
        '''
        Create the parser
        '''

        # Determine if we are running on a VM
        default_vm = False
        try:
            default_vm = (subprocess.check_output("/usr/sbin/virt-what") != "")
        except (subprocess.CalledProcessError, OSError):
            pass

        self.parser = ThrowingArgumentParser(description="CLAG daemon, version %s" % (ClagParser._ClagVersion,))
        self.parser.add_argument("--lacpPoll", "-o", type=int, default=ClagParser.default_lacp_poll, 
                                 metavar="SECONDS",
                                 help="Seconds between obtaining local LACP information")
        self.parser.add_argument("--peerConnect", "-r", type=int, default=ClagParser.default_peer_connect,
                                 metavar="SECONDS",
                                 help="Seconds between trying to connect to peer.")
        self.parser.add_argument("--sendTimeout", type=int, default=ClagParser.default_send_timeout,
                                 metavar="SECONDS",
                                 help="Seconds until sending socket will timeout.")
        self.parser.add_argument("--sendBufSize", type=int, default=ClagParser.default_send_bufsize,
                                 metavar="BYTES",
                                 help="Bytes in the send socket buffer.")
        self.parser.add_argument("--cmdConnect", "-c", type=int, default=ClagParser.default_cmd_connect,
                                 metavar="SECONDS",
                                 help="Seconds between waiting for command connections.")
        self.parser.add_argument("--peerLinkPoll", type=int, default=ClagParser.default_link_poll,
                                 metavar="SECONDS",
                                 help="Seconds between checking if peer link is up.")
        self.parser.add_argument("--switchdReadyTimeout", type=int, default=ClagParser.default_switchd_timeout,
                                 metavar="SECONDS",
                                 help="Seconds clagd waits for switchd to be ready.")
        self.parser.add_argument("--priority", "-i", type=int, default=ClagParser.default_priority,
                                 help="The priority of this instance compared to the peer.")
        self.parser.add_argument("--periodicRun", type=int, default=ClagParser.default_periodic_run, 
                                 metavar="SECONDS",
                                 help="Seconds between running periodic processes.")
        self.parser.add_argument("--forceDynamic", "-f", action="store_true", 
                                 default=ClagParser.default_force_dynamic, 
                                 help=argparse.SUPPRESS)
        self.parser.add_argument("--dormantDisable", "-a", action="store_true", 
                                 default=ClagParser.default_dormant_disable, 
                                 help=argparse.SUPPRESS)
        self.parser.add_argument("--vxlanAnycast", default=ClagParser.default_vxlan_anycast, 
                                 metavar="IPADDR",
                                 help="Anycast local IP address for dual connected VxLANs")
        self.parser.add_argument("--daemon", "-d", action="store_true", default=ClagParser.default_daemon,
                                 help="Run as a daemon.")
        self.parser.add_argument("--quiet", "-q", action="store_true", default=Logger.default_quiet,
                                 help="Don't log anything.")
        self.parser.add_argument("--debug", type=self.intBase, default=Logger.default_debug,
                                 metavar="DEBUG_CODE",
                                 help="Debugging output mask.")
        self.parser.add_argument("--verbose", "-v", action="store_true", default=Logger.default_verbose,
                                 help="Log with verbosity.")
        self.parser.add_argument("--log", "-l", "--logfile", type=self.logDest, default=Logger.default_log,
                                 help="Log output to stdout, syslog, or a file")
        self.parser.add_argument("--vm", "-m", action="store_true", dest='vm',
                                 help="Enable debugging on a virtual machine")
        self.parser.add_argument("--novm", action="store_false", dest='vm',
                                 help="Disable debugging on a virtual machine")
        self.parser.set_defaults(vm=default_vm)
        self.parser.add_argument("--backupPort", "-b", type=int, default=ClagParser.default_backup_port,
                                 help="UDP port used by peer to respond to backup health check requests.")
        self.parser.add_argument("--backupIp", "-s", default="",
                                 help="Backup IP address of peer")
        self.parser.add_argument("--backupVrf", default="",
                                 help="VRF for the backupIp address")
        self.parser.add_argument("--peerPort", "-p", type=int, default=ClagParser.default_peer_port,
                                 help="TCP/IP port used by peer to communicate topology info.")
        self.parser.add_argument("--peerTimeout", "-t", type=int, default=ClagParser.default_peer_timeout,
                                 help="Seconds before peer information is discarded.")
        self.parser.add_argument("peerIp", 
                                 help="Unique IP address or DNS name of peer")
        self.parser.add_argument("peerIf", type=self.isIfNameWithIp,
                                 help="Name of the interface connected to peer")
        self.parser.add_argument("sysMac", type=self.isMacAddr,
                                 help="The system ID shared with the peer (xx:xx:xx:xx:xx:xx)")
        #
        #   LACP Rate - The rate at which the peer will send information to us. Before
        #   we receive the first message from the peer, we don't know his rate and so
        #   this value will be None. After each message is received from the peer the
        #   included rate, in seconds, is saved here. If we lose communication to the
        #   peer, the value goes back to None.
        #
        self.peerLacpRateLock   = threading.Lock()
        self.peerLacpRate       = None

        self.currLacpPoll       = ClagParser.default_lacp_poll
        #                                 [0] Priority,             [1] UniqueId,     
        self.clagId             = [ ClagParser.default_priority, "00:00:00:00:00:00" ]
        self.peerId             = [ ClagParser.default_priority, "00:00:00:00:00:00" ]
        self.clagRole           = ClagParser.default_role
        self.peerRole           = None
        self.systemdStateMap    = {
            "inactive"      : ClagParser._SWITCHD_DEAD,
            "activating"    : ClagParser._SWITCHD_STARTING,
            "active"        : ClagParser._SWITCHD_READY,
            "deactivating"  : ClagParser._SWITCHD_DEAD,
            "failed"        : ClagParser._SWITCHD_DEAD,
        }
        try:
            stateStr = subprocess.check_output(["/bin/systemctl", "is-active", "switchd.service"]).strip()
        except subprocess.CalledProcessError as e:
            stateStr = "failed"
        self.switchdState = self.systemdStateMap.get(stateStr, ClagParser._SWITCHD_DEAD)

    def intBase(self, intStr):
        '''
        Allow an integer to be specified with standard base representations:
        37 = decimal, 0x25 = hex, 045 = octal, 0b100101 = binary
        '''
        try:
            intVal = int(intStr,0)
        except:
            msg = "%s is not a valid integer" % (intStr,)
            raise argparse.ArgumentTypeError(msg)
        return intVal

    def logDest(self, logDest):
        '''
        The log file can be the constant 'syslog', 'stdout', or the name of a file.
        '''
        if logDest not in ['syslog', 'stdout']:
            if os.path.isfile(logDest):
                if not os.access(logDest, os.W_OK):
                    msg = "%s is not a file that can be opened for writing" % (logDest,)
                    raise argparse.ArgumentTypeError(msg)
            else:
                if not os.access(os.path.dirname(os.path.realpath(logDest)), os.W_OK):
                    msg = "%s is not a file/directory that can be opened for writing" % (logDest,)
                    raise argparse.ArgumentTypeError(msg)

        return logDest

    def getDataVer(self, verStr):
        (majVer, minVer, subVer) = verStr.split(".")
        return (int(majVer), int(minVer))

    def getPeerDataVer(self):
        return self.getDataVer(Parser.peerDataVersion)

    def getMyDataVer(self):
        return self.getDataVer(Parser._ClagDataVersion)

    def isPeerStaticClagStateAware(self):
        # dormant and protoDown states were introduced in in 1.1.0
        # This checks if the version is greater than or equal to 1.1.0
        (majVer, minVer) = self.getPeerDataVer()
        if (majVer > 1):
            return True
        if majVer == 1 and minVer > 0:
            return True
        return False

    def isPeerVlanSyncOptimized(self):
        # An optimized version of the vlan sync was introduced in in 1.3.0
        # This checks if the version is greater than or equal to 1.3.0
        (majVer, minVer) = self.getPeerDataVer()
        if (majVer > 1):
            return True
        if majVer == 1 and minVer > 2:
            return True
        return False

    def isPeerVxLanStateAware(self):
        # VxLAN Active-Active was introduced in in 1.2.0
        # This checks if the version is greater than or equal to 1.2.0
        (majVer, minVer) = self.getPeerDataVer()
        if (majVer > 1):
            return True
        if majVer == 1 and minVer > 1:
            return True
        return False

    def isIfNameWithIp(self, ifName):
        '''
        Determine if the agrument passed in is the name of an interface. Used 
        for argparse type checking.
        '''
        if not Intf.isIfName(ifName):
            msg = "%s is not the name of an interface" % (ifName,)
            raise argparse.ArgumentTypeError(msg)
        if not Intf.GetIpAddress(ifName):
            msg = "%s does not have an IP address assigned to it." % (ifName,)
            raise argparse.ArgumentTypeError(msg)
        return ifName

    def isClagBond(self, bond):
        return self.GetBondClagId(bond)

    def isValidClagId(self, clagId):
        return 0 <= clagId <= Parser._CLAG_ID_MAX
        
    def GetBondClagId(self, bond):
        clagId = 0
        self.clagIntfDBLock.acquire()
        if bond in self.clagIntfDB:
            clagIntf = self.clagIntfDB[bond]
            clagId = clagIntf.clagId
        self.clagIntfDBLock.release()
        return clagId

    def GetBondFromClagId(self, clagId):
        bond = None
        self.clagIntfDBLock.acquire()
        if clagId in self.bondClagIdDB:
            bond = self.bondClagIdDB[clagId]
        self.clagIntfDBLock.release()
        return bond

    def GetVxLanInfo(self, vxlan):
        vxinfo = None
        if VxLanSync:
            vxinfo = VxLanSync.GetOurVxLanInfo(vxlan)
        return vxinfo

    def GetClagIntfDB(self):
        # Return a list of current CLAG interfaces
        self.clagIntfDBLock.acquire()
        clagIntfs = copy.copy(self.clagIntfDB)
        self.clagIntfDBLock.release()
        return clagIntfs

    def GetClagBondDB(self):
        # Return a list of current CLAG bond 
        clagBonds = { k:v for k,v in self.GetClagIntfDB().iteritems() if v.intfType == Parser._CLAG_INTF_TYPE_BOND }
        return clagBonds

    def delClagBond(self, bond):
        self.clagIntfDBLock.acquire()
        clagIntf = self.clagIntfDB.get(bond)
        if clagIntf:
            clagIntf.CleanUp()
            del self.clagIntfDB[bond]
        self.clagIntfDBLock.release()

    def addClagBond(self, bond, clagId):
        self.clagIntfDBLock.acquire()
        if bond in self.clagIntfDB:
            clagIntf = self.clagIntfDB[bond]
            clagIntf.SetNewClagId(clagId)
        else:
            self.clagIntfDB[bond] = ClagIntf(bond, intfType=Parser._CLAG_INTF_TYPE_BOND, clagId=clagId)
        self.clagIntfDBLock.release()

    def delClagVxLan(self, vxlan):
        self.clagIntfDBLock.acquire()
        clagIntf = self.clagIntfDB.get(vxlan)
        if clagIntf:
            clagIntf.CleanUp()
            del self.clagIntfDB[vxlan]
        self.clagIntfDBLock.release()

    def addClagVxLan(self, vxlan):
        self.clagIntfDBLock.acquire()
        if vxlan not in self.clagIntfDB:
            self.clagIntfDB[vxlan] = ClagIntf(vxlan, intfType=Parser._CLAG_INTF_TYPE_VXLAN)
        self.clagIntfDBLock.release()

    def UpdateClagConflicts(self, bond, set, clear):
        self.clagIntfDBLock.acquire()
        if bond in self.clagIntfDB:
            self.clagIntfDB[bond].UpdateConflicts(set, clear)
        self.clagIntfDBLock.release()

    def ResetClagConflicts(self):
        self.clagIntfDBLock.acquire()
        resetDone = False
        for intfName in self.clagIntfDB:
            if self.clagIntfDB[intfName].conflicts:
                resetDone = True
                self.clagIntfDB[intfName].UpdateConflicts(0, self._CLAG_INTF_CONFLICT_LACP_ALL, logConflict=False)
        if resetDone:
            Log.log("Conflict reset (all-interfaces): peer connectivity lost")
        self.clagIntfDBLock.release()
        
    def SetSyncDoneFromPeer(self, done):
        self.syncDoneFromPeer = done
        if self.syncDoneFromPeer:
            if LacpSync:
                LacpSync.EvaluateAndUpdateConflicts() 
        else:
            self.ResetClagConflicts() 

    def GetClagIntfConflicts(self, intfName):
        conflicts = 0
        self.clagIntfDBLock.acquire()
        conflicts = self.clagIntfDB[intfName].conflicts if intfName in self.clagIntfDB else 0
        self.clagIntfDBLock.release()
        return conflicts
        
    # XXX - shoudl really sit in an util module
    def GetNonZeroBits(self, bits):
        while bits:
            # get the first non-zero bit
            nzBit = bits & (~bits + 1)    
            # generator for nz bit value
            yield nzBit
            # clear the bit that we just returned
            bits &= ~nzBit

    def GetClagIntfConflictStr(self, intfName):
        conflictList = []
        conflicts = self.GetClagIntfConflicts(intfName)
        bits = self.GetNonZeroBits(conflicts)
        for bit in bits:
            if bit in Parser._clag_intf_conflict_map:
                bitStr = Parser._clag_intf_conflict_map[bit]
            else:
                bitStr = "0x%x" % bit
            conflictList.append(bitStr)
        return conflictList

    def setReloadDone(self):
        Log.log("Initial config loaded")
        # XXX - need to use this as trigger to start communicating with the
        # peer
        self.configReloadDone = True
        return ""

    def setBondClagId(self, bond, clagId):
        Log.log_debug(Log._LOG_DEBUG_PARSER, "Set %s clag_id %d" % (bond, clagId))
        msg = ""

        if clagId == 0:
            self.delClagBond(bond)
            return msg

        if not self.isValidClagId(clagId):
            return "ERROR %s is not in the clag id range of <0-%d>." % (clagId, Parser._CLAG_ID_MAX)

        # duplicate config
        oldClagId = self.GetBondClagId(bond)
        if clagId == oldClagId:
            return msg

        # check if the clagId is being used by a different bond
        oldBond = self.GetBondFromClagId(clagId)
        if oldBond:
            return "ERROR %d is already in use for bond %s" % (clagId, oldBond)

        # set clag id
        self.addClagBond(bond, clagId)
        return  msg

    def setBackupIp(self, address):
        msg = ""
        self.args.backupIp = address
        HealthCheck.HelloBackupReset()
        return msg

    def CloseClientSock(self):
        global clientSock
        if clientSock:
            clientSock.close()
            clientSock = None

    def isMacAddr(self, macAddr):
        '''
        Determine if the agrument passed in is a valid mac address, of the form
        xx:xx:xx:xx:xx:xx, where x is a hexidecimal digit.
        '''
        macAddr = macAddr.lower()
        if not re.match("([0-9a-f]{2}:){5}[0-9a-f]{2}$", macAddr):
            msg = "%s is not a valid mac address (xx:xx:xx:xx:xx:xx)" % (macAddr,)
            raise argparse.ArgumentTypeError(msg)
        return macAddr

    def ParseCmdLine(self):
        '''
        Parse the command line parameters
        '''
        try:
            self.args, unknown = self.parser.parse_known_args()
        except ArgumentParsingError as e:
            Log.log_error("There was an error in the command line parameters.")
            Log.log_error(str(e))
            Log.log_error("exit with status -1")
            sys.exit(-1)

        for arg in unknown:
            Log.log_warn("The command line argument '%s' is unrecognized and will be ignored." % (arg,))

    def getCfgLacpPoll(self):
        return self.args.lacpPoll

    def getCurrLacpPoll(self):
        return self.currLacpPoll

    def setPeerRate(self, rate):
        self.peerLacpRateLock.acquire()
        self.peerLacpRate = rate
        self.currLacpPoll = max(self.peerLacpRate, self.args.lacpPoll)
        self.peerLacpRateLock.release()

    def getPeerTimeout(self):
        return self.args.peerTimeout

    def setPeerTimeout(self, timeout):
        self.args.peerTimeout = timeout

    def getPeerLinkPoll(self):
        return self.args.peerLinkPoll

    def setPeerLinkPoll(self, timeout):
        self.args.peerLinkPoll = timeout

    def getPeriodicRun(self):
        return self.args.periodicRun

    def setPeriodicRun(self, timeout):
        self.args.periodicRun = timeout

    def DumpParameters(self):
        '''
        Print out all of the command line parameter values to the debug log.
        '''
        Log.log_debug(Log._LOG_DEBUG_PARSER, "The parameters are:")
        Log.log_debug(Log._LOG_DEBUG_PARSER, "    lacpPoll = %d" % (self.args.lacpPoll,))
        Log.log_debug(Log._LOG_DEBUG_PARSER, "    peerConnect = %d" % (self.args.peerConnect,))
        Log.log_debug(Log._LOG_DEBUG_PARSER, "    sendTimeout = %d" % (self.args.sendTimeout,))
        Log.log_debug(Log._LOG_DEBUG_PARSER, "    sendBufSize = %d" % (self.args.sendBufSize,))
        Log.log_debug(Log._LOG_DEBUG_PARSER, "    cmdConnect = %d" % (self.args.cmdConnect,))
        Log.log_debug(Log._LOG_DEBUG_PARSER, "    peerLinkPoll = %d" % (self.args.peerLinkPoll,))
        Log.log_debug(Log._LOG_DEBUG_PARSER, "    switchdReadyTimeout = %d" % (self.args.switchdReadyTimeout,))
        Log.log_debug(Log._LOG_DEBUG_PARSER, "    periodicRun = %d" % (self.args.periodicRun,))
        Log.log_debug(Log._LOG_DEBUG_PARSER, "    priority = %d" % (self.args.priority,))
        Log.log_debug(Log._LOG_DEBUG_PARSER, "    quiet = %s" % (self.args.quiet,))
        Log.log_debug(Log._LOG_DEBUG_PARSER, "    debug = 0x%X" % (self.args.debug,))
        Log.log_debug(Log._LOG_DEBUG_PARSER, "    verbose = %s" % (self.args.verbose,))
        Log.log_debug(Log._LOG_DEBUG_PARSER, "    log = %s" % (self.args.log,))
        Log.log_debug(Log._LOG_DEBUG_PARSER, "    vm = %s" % (self.args.vm,))
        Log.log_debug(Log._LOG_DEBUG_PARSER, "    peerPort = %d" % (self.args.peerPort,))
        Log.log_debug(Log._LOG_DEBUG_PARSER, "    peerTimeout = %s" % (self.args.peerTimeout,))
        Log.log_debug(Log._LOG_DEBUG_PARSER, "    peerIp = %s" % (self.args.peerIp,))
        Log.log_debug(Log._LOG_DEBUG_PARSER, "    peerIf = %s" % (self.args.peerIf,))
        Log.log_debug(Log._LOG_DEBUG_PARSER, "    backupIp = %s" % (self.args.backupIp,))
        Log.log_debug(Log._LOG_DEBUG_PARSER, "    backupVrf = %s" % (self.args.backupVrf,))
        Log.log_debug(Log._LOG_DEBUG_PARSER, "    backupPort = %d" % (self.args.backupPort,))
        Log.log_debug(Log._LOG_DEBUG_PARSER, "    sysMac = %s" % (self.args.sysMac,))
        Log.log_debug(Log._LOG_DEBUG_PARSER, "    forceDynamic = %s" % (self.args.forceDynamic,))
        Log.log_debug(Log._LOG_DEBUG_PARSER, "    vxlanAnycast = %s" % (self.args.vxlanAnycast,))
        Log.log_debug(Log._LOG_DEBUG_PARSER, "    dormantDisable = %s" % (self.args.dormantDisable,))


#-------------------------------------------------------------------------------
#
#   Linux interface utility routines
#
#-------------------------------------------------------------------------------

class IntfSupport:
    def __init__(self):
        #
        #   Dual Connected Interfaces - This is a dictionary which contains our  
        #   interface names of the current dual connected bonds as the keys and the
        #   peer switch's corresponding interface names as the values:
        #   {
        #       "bond1" : "bond4",
        #       "bond3" : "bond3"
        #   }
        #
        self.dualIfsLock = threading.Lock()
        self.dualIfs     = {}
        self.peerAlive   = False
        self.initHandShakeDone = False
        # multiple threads can trigger peer connectivity shutdown
        self.peerDownLock = threading.Lock()
        self.peerDeathPending = False
        self.protoDownLock = threading.Lock()
        self.bridgeAttrLock = threading.Lock()

    def GetLinkAdminState(self, ifName):
        # return the admin state as 1 (IFF_UP) or 0
        flags = 0
        if self.isIfName(ifName):
            if os.path.isfile(_InterfacePath + str(ifName) + "/flags"):
                try:
                    for line in open(_InterfacePath + str(ifName) + "/flags"):
                        flags = int(line.strip(), 0)
                except IOError:
                    pass
        return 1 if (flags & clag.iff.IFF_UP) else 0

    def SetLinkAdminState(self, ifName, state, nullDev):
        stateStr = "up" if state else "down"
        cmd = [ "/bin/ip", "link", "set", ifName, stateStr ]
        Log.log_debug(Log._LOG_DEBUG_FSM, "Executing: " + str(cmd))
        subprocess.call(cmd, stdout=nullDev, stderr=nullDev)

    def GetLinkProtoDownFlags(self, ifName):
        # return the protodown state as 0 or 1
        flags = 0
        if self.isIfName(ifName):
            if os.path.isfile(_InterfacePath + str(ifName) + "/proto_down"):
                try:
                    for line in open(_InterfacePath + str(ifName) + "/proto_down"):
                        flags = int(line.strip(), 0)
                except IOError:
                    pass
        return flags

    def SetLinkProtoDownFlags(self, ifName, protoDown):
        DEVNULL = open(os.devnull, 'wb')
        stateStr = "on" if protoDown else "off"
        cmd = [ "/bin/ip", "link", "set", ifName, "protodown", stateStr ]
        Log.log_debug(Log._LOG_DEBUG_FSM, "Executing: " + str(cmd))
        subprocess.call(cmd, stdout=DEVNULL, stderr=DEVNULL)

    def GetLinkMode(self, ifName):
        mode = 0
        if self.isIfName(ifName):
            if os.path.isfile(_InterfacePath + str(ifName) + "/link_mode"):
                try:
                    for line in open(_InterfacePath + str(ifName) + "/link_mode"):
                        mode = int(line.strip(), 0)
                except IOError:
                    pass
        return mode

    def SetLinkMode(self, ifName, dormant):
        DEVNULL = open(os.devnull, 'wb')
        modeStr = "dormant" if dormant else "default"
        cmd = [ "/bin/ip", "link", "set", ifName, "mode", modeStr ]
        Log.log_debug(Log._LOG_DEBUG_FSM, "Executing: " + str(cmd))
        subprocess.call(cmd, stdout=DEVNULL, stderr=DEVNULL)

    def CheckAndUpdateIntfModeDormant(self):
        '''
        The protoDown state in interface driver could have fallen out of sync
        with clagd
        '''
        if Parser.args.dormantDisable:
            return

        clagIntfs = Parser.GetClagBondDB().values() 
        self.protoDownLock.acquire()
        for intf in clagIntfs:
            intf.SetClagIntfModeDormant(dormant=True, retainProtoDown=True)
        self.protoDownLock.release()

    def SetProtoDownOnExit(self):
        params = []
        for intf in Parser.GetClagIntfDB().values():
            intf.protoDownFlags |= intf._PROTO_DOWNF_SHUTDOWN
            if intf.intfType == Parser._CLAG_INTF_TYPE_BOND:
                for intfMbr in self.GetBaseInterfaces(intf.intfName):
                    params += [(intfMbr,)]
            else:
                params += [(intf.intfName,)]
        self.ExecuteBatch(["/sbin/ip", "-force", "-batch"], "link set dev %s protodown on", params)

    def ClearDormantMode(self):
        params = []
        for intf in Parser.GetClagBondDB().values():
            params += [(intf.intfName,)]
        self.ExecuteBatch(["/sbin/ip", "-force", "-batch"], "link set dev %s mode default", params)

    def GetBackupRole(self): 
        if HealthCheck and HealthCheck.IsBackupRoleAvailable():
            role = HealthCheck.GetBackupRole()
        else:
            role = Parser.clagRole

        return role

    def SetClagRole(self, newRole, reasonStr, doBackupElection=True):
        if Parser.clagRole == newRole:
            return

        Parser.clagRole = newRole
        if newRole == Parser._ROLE_PRIMARY:
            Log.log("Role is now primary; %s" % reasonStr)
            Log.log_debug(Log._LOG_DEBUG_FSM, "toListeners: add clag-role = %s" % (str(Parser.clagRole)))
            Cmd.CmdGetAllOutput("add clag-role = %s" % (str(Parser.clagRole),))
        else:
            Parser.clagRole = Parser._ROLE_SECONDARY
            Log.log("Role is now secondary; %s" % reasonStr)
            Log.log_debug(Log._LOG_DEBUG_FSM, "toListeners: add clag-role = %s" % (str(Parser.clagRole)))
            Cmd.CmdGetAllOutput("add clag-role = %s" % (str(Parser.clagRole),))

        # role over ISL can influence the role over backup
        if doBackupElection and HealthCheck and HealthCheck.GetBackupActive():
            HealthCheck.RunBackupElection()

    def GetKCacheLinkCopy(self):
        linkDB = None
        if ClagNetLink:
            linkDB = ClagNetLink.GetKCacheLinkCopy()
        return linkDB

    def SetDormantBondOperState(self, bond):
        if LacpSync:
            LacpSync.SetDormantBondOperState(bond)
        
    def UpdatePendingDormantBonds(self):
        if not LacpSync:
            return
        clagBonds = Parser.GetClagBondDB()
        for bond in clagBonds:
            if self.GetOperStateFromKCache(bond) == "dormant":
                LacpSync.SetDormantBondOperState(bond)

    def SetInitHandShakeDone(self):
        if not self.initHandShakeDone:
            Log.log("Initial handshake done.")
            self.initHandShakeDone = True
            params = []
            self.protoDownLock.acquire()
            for intf in Parser.GetClagIntfDB().values():
                if (intf.protoDownFlags & intf._PROTO_DOWNF_INIT):
                    params += intf.UpdateProtoDownFlags(0, intf._PROTO_DOWNF_INIT, batch=True)
            self.ExecuteBatch(["/sbin/ip", "-force", "-batch"], "link set dev %s protodown %s", params)
            self.protoDownLock.release()


    def SetProtoPeerOrBackupState(self):
        params = []
        if not self.isPeerAlive() and HealthCheck and HealthCheck.GetBackupActive():
            for intf in Parser.GetClagIntfDB().values():
                if not (intf.protoDownFlags & intf._PROTO_DOWNF_PEER_LINK_DOWN_BACKUP_ACTIVE):
                    params += intf.UpdateProtoDownFlags(intf._PROTO_DOWNF_PEER_LINK_DOWN_BACKUP_ACTIVE, 0, batch=True)
        else:
            for intf in Parser.GetClagIntfDB().values():
                if (intf.protoDownFlags & intf._PROTO_DOWNF_PEER_LINK_DOWN_BACKUP_ACTIVE):
                    params += intf.UpdateProtoDownFlags(0, intf._PROTO_DOWNF_PEER_LINK_DOWN_BACKUP_ACTIVE, batch=True)
        self.ExecuteBatch(["/sbin/ip", "-force", "-batch"], "link set dev %s protodown %s", params)

    def isPeerLinkDown(self):
        '''
        Determine if communication with the peer should be possible because the
        interfaces to the peer are all up. This typically involves checking the
        peerlink SVI and the peerlink bond.
        '''
        # Determine if the interfaces to the peer are down
        peerIfDown = self.GetOperStateFromSysFs(Parser.args.peerIf) != "up"
        if self.isSubIfName(Parser.args.peerIf):
            peerIfDown |= self.GetOperStateFromSysFs(self.GetMasterOfSubIf(Parser.args.peerIf)) != "up"
        peerIfs = self.GetLogicalMastersOfSubIf(Parser.args.peerIf)
        for peerIf in peerIfs:
            peerIfDown |= self.GetOperStateFromSysFs(peerIf) != "up"
        return peerIfDown


    #
    #   Bridge interface routines
    #
    def isBridgeName(self, bridgeName):
        '''
        Determine if the agrument passed in is the name of a bridge.
        '''
        return os.path.isdir(_InterfacePath + str(bridgeName) + "/bridge")

    def isBridgeMember(self, bridgeMemberName):
        '''
        Determines if the specified interface is a member of a bridge.
        '''
        return os.path.isdir(_InterfacePath + str(bridgeMemberName) + "/brport")

    def GetBridgeOfMember(self, bridgeMemberName):
        '''
        Returns the name of the bridge of which the specified interface is a member.
        '''
        try:
            bridgeName = os.path.basename(os.readlink(_InterfacePath + str(bridgeMemberName) + "/brport/bridge"))
        except:
            bridgeName = None
        return bridgeName

    def GetMembersOfBridge(self, bridgeName):
        '''
        Returns a list of interfaces which are members of the specified bridge.
        '''
        try:
            bridgeMembers = os.listdir(_InterfacePath + str(bridgeName) + "/brif")
        except OSError:
            bridgeMembers = []
        return bridgeMembers

    def GetAllBridges(self):
        '''
        Returns a list of all bridges in the system
        '''
        bridgeNames = []
        brPaths = glob.glob(_InterfacePath + "*/bridge")
        for brPath in brPaths:
            bridgeName = brPath.split("/")[-2]
            bridgeNames.append(bridgeName)
        return bridgeNames

    def isVlanFilteringBridge(self, bridgeName):
        '''
        Return True if bridge is VLAN filtering or False otherwise
        '''
        try:
            vfilt_file = _InterfacePath + str(bridgeName) + "/bridge/vlan_filtering"
            for line in open(vfilt_file):
                return line.strip() == '1'
        except IOError:
            pass
        return False

    def GetBridgePeerVlanMap(self):
        '''
        Returns a dictionary whose keys are bridge names and data is the VLAN ID
        of the peelink in that bridge. Will only include vlan unaware bridges.
        '''
        peerVlanMap = {}
        subIfVlanMap = self.GetSubIfsVlanMap()
        peerMIf = self.GetMasterOfSubIf(Parser.args.peerIf)
        peerIfs = set(self.GetSubIfsOfMaster(peerMIf)) | set([peerMIf])
        for bridge in self.GetAllBridges():
            for intf in set(self.GetMembersOfBridge(bridge)) & peerIfs:
                peerVlanMap[bridge] = subIfVlanMap.get(intf, (None,0))[1]
        return peerVlanMap

    def GetPeerBridgeVlanMap(self):
        '''
        Returns a dictionary whose keys are vlan IDs of the sub interfaces of 
        the peerlink which are members of bridges. The data is a dictionary.
        The keys of the dictionary are the "base" interface names of members
        of the bridge, and the data is the vlan IDs of the base interface in
        the bridge.
        '''
        vlanMap = {}
        masterVlanMap = self.GetMasterIfVlanMap()
        vlanIdMap = self.GetSubIfsVlanMap()
        peerMIf = self.GetMasterOfSubIf(Parser.args.peerIf)
        peerIfs = set(self.GetSubIfsOfMaster(peerMIf)) | set([peerMIf])
        for peerIf in peerIfs:
            peerVlanId = vlanIdMap.get(peerIf, (None, 0))[1]
            bridgeName = self.GetBridgeOfMember(peerIf)
            for member in self.GetMembersOfBridge(bridgeName):
                (intfBase, intfVlanId) = vlanIdMap.get(member, (None, 0))
                if not vlanMap.get(peerVlanId):
                    vlanMap[peerVlanId] = {}
                vlanMap[peerVlanId][intfBase] = intfVlanId
        return vlanMap

    def GetBridgeAgeingTime(self, bridgeName):
        '''
        Returns the number of seconds after which MAC addresses will be aged
        '''
        age = None
        age_file = _InterfacePath + str(bridgeName) + "/bridge/ageing_time"
        if bridgeName is not None and os.path.isfile(age_file):
            try:
                with open(age_file) as f:
                    age = f.readline().strip()
                    age = int(age) / 100
            except IOError:
                age = None
        return age

    def GetBridgeMemInterval(self, bridgeName):
        '''
        Returns the multicast group membership interval for a bridge.
        '''
        age = None
        age_file = _InterfacePath + str(bridgeName) + "/bridge/multicast_membership_interval"
        if bridgeName is not None and os.path.isfile(age_file):
            try:
                with open(age_file) as f:
                    age = f.readline().strip()
                    age = int(age) / 100
            except IOError:
                age = None
        return age

    def GetBridgeQuerierInterval(self, bridgeName):
        '''
        Returns the multicast querier interval for a bridge.
        '''
        age = None
        age_file = _InterfacePath + str(bridgeName) + "/bridge/multicast_querier_interval"
        if bridgeName is not None and os.path.isfile(age_file):
            try:
                with open(age_file) as f:
                    age = f.readline().strip()
                    age = int(age) / 100
            except IOError:
                age = None
        return age

    def GetCistPortId(self, intf):
        '''
        Returns the single instance spanning tree portId for the interface
        supplied. If single instance spanning tree is not enabled, or if the
        interface is not included in the spanning tree, None is returned.
        '''
        portId = None
        cistEnabled = True
        bridgeRe = '^(\S+) CIST info'
        if os.path.isfile("/proc/sys/net/bridge/bridge-stp-user-space-single"):
            cistEnabled = False
            bridgeRe = 'BRIDGE: (\S+), '
            try:
                for line in open("/proc/sys/net/bridge/bridge-stp-user-space-single"):
                    cistEnabled = "0" not in line
            except IOError:
                cistEnabled = False
        if cistEnabled:
            try:
                showBridgeStr = subprocess.check_output(["/sbin/mstpctl", "showbridge"])
            except subprocess.CalledProcessError as e:
                showBridgeStr = e.output
            match = re.search(bridgeRe, showBridgeStr)
            if match:
                brMemSet = set(self.GetMembersOfBridge(match.group(1)))
                intfSet = set([intf])
                intfSet = intfSet | set(self.GetSubIfsOfMaster(intf))
                if self.isSubIfName(intf):
                    intfSet = intfSet | set([self.GetMasterOfSubIf(intf)])
                brIntfSet = brMemSet & intfSet
                if len(brIntfSet):
                    brIntf = brIntfSet.pop()
                    try:
                        portId = subprocess.check_output(["/sbin/mstpctl", "showportdetail", match.group(1), brIntf, "port-id"]).strip()
                    except subprocess.CalledProcessError as e:
                        portId = None
        return portId

    def GetPvid(self, intf):
        '''
        Returns the PVID of a bridge port, or None if there is none.
        '''
        try:
            vlan_json = subprocess.check_output(["/sbin/bridge", "--json-cl", "vlan", "show", "dev", intf])
        except subprocess.CalledProcessError as e:
            Log.log_debug(Log._LOG_DEBUG_FSM, "Error executing: /sbin/bridge --json-cl vlan show dev " + intf + " Returned error: " + str(e))
            return None

        try:
            vlanDict = json.loads(vlan_json)
        except ValueError as e:
            Log.log_debug(Log._LOG_DEBUG_FSM, "Loading json failed: " + vlan_json + " Error: " + str(e))
            return None

        for vlanInfo in vlanDict.get(intf, {}):
            if "PVID" in vlanInfo.get("flags", ""):
                return vlanInfo.get("vlan", None)

        return None

    #
    #   VLAN Subinterface utility routines
    #
    def isSubIfName(self, subInterfaceName):
        '''
        Determine if the argument passed in is the name of a VLAN subinterface.
        '''
        return os.path.isfile(_VLANPath + str(subInterfaceName))

    def GetMasterOfSubIf(self, subInterfaceName):
        '''
        Returns the name of the interface which is the "master" of the given
        VLAN subinterface
        '''
        masterIf = None
        if self.isSubIfName(subInterfaceName):
            try:
                for line in open(_VLANPath + subInterfaceName):
                    lineParts = line.strip().split()
                    if len(lineParts) > 1 and lineParts[0] == "Device:":
                        masterIf = lineParts[1]
                        break
            except IOError:
                pass
        return masterIf

    def GetLogicalMastersOfSubIf(self, subInterfaceName):
        '''
        Returns the name of the interface which is the "master" of the given
        VLAN subinterface. If the VLAN subinterface is from a bridge, then
        the bridge members with the same VLAN membership are returned.
        '''
        masterIfs = []
        directMaster = self.GetMasterOfSubIf(subInterfaceName)
        if directMaster:
            masterIfs = [directMaster]
            if self.isBridgeName(directMaster):
                vlanId = self.GetVlanIdOfSubIf(subInterfaceName)
                brifs = set(self.GetMembersOfBridge(directMaster))
                vlanifs = set(self.GetMembersOfVlan(vlanId))
                masterIfs = [i for i in brifs & vlanifs]
        return masterIfs

    def GetVlanIdOfSubIf(self, subInterfaceName):
        '''
        Returns the VLAN ID of the VLAN subinterface
        '''
        vlanId = None
        if self.isSubIfName(subInterfaceName):
            try:
                for line in open(_VLANPath + subInterfaceName):
                    lineParts = line.strip().split()
                    if len(lineParts) > 2 and lineParts[1] == "VID:":
                        vlanId = int(lineParts[2])
                        break
            except IOError:
                pass
        return vlanId

    def GetSubIfsOfMaster(self, masterIf):
        '''
        Returns a list of VLAN sub-interfaces of the supplied interface
        '''
        subIfs = []
        try:
            for line in open(_VLANPath + "config"):
                lineParts = line.strip().split("|")
                if len(lineParts) > 2 and lineParts[2].strip() == masterIf:
                    subIfs.append(lineParts[0].strip())
        except IOError:
            subIfs = []
        return subIfs

    def GetSubIfsVlanMap(self):
        '''
        Returns a dictionary which allows mapping of VLAN sub-interface name
        (the key) to the raw interface name and VLAN ID (the data, a tuple).
        '''
        vlanMap = {}
        try:
            for line in open(_VLANPath + "config"):
                lineParts = line.strip().split("|")
                if len(lineParts) == 3:
                    vlanMap[lineParts[0].strip()] = ( lineParts[2].strip(), int(lineParts[1].strip()) )
        except IOError:
            vlanMap = {}
        return vlanMap

    def GetMasterIfVlanMap(self):
        '''
        Returns a dictionary which allows mapping of a master interface name
        (the key) to a dictionary of VLAN sub-interfaces on that interface (the
        data). That dictionary contains VLAN IDs (the key) and VLAN sub-interface
        names (the data).
        '''
        vlanMap = {}
        try:
            for line in open(_VLANPath + "config"):
                lineParts = line.strip().split("|")
                if len(lineParts) == 3:
                    masterIf = lineParts[2].strip()
                    if masterIf not in vlanMap:
                        vlanMap[masterIf] = {} 
                    vlanMap[masterIf][int(lineParts[1].strip())] = lineParts[0].strip()
        except IOError:
            vlanMap = {}
        return vlanMap

    def GetIfVlanMap(self, intf):
        '''
        Returns a list of 128 32-bit integers which are bitmaps of the VLANs 
        enabled on an interface, or the empty list [] if the new bridge driver
        is not being used.
        '''
        vlanMapList = []

        try:
            vlan_json = subprocess.check_output(["/sbin/bridge", "--json-cl", "vlan", "show", "dev", intf])
        except subprocess.CalledProcessError as e:
            Log.log_debug(Log._LOG_DEBUG_FSM, "Error executing: /sbin/bridge --json-cl vlan show dev " + intf + " Returned error: " + str(e))
            return vlanMapList

        if vlan_json:
            try:
                vlanDict = json.loads(vlan_json)
            except ValueError as e:
                Log.log_debug(Log._LOG_DEBUG_FSM, "Loading json failed: " + vlan_json + " Error: " + str(e))
                return vlanMapList

            for vlanInfo in vlanDict.get(intf, {}):
                vlanId = vlanInfo.get("vlan", 0)
                if vlanId:
                    if not len(vlanMapList):
                        vlanMapList = [0x00000000] * 128
                    vlanMapList[vlanId // 32] |= 1 << (vlanId % 32)

        return vlanMapList

    def GetMembersOfVlan(self, vlanId):
        '''
        Returns a list of interface names which are members of the specified
        VLAN ID.
        '''
        ifs = []

        try:
            vlan_json = subprocess.check_output(["/sbin/bridge", "--json-cl", "vlan", "show"])
        except subprocess.CalledProcessError as e:
            Log.log_debug(Log._LOG_DEBUG_FSM, "Error executing: /sbin/bridge --json-cl vlan show - Returned error: " + str(e))
            return ifs

        if vlan_json:
            try:
                vlanDict = json.loads(vlan_json)
            except ValueError as e:
                Log.log_debug(Log._LOG_DEBUG_FSM, "Loading json failed: " + vlan_json + " Error: " + str(e))
                return ifs

            for ifName in vlanDict:
                for vlanInfo in vlanDict[ifName]:
                    if vlanInfo.get("vlan", -1) == vlanId:
                        ifs.append(ifName)
                        break
        return ifs


    #
    #   Bond utility routines
    #
    def isBondName(self, bondName):
        '''
        Determine if the argument passed in is the name of a bond
        '''
        return os.path.isdir(_InterfacePath + str(bondName) + "/bonding")

    def isBondMember(self, bondMemberName):
        '''
        Determines if the specified interface is a member of a bond.
        '''
        return os.path.islink(_InterfacePath + str(bondMemberName) + "/master")

    def GetBondOfMember(self, bondMemberName):
        '''
        Returns the name of the bond of which the specified interface is a member.
        '''
        try:
            bondName = os.path.basename(os.readlink(_InterfacePath + str(bondMemberName) + "/master"))
        except:
            bondName = None
        return bondName

    def GetMembersOfBond(self, bondName):
        '''
        Returns a list of the interfaces which are members fo the specified bond.
        '''
        bondMembers = []
        if self.isBondName(bondName):
            try:
                for line in open(_InterfacePath + str(bondName) + "/bonding/slaves"):
                    bondMembers.extend(line.strip().split())
            except IOError:
                bondMembers = []
        return bondMembers

    def GetBondMode(self, bondName):
        '''
        Returns a tuple which is the mode of a bond. The first member is the
        string representation of the mode and the second member is integer
        mode value.
        '''
        bondMode = ("", -1)
        if self.isBondName(bondName):
            try:
                for line in open(_InterfacePath + str(bondName) + "/bonding/mode"):
                    (modeStr, modeInt) = line.strip().split()
                    bondMode = (modeStr, int(modeInt))
            except IOError:
                bondMode = ("", -1)
        return bondMode

    def GetIntfBondNames(self, intfName):
        '''
        Given the name of an interface, returns the name of the bond on that
        interface, or [] if there is no bond. If the interface is a bond,
        then simply return that name. If the interface is a bond member then
        return the interface's "master". And if the interface is a VLAN sub-
        interface of a bond, then return the master of the subinterface. And
        if the interface is a VLAN sub-interface of a bridge, then return all
        bonds in the bridge which are part of the same VLAN.
        '''
        bondNames = []
        if self.isBondName(intfName):
            bondNames = [intfName]
        elif self.isBondMember(intfName):
            bondNames = [self.GetBondOfMember(intfName)]
        elif self.isSubIfName(intfName) and self.isBondName(self.GetMasterOfSubIf(intfName)):
            bondNames = [self.GetMasterOfSubIf(intfName)]
        elif self.isSubIfName(intfName) and self.isBridgeName(self.GetMasterOfSubIf(intfName)):
            vlanId = self.GetVlanIdOfSubIf(intfName)
            brName = self.GetMasterOfSubIf(intfName)
            brifs = set(self.GetMembersOfBridge(brName))
            vlanifs = set(self.GetMembersOfVlan(vlanId))
            bondNames = [i for i in brifs & vlanifs if self.isBondName(i)]
        return bondNames

    def GetAllBonds(self, excludeIfs=[]):
        bonds = []
        try:
            for line in open(_InterfacePath + "bonding_masters"):
                bonds.extend(line.strip().split())
        except IOError:
            bonds = []

        excludeBonds = []
        for excludeIf in excludeIfs:
            excludeBonds = self.GetIntfBondNames(excludeIf)
            for excludeBond in excludeBonds:
                try:
                    bonds.remove(excludeBond)
                except ValueError:
                    pass
        return bonds

    def GetAllClagBonds(self, excludeIfs=[]):
        bonds = self.GetAllBonds(excludeIfs)
        clagBonds = [bond for bond in bonds if Parser.GetBondClagId(bond) or Parser.args.forceDynamic]
        return clagBonds

    def SetBondSysMac(self, sys_mac, bond):
        sys_mac_file = _InterfacePath + str(bond) + "/bonding/ad_actor_system"
        if os.path.isfile(sys_mac_file) and os.access(sys_mac_file, os.W_OK) and self.GetBondMode(bond)[1] == 4:
            if sys_mac == "00:00:00:00:00:00":
                sys_mac = self.GetIfMac(bond)
            curr_sys_mac = self.GetBondSysMac(bond)
            if curr_sys_mac == "00:00:00:00:00:00":
                curr_sys_mac = self.GetIfMac(bond)
            if curr_sys_mac != sys_mac:
                initState = self.GetLinkAdminState(bond)
                if initState:
                    DEVNULL = open(os.devnull, 'wb')
                    self.SetLinkAdminState(bond, 0, DEVNULL)
                try:
                    with open(sys_mac_file, "w") as f:
                        f.write(sys_mac)
                except IOError:
                    pass
                if initState:
                    self.SetLinkAdminState(bond, 1, DEVNULL)

    def SetBondsSysMac(self, sys_mac, bonds, excludeIfs=[]):
        for bond in bonds:
            if bond not in excludeIfs:
                self.SetBondSysMac(sys_mac, bond)

    def SetAllClagBondsSysMac(self, sys_mac, excludeIfs=[]):
        bonds = self.GetAllClagBonds(excludeIfs)
        self.SetBondsSysMac(sys_mac, bonds, excludeIfs)

    def GetBondSysMac(self, bond):
        sysMac = None
        sys_mac_file = _InterfacePath + str(bond) + "/bonding/ad_actor_system"
        if os.path.isfile(sys_mac_file):
            try:
                with open(sys_mac_file) as f:
                    sysMac = f.readline().strip()
            except IOError:
                sysMac = None
        return sysMac

    def GetBondPartnerMac(self, bond):
        partnerMac = None
        partner_mac_file = _InterfacePath + str(bond) + "/bonding/ad_partner_mac"
        if os.path.isfile(partner_mac_file):
            try:
                with open(partner_mac_file) as f:
                    partnerMac = f.readline().strip()
            except IOError:
                partnerMac = None
        return partnerMac

    #
    #   VxLAN Interface Routines
    #
    def AddVxLanInterface(self, ifname, vni, localip, operstate, brportst):
        if VxLanSync:
            VxLanSync.AddVxLanInterface(ifname, vni, localip, operstate, brportst)

    def DelVxLanInterface(self, ifname):
        if VxLanSync:
            VxLanSync.DelVxLanInterface(ifname)

    #
    #   Dual Connected Interfaces - Sort of specific to CLAG
    #
    def GetDualIfs(self):
        self.dualIfsLock.acquire()
        dualIfs = copy.deepcopy(self.dualIfs)
        self.dualIfsLock.release()
        return dualIfs

    def SetDualIfs(self, dualIfs):
        self.dualIfsLock.acquire()
        self.dualIfs = copy.deepcopy(dualIfs)
        self.dualIfsLock.release()

    def AddDualIfs(self, dualIfs):
        self.dualIfsLock.acquire()
        self.dualIfs.update(dualIfs)
        self.dualIfsLock.release()

    def DelDualIfs(self, dualIfs):
        self.dualIfsLock.acquire()
        for dualIf in dualIfs:
            self.dualIfs.pop(dualIf, None)
        self.dualIfsLock.release()

    def isDualIf(self, intf):
        self.dualIfsLock.acquire()
        isDual = intf in self.dualIfs
        self.dualIfsLock.release()
        return isDual

    def SetPeerAlive(self, alive):
        self.protoDownLock.acquire()
        self.peerAlive = alive
        self.SetProtoPeerOrBackupState()
        self.protoDownLock.release()
        if self.peerAlive:
            if LacpSync:
                LacpSync.EvaluateAndUpdateConflicts() 
        else:
            Parser.ResetClagConflicts() 

    def SetPeerDeathPending(self, pending):
        if pending == True:
            Log.log("Peer link is down; checking if the peer switch is alive.")
        self.peerDeathPending = pending

    def GetPeerDeathPending(self):
        return self.peerDeathPending

    def ClearPeerDeathPending(self):
        if self.peerDeathPending:
            peerSwitchDead = False if HealthCheck.GetBackupActive() else True
            if peerSwitchDead:
                # Take over as primary
                Intf.SetClagRole(Parser._ROLE_PRIMARY, "backup became inactive", doBackupElection=False)
            PeerIsNotActive(peerSwitchDead)
            self.peerDeathPending = False
            
    def isPeerAlive(self):
        return self.peerAlive

    #
    #   Other interface utility routines
    #
    def isPortName(self, portName):
        '''
        Determine if the argument passed in is the name is a "base" interface
        (not a bond, VLAN subinterface, nor bridge)
        '''
        return ( os.path.isdir(_InterfacePath + str(portName)) and  
                 not self.isBondName(portName) and
                 not self.isSubIfName(portName) and 
                 not self.isBridgeName(portName) )

    def isIfName(self, ifName):
        '''
        Determine if the argument passed in is the name of an interface
        '''
        return os.path.isdir(_InterfacePath + str(ifName))

    def GetIpAddress(self, ifName):
        s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
        ifreq = struct.pack('16sH14s', ifName, socket.AF_INET, '\x00'*14)
        try:
            res = fcntl.ioctl(s.fileno(), 0x8915, ifreq)
        except:
            return None
        ip = struct.unpack('16sH2x4s8x', res)[2]
        return socket.inet_ntoa(ip)

    def GetIpAddressInSameNet(self, ifName, ipAddr):
        ipInt = struct.unpack("!I", socket.inet_aton(ipAddr))[0]
        try:
            ipAddrStr = subprocess.check_output([ "/bin/ip", "addr", "show", ifName ])
        except subprocess.CalledProcessError as e:
            ipAddrStr = e.output
        ipAddrRe = '\s+inet\s+(\S+)/(\d+)\s+'
        matchList = re.findall(ipAddrRe, ipAddrStr)
        for (ifIpStr, ifMaskStr) in matchList:
            ifIpInt = struct.unpack("!I", socket.inet_aton(ifIpStr))[0]
            ifMaskInt = ~((1 << (32 - int(ifMaskStr))) - 1)
            if (ipInt & ifMaskInt) == (ifIpInt & ifMaskInt):
                return ifIpStr
        return None

    def GetBaseInterfaces(self, ifName):
        '''
        Given an interface, returns an unordered set of "base" interfaces which
        are controlled by that interface. A "base" interface is one which does
        not control other (lower) interfaces. Specifically:
           1. For bridges this means the "base" interface(s) of all bridge 
              member interface(s).
           2. For bonds, this means the "base" interface(s) of all enslaved
              interface(s).
           3. For VLAN sub-interfaces, this means the "base" interface(s) of
              the interface from which the VLAN was created.
           4. All other interfaces are base interfaces.
        '''
        if self.isBridgeName(ifName):
            baseIfs = set()
            for bintf in self.GetMembersOfBridge(ifName):
                baseIfs |= self.GetBaseInterfaces(bintf)
            return baseIfs
        elif self.isBondName(ifName):
            baseIfs = set()
            for bintf in self.GetMembersOfBond(ifName):
                baseIfs |= self.GetBaseInterfaces(bintf)
            return baseIfs
        elif self.isSubIfName(ifName):
            baseIfs = set()
            for bintf in self.GetLogicalMastersOfSubIf(ifName):
                baseIfs |= self.GetBaseInterfaces(bintf)
            return baseIfs
        else:
            return {ifName}

    def GetIfMac(self, ifName):
        '''
        Given an interface name, return the MAC address of that interface.
        '''
        if self.isIfName(ifName):
            if os.path.isfile(_InterfacePath + str(ifName) + "/address"):
                try:
                    for line in open(_InterfacePath + str(ifName) + "/address"):
                        return line.strip()
                except IOError:
                    pass
        return None

    def GetOperState(self, ifName):
        if ClagNetLink:
            return ClagNetLink.GetOperState(ifName)

        return self.GetOperStateFromSysFs(ifName)

    def GetOperStateFromKCache(self, ifName):
        if ClagNetLink:
            return ClagNetLink.GetOperState(ifName)
        return "unknown"

    def GetOperStateFromSysFs(self, ifName):
        '''
        Given an interface name, return the operational state
        '''
        if self.isIfName(ifName):
            if os.path.isfile(_InterfacePath + str(ifName) + "/operstate"):
                try:
                    for line in open(_InterfacePath + str(ifName) + "/operstate"):
                        return line.strip()
                except IOError:
                    pass
        return "unknown"

    def SetOperStateUp(self, ifName):
        # we only change the oper state if it is dormant
        oldState = self.GetOperStateFromSysFs(ifName)
        if oldState == 'dormant':
            # AK-TODO do we need to keep opening this null device?
            DEVNULL = open(os.devnull, 'wb')
            cmd = [ "/bin/ip", "link", "set", ifName, "state", "up" ]
            Log.log_debug(Log._LOG_DEBUG_FSM, "Executing: " + str(cmd))
            subprocess.call(cmd, stdout=DEVNULL, stderr=DEVNULL)

    def ReadyToRunClagConflictChecks(self):
        return self.isPeerAlive() and Parser.syncDoneFromPeer

    def NetlinkFilter(self, enable, disable):
        if ClagNetLink:
            ClagNetLink.SetClearFilter(enable, disable)

    def ExecuteSingle(self, cmd, cmdline, params):
        '''
        This function executes the supplied commands one at a time. This is
        intended to be called by ExecuteBatch when the command list cannot or
        should not be executed as a batch.
        '''
        DEVNULL = open(os.devnull, 'wb')
        for param in params:
            oneCmd = cmdline % param
            execCmd = [cmd[0]] + oneCmd.split()
            Log.log_debug(Log._LOG_DEBUG_FSM, "ExecuteSingle: " + str(execCmd))
            subprocess.call(execCmd, stdout=DEVNULL, stderr=DEVNULL)

    def ExecuteBatch(self, cmd, cmdline, params):
        '''
        This function executes the supplied commands using the batch mechanism. A
        temporary file is created which contains up to 2000 commands at a time and 
        is run. If for some reason the temporary file has an issue (out of space) 
        the commands are executed individually.
        '''
        # If only a few commands, run them individually
        if len(params) < 3:
            self.ExecuteSingle(cmd, cmdline, params)
            return

        DEVNULL = open(os.devnull, 'wb')
        numCmds = 0
        try:
            with tempfile.NamedTemporaryFile('w+', prefix='clagd_tmp') as tmpf:
                for param in params:
                    tmpf.write(cmdline % param + "\n" )
                    Log.log_debug(Log._LOG_DEBUG_FSM, "ExecuteBatch: " + cmdline % param)
                    numCmds += 1
                    if numCmds >= 2000:
                        tmpf.flush()
                        execCmd = cmd + [tmpf.name]
                        subprocess.call(execCmd, stdout=DEVNULL, stderr=DEVNULL)
                        tmpf.seek(0)
                        tmpf.truncate(0)
                        numCmds = 0
                if numCmds:
                    tmpf.flush()
                    execCmd = cmd + [tmpf.name]
                    subprocess.call(execCmd, stdout=DEVNULL, stderr=DEVNULL)
        except:
            self.ExecuteSingle(cmd, cmdline, params)


#-------------------------------------------------------------------------------
#
#   The init code
#
#-------------------------------------------------------------------------------

def ClagInit():
    # Create a log for our output
    global Log
    Log = Logger()

    Log.log("Beginning execution of clagd version %s" % (ClagParser._ClagVersion,))
    Log.log("Invoked with: " + " ".join(sys.argv))
    util.queueAddNameMap(peerSendQueue, 'peerSendQueue')

    # Create the daemon
    global Daemon
    Daemon = ClagDaemon()

    # Interface utility routines
    global Intf
    Intf = IntfSupport()

    # Parse the command line parameters
    global Parser
    Parser = ClagParser()
    Parser.ParseCmdLine()
    Parser.DumpParameters()

    # Make sure script is invoked with root privledges
    if os.getuid() != 0:
        Log.log_error("You must be root to run this command.")
        print "You must be root to run this command."
        sys.exit(-1)

    if not Parser.args.backupIp:
        Log.log("Backup IP has not been configured")


def ClagReloadConfig():
    clagIdCfg = reloadconfig.getClagIdConfig()
    for bond in clagIdCfg:
        clagIdInfo = clagIdCfg[bond]
        Parser.setBondClagId(bond, clagIdInfo.get('clagId'))
    Parser.setReloadDone()


#-------------------------------------------------------------------------------
#
#   The main runtime code
#
#-------------------------------------------------------------------------------
def ClagRun():
    Daemon.start()

    # Perform operations based on the command line parameter settings
    Log.log("Role is now %s" % (Parser.clagRole,))
    Log.setQuiet(Parser.args.quiet)
    Log.setDebug(Parser.args.debug)
    Log.setVerbose(Parser.args.verbose)
    Log.setLogfile(Parser.args.log)
    Parser.clagId[0] = Parser.args.priority
    Parser.clagId[1] = Intf.GetIfMac(Parser.args.peerIf)
    Parser.currLacpPoll = Parser.getCfgLacpPoll()
    ConfigurePeerLearning(True)

    global Cmd
    global LacpSync
    global VxLanSync
    global HealthCheck
    global ClagNetLink
    FdbSync  = clag.fdbsync.fdbsync(Daemon, Intf, Parser, Log, peerSendQueue)
    MdbSync  = clag.mdbsync.mdbsync(Daemon, Intf, Parser, Log)
    VlanSync = clag.vlansync.vlansync(Intf, Parser, Log, peerSendQueue)
    LacpSync = clag.lacpsync.lacpsync(Daemon, Intf, Parser, Log, peerSendQueue)
    VxLanSync = clag.vxlansync.vxlansync(Daemon, Intf, Parser, Log, peerSendQueue)
    HealthCheck = clag.healthcheck.HealthCheck(Daemon, Parser, Log, Intf)
    ClagNetLink = clag.clagnetlink.ClagNl(Daemon, Parser, Log, Intf)
    Cmd = clag.cmdsrv.cmdsrv(Daemon, Intf, Parser, Log, LacpSync, VxLanSync, FdbSync, MdbSync, VlanSync, HealthCheck)
    LacpSync.Cmd = Cmd
    VxLanSync.Cmd = Cmd

    ClagReloadConfig()

    global DataSyncs
    DataSyncs = [LacpSync, VxLanSync, FdbSync, MdbSync, VlanSync]
    LacpSync.DataSyncs = DataSyncs
    VxLanSync.DataSyncs = DataSyncs

    # Create the threads which do the work
    global Threads
    for threadName in Threads:
        Threads[threadName][1] = threading.Thread(None, Threads[threadName][0], threadName)
        Threads[threadName][1].daemon = True

    # Start the threads
    for threadName in Threads:
        Threads[threadName][1].start()

    # Tell systemd that we are initialized and ready
    cumulus.sdnotify.sd_notify(0, "READY=1")

    # Take the day off
    while not stopEvent.wait(60*60*24):
        pass


#-------------------------------------------------------------------------------
#
#   Main program entry point
#
#-------------------------------------------------------------------------------

def main():
    status = 0
    try:
        ClagRun()
        status = 1
    except RuntimeError, e:
        if Log:
            Log.log_error(str(e))
        else:
            print str(e)
        status = 2
    except KeyboardInterrupt:
        pass
    except Exception:
        (exc_type, exc_value, exc_traceback) = sys.exc_info()
        err = "".join(traceback.format_exception(exc_type, exc_value,
                                                 exc_traceback))
        if "release unlocked lock" not in err:
           if Log:
               Log.log_error("unhandled exception: %s" % (err,))
           else:
               print "unhandled exception: %s" % (err,)
           status = 3
    do_exit(status)


#-------------------------------------------------------------------------------
#
#   Are we being executed or imported?
#
#-------------------------------------------------------------------------------

if __name__ == '__main__':

    ClagInit()

    if Parser.args.daemon:
        context = daemon.DaemonContext()
        context.signal_map = {
            signal.SIGHUP:  signal_handler,
            signal.SIGTERM: signal_handler,
            signal.SIGRTMIN: signal_handler,
            signal.SIGRTMIN+1: signal_handler,
            signal.SIGRTMIN+2: signal_handler
        }
        context.pidfile = Daemon.lock
        if Daemon.isClagAlreadyRunning():
            Log.log_error("Unable to start clagd - clagd is already running.")
            sys.exit(-1)
        Daemon.lock.break_lock()
        context.open()
        try:
            with context:
                main()
        except lockfile.NotLocked:
            pass
    else:
        signal.signal(signal.SIGHUP, signal_handler)
        signal.signal(signal.SIGTERM, signal_handler)
        signal.signal(signal.SIGRTMIN, signal_handler)
        signal.signal(signal.SIGRTMIN+1, signal_handler)
        signal.signal(signal.SIGRTMIN+2, signal_handler)
        main()

