#! /bin/bash
# Copyright 2017,2018,2019,2020 Cumulus Networks,  Inc.  All rights reserved.
#
# Record switchd and related state; used by cl-support
#
# Note that cl-support expects the below three flags to be commented
#TIMEOUT=300
#DEFAULT
#ONCORE=switchd sx_sdk

module=${0##*/}

funcs=(bcm mlx debug stack fuse)
nondef_funcs=(verbose)
json=
jexec=
if [ -n "$TMPDIR" ]; then tmpdir="$TMPDIR"; else tmpdir=/tmp; fi

declare -i _switchd_ok=1
switchd_ok()
{
    local cpid z

    { [ $_switchd_ok -eq 0 ] &&  return 1 ; } ||
        { [ $_switchd_ok -eq 2 ] && return 0 ; }
    local -r flagf=$tmpdir/flagf$$ pidf=$tmpdir/pid$$

    touch $flagf
    trap "export -n flagf pidf; rm -f $flagf $pidf" RETURN
    systemctl -q is-active switchd || { echo $module: switchd is not running;
        return 1 ; }
    export flagf pidf
    ( (echo $BASHPID > $pidf ; TMOUT=5 read x < /cumulus/switchd/config/logging \
        > $flagf ; echo $x > $flagf) 2>/dev/null &)
    sleep 1.5
    read cpid < $pidf
    TMOUT=8 read z < $flagf
    [ $? -ne 0 ] && { echo $module: switchd not responding
        kill -KILL $cpid 2>/dev/null; _switchd_ok=0 ; return 1; }
    _switchd_ok=2
    return 0
}

declare -i _debug_state_orig=2
declare -r switchd_dbg_ctrl=/cumulus/switchd/ctrl/debug \
    switchd_dbg_dir=/cumulus/switchd/debug

# enable fuse debug nodes if not already enabled.
enable_debug()
{
	switchd_ok || return
	if [ -e $switchd_dbg_dir/bond ]; then
		_debug_state_orig=1
	elif [ -e $switchd_dbg_ctrl ]; then
		echo 1 > $switchd_dbg_ctrl || {
        echo Unable to enable switchd fuse debug ; return 1; }
		_debug_state_orig=0
	fi
    return 0
}

restore_debug()
{
	[ $_debug_state_orig -eq 0 ] && echo 1 > $switchd_dbg_ctrl
}

func_debug()
{
	switchd_ok || return

	# ACL Dump
	exec_cmd switchd.debug.acl cat ${switchd_dbg_dir}/acl

	# Bond Dump
	switchd_dbg_bond_debug="${switchd_dbg_dir}/bond"
	exec_cmd switchd.debug.bond.info cat ${switchd_dbg_bond_debug}/info
	exec_cmd switchd.debug.bond.sw_bonds cat ${switchd_dbg_bond_debug}/sw_bonds

	# Bridge Dump
	switchd_dbg_bridge_debug="${switchd_dbg_dir}/bridge"
	exec_cmd switchd.debug.bridge.info cat ${switchd_dbg_bridge_debug}/info

	# Vlan Dump
	switchd_dbg_vlan_debug="${switchd_dbg_dir}/vlan"
	exec_cmd switchd.debug.vlan.info cat ${switchd_dbg_vlan_debug}/info

	# Port Dump
	switchd_dbg_misc_debug="${switchd_dbg_dir}/misc"
	exec_cmd switchd.debug.misc.ports cat ${switchd_dbg_misc_debug}/ports

        # VXLAN Dump
	switchd_dbg_vxlan_debug="${switchd_dbg_dir}/vxlan"
	exec_cmd switchd.debug.vxlan.info cat ${switchd_dbg_vxlan_debug}/info

	# Knet Dump
	switchd_dbg_knet_debug="${switchd_dbg_dir}/knet"
	exec_cmd switchd.debug.knet.info cat ${switchd_dbg_knet_debug}/info

	# mroute dump
	switchd_dbg_mroute_debug="${switchd_dbg_dir}/mroute"
	exec_cmd switchd.debug.mroute.info cat ${switchd_dbg_mroute_debug}/info
}

func_stack()
{
	/bin/ps -C switchd -L -o lwp --no-headers | while read p; do exec_cmd switchd.stack.1 /bin/cat /proc/$p/stack; done
	sleep 0.1
	/bin/ps -C switchd -L -o lwp --no-headers | while read p; do exec_cmd switchd.stack.2 /bin/cat /proc/$p/stack; done
	sleep 0.1
	/bin/ps -C switchd -L -o lwp --no-headers | while read p; do exec_cmd switchd.stack.3 /bin/cat /proc/$p/stack; done
}

# get the stacks for all TIDS, with the thread name, for a given command.
# needs to be a function because exec_cmd() can't take a pipeline
getstacks()
{
	/bin/ps -C $1 -L -o lwp --no-headers | \
	   xargs -l -I'##' awk 'BEGIN {getline comm < "/proc/##/comm"}
		{ printf "%9d %14s: %s\n", ##, comm, $0}' /proc/##/stack
}

# get the status file for all TIDS, with the thread name, for a given command.
# needs to be a function because exec_cmd() can't take a pipeline
getstatus()
{
	/bin/ps -C $1 -L -o lwp --no-headers | \
	   xargs -l -I'##' awk 'BEGIN {getline comm < "/proc/##/comm"}
		{ printf "%9d %14s: %s\n", ##, comm, $0}' /proc/##/status
}

# get the stat file for all TIDS, with the thread name, for a given command.
# needs to be a function because exec_cmd() can't take a pipeline
# This doesn't need the IDs in the printf because they are in the stat file
# and it's a oneline file
getstat()
{
	/bin/ps -C $1 -L -o lwp --no-headers | \
	   xargs -l -I'##' cat /proc/##/stat
}

func_verbose() {
	exec_cmd switchd.stacks getstacks switchd
	exec_cmd switchd.status getstatus switchd
	exec_cmd switchd.stat getstat switchd
}

# this function accesses FUSE
# filesystem should be called
# under swictchd_ok call
#
# BCM has XGS and DNX families of ASICs.
# - XGS ASICs = BCM56xxx
# - DNX ASICs = BCM88xxx
get_bcm_family () {
    local _systeminfo=/cumulus/switchd/config/system_info
    local _chip_dnx="BCM88"
    local chip_family=''
    [ -e $_systeminfo ] || return

    if [[ ! -z $(grep "$_chip_dnx" "$_systeminfo") ]];
    then
        chip_family='DNX'
    else
        chip_family='XGS'
    fi
    echo $chip_family
}

# this function accesses FUSE
# filesystem should be called
# under swictchd_ok call
get_bcm_xgs_chip_name () {
    local _chip_td3="BCM5687"
    local _chip_th3="BCM56980"
    local _systeminfo=/cumulus/switchd/config/system_info
    local chip_name=''
    [ -e $_systeminfo ] || return

    # match BCM5687* series as trident3
    if [[ ! -z $(grep "$_chip_td3" "$_systeminfo") ]];
    then
        chip_name='trident3'

    elif [[ ! -z $(grep "$_chip_th3" "$_systeminfo") ]];
    then
        chip_name='tomahawk3'
    else
        chip_name='trident2'
    fi
    echo $chip_name
}

# this function accesses FUSE
# filesystem should be called
# under swictchd_ok call
get_bcm_dnx_chip_name () {
    # Qumran-MX = BCM88370/BCM88375
    local _systeminfo=/cumulus/switchd/config/system_info
    local _chip_qumran_mx="BCM8837"
    local chip_name=''
    [ -e $_systeminfo ] || return

    if [[ ! -z $(grep "$_chip_qumran_mx" "$_systeminfo") ]];
    then
        chip_name='Qumran-MX'
    else
        chip_name='Unsupported'
    fi

    echo $chip_name
}

_bcmcmd=/usr/lib/cumulus/bcmcmd
bcmcmd()
{
   file=$1
   shift
   exec_cmd $file $_bcmcmd "*:"$@
   return 0
}

func_bcm_dnx()
{
	[ -e $_bcmcmd ] || return
	switchd_ok || return

        local chip_name=$(get_bcm_dnx_chip_name)

        # Only Qumran-MX is supported for now
        if [[ $chip_name != "Qumran-MX" ]] ; then
            echo "Unsupported DNX ASIC"
            return
        fi

        # Generic commands
        bcmcmd l2.show "l2 show"
        bcmcmd vlan.show "vlan show"
        bcmcmd counters.show "show counters"
        bcmcmd counters.show "show counters full"
        bcmcmd portstat.show "portstat"
        bcmcmd diag.counters.show "diag counters"
        bcmcmd diag.alloc.all.show "diag alloc all"
        bcmcmd diag.portmap.show "diag port_db"
        bcmcmd diag.nif.show "diag nif"
        bcmcmd config.show "config show"

        # ACL commands
        bcmcmd diag.field.resget.show "diag field RESource_get"
        bcmcmd diag.field.action.show "diag field ACtion_info"
        bcmcmd diag.field.db.action.show "diag field DB_action_info"
        bcmcmd diag.field.ranges.show "diag field Ranges"
        bcmcmd diag.alloc.fieldid.show "diag alloc FieldEntryID"
        bcmcmd diag.alloc.direxfieldid.show "diag alloc FieldDirExEntID"

        # CoS commands
        bcmcmd diag.ingress.cong.show "diag ing_congestion"
        bcmcmd diag.egress.cong.show "diag egr_congestion"
        bcmcmd diag.res.alloc.show "diag resource_alloc"
        bcmcmd diag.cos.voq.show "diag cosq voq"
        bcmcmd diag.cosq.nonemptyqs.show "diag cosq non_empty_queues"
        bcmcmd diag.localtosys.show "diag cosq local_to_sys"

        # DB dumps - LEM=0, TCAM=1, KBP=2, SEM A=3, SEM B=4, ESEM=6, OAM1=7, OAM2=8, RMEP=9, GLEM=10.
        bcmcmd diag.dbal.ti.show "diag dbal ti"
        bcmcmd diag.dbal.prefix.info.show "diag dbal Prefix_Info"
        bcmcmd diag.dbal.dbd.lem.show "diag dbal dbd 0"
        bcmcmd diag.dbal.dbd.tcam.show "diag dbal dbd 1"
        bcmcmd diag.dbal.dbd.kbp.show "diag dbal dbd 2"
        bcmcmd diag.dbal.dbd.sema.show "diag dbal dbd 3"
        bcmcmd diag.dbal.dbd.semb.show "diag dbal dbd 4"
        bcmcmd diag.dbal.dbd.esem.show "diag dbal dbd 6"
        bcmcmd diag.dbal.dbd.oam1.show "diag dbal dbd 7"
        bcmcmd diag.dbal.dbd.oam2.show "diag dbal dbd 8"
        bcmcmd diag.dbal.dbd.rmep.show "diag dbal dbd 9"
        bcmcmd diag.dbal.dbd.glem.show "diag dbal dbd 10"
        bcmcmd diag.alloc.fec.hw.show "diag alloc FEC direct=1"
        bcmcmd diag.alloc.fec.sw.show "diag alloc FEC info=1"
        bcmcmd diag.pp.fec.show "diag pp fec all"
        bcmcmd diag.ivec.show "dump chg EPNI_IVEC_TABLE"
        bcmcmd diag.evec.show "dump chg EPNI_EVEC_TABLE"
        bcmcmd diag.alloc.invlan.show "diag alloc invlan info=1"
        bcmcmd diag.alloc.outvlan.show "diag alloc outvlan info=1"
        bcmcmd diag.alloc.vlan.show "diag alloc vlan"
        bcmcmd diag.alloc.com.inlif.show "diag alloc Common_Inlif"
        bcmcmd diag.alloc.wide.inlif.show "diag alloc Wide_Inlif"
        bcmcmd diag.alloc.local.outlif.show "diag alloc Local_Outlif"
        bcmcmd diag.pp.outlif.show "diag pp OUTLIF_Info"
        bcmcmd diag.pp.outlif.ll.show "diag pp OUTLIF_Info_LL"
        bcmcmd diag.pp.outlif.vsi.show "diag pp OUTLIF_Info_VSI"

        # KBP dumps
        bcmcmd diag.kbp.version.show "kbp sdk_ver"
        bcmcmd diag.kbp.kaps.show "kbp kaps show"
        bcmcmd diag.kbp.kaps.stats.show "kbp kaps_db_stats"

        # Last packet related info
        bcmcmd diag.lp.show "diag last_packet"
        bcmcmd diag.field.lp.show "diag field LAST_packet_get"
        bcmcmd diag.dbal.lp.show "diag dbal lp"
        bcmcmd diag.signal.dump.show "diag dump_signals"

        # TODO: Add signal based commands once BCM approves

        # cl-*-checks
	exec_cmd cl-route-check.show /usr/cumulus/bin/cl-route-check -V
	exec_cmd cl-mroute-check.show /usr/cumulus/bin/cl-mroute-check -V
}

func_bcm()
{
	[ -e $_bcmcmd ] || return
	switchd_ok || return

        # Check BCM ASIC family
        local chip_family=$(get_bcm_family)

        # Handle DNX family separately
        if [[ $chip_family == "DNX" ]] ; then
            func_bcm_dnx
            return
        fi

        # Handle XGS family of ASICs here
	local bcm_chip=$(get_bcm_xgs_chip_name)

	bcmcmd l3.defip.show "l3 defip show"
	bcmcmd l3.l3table.show "l3 l3table show"
	bcmcmd l3.ip6route.show "l3 ip6route show"
	bcmcmd l3.ip6host.show "l3 ip6host show"
	bcmcmd l3.egress.show "l3 egress show"
	bcmcmd l3.multipath.show "l3 multipath show"
	bcmcmd l3.intf.show "l3 intf show"
	bcmcmd portstat "portstat"
	bcmcmd port.all "port all"
	bcmcmd soc "soc"
	bcmcmd show.counters "show counters"
	bcmcmd show.statistics "show statistics lite"
	bcmcmd show.errors "show errors"
	bcmcmd show.interrupts "show interrupts"
	bcmcmd show.params "show params"
	bcmcmd show.errors "show errors"
	bcmcmd l2.show "l2 show"
	bcmcmd vlan.show "vlan show"
	bcmcmd egr_vlan.show "dump chg egr_vlan"

    if [[ $bcm_chip == "tomahawk3" ]] ; then
        bcmcmd l2.learn_cache "dump chg l2_learn_cache"
        bcmcmd l2.l2_dump "dump sw l2"
        bcmcmd l3.l3_dump "dump sw l3"
        bcmcmd alpm_bkt.show "alpm bkt show"
        bcmcmd alpm.pvtlen.show "alpm fmt show"
        bcmcmd alpm.pvtlen.show "alpm pvtlen show"
        bcmcmd alpm.stat.show "alpm stat show"
        bcmcmd alpm.tcam.show "alpm tcam show"
        bcmcmd alpm.config.show "alpm config show"
        bcmcmd alpm.pvt.show "alpm pvt show"
    elif [[ $bcm_chip == "trident3" ]] ; then
        bcmcmd vlan_xlate.show "dump chg vlan_xlate_1_double"
        bcmcmd vlan_xlate_egr.show "dump chg egr_vlan_xlate_1_double"
    else
        bcmcmd vlan_xlate.show "dump chg vlan_xlate"
        bcmcmd vlan_xlate_egr.show "dump chg egr_vlan_xlate"
    fi
	bcmcmd stg.show "stg show"
	bcmcmd trunk.show "trunk show"
	bcmcmd l2_cache.show "l2 cache show"
	bcmcmd fp.show "fp show"
	bcmcmd mcast.show "multicast show"
	bcmcmd ipmc.table.show "ipmc table show"
	bcmcmd clag.egress_mask.show "dump chg egress_mask"
        bcmcmd egr.l3.next.hop "dump raw egr_l3_next_hop"
        bcmcmd egr.port.to.nhi "dump chg egr_dgpp_to_nhi"
        bcmcmd my_station_tcam.dump "dump my_station_tcam"
        bcmcmd pcie_fw.show "pciephy fw version"
        bcmcmd phy.info "phy info"
        bcmcmd phy.diag.ce.dsc "phy diag ce dsc"
        bcmcmd phy.diag.ge.dsc "phy diag ge dsc"
        bcmcmd phy.diag.xe.dsc "phy diag xe dsc"
    if [[ $bcm_chip == "trident3" ]] ; then
        bcmcmd port.show "dump chg port"
        bcmcmd lport.show "dump chg lport"
        bcmcmd egr_vlan_control_1.show "dump chg egr_vlan_control_1"
        bcmcmd egr_inner_tpid.show "getreg chg EGR_INNER_TPID"
        bcmcmd egr_outer_tpid.show "getreg chg EGR_OUTER_TPID"
        bcmcmd my_station_tcam_2.dump "dump my_station_tcam_2"
        bcmcmd l3.nat_ingress.show "l3 nat_ingress show"
        bcmcmd l3.nat_egress.show "l3 nat_egress show"
    else
        bcmcmd port.show "dump chg port"
        bcmcmd egr_vlan_control_1.show "getreg chg egr_vlan_control_1"
        bcmcmd egr_inner_tpid.show "getreg chg EGR_OUTER_TPID"
    fi

    # --json output too terse to support it.
	exec_cmd cl-route-check.show /usr/cumulus/bin/cl-route-check -V
	exec_cmd cl-mroute-check.show /usr/cumulus/bin/cl-mroute-check -V
	exec_cmd $jexec portmap /usr/lib/cumulus/portmap -v $json
}

_mlxcmd=/usr/lib/cumulus/mlxcmd
declare -i mlxcmd_ok=1
mlxcmd()
{
   [ $mlxcmd_ok -eq 0 ] && { return 1 ; }
   [ $mlxcmd_ok -eq 1 ] && { (switchd_ok && mlxcmd_ok=2) || {
     mlxcmd_ok=0 ; return 1 ; } ; }
   file=$1
   shift
   exec_cmd $jexec $file $_mlxcmd $json $@
   return 0
}

func_mlx()
{
    #
    # if the SDK is not ready, do not do mlxdump commands
    #
    systemctl --quiet is-active sx_sdk.service
    rc=$?
    if [ $rc -ne 0 ]; then
	echo "# skipped sx_api_dbg_generate_dump due to sdk not ready" > /var/log/sdk_debug_dump.log
        return 0
    fi

    #
    # if switchd is not ready, do not do mlxdump commands
    #
    if [ ! -f "/run/switchd.ready" ]; then
	echo "# skipped sx_api_dbg_generate_dump due to switchd not ready" > /var/log/sdk_debug_dump.log
	return 0
    fi

    [ -x /usr/bin/sx_api_dbg_generate_dump.py ] || return # not mlx
    /usr/bin/sx_api_dbg_generate_dump.py /var/log/sdk_debug_dump.log
    [ -x /usr/bin/sx_api_flex_acl_dump.py ] && { exec_cmd sx_api_flex_acl_dump.txt /usr/bin/sx_api_flex_acl_dump.py; }
    [ -x $_mlxcmd ] && {
        mlxcmd l2.show l2 fdb show table && {
        mlxcmd l2mc.show l2 fdb mcshow table
        mlxcmd l3.route.show l3 route
        mlxcmd l3.route6.show l3 route6
        mlxcmd l3.neigh.show l3 neighbor
        mlxcmd l3.neigh6.show l3 neighbor6
        mlxcmd l3.ecmp.show l3 ecmp_table
        mlxcmd l3.interface.show l3 interface 
        #mlxcmd vlan.show vlan ids
        mlxcmd port.info.show ports info
        mlxcmd port.ids.show ports show ids
        mlxcmd port.isolate.show ports show isolate
        for cmd in counters pktdist discards sflow sflow_statistics; do
            mlxcmd port.${cmd}.show ports show $cmd swp\*
        done
        }

    exec_cmd mlxfwversion /usr/lib/cumulus/mlxfwupgrade.sx --fwversion
    exec_cmd cl-route-check.show /usr/cumulus/bin/cl-route-check -V
    exec_cmd cl-mroute-check.show /usr/cumulus/bin/cl-mroute-check -V
    exec_cmd $jexec resources cl-resource-query $json
	# else switchd not running or stuck
    }

    [ -x /usr/bin/mstdump ] && {
	 j=0
         while [ $j -lt 1 ]; do
             /usr/bin/mstdump -full mlnxsw-255 > mstdump_$j.dmp
             xz -2 mstdump_$j.dmp &
             # sleep 1
             (( j++ ))
         done
    }

    [ -x /usr/bin/mlxdump ] && {
         j=0
         while [ $j -lt 1 ]; do
            /usr/bin/mlxdump -d mlnxsw-255 snapshot -m full -o mlxdump_$j.udmp
            xz -2 mlxdump_$j.udmp &
            # sleep 1
            (( j++ ))
         done
    }

    [ -x /usr/bin/mlxlink ] && {
        local p d
        d=$(mst status | grep pciconf | awk '{print $1}' )
        for p in $(ls -v1 /sys/class/net); do
            [ -d $p/bridge -o -d $p/bonding ] && continue;
            p=${p##*/}
            [[ $p =~ ^(sw[0-9]*p[0-9]+(s[0-7])?$) ]] && {
                p=${p#"swp"}
                p=${p%"s0"}
                p=${p/s1/\/2}
                p=${p/s2/\/3}
                p=${p/s3/\/4}
                p=${p/s4/\/5}
                p=${p/s5/\/6}
                p=${p/s6/\/7}
                p=${p/s7/\/8}
                exec_cmd mlxlink sh -c "mlxlink -d ${d} -c -m -e --show_fec -p ${p} | tr -d '\0'" & 
            }
        done
        wait
    }

    [ -x /usr/bin/hw-management-generate-dump.sh ] && {
        /usr/bin/hw-management-generate-dump.sh
        mv /tmp/hw-mgmt-dump.tar.gz /var/log/
    }

    exec_cmd mlxcables.show /usr/lib/cumulus/dump_mlxcables
}

func_fuse()
{
    switchd_ok || return

    date +"%F_%T.%N: ${module}.${func} Copying switchd fuse filesystem"
    [ $SUP_VERBOSE -eq 1 ] && date +"%F_%T.%N: ${module}.${func} Copying switchd fuse filesystem" 1>&2

    mkdir -p $SUP_TOPDIR/cumulus
    cp -a /cumulus/switchd $SUP_TOPDIR/cumulus/switchd |& egrep -v 'Invalid argument'
}

while getopts "c:jl" Option; do
    case $Option in
    c) [ $SUP_VERBOSE -eq 1 ] && # default submods
        echo ${module}: Invoked for "$OPTARG" core dumps 1>&2 ;;
    j) [ $SUP_VERBOSE -eq 1 ] && echo JSON output not yet implemented 1>&2
       jexec=-j json=--json ;;
    l) echo ${funcs[@]} ';' ${nondef_funcs[@]}
       exit 0 ;;
    *) ;;
    esac
done
shift $((OPTIND - 1))

main()
{
    local -r TIMEFORMAT='%2R seconds' tfile=/run/${module}_funcstime$$
    if [ $# -ne 0 ]; then # only run specified sub-modules
       [ $SUP_VERBOSE -eq 1 ] && echo ${module}: run only submodules: $@ 1>&$stderr
       funcs=($@)
    fi

    case "${funcs[@]}" in
        *debug*|*fuse*) enable_debug ;;
    esac

    for func in ${funcs[@]}; do
        [ $SUP_VERBOSE -eq 1 ] && echo "$module.$func" 1>&2
        date +"### $module.$func Started at %F-%T.%N"
        { time func_$func 2>&$stderr ; } 2>$tfile
        read secs < $tfile
        echo "### $module.$func Completed in $secs"
    done
    rm $tfile
    restore_debug
}

TIMEFORMAT="Module $module Completed in %2R seconds"
exec 42>&2
stderr=42

{ time main $@ 2>&$stderr ; } 2>&1

exit 0
