diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c index 90056a3ca89d45c0efbc7c457e35c6064baad0ef..8a74c409b5019a4820ff61b83afe80d428bb1a9e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c @@ -107,14 +107,73 @@ int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev) } EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag); +static void mlx5_infer_tx_disabled(struct lag_tracker *tracker, u8 num_ports, + u8 *ports, int *num_disabled) +{ + int i; + + *num_disabled = 0; + for (i = 0; i < num_ports; i++) { + if (!tracker->netdev_state[i].tx_enabled || + !tracker->netdev_state[i].link_up) + ports[(*num_disabled)++] = i; + } +} + +static void mlx5_infer_tx_enabled(struct lag_tracker *tracker, u8 num_ports, + u8 *ports, int *num_enabled) +{ + int i; + + *num_enabled = 0; + for (i = 0; i < num_ports; i++) { + if (tracker->netdev_state[i].tx_enabled && + tracker->netdev_state[i].link_up) + ports[(*num_enabled)++] = i; + } + + if (*num_enabled == 0) + mlx5_infer_tx_disabled(tracker, num_ports, ports, num_enabled); +} + static void mlx5_lag_print_mapping(struct mlx5_core_dev *dev, - struct mlx5_lag *ldev) + struct mlx5_lag *ldev, + struct lag_tracker *tracker, + u8 flags) { + char buf[MLX5_MAX_PORTS * 10 + 1] = {}; + u8 enabled_ports[MLX5_MAX_PORTS] = {}; + int written = 0; + int num_enabled; + int idx; + int err; int i; + int j; - mlx5_core_info(dev, "lag map:\n"); - for (i = 0; i < ldev->ports; i++) - mlx5_core_info(dev, "\tport %d:%d\n", i + 1, ldev->v2p_map[i]); + if (flags & MLX5_LAG_FLAG_HASH_BASED) { + mlx5_infer_tx_enabled(tracker, ldev->ports, enabled_ports, + &num_enabled); + for (i = 0; i < num_enabled; i++) { + err = scnprintf(buf + written, 4, "%d, ", enabled_ports[i] + 1); + if (err != 3) + return; + written += err; + } + buf[written - 2] = 0; + mlx5_core_info(dev, "lag map active ports: %s\n", buf); + } else { + for (i = 0; i < ldev->ports; i++) { + for (j = 0; j < ldev->buckets; j++) { + idx = i * ldev->buckets + j; + err = scnprintf(buf + written, 10, + " port %d:%d", i + 1, ldev->v2p_map[idx]); + if (err != 9) + return; + written += err; + } + } + mlx5_core_info(dev, "lag map:%s\n", buf); + } } static int mlx5_lag_netdev_event(struct notifier_block *this, @@ -174,6 +233,7 @@ static struct mlx5_lag *mlx5_lag_dev_alloc(struct mlx5_core_dev *dev) mlx5_core_err(dev, "Failed to init multipath lag err=%d\n", err); ldev->ports = MLX5_CAP_GEN(dev, num_lag_ports); + ldev->buckets = 1; return ldev; } @@ -200,28 +260,25 @@ static bool __mlx5_lag_is_sriov(struct mlx5_lag *ldev) return !!(ldev->flags & MLX5_LAG_FLAG_SRIOV); } -static void mlx5_infer_tx_disabled(struct lag_tracker *tracker, u8 num_ports, - u8 *ports, int *num_disabled) -{ - int i; - - *num_disabled = 0; - for (i = 0; i < num_ports; i++) { - if (!tracker->netdev_state[i].tx_enabled || - !tracker->netdev_state[i].link_up) - ports[(*num_disabled)++] = i; - } -} - +/* Create a mapping between steering slots and active ports. + * As we have ldev->buckets slots per port first assume the native + * mapping should be used. + * If there are ports that are disabled fill the relevant slots + * with mapping that points to active ports. + */ static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker, - u8 num_ports, u8 *ports) + u8 num_ports, + u8 buckets, + u8 *ports) { int disabled[MLX5_MAX_PORTS] = {}; int enabled[MLX5_MAX_PORTS] = {}; int disabled_ports_num = 0; int enabled_ports_num = 0; + int idx; u32 rand; int i; + int j; for (i = 0; i < num_ports; i++) { if (tracker->netdev_state[i].tx_enabled && @@ -231,9 +288,14 @@ static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker, disabled[disabled_ports_num++] = i; } - /* Use native mapping by default */ + /* Use native mapping by default where each port's buckets + * point the native port: 1 1 1 .. 1 2 2 2 ... 2 3 3 3 ... 3 etc + */ for (i = 0; i < num_ports; i++) - ports[i] = MLX5_LAG_EGRESS_PORT_1 + i; + for (j = 0; j < buckets; j++) { + idx = i * buckets + j; + ports[idx] = MLX5_LAG_EGRESS_PORT_1 + i; + } /* If all ports are disabled/enabled keep native mapping */ if (enabled_ports_num == num_ports || @@ -242,9 +304,10 @@ static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker, /* Go over the disabled ports and for each assign a random active port */ for (i = 0; i < disabled_ports_num; i++) { - get_random_bytes(&rand, 4); - - ports[disabled[i]] = enabled[rand % enabled_ports_num] + 1; + for (j = 0; j < buckets; j++) { + get_random_bytes(&rand, 4); + ports[disabled[i] * buckets + j] = enabled[rand % enabled_ports_num] + 1; + } } } @@ -317,28 +380,33 @@ static int _mlx5_modify_lag(struct mlx5_lag *ldev, u8 *ports) void mlx5_modify_lag(struct mlx5_lag *ldev, struct lag_tracker *tracker) { + u8 ports[MLX5_MAX_PORTS * MLX5_LAG_MAX_HASH_BUCKETS] = {}; struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; - u8 ports[MLX5_MAX_PORTS] = {}; + int idx; int err; int i; + int j; - mlx5_infer_tx_affinity_mapping(tracker, ldev->ports, ports); + mlx5_infer_tx_affinity_mapping(tracker, ldev->ports, ldev->buckets, ports); for (i = 0; i < ldev->ports; i++) { - if (ports[i] == ldev->v2p_map[i]) - continue; - err = _mlx5_modify_lag(ldev, ports); - if (err) { - mlx5_core_err(dev0, - "Failed to modify LAG (%d)\n", - err); - return; - } - memcpy(ldev->v2p_map, ports, sizeof(ports[0]) * - ldev->ports); + for (j = 0; j < ldev->buckets; j++) { + idx = i * ldev->buckets + j; + if (ports[idx] == ldev->v2p_map[idx]) + continue; + err = _mlx5_modify_lag(ldev, ports); + if (err) { + mlx5_core_err(dev0, + "Failed to modify LAG (%d)\n", + err); + return; + } + memcpy(ldev->v2p_map, ports, sizeof(ports)); - mlx5_lag_print_mapping(dev0, ldev); - break; + mlx5_lag_print_mapping(dev0, ldev, tracker, + ldev->flags); + break; + } } if (tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP && @@ -357,6 +425,8 @@ static int mlx5_lag_set_port_sel_mode_roce(struct mlx5_lag *ldev, if (!MLX5_CAP_PORT_SELECTION(dev0->dev, port_select_flow_table)) return -EINVAL; *flags |= MLX5_LAG_FLAG_HASH_BASED; + if (ldev->ports > 2) + ldev->buckets = MLX5_LAG_MAX_HASH_BUCKETS; } return 0; @@ -370,6 +440,7 @@ static int mlx5_lag_set_port_sel_mode_offloads(struct mlx5_lag *ldev, if (MLX5_CAP_PORT_SELECTION(dev0->dev, port_select_flow_table) && tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH) *flags |= MLX5_LAG_FLAG_HASH_BASED; + return 0; } @@ -399,7 +470,7 @@ static int mlx5_create_lag(struct mlx5_lag *ldev, u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {}; int err; - mlx5_lag_print_mapping(dev0, ldev); + mlx5_lag_print_mapping(dev0, ldev, tracker, flags); mlx5_core_info(dev0, "shared_fdb:%d mode:%s\n", shared_fdb, get_str_port_sel_mode(flags)); @@ -439,11 +510,12 @@ int mlx5_activate_lag(struct mlx5_lag *ldev, struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; int err; - mlx5_infer_tx_affinity_mapping(tracker, ldev->ports, ldev->v2p_map); err = mlx5_lag_set_port_sel_mode(ldev, tracker, &flags); if (err) return err; + mlx5_infer_tx_affinity_mapping(tracker, ldev->ports, ldev->buckets, ldev->v2p_map); + if (flags & MLX5_LAG_FLAG_HASH_BASED) { err = mlx5_lag_port_sel_create(ldev, tracker->hash_type, ldev->v2p_map); @@ -1265,7 +1337,7 @@ u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev, } } - port = ldev->v2p_map[port]; + port = ldev->v2p_map[port * ldev->buckets]; unlock: spin_unlock(&lag_lock); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h index 1c8fb3fada0c0affdcac1ab79e6f93e275e739c8..0c90d0ed03bea2d067d74bcd9658515bf5add33c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h @@ -4,6 +4,7 @@ #ifndef __MLX5_LAG_H__ #define __MLX5_LAG_H__ +#define MLX5_LAG_MAX_HASH_BUCKETS 16 #include "mlx5_core.h" #include "mp.h" #include "port_sel.h" @@ -46,9 +47,10 @@ struct lag_tracker { struct mlx5_lag { u8 flags; u8 ports; + u8 buckets; int mode_changes_in_progress; bool shared_fdb; - u8 v2p_map[MLX5_MAX_PORTS]; + u8 v2p_map[MLX5_MAX_PORTS * MLX5_LAG_MAX_HASH_BUCKETS]; struct kref ref; struct lag_func pf[MLX5_MAX_PORTS]; struct lag_tracker tracker; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c index 478b4ef723f8a03eea65eed5d0a39dbb8126449c..d3a3fe4ce670260f30436a7274cc9ebbeb4ac87c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c @@ -13,7 +13,7 @@ enum { static struct mlx5_flow_group * mlx5_create_hash_flow_group(struct mlx5_flow_table *ft, struct mlx5_flow_definer *definer, - u8 ports) + u8 rules) { int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); struct mlx5_flow_group *fg; @@ -26,7 +26,7 @@ mlx5_create_hash_flow_group(struct mlx5_flow_table *ft, MLX5_SET(create_flow_group_in, in, match_definer_id, mlx5_get_match_definer_id(definer)); MLX5_SET(create_flow_group_in, in, start_flow_index, 0); - MLX5_SET(create_flow_group_in, in, end_flow_index, ports - 1); + MLX5_SET(create_flow_group_in, in, end_flow_index, rules - 1); MLX5_SET(create_flow_group_in, in, group_type, MLX5_CREATE_FLOW_GROUP_IN_GROUP_TYPE_HASH_SPLIT); @@ -45,8 +45,10 @@ static int mlx5_lag_create_port_sel_table(struct mlx5_lag *ldev, MLX5_DECLARE_FLOW_ACT(flow_act); struct mlx5_flow_namespace *ns; int err, i; + int idx; + int j; - ft_attr.max_fte = ldev->ports; + ft_attr.max_fte = ldev->ports * ldev->buckets; ft_attr.level = MLX5_LAG_FT_LEVEL_DEFINER; ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_PORT_SEL); @@ -63,7 +65,7 @@ static int mlx5_lag_create_port_sel_table(struct mlx5_lag *ldev, lag_definer->fg = mlx5_create_hash_flow_group(lag_definer->ft, lag_definer->definer, - ldev->ports); + ft_attr.max_fte); if (IS_ERR(lag_definer->fg)) { err = PTR_ERR(lag_definer->fg); goto destroy_ft; @@ -73,18 +75,24 @@ static int mlx5_lag_create_port_sel_table(struct mlx5_lag *ldev, dest.vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID; flow_act.flags |= FLOW_ACT_NO_APPEND; for (i = 0; i < ldev->ports; i++) { - u8 affinity = ports[i]; - - dest.vport.vhca_id = MLX5_CAP_GEN(ldev->pf[affinity - 1].dev, - vhca_id); - lag_definer->rules[i] = mlx5_add_flow_rules(lag_definer->ft, - NULL, &flow_act, - &dest, 1); - if (IS_ERR(lag_definer->rules[i])) { - err = PTR_ERR(lag_definer->rules[i]); - while (i--) - mlx5_del_flow_rules(lag_definer->rules[i]); - goto destroy_fg; + for (j = 0; j < ldev->buckets; j++) { + u8 affinity; + + idx = i * ldev->buckets + j; + affinity = ports[idx]; + + dest.vport.vhca_id = MLX5_CAP_GEN(ldev->pf[affinity - 1].dev, + vhca_id); + lag_definer->rules[idx] = mlx5_add_flow_rules(lag_definer->ft, + NULL, &flow_act, + &dest, 1); + if (IS_ERR(lag_definer->rules[idx])) { + err = PTR_ERR(lag_definer->rules[idx]); + while (i--) + while (j--) + mlx5_del_flow_rules(lag_definer->rules[idx]); + goto destroy_fg; + } } } @@ -330,10 +338,16 @@ static void mlx5_lag_destroy_definer(struct mlx5_lag *ldev, struct mlx5_lag_definer *lag_definer) { struct mlx5_core_dev *dev = ldev->pf[MLX5_LAG_P1].dev; + int idx; int i; + int j; - for (i = 0; i < ldev->ports; i++) - mlx5_del_flow_rules(lag_definer->rules[i]); + for (i = 0; i < ldev->ports; i++) { + for (j = 0; j < ldev->buckets; j++) { + idx = i * ldev->buckets + j; + mlx5_del_flow_rules(lag_definer->rules[idx]); + } + } mlx5_destroy_flow_group(lag_definer->fg); mlx5_destroy_flow_table(lag_definer->ft); mlx5_destroy_match_definer(dev, lag_definer->definer); @@ -544,31 +558,28 @@ int mlx5_lag_port_sel_create(struct mlx5_lag *ldev, return err; } -static int -mlx5_lag_modify_definers_destinations(struct mlx5_lag *ldev, - struct mlx5_lag_definer **definers, - u8 *ports) +static int __mlx5_lag_modify_definers_destinations(struct mlx5_lag *ldev, + struct mlx5_lag_definer *def, + u8 *ports) { - struct mlx5_lag_port_sel *port_sel = &ldev->port_sel; struct mlx5_flow_destination dest = {}; + int idx; int err; - int tt; int i; + int j; dest.type = MLX5_FLOW_DESTINATION_TYPE_UPLINK; dest.vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID; - for_each_set_bit(tt, port_sel->tt_map, MLX5_NUM_TT) { - struct mlx5_flow_handle **rules = definers[tt]->rules; - - for (i = 0; i < ldev->ports; i++) { + for (i = 0; i < ldev->ports; i++) { + for (j = 0; j < ldev->buckets; j++) { + idx = i * ldev->buckets + j; if (ldev->v2p_map[i] == ports[i]) continue; - dest.vport.vhca_id = - MLX5_CAP_GEN(ldev->pf[ports[i] - 1].dev, - vhca_id); - err = mlx5_modify_rule_destination(rules[i], - &dest, NULL); + + dest.vport.vhca_id = MLX5_CAP_GEN(ldev->pf[ports[idx] - 1].dev, + vhca_id); + err = mlx5_modify_rule_destination(def->rules[idx], &dest, NULL); if (err) return err; } @@ -577,6 +588,24 @@ mlx5_lag_modify_definers_destinations(struct mlx5_lag *ldev, return 0; } +static int +mlx5_lag_modify_definers_destinations(struct mlx5_lag *ldev, + struct mlx5_lag_definer **definers, + u8 *ports) +{ + struct mlx5_lag_port_sel *port_sel = &ldev->port_sel; + int err; + int tt; + + for_each_set_bit(tt, port_sel->tt_map, MLX5_NUM_TT) { + err = __mlx5_lag_modify_definers_destinations(ldev, definers[tt], ports); + if (err) + return err; + } + + return 0; +} + int mlx5_lag_port_sel_modify(struct mlx5_lag *ldev, u8 *ports) { struct mlx5_lag_port_sel *port_sel = &ldev->port_sel; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.h index 79852ac41dbca1a6b47d71fb75616340861fbb7a..5ec3af2a3ecd9b34fb6b4b8fa4fa40dc8d0e767b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.h @@ -10,7 +10,10 @@ struct mlx5_lag_definer { struct mlx5_flow_definer *definer; struct mlx5_flow_table *ft; struct mlx5_flow_group *fg; - struct mlx5_flow_handle *rules[MLX5_MAX_PORTS]; + /* Each port has ldev->buckets number of rules and they are arrange in + * [port * buckets .. port * buckets + buckets) locations + */ + struct mlx5_flow_handle *rules[MLX5_MAX_PORTS * MLX5_LAG_MAX_HASH_BUCKETS]; }; struct mlx5_lag_ttc {