| /* |
| * Copyright (c) 2016 Cisco and/or its affiliates. |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at: |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| /* |
| * WARNING! |
| * This driver is not intended for production use and it is unsupported. |
| * It is provided for educational use only. |
| * Please use supported DPDK driver instead. |
| */ |
| |
| #if __x86_64__ || __i386__ |
| #include <vppinfra/vector.h> |
| |
| #ifndef CLIB_HAVE_VEC128 |
| #warning HACK: ixge driver wont really work, missing u32x4 |
| typedef unsigned long long u32x4; |
| #endif |
| |
| #include <vlib/vlib.h> |
| #include <vlib/unix/unix.h> |
| #include <vlib/pci/pci.h> |
| #include <vnet/vnet.h> |
| #include <ixge/ixge.h> |
| #include <vnet/ethernet/ethernet.h> |
| #include <vnet/plugin/plugin.h> |
| #include <vpp/app/version.h> |
| |
| #define IXGE_ALWAYS_POLL 0 |
| |
| #define EVENT_SET_FLAGS 0 |
| #define IXGE_HWBP_RACE_ELOG 0 |
| |
| #define PCI_VENDOR_ID_INTEL 0x8086 |
| |
| /* 10 GIG E (XGE) PHY IEEE 802.3 clause 45 definitions. */ |
| #define XGE_PHY_DEV_TYPE_PMA_PMD 1 |
| #define XGE_PHY_DEV_TYPE_PHY_XS 4 |
| #define XGE_PHY_ID1 0x2 |
| #define XGE_PHY_ID2 0x3 |
| #define XGE_PHY_CONTROL 0x0 |
| #define XGE_PHY_CONTROL_RESET (1 << 15) |
| |
| ixge_main_t ixge_main; |
| static vlib_node_registration_t ixge_input_node; |
| static vlib_node_registration_t ixge_process_node; |
| |
| static void |
| ixge_semaphore_get (ixge_device_t * xd) |
| { |
| ixge_main_t *xm = &ixge_main; |
| vlib_main_t *vm = xm->vlib_main; |
| ixge_regs_t *r = xd->regs; |
| u32 i; |
| |
| i = 0; |
| while (!(r->software_semaphore & (1 << 0))) |
| { |
| if (i > 0) |
| vlib_process_suspend (vm, 100e-6); |
| i++; |
| } |
| do |
| { |
| r->software_semaphore |= 1 << 1; |
| } |
| while (!(r->software_semaphore & (1 << 1))); |
| } |
| |
| static void |
| ixge_semaphore_release (ixge_device_t * xd) |
| { |
| ixge_regs_t *r = xd->regs; |
| r->software_semaphore &= ~3; |
| } |
| |
| static void |
| ixge_software_firmware_sync (ixge_device_t * xd, u32 sw_mask) |
| { |
| ixge_main_t *xm = &ixge_main; |
| vlib_main_t *vm = xm->vlib_main; |
| ixge_regs_t *r = xd->regs; |
| u32 fw_mask = sw_mask << 5; |
| u32 m, done = 0; |
| |
| while (!done) |
| { |
| ixge_semaphore_get (xd); |
| m = r->software_firmware_sync; |
| done = (m & fw_mask) == 0; |
| if (done) |
| r->software_firmware_sync = m | sw_mask; |
| ixge_semaphore_release (xd); |
| if (!done) |
| vlib_process_suspend (vm, 10e-3); |
| } |
| } |
| |
| static void |
| ixge_software_firmware_sync_release (ixge_device_t * xd, u32 sw_mask) |
| { |
| ixge_regs_t *r = xd->regs; |
| ixge_semaphore_get (xd); |
| r->software_firmware_sync &= ~sw_mask; |
| ixge_semaphore_release (xd); |
| } |
| |
| u32 |
| ixge_read_write_phy_reg (ixge_device_t * xd, u32 dev_type, u32 reg_index, |
| u32 v, u32 is_read) |
| { |
| ixge_regs_t *r = xd->regs; |
| const u32 busy_bit = 1 << 30; |
| u32 x; |
| |
| ASSERT (xd->phy_index < 2); |
| ixge_software_firmware_sync (xd, 1 << (1 + xd->phy_index)); |
| |
| ASSERT (reg_index < (1 << 16)); |
| ASSERT (dev_type < (1 << 5)); |
| if (!is_read) |
| r->xge_mac.phy_data = v; |
| |
| /* Address cycle. */ |
| x = |
| reg_index | (dev_type << 16) | (xd-> |
| phys[xd->phy_index].mdio_address << 21); |
| r->xge_mac.phy_command = x | busy_bit; |
| /* Busy wait timed to take 28e-6 secs. No suspend. */ |
| while (r->xge_mac.phy_command & busy_bit) |
| ; |
| |
| r->xge_mac.phy_command = x | ((is_read ? 2 : 1) << 26) | busy_bit; |
| while (r->xge_mac.phy_command & busy_bit) |
| ; |
| |
| if (is_read) |
| v = r->xge_mac.phy_data >> 16; |
| |
| ixge_software_firmware_sync_release (xd, 1 << (1 + xd->phy_index)); |
| |
| return v; |
| } |
| |
| static u32 |
| ixge_read_phy_reg (ixge_device_t * xd, u32 dev_type, u32 reg_index) |
| { |
| return ixge_read_write_phy_reg (xd, dev_type, reg_index, 0, /* is_read */ |
| 1); |
| } |
| |
| static void |
| ixge_write_phy_reg (ixge_device_t * xd, u32 dev_type, u32 reg_index, u32 v) |
| { |
| (void) ixge_read_write_phy_reg (xd, dev_type, reg_index, v, /* is_read */ |
| 0); |
| } |
| |
| static void |
| ixge_i2c_put_bits (i2c_bus_t * b, int scl, int sda) |
| { |
| ixge_main_t *xm = &ixge_main; |
| ixge_device_t *xd = vec_elt_at_index (xm->devices, b->private_data); |
| u32 v; |
| |
| v = 0; |
| v |= (sda != 0) << 3; |
| v |= (scl != 0) << 1; |
| xd->regs->i2c_control = v; |
| } |
| |
| static void |
| ixge_i2c_get_bits (i2c_bus_t * b, int *scl, int *sda) |
| { |
| ixge_main_t *xm = &ixge_main; |
| ixge_device_t *xd = vec_elt_at_index (xm->devices, b->private_data); |
| u32 v; |
| |
| v = xd->regs->i2c_control; |
| *sda = (v & (1 << 2)) != 0; |
| *scl = (v & (1 << 0)) != 0; |
| } |
| |
| static u16 |
| ixge_read_eeprom (ixge_device_t * xd, u32 address) |
| { |
| ixge_regs_t *r = xd->regs; |
| u32 v; |
| r->eeprom_read = (( /* start bit */ (1 << 0)) | (address << 2)); |
| /* Wait for done bit. */ |
| while (!((v = r->eeprom_read) & (1 << 1))) |
| ; |
| return v >> 16; |
| } |
| |
| static void |
| ixge_sfp_enable_disable_laser (ixge_device_t * xd, uword enable) |
| { |
| u32 tx_disable_bit = 1 << 3; |
| if (enable) |
| xd->regs->sdp_control &= ~tx_disable_bit; |
| else |
| xd->regs->sdp_control |= tx_disable_bit; |
| } |
| |
| static void |
| ixge_sfp_enable_disable_10g (ixge_device_t * xd, uword enable) |
| { |
| u32 is_10g_bit = 1 << 5; |
| if (enable) |
| xd->regs->sdp_control |= is_10g_bit; |
| else |
| xd->regs->sdp_control &= ~is_10g_bit; |
| } |
| |
| static clib_error_t * |
| ixge_sfp_phy_init_from_eeprom (ixge_device_t * xd, u16 sfp_type) |
| { |
| u16 a, id, reg_values_addr = 0; |
| |
| a = ixge_read_eeprom (xd, 0x2b); |
| if (a == 0 || a == 0xffff) |
| return clib_error_create ("no init sequence in eeprom"); |
| |
| while (1) |
| { |
| id = ixge_read_eeprom (xd, ++a); |
| if (id == 0xffff) |
| break; |
| reg_values_addr = ixge_read_eeprom (xd, ++a); |
| if (id == sfp_type) |
| break; |
| } |
| if (id != sfp_type) |
| return clib_error_create ("failed to find id 0x%x", sfp_type); |
| |
| ixge_software_firmware_sync (xd, 1 << 3); |
| while (1) |
| { |
| u16 v = ixge_read_eeprom (xd, ++reg_values_addr); |
| if (v == 0xffff) |
| break; |
| xd->regs->core_analog_config = v; |
| } |
| ixge_software_firmware_sync_release (xd, 1 << 3); |
| |
| /* Make sure laser is off. We'll turn on the laser when |
| the interface is brought up. */ |
| ixge_sfp_enable_disable_laser (xd, /* enable */ 0); |
| ixge_sfp_enable_disable_10g (xd, /* is_10g */ 1); |
| |
| return 0; |
| } |
| |
| static void |
| ixge_sfp_device_up_down (ixge_device_t * xd, uword is_up) |
| { |
| u32 v; |
| |
| if (is_up) |
| { |
| /* pma/pmd 10g serial SFI. */ |
| xd->regs->xge_mac.auto_negotiation_control2 &= ~(3 << 16); |
| xd->regs->xge_mac.auto_negotiation_control2 |= 2 << 16; |
| |
| v = xd->regs->xge_mac.auto_negotiation_control; |
| v &= ~(7 << 13); |
| v |= (0 << 13); |
| /* Restart autoneg. */ |
| v |= (1 << 12); |
| xd->regs->xge_mac.auto_negotiation_control = v; |
| |
| while (!(xd->regs->xge_mac.link_partner_ability[0] & 0xf0000)) |
| ; |
| |
| v = xd->regs->xge_mac.auto_negotiation_control; |
| |
| /* link mode 10g sfi serdes */ |
| v &= ~(7 << 13); |
| v |= (3 << 13); |
| |
| /* Restart autoneg. */ |
| v |= (1 << 12); |
| xd->regs->xge_mac.auto_negotiation_control = v; |
| |
| xd->regs->xge_mac.link_status; |
| } |
| |
| ixge_sfp_enable_disable_laser (xd, /* enable */ is_up); |
| |
| /* Give time for link partner to notice that we're up. */ |
| if (is_up && vlib_in_process_context (vlib_get_main ())) |
| { |
| vlib_process_suspend (vlib_get_main (), 300e-3); |
| } |
| } |
| |
| always_inline ixge_dma_regs_t * |
| get_dma_regs (ixge_device_t * xd, vlib_rx_or_tx_t rt, u32 qi) |
| { |
| ixge_regs_t *r = xd->regs; |
| ASSERT (qi < 128); |
| if (rt == VLIB_RX) |
| return qi < 64 ? &r->rx_dma0[qi] : &r->rx_dma1[qi - 64]; |
| else |
| return &r->tx_dma[qi]; |
| } |
| |
| static clib_error_t * |
| ixge_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags) |
| { |
| vnet_hw_interface_t *hif = vnet_get_hw_interface (vnm, hw_if_index); |
| uword is_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0; |
| ixge_main_t *xm = &ixge_main; |
| ixge_device_t *xd = vec_elt_at_index (xm->devices, hif->dev_instance); |
| ixge_dma_regs_t *dr = get_dma_regs (xd, VLIB_RX, 0); |
| |
| if (is_up) |
| { |
| xd->regs->rx_enable |= 1; |
| xd->regs->tx_dma_control |= 1; |
| dr->control |= 1 << 25; |
| while (!(dr->control & (1 << 25))) |
| ; |
| } |
| else |
| { |
| xd->regs->rx_enable &= ~1; |
| xd->regs->tx_dma_control &= ~1; |
| } |
| |
| ixge_sfp_device_up_down (xd, is_up); |
| |
| return /* no error */ 0; |
| } |
| |
| static void |
| ixge_sfp_phy_init (ixge_device_t * xd) |
| { |
| ixge_phy_t *phy = xd->phys + xd->phy_index; |
| i2c_bus_t *ib = &xd->i2c_bus; |
| |
| ib->private_data = xd->device_index; |
| ib->put_bits = ixge_i2c_put_bits; |
| ib->get_bits = ixge_i2c_get_bits; |
| vlib_i2c_init (ib); |
| |
| vlib_i2c_read_eeprom (ib, 0x50, 0, 128, (u8 *) & xd->sfp_eeprom); |
| |
| if (vlib_i2c_bus_timed_out (ib) || !sfp_eeprom_is_valid (&xd->sfp_eeprom)) |
| xd->sfp_eeprom.id = SFP_ID_unknown; |
| else |
| { |
| /* FIXME 5 => SR/LR eeprom ID. */ |
| clib_error_t *e = |
| ixge_sfp_phy_init_from_eeprom (xd, 5 + xd->pci_function); |
| if (e) |
| clib_error_report (e); |
| } |
| |
| phy->mdio_address = ~0; |
| } |
| |
| static void |
| ixge_phy_init (ixge_device_t * xd) |
| { |
| ixge_main_t *xm = &ixge_main; |
| vlib_main_t *vm = xm->vlib_main; |
| ixge_phy_t *phy = xd->phys + xd->phy_index; |
| |
| switch (xd->device_id) |
| { |
| case IXGE_82599_sfp: |
| case IXGE_82599_sfp_em: |
| case IXGE_82599_sfp_fcoe: |
| /* others? */ |
| return ixge_sfp_phy_init (xd); |
| |
| default: |
| break; |
| } |
| |
| /* Probe address of phy. */ |
| { |
| u32 i, v; |
| |
| phy->mdio_address = ~0; |
| for (i = 0; i < 32; i++) |
| { |
| phy->mdio_address = i; |
| v = ixge_read_phy_reg (xd, XGE_PHY_DEV_TYPE_PMA_PMD, XGE_PHY_ID1); |
| if (v != 0xffff && v != 0) |
| break; |
| } |
| |
| /* No PHY found? */ |
| if (i >= 32) |
| return; |
| } |
| |
| phy->id = |
| ((ixge_read_phy_reg (xd, XGE_PHY_DEV_TYPE_PMA_PMD, XGE_PHY_ID1) << 16) | |
| ixge_read_phy_reg (xd, XGE_PHY_DEV_TYPE_PMA_PMD, XGE_PHY_ID2)); |
| |
| { |
| ELOG_TYPE_DECLARE (e) = |
| { |
| .function = (char *) __FUNCTION__,.format = |
| "ixge %d, phy id 0x%d mdio address %d",.format_args = "i4i4i4",}; |
| struct |
| { |
| u32 instance, id, address; |
| } *ed; |
| ed = ELOG_DATA (&vm->elog_main, e); |
| ed->instance = xd->device_index; |
| ed->id = phy->id; |
| ed->address = phy->mdio_address; |
| } |
| |
| /* Reset phy. */ |
| ixge_write_phy_reg (xd, XGE_PHY_DEV_TYPE_PHY_XS, XGE_PHY_CONTROL, |
| XGE_PHY_CONTROL_RESET); |
| |
| /* Wait for self-clearning reset bit to clear. */ |
| do |
| { |
| vlib_process_suspend (vm, 1e-3); |
| } |
| while (ixge_read_phy_reg (xd, XGE_PHY_DEV_TYPE_PHY_XS, XGE_PHY_CONTROL) & |
| XGE_PHY_CONTROL_RESET); |
| } |
| |
| static u8 * |
| format_ixge_rx_from_hw_descriptor (u8 * s, va_list * va) |
| { |
| ixge_rx_from_hw_descriptor_t *d = |
| va_arg (*va, ixge_rx_from_hw_descriptor_t *); |
| u32 s0 = d->status[0], s2 = d->status[2]; |
| u32 is_ip4, is_ip6, is_ip, is_tcp, is_udp; |
| uword indent = format_get_indent (s); |
| |
| s = format (s, "%s-owned", |
| (s2 & IXGE_RX_DESCRIPTOR_STATUS2_IS_OWNED_BY_SOFTWARE) ? "sw" : |
| "hw"); |
| s = |
| format (s, ", length this descriptor %d, l3 offset %d", |
| d->n_packet_bytes_this_descriptor, |
| IXGE_RX_DESCRIPTOR_STATUS0_L3_OFFSET (s0)); |
| if (s2 & IXGE_RX_DESCRIPTOR_STATUS2_IS_END_OF_PACKET) |
| s = format (s, ", end-of-packet"); |
| |
| s = format (s, "\n%U", format_white_space, indent); |
| |
| if (s2 & IXGE_RX_DESCRIPTOR_STATUS2_ETHERNET_ERROR) |
| s = format (s, "layer2 error"); |
| |
| if (s0 & IXGE_RX_DESCRIPTOR_STATUS0_IS_LAYER2) |
| { |
| s = format (s, "layer 2 type %d", (s0 & 0x1f)); |
| return s; |
| } |
| |
| if (s2 & IXGE_RX_DESCRIPTOR_STATUS2_IS_VLAN) |
| s = format (s, "vlan header 0x%x\n%U", d->vlan_tag, |
| format_white_space, indent); |
| |
| if ((is_ip4 = (s0 & IXGE_RX_DESCRIPTOR_STATUS0_IS_IP4))) |
| { |
| s = format (s, "ip4%s", |
| (s0 & IXGE_RX_DESCRIPTOR_STATUS0_IS_IP4_EXT) ? " options" : |
| ""); |
| if (s2 & IXGE_RX_DESCRIPTOR_STATUS2_IS_IP4_CHECKSUMMED) |
| s = format (s, " checksum %s", |
| (s2 & IXGE_RX_DESCRIPTOR_STATUS2_IP4_CHECKSUM_ERROR) ? |
| "bad" : "ok"); |
| } |
| if ((is_ip6 = (s0 & IXGE_RX_DESCRIPTOR_STATUS0_IS_IP6))) |
| s = format (s, "ip6%s", |
| (s0 & IXGE_RX_DESCRIPTOR_STATUS0_IS_IP6_EXT) ? " extended" : |
| ""); |
| is_tcp = is_udp = 0; |
| if ((is_ip = (is_ip4 | is_ip6))) |
| { |
| is_tcp = (s0 & IXGE_RX_DESCRIPTOR_STATUS0_IS_TCP) != 0; |
| is_udp = (s0 & IXGE_RX_DESCRIPTOR_STATUS0_IS_UDP) != 0; |
| if (is_tcp) |
| s = format (s, ", tcp"); |
| if (is_udp) |
| s = format (s, ", udp"); |
| } |
| |
| if (s2 & IXGE_RX_DESCRIPTOR_STATUS2_IS_TCP_CHECKSUMMED) |
| s = format (s, ", tcp checksum %s", |
| (s2 & IXGE_RX_DESCRIPTOR_STATUS2_TCP_CHECKSUM_ERROR) ? "bad" : |
| "ok"); |
| if (s2 & IXGE_RX_DESCRIPTOR_STATUS2_IS_UDP_CHECKSUMMED) |
| s = format (s, ", udp checksum %s", |
| (s2 & IXGE_RX_DESCRIPTOR_STATUS2_UDP_CHECKSUM_ERROR) ? "bad" : |
| "ok"); |
| |
| return s; |
| } |
| |
| static u8 * |
| format_ixge_tx_descriptor (u8 * s, va_list * va) |
| { |
| ixge_tx_descriptor_t *d = va_arg (*va, ixge_tx_descriptor_t *); |
| u32 s0 = d->status0, s1 = d->status1; |
| uword indent = format_get_indent (s); |
| u32 v; |
| |
| s = format (s, "buffer 0x%Lx, %d packet bytes, %d bytes this buffer", |
| d->buffer_address, s1 >> 14, d->n_bytes_this_buffer); |
| |
| s = format (s, "\n%U", format_white_space, indent); |
| |
| if ((v = (s0 >> 0) & 3)) |
| s = format (s, "reserved 0x%x, ", v); |
| |
| if ((v = (s0 >> 2) & 3)) |
| s = format (s, "mac 0x%x, ", v); |
| |
| if ((v = (s0 >> 4) & 0xf) != 3) |
| s = format (s, "type 0x%x, ", v); |
| |
| s = format (s, "%s%s%s%s%s%s%s%s", |
| (s0 & (1 << 8)) ? "eop, " : "", |
| (s0 & (1 << 9)) ? "insert-fcs, " : "", |
| (s0 & (1 << 10)) ? "reserved26, " : "", |
| (s0 & (1 << 11)) ? "report-status, " : "", |
| (s0 & (1 << 12)) ? "reserved28, " : "", |
| (s0 & (1 << 13)) ? "is-advanced, " : "", |
| (s0 & (1 << 14)) ? "vlan-enable, " : "", |
| (s0 & (1 << 15)) ? "tx-segmentation, " : ""); |
| |
| if ((v = s1 & 0xf) != 0) |
| s = format (s, "status 0x%x, ", v); |
| |
| if ((v = (s1 >> 4) & 0xf)) |
| s = format (s, "context 0x%x, ", v); |
| |
| if ((v = (s1 >> 8) & 0x3f)) |
| s = format (s, "options 0x%x, ", v); |
| |
| return s; |
| } |
| |
| typedef struct |
| { |
| ixge_descriptor_t before, after; |
| |
| u32 buffer_index; |
| |
| u16 device_index; |
| |
| u8 queue_index; |
| |
| u8 is_start_of_packet; |
| |
| /* Copy of VLIB buffer; packet data stored in pre_data. */ |
| vlib_buffer_t buffer; |
| } ixge_rx_dma_trace_t; |
| |
| static u8 * |
| format_ixge_rx_dma_trace (u8 * s, va_list * va) |
| { |
| CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *); |
| vlib_node_t *node = va_arg (*va, vlib_node_t *); |
| vnet_main_t *vnm = vnet_get_main (); |
| ixge_rx_dma_trace_t *t = va_arg (*va, ixge_rx_dma_trace_t *); |
| ixge_main_t *xm = &ixge_main; |
| ixge_device_t *xd = vec_elt_at_index (xm->devices, t->device_index); |
| format_function_t *f; |
| uword indent = format_get_indent (s); |
| |
| { |
| vnet_sw_interface_t *sw = |
| vnet_get_sw_interface (vnm, xd->vlib_sw_if_index); |
| s = |
| format (s, "%U rx queue %d", format_vnet_sw_interface_name, vnm, sw, |
| t->queue_index); |
| } |
| |
| s = format (s, "\n%Ubefore: %U", |
| format_white_space, indent, |
| format_ixge_rx_from_hw_descriptor, &t->before); |
| s = format (s, "\n%Uafter : head/tail address 0x%Lx/0x%Lx", |
| format_white_space, indent, |
| t->after.rx_to_hw.head_address, t->after.rx_to_hw.tail_address); |
| |
| s = format (s, "\n%Ubuffer 0x%x: %U", |
| format_white_space, indent, |
| t->buffer_index, format_vlib_buffer, &t->buffer); |
| |
| s = format (s, "\n%U", format_white_space, indent); |
| |
| f = node->format_buffer; |
| if (!f || !t->is_start_of_packet) |
| f = format_hex_bytes; |
| s = format (s, "%U", f, t->buffer.pre_data, sizeof (t->buffer.pre_data)); |
| |
| return s; |
| } |
| |
| #define foreach_ixge_error \ |
| _ (none, "no error") \ |
| _ (tx_full_drops, "tx ring full drops") \ |
| _ (ip4_checksum_error, "ip4 checksum errors") \ |
| _ (rx_alloc_fail, "rx buf alloc from free list failed") \ |
| _ (rx_alloc_no_physmem, "rx buf alloc failed no physmem") |
| |
| typedef enum |
| { |
| #define _(f,s) IXGE_ERROR_##f, |
| foreach_ixge_error |
| #undef _ |
| IXGE_N_ERROR, |
| } ixge_error_t; |
| |
| always_inline void |
| ixge_rx_next_and_error_from_status_x1 (ixge_device_t * xd, |
| u32 s00, u32 s02, |
| u8 * next0, u8 * error0, u32 * flags0) |
| { |
| u8 is0_ip4, is0_ip6, n0, e0; |
| u32 f0; |
| |
| e0 = IXGE_ERROR_none; |
| n0 = IXGE_RX_NEXT_ETHERNET_INPUT; |
| |
| is0_ip4 = s02 & IXGE_RX_DESCRIPTOR_STATUS2_IS_IP4_CHECKSUMMED; |
| n0 = is0_ip4 ? IXGE_RX_NEXT_IP4_INPUT : n0; |
| |
| e0 = (is0_ip4 && (s02 & IXGE_RX_DESCRIPTOR_STATUS2_IP4_CHECKSUM_ERROR) |
| ? IXGE_ERROR_ip4_checksum_error : e0); |
| |
| is0_ip6 = s00 & IXGE_RX_DESCRIPTOR_STATUS0_IS_IP6; |
| n0 = is0_ip6 ? IXGE_RX_NEXT_IP6_INPUT : n0; |
| |
| n0 = (xd->per_interface_next_index != ~0) ? |
| xd->per_interface_next_index : n0; |
| |
| /* Check for error. */ |
| n0 = e0 != IXGE_ERROR_none ? IXGE_RX_NEXT_DROP : n0; |
| |
| f0 = ((s02 & (IXGE_RX_DESCRIPTOR_STATUS2_IS_TCP_CHECKSUMMED |
| | IXGE_RX_DESCRIPTOR_STATUS2_IS_UDP_CHECKSUMMED)) |
| ? IP_BUFFER_L4_CHECKSUM_COMPUTED : 0); |
| |
| f0 |= ((s02 & (IXGE_RX_DESCRIPTOR_STATUS2_TCP_CHECKSUM_ERROR |
| | IXGE_RX_DESCRIPTOR_STATUS2_UDP_CHECKSUM_ERROR)) |
| ? 0 : IP_BUFFER_L4_CHECKSUM_CORRECT); |
| |
| *error0 = e0; |
| *next0 = n0; |
| *flags0 = f0; |
| } |
| |
| always_inline void |
| ixge_rx_next_and_error_from_status_x2 (ixge_device_t * xd, |
| u32 s00, u32 s02, |
| u32 s10, u32 s12, |
| u8 * next0, u8 * error0, u32 * flags0, |
| u8 * next1, u8 * error1, u32 * flags1) |
| { |
| u8 is0_ip4, is0_ip6, n0, e0; |
| u8 is1_ip4, is1_ip6, n1, e1; |
| u32 f0, f1; |
| |
| e0 = e1 = IXGE_ERROR_none; |
| n0 = n1 = IXGE_RX_NEXT_IP4_INPUT; |
| |
| is0_ip4 = s02 & IXGE_RX_DESCRIPTOR_STATUS2_IS_IP4_CHECKSUMMED; |
| is1_ip4 = s12 & IXGE_RX_DESCRIPTOR_STATUS2_IS_IP4_CHECKSUMMED; |
| |
| n0 = is0_ip4 ? IXGE_RX_NEXT_IP4_INPUT : n0; |
| n1 = is1_ip4 ? IXGE_RX_NEXT_IP4_INPUT : n1; |
| |
| e0 = (is0_ip4 && (s02 & IXGE_RX_DESCRIPTOR_STATUS2_IP4_CHECKSUM_ERROR) |
| ? IXGE_ERROR_ip4_checksum_error : e0); |
| e1 = (is1_ip4 && (s12 & IXGE_RX_DESCRIPTOR_STATUS2_IP4_CHECKSUM_ERROR) |
| ? IXGE_ERROR_ip4_checksum_error : e1); |
| |
| is0_ip6 = s00 & IXGE_RX_DESCRIPTOR_STATUS0_IS_IP6; |
| is1_ip6 = s10 & IXGE_RX_DESCRIPTOR_STATUS0_IS_IP6; |
| |
| n0 = is0_ip6 ? IXGE_RX_NEXT_IP6_INPUT : n0; |
| n1 = is1_ip6 ? IXGE_RX_NEXT_IP6_INPUT : n1; |
| |
| n0 = (xd->per_interface_next_index != ~0) ? |
| xd->per_interface_next_index : n0; |
| n1 = (xd->per_interface_next_index != ~0) ? |
| xd->per_interface_next_index : n1; |
| |
| /* Check for error. */ |
| n0 = e0 != IXGE_ERROR_none ? IXGE_RX_NEXT_DROP : n0; |
| n1 = e1 != IXGE_ERROR_none ? IXGE_RX_NEXT_DROP : n1; |
| |
| *error0 = e0; |
| *error1 = e1; |
| |
| *next0 = n0; |
| *next1 = n1; |
| |
| f0 = ((s02 & (IXGE_RX_DESCRIPTOR_STATUS2_IS_TCP_CHECKSUMMED |
| | IXGE_RX_DESCRIPTOR_STATUS2_IS_UDP_CHECKSUMMED)) |
| ? IP_BUFFER_L4_CHECKSUM_COMPUTED : 0); |
| f1 = ((s12 & (IXGE_RX_DESCRIPTOR_STATUS2_IS_TCP_CHECKSUMMED |
| | IXGE_RX_DESCRIPTOR_STATUS2_IS_UDP_CHECKSUMMED)) |
| ? IP_BUFFER_L4_CHECKSUM_COMPUTED : 0); |
| |
| f0 |= ((s02 & (IXGE_RX_DESCRIPTOR_STATUS2_TCP_CHECKSUM_ERROR |
| | IXGE_RX_DESCRIPTOR_STATUS2_UDP_CHECKSUM_ERROR)) |
| ? 0 : IP_BUFFER_L4_CHECKSUM_CORRECT); |
| f1 |= ((s12 & (IXGE_RX_DESCRIPTOR_STATUS2_TCP_CHECKSUM_ERROR |
| | IXGE_RX_DESCRIPTOR_STATUS2_UDP_CHECKSUM_ERROR)) |
| ? 0 : IP_BUFFER_L4_CHECKSUM_CORRECT); |
| |
| *flags0 = f0; |
| *flags1 = f1; |
| } |
| |
| static void |
| ixge_rx_trace (ixge_main_t * xm, |
| ixge_device_t * xd, |
| ixge_dma_queue_t * dq, |
| ixge_descriptor_t * before_descriptors, |
| u32 * before_buffers, |
| ixge_descriptor_t * after_descriptors, uword n_descriptors) |
| { |
| vlib_main_t *vm = xm->vlib_main; |
| vlib_node_runtime_t *node = dq->rx.node; |
| ixge_rx_from_hw_descriptor_t *bd; |
| ixge_rx_to_hw_descriptor_t *ad; |
| u32 *b, n_left, is_sop, next_index_sop; |
| |
| n_left = n_descriptors; |
| b = before_buffers; |
| bd = &before_descriptors->rx_from_hw; |
| ad = &after_descriptors->rx_to_hw; |
| is_sop = dq->rx.is_start_of_packet; |
| next_index_sop = dq->rx.saved_start_of_packet_next_index; |
| |
| while (n_left >= 2) |
| { |
| u32 bi0, bi1, flags0, flags1; |
| vlib_buffer_t *b0, *b1; |
| ixge_rx_dma_trace_t *t0, *t1; |
| u8 next0, error0, next1, error1; |
| |
| bi0 = b[0]; |
| bi1 = b[1]; |
| n_left -= 2; |
| |
| b0 = vlib_get_buffer (vm, bi0); |
| b1 = vlib_get_buffer (vm, bi1); |
| |
| ixge_rx_next_and_error_from_status_x2 (xd, |
| bd[0].status[0], bd[0].status[2], |
| bd[1].status[0], bd[1].status[2], |
| &next0, &error0, &flags0, |
| &next1, &error1, &flags1); |
| |
| next_index_sop = is_sop ? next0 : next_index_sop; |
| vlib_trace_buffer (vm, node, next_index_sop, b0, /* follow_chain */ 0); |
| t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0])); |
| t0->is_start_of_packet = is_sop; |
| is_sop = (b0->flags & VLIB_BUFFER_NEXT_PRESENT) == 0; |
| |
| next_index_sop = is_sop ? next1 : next_index_sop; |
| vlib_trace_buffer (vm, node, next_index_sop, b1, /* follow_chain */ 0); |
| t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0])); |
| t1->is_start_of_packet = is_sop; |
| is_sop = (b1->flags & VLIB_BUFFER_NEXT_PRESENT) == 0; |
| |
| t0->queue_index = dq->queue_index; |
| t1->queue_index = dq->queue_index; |
| t0->device_index = xd->device_index; |
| t1->device_index = xd->device_index; |
| t0->before.rx_from_hw = bd[0]; |
| t1->before.rx_from_hw = bd[1]; |
| t0->after.rx_to_hw = ad[0]; |
| t1->after.rx_to_hw = ad[1]; |
| t0->buffer_index = bi0; |
| t1->buffer_index = bi1; |
| memcpy (&t0->buffer, b0, sizeof (b0[0]) - sizeof (b0->pre_data)); |
| memcpy (&t1->buffer, b1, sizeof (b1[0]) - sizeof (b0->pre_data)); |
| memcpy (t0->buffer.pre_data, b0->data + b0->current_data, |
| sizeof (t0->buffer.pre_data)); |
| memcpy (t1->buffer.pre_data, b1->data + b1->current_data, |
| sizeof (t1->buffer.pre_data)); |
| |
| b += 2; |
| bd += 2; |
| ad += 2; |
| } |
| |
| while (n_left >= 1) |
| { |
| u32 bi0, flags0; |
| vlib_buffer_t *b0; |
| ixge_rx_dma_trace_t *t0; |
| u8 next0, error0; |
| |
| bi0 = b[0]; |
| n_left -= 1; |
| |
| b0 = vlib_get_buffer (vm, bi0); |
| |
| ixge_rx_next_and_error_from_status_x1 (xd, |
| bd[0].status[0], bd[0].status[2], |
| &next0, &error0, &flags0); |
| |
| next_index_sop = is_sop ? next0 : next_index_sop; |
| vlib_trace_buffer (vm, node, next_index_sop, b0, /* follow_chain */ 0); |
| t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0])); |
| t0->is_start_of_packet = is_sop; |
| is_sop = (b0->flags & VLIB_BUFFER_NEXT_PRESENT) == 0; |
| |
| t0->queue_index = dq->queue_index; |
| t0->device_index = xd->device_index; |
| t0->before.rx_from_hw = bd[0]; |
| t0->after.rx_to_hw = ad[0]; |
| t0->buffer_index = bi0; |
| memcpy (&t0->buffer, b0, sizeof (b0[0]) - sizeof (b0->pre_data)); |
| memcpy (t0->buffer.pre_data, b0->data + b0->current_data, |
| sizeof (t0->buffer.pre_data)); |
| |
| b += 1; |
| bd += 1; |
| ad += 1; |
| } |
| } |
| |
| typedef struct |
| { |
| ixge_tx_descriptor_t descriptor; |
| |
| u32 buffer_index; |
| |
| u16 device_index; |
| |
| u8 queue_index; |
| |
| u8 is_start_of_packet; |
| |
| /* Copy of VLIB buffer; packet data stored in pre_data. */ |
| vlib_buffer_t buffer; |
| } ixge_tx_dma_trace_t; |
| |
| static u8 * |
| format_ixge_tx_dma_trace (u8 * s, va_list * va) |
| { |
| CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *); |
| CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *); |
| ixge_tx_dma_trace_t *t = va_arg (*va, ixge_tx_dma_trace_t *); |
| vnet_main_t *vnm = vnet_get_main (); |
| ixge_main_t *xm = &ixge_main; |
| ixge_device_t *xd = vec_elt_at_index (xm->devices, t->device_index); |
| format_function_t *f; |
| uword indent = format_get_indent (s); |
| |
| { |
| vnet_sw_interface_t *sw = |
| vnet_get_sw_interface (vnm, xd->vlib_sw_if_index); |
| s = |
| format (s, "%U tx queue %d", format_vnet_sw_interface_name, vnm, sw, |
| t->queue_index); |
| } |
| |
| s = format (s, "\n%Udescriptor: %U", |
| format_white_space, indent, |
| format_ixge_tx_descriptor, &t->descriptor); |
| |
| s = format (s, "\n%Ubuffer 0x%x: %U", |
| format_white_space, indent, |
| t->buffer_index, format_vlib_buffer, &t->buffer); |
| |
| s = format (s, "\n%U", format_white_space, indent); |
| |
| f = format_ethernet_header_with_length; |
| if (!f || !t->is_start_of_packet) |
| f = format_hex_bytes; |
| s = format (s, "%U", f, t->buffer.pre_data, sizeof (t->buffer.pre_data)); |
| |
| return s; |
| } |
| |
| typedef struct |
| { |
| vlib_node_runtime_t *node; |
| |
| u32 is_start_of_packet; |
| |
| u32 n_bytes_in_packet; |
| |
| ixge_tx_descriptor_t *start_of_packet_descriptor; |
| } ixge_tx_state_t; |
| |
| static void |
| ixge_tx_trace (ixge_main_t * xm, |
| ixge_device_t * xd, |
| ixge_dma_queue_t * dq, |
| ixge_tx_state_t * tx_state, |
| ixge_tx_descriptor_t * descriptors, |
| u32 * buffers, uword n_descriptors) |
| { |
| vlib_main_t *vm = xm->vlib_main; |
| vlib_node_runtime_t *node = tx_state->node; |
| ixge_tx_descriptor_t *d; |
| u32 *b, n_left, is_sop; |
| |
| n_left = n_descriptors; |
| b = buffers; |
| d = descriptors; |
| is_sop = tx_state->is_start_of_packet; |
| |
| while (n_left >= 2) |
| { |
| u32 bi0, bi1; |
| vlib_buffer_t *b0, *b1; |
| ixge_tx_dma_trace_t *t0, *t1; |
| |
| bi0 = b[0]; |
| bi1 = b[1]; |
| n_left -= 2; |
| |
| b0 = vlib_get_buffer (vm, bi0); |
| b1 = vlib_get_buffer (vm, bi1); |
| |
| t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0])); |
| t0->is_start_of_packet = is_sop; |
| is_sop = (b0->flags & VLIB_BUFFER_NEXT_PRESENT) == 0; |
| |
| t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0])); |
| t1->is_start_of_packet = is_sop; |
| is_sop = (b1->flags & VLIB_BUFFER_NEXT_PRESENT) == 0; |
| |
| t0->queue_index = dq->queue_index; |
| t1->queue_index = dq->queue_index; |
| t0->device_index = xd->device_index; |
| t1->device_index = xd->device_index; |
| t0->descriptor = d[0]; |
| t1->descriptor = d[1]; |
| t0->buffer_index = bi0; |
| t1->buffer_index = bi1; |
| memcpy (&t0->buffer, b0, sizeof (b0[0]) - sizeof (b0->pre_data)); |
| memcpy (&t1->buffer, b1, sizeof (b1[0]) - sizeof (b0->pre_data)); |
| memcpy (t0->buffer.pre_data, b0->data + b0->current_data, |
| sizeof (t0->buffer.pre_data)); |
| memcpy (t1->buffer.pre_data, b1->data + b1->current_data, |
| sizeof (t1->buffer.pre_data)); |
| |
| b += 2; |
| d += 2; |
| } |
| |
| while (n_left >= 1) |
| { |
| u32 bi0; |
| vlib_buffer_t *b0; |
| ixge_tx_dma_trace_t *t0; |
| |
| bi0 = b[0]; |
| n_left -= 1; |
| |
| b0 = vlib_get_buffer (vm, bi0); |
| |
| t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0])); |
| t0->is_start_of_packet = is_sop; |
| is_sop = (b0->flags & VLIB_BUFFER_NEXT_PRESENT) == 0; |
| |
| t0->queue_index = dq->queue_index; |
| t0->device_index = xd->device_index; |
| t0->descriptor = d[0]; |
| t0->buffer_index = bi0; |
| memcpy (&t0->buffer, b0, sizeof (b0[0]) - sizeof (b0->pre_data)); |
| memcpy (t0->buffer.pre_data, b0->data + b0->current_data, |
| sizeof (t0->buffer.pre_data)); |
| |
| b += 1; |
| d += 1; |
| } |
| } |
| |
| always_inline uword |
| ixge_ring_sub (ixge_dma_queue_t * q, u32 i0, u32 i1) |
| { |
| i32 d = i1 - i0; |
| ASSERT (i0 < q->n_descriptors); |
| ASSERT (i1 < q->n_descriptors); |
| return d < 0 ? q->n_descriptors + d : d; |
| } |
| |
| always_inline uword |
| ixge_ring_add (ixge_dma_queue_t * q, u32 i0, u32 i1) |
| { |
| u32 d = i0 + i1; |
| ASSERT (i0 < q->n_descriptors); |
| ASSERT (i1 < q->n_descriptors); |
| d -= d >= q->n_descriptors ? q->n_descriptors : 0; |
| return d; |
| } |
| |
| always_inline uword |
| ixge_tx_descriptor_matches_template (ixge_main_t * xm, |
| ixge_tx_descriptor_t * d) |
| { |
| u32 cmp; |
| |
| cmp = ((d->status0 & xm->tx_descriptor_template_mask.status0) |
| ^ xm->tx_descriptor_template.status0); |
| if (cmp) |
| return 0; |
| cmp = ((d->status1 & xm->tx_descriptor_template_mask.status1) |
| ^ xm->tx_descriptor_template.status1); |
| if (cmp) |
| return 0; |
| |
| return 1; |
| } |
| |
| static uword |
| ixge_tx_no_wrap (ixge_main_t * xm, |
| ixge_device_t * xd, |
| ixge_dma_queue_t * dq, |
| u32 * buffers, |
| u32 start_descriptor_index, |
| u32 n_descriptors, ixge_tx_state_t * tx_state) |
| { |
| vlib_main_t *vm = xm->vlib_main; |
| ixge_tx_descriptor_t *d, *d_sop; |
| u32 n_left = n_descriptors; |
| u32 *to_free = vec_end (xm->tx_buffers_pending_free); |
| u32 *to_tx = |
| vec_elt_at_index (dq->descriptor_buffer_indices, start_descriptor_index); |
| u32 is_sop = tx_state->is_start_of_packet; |
| u32 len_sop = tx_state->n_bytes_in_packet; |
| u16 template_status = xm->tx_descriptor_template.status0; |
| u32 descriptor_prefetch_rotor = 0; |
| |
| ASSERT (start_descriptor_index + n_descriptors <= dq->n_descriptors); |
| d = &dq->descriptors[start_descriptor_index].tx; |
| d_sop = is_sop ? d : tx_state->start_of_packet_descriptor; |
| |
| while (n_left >= 4) |
| { |
| vlib_buffer_t *b0, *b1; |
| u32 bi0, fi0, len0; |
| u32 bi1, fi1, len1; |
| u8 is_eop0, is_eop1; |
| |
| /* Prefetch next iteration. */ |
| vlib_prefetch_buffer_with_index (vm, buffers[2], LOAD); |
| vlib_prefetch_buffer_with_index (vm, buffers[3], LOAD); |
| |
| if ((descriptor_prefetch_rotor & 0x3) == 0) |
| CLIB_PREFETCH (d + 4, CLIB_CACHE_LINE_BYTES, STORE); |
| |
| descriptor_prefetch_rotor += 2; |
| |
| bi0 = buffers[0]; |
| bi1 = buffers[1]; |
| |
| to_free[0] = fi0 = to_tx[0]; |
| to_tx[0] = bi0; |
| to_free += fi0 != 0; |
| |
| to_free[0] = fi1 = to_tx[1]; |
| to_tx[1] = bi1; |
| to_free += fi1 != 0; |
| |
| buffers += 2; |
| n_left -= 2; |
| to_tx += 2; |
| |
| b0 = vlib_get_buffer (vm, bi0); |
| b1 = vlib_get_buffer (vm, bi1); |
| |
| is_eop0 = (b0->flags & VLIB_BUFFER_NEXT_PRESENT) == 0; |
| is_eop1 = (b1->flags & VLIB_BUFFER_NEXT_PRESENT) == 0; |
| |
| len0 = b0->current_length; |
| len1 = b1->current_length; |
| |
| ASSERT (ixge_tx_descriptor_matches_template (xm, d + 0)); |
| ASSERT (ixge_tx_descriptor_matches_template (xm, d + 1)); |
| |
| d[0].buffer_address = |
| vlib_get_buffer_data_physical_address (vm, bi0) + b0->current_data; |
| d[1].buffer_address = |
| vlib_get_buffer_data_physical_address (vm, bi1) + b1->current_data; |
| |
| d[0].n_bytes_this_buffer = len0; |
| d[1].n_bytes_this_buffer = len1; |
| |
| d[0].status0 = |
| template_status | (is_eop0 << |
| IXGE_TX_DESCRIPTOR_STATUS0_LOG2_IS_END_OF_PACKET); |
| d[1].status0 = |
| template_status | (is_eop1 << |
| IXGE_TX_DESCRIPTOR_STATUS0_LOG2_IS_END_OF_PACKET); |
| |
| len_sop = (is_sop ? 0 : len_sop) + len0; |
| d_sop[0].status1 = |
| IXGE_TX_DESCRIPTOR_STATUS1_N_BYTES_IN_PACKET (len_sop); |
| d += 1; |
| d_sop = is_eop0 ? d : d_sop; |
| |
| is_sop = is_eop0; |
| |
| len_sop = (is_sop ? 0 : len_sop) + len1; |
| d_sop[0].status1 = |
| IXGE_TX_DESCRIPTOR_STATUS1_N_BYTES_IN_PACKET (len_sop); |
| d += 1; |
| d_sop = is_eop1 ? d : d_sop; |
| |
| is_sop = is_eop1; |
| } |
| |
| while (n_left > 0) |
| { |
| vlib_buffer_t *b0; |
| u32 bi0, fi0, len0; |
| u8 is_eop0; |
| |
| bi0 = buffers[0]; |
| |
| to_free[0] = fi0 = to_tx[0]; |
| to_tx[0] = bi0; |
| to_free += fi0 != 0; |
| |
| buffers += 1; |
| n_left -= 1; |
| to_tx += 1; |
| |
| b0 = vlib_get_buffer (vm, bi0); |
| |
| is_eop0 = (b0->flags & VLIB_BUFFER_NEXT_PRESENT) == 0; |
| |
| len0 = b0->current_length; |
| |
| ASSERT (ixge_tx_descriptor_matches_template (xm, d + 0)); |
| |
| d[0].buffer_address = |
| vlib_get_buffer_data_physical_address (vm, bi0) + b0->current_data; |
| |
| d[0].n_bytes_this_buffer = len0; |
| |
| d[0].status0 = |
| template_status | (is_eop0 << |
| IXGE_TX_DESCRIPTOR_STATUS0_LOG2_IS_END_OF_PACKET); |
| |
| len_sop = (is_sop ? 0 : len_sop) + len0; |
| d_sop[0].status1 = |
| IXGE_TX_DESCRIPTOR_STATUS1_N_BYTES_IN_PACKET (len_sop); |
| d += 1; |
| d_sop = is_eop0 ? d : d_sop; |
| |
| is_sop = is_eop0; |
| } |
| |
| if (tx_state->node->flags & VLIB_NODE_FLAG_TRACE) |
| { |
| to_tx = |
| vec_elt_at_index (dq->descriptor_buffer_indices, |
| start_descriptor_index); |
| ixge_tx_trace (xm, xd, dq, tx_state, |
| &dq->descriptors[start_descriptor_index].tx, to_tx, |
| n_descriptors); |
| } |
| |
| _vec_len (xm->tx_buffers_pending_free) = |
| to_free - xm->tx_buffers_pending_free; |
| |
| /* When we are done d_sop can point to end of ring. Wrap it if so. */ |
| { |
| ixge_tx_descriptor_t *d_start = &dq->descriptors[0].tx; |
| |
| ASSERT (d_sop - d_start <= dq->n_descriptors); |
| d_sop = d_sop - d_start == dq->n_descriptors ? d_start : d_sop; |
| } |
| |
| tx_state->is_start_of_packet = is_sop; |
| tx_state->start_of_packet_descriptor = d_sop; |
| tx_state->n_bytes_in_packet = len_sop; |
| |
| return n_descriptors; |
| } |
| |
| static uword |
| ixge_interface_tx (vlib_main_t * vm, |
| vlib_node_runtime_t * node, vlib_frame_t * f) |
| { |
| ixge_main_t *xm = &ixge_main; |
| vnet_interface_output_runtime_t *rd = (void *) node->runtime_data; |
| ixge_device_t *xd = vec_elt_at_index (xm->devices, rd->dev_instance); |
| ixge_dma_queue_t *dq; |
| u32 *from, n_left_tx, n_descriptors_to_tx, n_tail_drop; |
| u32 queue_index = 0; /* fixme parameter */ |
| ixge_tx_state_t tx_state; |
| |
| tx_state.node = node; |
| tx_state.is_start_of_packet = 1; |
| tx_state.start_of_packet_descriptor = 0; |
| tx_state.n_bytes_in_packet = 0; |
| |
| from = vlib_frame_vector_args (f); |
| |
| dq = vec_elt_at_index (xd->dma_queues[VLIB_TX], queue_index); |
| |
| dq->head_index = dq->tx.head_index_write_back[0]; |
| |
| /* Since head == tail means ring is empty we can send up to dq->n_descriptors - 1. */ |
| n_left_tx = dq->n_descriptors - 1; |
| n_left_tx -= ixge_ring_sub (dq, dq->head_index, dq->tail_index); |
| |
| _vec_len (xm->tx_buffers_pending_free) = 0; |
| |
| n_descriptors_to_tx = f->n_vectors; |
| n_tail_drop = 0; |
| if (PREDICT_FALSE (n_descriptors_to_tx > n_left_tx)) |
| { |
| i32 i, n_ok, i_eop, i_sop; |
| |
| i_sop = i_eop = ~0; |
| for (i = n_left_tx - 1; i >= 0; i--) |
| { |
| vlib_buffer_t *b = vlib_get_buffer (vm, from[i]); |
| if (!(b->flags & VLIB_BUFFER_NEXT_PRESENT)) |
| { |
| if (i_sop != ~0 && i_eop != ~0) |
| break; |
| i_eop = i; |
| i_sop = i + 1; |
| } |
| } |
| if (i == 0) |
| n_ok = 0; |
| else |
| n_ok = i_eop + 1; |
| |
| { |
| ELOG_TYPE_DECLARE (e) = |
| { |
| .function = (char *) __FUNCTION__,.format = |
| "ixge %d, ring full to tx %d head %d tail %d",.format_args = |
| "i2i2i2i2",}; |
| struct |
| { |
| u16 instance, to_tx, head, tail; |
| } *ed; |
| ed = ELOG_DATA (&vm->elog_main, e); |
| ed->instance = xd->device_index; |
| ed->to_tx = n_descriptors_to_tx; |
| ed->head = dq->head_index; |
| ed->tail = dq->tail_index; |
| } |
| |
| if (n_ok < n_descriptors_to_tx) |
| { |
| n_tail_drop = n_descriptors_to_tx - n_ok; |
| vec_add (xm->tx_buffers_pending_free, from + n_ok, n_tail_drop); |
| vlib_error_count (vm, ixge_input_node.index, |
| IXGE_ERROR_tx_full_drops, n_tail_drop); |
| } |
| |
| n_descriptors_to_tx = n_ok; |
| } |
| |
| dq->tx.n_buffers_on_ring += n_descriptors_to_tx; |
| |
| /* Process from tail to end of descriptor ring. */ |
| if (n_descriptors_to_tx > 0 && dq->tail_index < dq->n_descriptors) |
| { |
| u32 n = |
| clib_min (dq->n_descriptors - dq->tail_index, n_descriptors_to_tx); |
| n = ixge_tx_no_wrap (xm, xd, dq, from, dq->tail_index, n, &tx_state); |
| from += n; |
| n_descriptors_to_tx -= n; |
| dq->tail_index += n; |
| ASSERT (dq->tail_index <= dq->n_descriptors); |
| if (dq->tail_index == dq->n_descriptors) |
| dq->tail_index = 0; |
| } |
| |
| if (n_descriptors_to_tx > 0) |
| { |
| u32 n = |
| ixge_tx_no_wrap (xm, xd, dq, from, 0, n_descriptors_to_tx, &tx_state); |
| from += n; |
| ASSERT (n == n_descriptors_to_tx); |
| dq->tail_index += n; |
| ASSERT (dq->tail_index <= dq->n_descriptors); |
| if (dq->tail_index == dq->n_descriptors) |
| dq->tail_index = 0; |
| } |
| |
| /* We should only get full packets. */ |
| ASSERT (tx_state.is_start_of_packet); |
| |
| /* Report status when last descriptor is done. */ |
| { |
| u32 i = dq->tail_index == 0 ? dq->n_descriptors - 1 : dq->tail_index - 1; |
| ixge_tx_descriptor_t *d = &dq->descriptors[i].tx; |
| d->status0 |= IXGE_TX_DESCRIPTOR_STATUS0_REPORT_STATUS; |
| } |
| |
| /* Give new descriptors to hardware. */ |
| { |
| ixge_dma_regs_t *dr = get_dma_regs (xd, VLIB_TX, queue_index); |
| |
| CLIB_MEMORY_BARRIER (); |
| |
| dr->tail_index = dq->tail_index; |
| } |
| |
| /* Free any buffers that are done. */ |
| { |
| u32 n = _vec_len (xm->tx_buffers_pending_free); |
| if (n > 0) |
| { |
| vlib_buffer_free_no_next (vm, xm->tx_buffers_pending_free, n); |
| _vec_len (xm->tx_buffers_pending_free) = 0; |
| ASSERT (dq->tx.n_buffers_on_ring >= n); |
| dq->tx.n_buffers_on_ring -= (n - n_tail_drop); |
| } |
| } |
| |
| return f->n_vectors; |
| } |
| |
| static uword |
| ixge_rx_queue_no_wrap (ixge_main_t * xm, |
| ixge_device_t * xd, |
| ixge_dma_queue_t * dq, |
| u32 start_descriptor_index, u32 n_descriptors) |
| { |
| vlib_main_t *vm = xm->vlib_main; |
| vlib_node_runtime_t *node = dq->rx.node; |
| ixge_descriptor_t *d; |
| static ixge_descriptor_t *d_trace_save; |
| static u32 *d_trace_buffers; |
| u32 n_descriptors_left = n_descriptors; |
| u32 *to_rx = |
| vec_elt_at_index (dq->descriptor_buffer_indices, start_descriptor_index); |
| u32 *to_add; |
| u32 bi_sop = dq->rx.saved_start_of_packet_buffer_index; |
| u32 bi_last = dq->rx.saved_last_buffer_index; |
| u32 next_index_sop = dq->rx.saved_start_of_packet_next_index; |
| u32 is_sop = dq->rx.is_start_of_packet; |
| u32 next_index, n_left_to_next, *to_next; |
| u32 n_packets = 0; |
| u32 n_bytes = 0; |
| u32 n_trace = vlib_get_trace_count (vm, node); |
| vlib_buffer_t *b_last, b_dummy; |
| |
| ASSERT (start_descriptor_index + n_descriptors <= dq->n_descriptors); |
| d = &dq->descriptors[start_descriptor_index]; |
| |
| b_last = bi_last != ~0 ? vlib_get_buffer (vm, bi_last) : &b_dummy; |
| next_index = dq->rx.next_index; |
| |
| if (n_trace > 0) |
| { |
| u32 n = clib_min (n_trace, n_descriptors); |
| if (d_trace_save) |
| { |
| _vec_len (d_trace_save) = 0; |
| _vec_len (d_trace_buffers) = 0; |
| } |
| vec_add (d_trace_save, (ixge_descriptor_t *) d, n); |
| vec_add (d_trace_buffers, to_rx, n); |
| } |
| |
| { |
| uword l = vec_len (xm->rx_buffers_to_add); |
| |
| if (l < n_descriptors_left) |
| { |
| u32 n_to_alloc = 2 * dq->n_descriptors - l; |
| u32 n_allocated; |
| |
| vec_resize (xm->rx_buffers_to_add, n_to_alloc); |
| |
| _vec_len (xm->rx_buffers_to_add) = l; |
| n_allocated = vlib_buffer_alloc_from_free_list |
| (vm, xm->rx_buffers_to_add + l, n_to_alloc, |
| xm->vlib_buffer_free_list_index); |
| _vec_len (xm->rx_buffers_to_add) += n_allocated; |
| |
| /* Handle transient allocation failure */ |
| if (PREDICT_FALSE (l + n_allocated <= n_descriptors_left)) |
| { |
| if (n_allocated == 0) |
| vlib_error_count (vm, ixge_input_node.index, |
| IXGE_ERROR_rx_alloc_no_physmem, 1); |
| else |
| vlib_error_count (vm, ixge_input_node.index, |
| IXGE_ERROR_rx_alloc_fail, 1); |
| |
| n_descriptors_left = l + n_allocated; |
| } |
| n_descriptors = n_descriptors_left; |
| } |
| |
| /* Add buffers from end of vector going backwards. */ |
| to_add = vec_end (xm->rx_buffers_to_add) - 1; |
| } |
| |
| while (n_descriptors_left > 0) |
| { |
| vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); |
| |
| while (n_descriptors_left >= 4 && n_left_to_next >= 2) |
| { |
| vlib_buffer_t *b0, *b1; |
| u32 bi0, fi0, len0, l3_offset0, s20, s00, flags0; |
| u32 bi1, fi1, len1, l3_offset1, s21, s01, flags1; |
| u8 is_eop0, error0, next0; |
| u8 is_eop1, error1, next1; |
| ixge_descriptor_t d0, d1; |
| |
| vlib_prefetch_buffer_with_index (vm, to_rx[2], STORE); |
| vlib_prefetch_buffer_with_index (vm, to_rx[3], STORE); |
| |
| CLIB_PREFETCH (d + 2, 32, STORE); |
| |
| d0.as_u32x4 = d[0].as_u32x4; |
| d1.as_u32x4 = d[1].as_u32x4; |
| |
| s20 = d0.rx_from_hw.status[2]; |
| s21 = d1.rx_from_hw.status[2]; |
| |
| s00 = d0.rx_from_hw.status[0]; |
| s01 = d1.rx_from_hw.status[0]; |
| |
| if (! |
| ((s20 & s21) & IXGE_RX_DESCRIPTOR_STATUS2_IS_OWNED_BY_SOFTWARE)) |
| goto found_hw_owned_descriptor_x2; |
| |
| bi0 = to_rx[0]; |
| bi1 = to_rx[1]; |
| |
| ASSERT (to_add - 1 >= xm->rx_buffers_to_add); |
| fi0 = to_add[0]; |
| fi1 = to_add[-1]; |
| |
| to_rx[0] = fi0; |
| to_rx[1] = fi1; |
| to_rx += 2; |
| to_add -= 2; |
| |
| ASSERT (VLIB_BUFFER_KNOWN_ALLOCATED == |
| vlib_buffer_is_known (vm, bi0)); |
| ASSERT (VLIB_BUFFER_KNOWN_ALLOCATED == |
| vlib_buffer_is_known (vm, bi1)); |
| ASSERT (VLIB_BUFFER_KNOWN_ALLOCATED == |
| vlib_buffer_is_known (vm, fi0)); |
| ASSERT (VLIB_BUFFER_KNOWN_ALLOCATED == |
| vlib_buffer_is_known (vm, fi1)); |
| |
| b0 = vlib_get_buffer (vm, bi0); |
| b1 = vlib_get_buffer (vm, bi1); |
| |
| /* |
| * Turn this on if you run into |
| * "bad monkey" contexts, and you want to know exactly |
| * which nodes they've visited... See main.c... |
| */ |
| VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0); |
| VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b1); |
| |
| CLIB_PREFETCH (b0->data, CLIB_CACHE_LINE_BYTES, LOAD); |
| CLIB_PREFETCH (b1->data, CLIB_CACHE_LINE_BYTES, LOAD); |
| |
| is_eop0 = (s20 & IXGE_RX_DESCRIPTOR_STATUS2_IS_END_OF_PACKET) != 0; |
| is_eop1 = (s21 & IXGE_RX_DESCRIPTOR_STATUS2_IS_END_OF_PACKET) != 0; |
| |
| ixge_rx_next_and_error_from_status_x2 (xd, s00, s20, s01, s21, |
| &next0, &error0, &flags0, |
| &next1, &error1, &flags1); |
| |
| next0 = is_sop ? next0 : next_index_sop; |
| next1 = is_eop0 ? next1 : next0; |
| next_index_sop = next1; |
| |
| b0->flags |= flags0 | (!is_eop0 << VLIB_BUFFER_LOG2_NEXT_PRESENT); |
| b1->flags |= flags1 | (!is_eop1 << VLIB_BUFFER_LOG2_NEXT_PRESENT); |
| |
| vnet_buffer (b0)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index; |
| vnet_buffer (b1)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index; |
| vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; |
| vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0; |
| |
| b0->error = node->errors[error0]; |
| b1->error = node->errors[error1]; |
| |
| len0 = d0.rx_from_hw.n_packet_bytes_this_descriptor; |
| len1 = d1.rx_from_hw.n_packet_bytes_this_descriptor; |
| n_bytes += len0 + len1; |
| n_packets += is_eop0 + is_eop1; |
| |
| /* Give new buffers to hardware. */ |
| d0.rx_to_hw.tail_address = |
| vlib_get_buffer_data_physical_address (vm, fi0); |
| d1.rx_to_hw.tail_address = |
| vlib_get_buffer_data_physical_address (vm, fi1); |
| d0.rx_to_hw.head_address = d[0].rx_to_hw.tail_address; |
| d1.rx_to_hw.head_address = d[1].rx_to_hw.tail_address; |
| d[0].as_u32x4 = d0.as_u32x4; |
| d[1].as_u32x4 = d1.as_u32x4; |
| |
| d += 2; |
| n_descriptors_left -= 2; |
| |
| /* Point to either l2 or l3 header depending on next. */ |
| l3_offset0 = (is_sop && (next0 != IXGE_RX_NEXT_ETHERNET_INPUT)) |
| ? IXGE_RX_DESCRIPTOR_STATUS0_L3_OFFSET (s00) : 0; |
| l3_offset1 = (is_eop0 && (next1 != IXGE_RX_NEXT_ETHERNET_INPUT)) |
| ? IXGE_RX_DESCRIPTOR_STATUS0_L3_OFFSET (s01) : 0; |
| |
| b0->current_length = len0 - l3_offset0; |
| b1->current_length = len1 - l3_offset1; |
| b0->current_data = l3_offset0; |
| b1->current_data = l3_offset1; |
| |
| b_last->next_buffer = is_sop ? ~0 : bi0; |
| b0->next_buffer = is_eop0 ? ~0 : bi1; |
| bi_last = bi1; |
| b_last = b1; |
| |
| if (CLIB_DEBUG > 0) |
| { |
| u32 bi_sop0 = is_sop ? bi0 : bi_sop; |
| u32 bi_sop1 = is_eop0 ? bi1 : bi_sop0; |
| |
| if (is_eop0) |
| { |
| u8 *msg = vlib_validate_buffer (vm, bi_sop0, |
| /* follow_buffer_next */ 1); |
| ASSERT (!msg); |
| } |
| if (is_eop1) |
| { |
| u8 *msg = vlib_validate_buffer (vm, bi_sop1, |
| /* follow_buffer_next */ 1); |
| ASSERT (!msg); |
| } |
| } |
| if (0) /* "Dave" version */ |
| { |
| u32 bi_sop0 = is_sop ? bi0 : bi_sop; |
| u32 bi_sop1 = is_eop0 ? bi1 : bi_sop0; |
| |
| if (is_eop0) |
| { |
| to_next[0] = bi_sop0; |
| to_next++; |
| n_left_to_next--; |
| |
| vlib_validate_buffer_enqueue_x1 (vm, node, next_index, |
| to_next, n_left_to_next, |
| bi_sop0, next0); |
| } |
| if (is_eop1) |
| { |
| to_next[0] = bi_sop1; |
| to_next++; |
| n_left_to_next--; |
| |
| vlib_validate_buffer_enqueue_x1 (vm, node, next_index, |
| to_next, n_left_to_next, |
| bi_sop1, next1); |
| } |
| is_sop = is_eop1; |
| bi_sop = bi_sop1; |
| } |
| if (1) /* "Eliot" version */ |
| { |
| /* Speculatively enqueue to cached next. */ |
| u8 saved_is_sop = is_sop; |
| u32 bi_sop_save = bi_sop; |
| |
| bi_sop = saved_is_sop ? bi0 : bi_sop; |
| to_next[0] = bi_sop; |
| to_next += is_eop0; |
| n_left_to_next -= is_eop0; |
| |
| bi_sop = is_eop0 ? bi1 : bi_sop; |
| to_next[0] = bi_sop; |
| to_next += is_eop1; |
| n_left_to_next -= is_eop1; |
| |
| is_sop = is_eop1; |
| |
| if (PREDICT_FALSE |
| (!(next0 == next_index && next1 == next_index))) |
| { |
| /* Undo speculation. */ |
| to_next -= is_eop0 + is_eop1; |
| n_left_to_next += is_eop0 + is_eop1; |
| |
| /* Re-do both descriptors being careful about where we enqueue. */ |
| bi_sop = saved_is_sop ? bi0 : bi_sop_save; |
| if (is_eop0) |
| { |
| if (next0 != next_index) |
| vlib_set_next_frame_buffer (vm, node, next0, bi_sop); |
| else |
| { |
| to_next[0] = bi_sop; |
| to_next += 1; |
| n_left_to_next -= 1; |
| } |
| } |
| |
| bi_sop = is_eop0 ? bi1 : bi_sop; |
| if (is_eop1) |
| { |
| if (next1 != next_index) |
| vlib_set_next_frame_buffer (vm, node, next1, bi_sop); |
| else |
| { |
| to_next[0] = bi_sop; |
| to_next += 1; |
| n_left_to_next -= 1; |
| } |
| } |
| |
| /* Switch cached next index when next for both packets is the same. */ |
| if (is_eop0 && is_eop1 && next0 == next1) |
| { |
| vlib_put_next_frame (vm, node, next_index, |
| n_left_to_next); |
| next_index = next0; |
| vlib_get_next_frame (vm, node, next_index, |
| to_next, n_left_to_next); |
| } |
| } |
| } |
| } |
| |
| /* Bail out of dual loop and proceed with single loop. */ |
| found_hw_owned_descriptor_x2: |
| |
| while (n_descriptors_left > 0 && n_left_to_next > 0) |
| { |
| vlib_buffer_t *b0; |
| u32 bi0, fi0, len0, l3_offset0, s20, s00, flags0; |
| u8 is_eop0, error0, next0; |
| ixge_descriptor_t d0; |
| |
| d0.as_u32x4 = d[0].as_u32x4; |
| |
| s20 = d0.rx_from_hw.status[2]; |
| s00 = d0.rx_from_hw.status[0]; |
| |
| if (!(s20 & IXGE_RX_DESCRIPTOR_STATUS2_IS_OWNED_BY_SOFTWARE)) |
| goto found_hw_owned_descriptor_x1; |
| |
| bi0 = to_rx[0]; |
| ASSERT (to_add >= xm->rx_buffers_to_add); |
| fi0 = to_add[0]; |
| |
| to_rx[0] = fi0; |
| to_rx += 1; |
| to_add -= 1; |
| |
| ASSERT (VLIB_BUFFER_KNOWN_ALLOCATED == |
| vlib_buffer_is_known (vm, bi0)); |
| ASSERT (VLIB_BUFFER_KNOWN_ALLOCATED == |
| vlib_buffer_is_known (vm, fi0)); |
| |
| b0 = vlib_get_buffer (vm, bi0); |
| |
| /* |
| * Turn this on if you run into |
| * "bad monkey" contexts, and you want to know exactly |
| * which nodes they've visited... |
| */ |
| VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0); |
| |
| is_eop0 = (s20 & IXGE_RX_DESCRIPTOR_STATUS2_IS_END_OF_PACKET) != 0; |
| ixge_rx_next_and_error_from_status_x1 |
| (xd, s00, s20, &next0, &error0, &flags0); |
| |
| next0 = is_sop ? next0 : next_index_sop; |
| next_index_sop = next0; |
| |
| b0->flags |= flags0 | (!is_eop0 << VLIB_BUFFER_LOG2_NEXT_PRESENT); |
| |
| vnet_buffer (b0)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index; |
| vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; |
| |
| b0->error = node->errors[error0]; |
| |
| len0 = d0.rx_from_hw.n_packet_bytes_this_descriptor; |
| n_bytes += len0; |
| n_packets += is_eop0; |
| |
| /* Give new buffer to hardware. */ |
| d0.rx_to_hw.tail_address = |
| vlib_get_buffer_data_physical_address (vm, fi0); |
| d0.rx_to_hw.head_address = d0.rx_to_hw.tail_address; |
| d[0].as_u32x4 = d0.as_u32x4; |
| |
| d += 1; |
| n_descriptors_left -= 1; |
| |
| /* Point to either l2 or l3 header depending on next. */ |
| l3_offset0 = (is_sop && (next0 != IXGE_RX_NEXT_ETHERNET_INPUT)) |
| ? IXGE_RX_DESCRIPTOR_STATUS0_L3_OFFSET (s00) : 0; |
| b0->current_length = len0 - l3_offset0; |
| b0->current_data = l3_offset0; |
| |
| b_last->next_buffer = is_sop ? ~0 : bi0; |
| bi_last = bi0; |
| b_last = b0; |
| |
| bi_sop = is_sop ? bi0 : bi_sop; |
| |
| if (CLIB_DEBUG > 0 && is_eop0) |
| { |
| u8 *msg = |
| vlib_validate_buffer (vm, bi_sop, /* follow_buffer_next */ 1); |
| ASSERT (!msg); |
| } |
| |
| if (0) /* "Dave" version */ |
| { |
| if (is_eop0) |
| { |
| to_next[0] = bi_sop; |
| to_next++; |
| n_left_to_next--; |
| |
| vlib_validate_buffer_enqueue_x1 (vm, node, next_index, |
| to_next, n_left_to_next, |
| bi_sop, next0); |
| } |
| } |
| if (1) /* "Eliot" version */ |
| { |
| if (PREDICT_TRUE (next0 == next_index)) |
| { |
| to_next[0] = bi_sop; |
| to_next += is_eop0; |
| n_left_to_next -= is_eop0; |
| } |
| else |
| { |
| if (next0 != next_index && is_eop0) |
| vlib_set_next_frame_buffer (vm, node, next0, bi_sop); |
| |
| vlib_put_next_frame (vm, node, next_index, n_left_to_next); |
| next_index = next0; |
| vlib_get_next_frame (vm, node, next_index, |
| to_next, n_left_to_next); |
| } |
| } |
| is_sop = is_eop0; |
| } |
| vlib_put_next_frame (vm, node, next_index, n_left_to_next); |
| } |
| |
| found_hw_owned_descriptor_x1: |
| if (n_descriptors_left > 0) |
| vlib_put_next_frame (vm, node, next_index, n_left_to_next); |
| |
| _vec_len (xm->rx_buffers_to_add) = (to_add + 1) - xm->rx_buffers_to_add; |
| |
| { |
| u32 n_done = n_descriptors - n_descriptors_left; |
| |
| if (n_trace > 0 && n_done > 0) |
| { |
| u32 n = clib_min (n_trace, n_done); |
| ixge_rx_trace (xm, xd, dq, |
| d_trace_save, |
| d_trace_buffers, |
| &dq->descriptors[start_descriptor_index], n); |
| vlib_set_trace_count (vm, node, n_trace - n); |
| } |
| if (d_trace_save) |
| { |
| _vec_len (d_trace_save) = 0; |
| _vec_len (d_trace_buffers) = 0; |
| } |
| |
| /* Don't keep a reference to b_last if we don't have to. |
| Otherwise we can over-write a next_buffer pointer after already haven |
| enqueued a packet. */ |
| if (is_sop) |
| { |
| b_last->next_buffer = ~0; |
| bi_last = ~0; |
| } |
| |
| dq->rx.n_descriptors_done_this_call = n_done; |
| dq->rx.n_descriptors_done_total += n_done; |
| dq->rx.is_start_of_packet = is_sop; |
| dq->rx.saved_start_of_packet_buffer_index = bi_sop; |
| dq->rx.saved_last_buffer_index = bi_last; |
| dq->rx.saved_start_of_packet_next_index = next_index_sop; |
| dq->rx.next_index = next_index; |
| dq->rx.n_bytes += n_bytes; |
| |
| return n_packets; |
| } |
| } |
| |
| static uword |
| ixge_rx_queue (ixge_main_t * xm, |
| ixge_device_t * xd, |
| vlib_node_runtime_t * node, u32 queue_index) |
| { |
| ixge_dma_queue_t *dq = |
| vec_elt_at_index (xd->dma_queues[VLIB_RX], queue_index); |
| ixge_dma_regs_t *dr = get_dma_regs (xd, VLIB_RX, dq->queue_index); |
| uword n_packets = 0; |
| u32 hw_head_index, sw_head_index; |
| |
| /* One time initialization. */ |
| if (!dq->rx.node) |
| { |
| dq->rx.node = node; |
| dq->rx.is_start_of_packet = 1; |
| dq->rx.saved_start_of_packet_buffer_index = ~0; |
| dq->rx.saved_last_buffer_index = ~0; |
| } |
| |
| dq->rx.next_index = node->cached_next_index; |
| |
| dq->rx.n_descriptors_done_total = 0; |
| dq->rx.n_descriptors_done_this_call = 0; |
| dq->rx.n_bytes = 0; |
| |
| /* Fetch head from hardware and compare to where we think we are. */ |
| hw_head_index = dr->head_index; |
| sw_head_index = dq->head_index; |
| |
| if (hw_head_index == sw_head_index) |
| goto done; |
| |
| if (hw_head_index < sw_head_index) |
| { |
| u32 n_tried = dq->n_descriptors - sw_head_index; |
| n_packets += ixge_rx_queue_no_wrap (xm, xd, dq, sw_head_index, n_tried); |
| sw_head_index = |
| ixge_ring_add (dq, sw_head_index, |
| dq->rx.n_descriptors_done_this_call); |
| |
| if (dq->rx.n_descriptors_done_this_call != n_tried) |
| goto done; |
| } |
| if (hw_head_index >= sw_head_index) |
| { |
| u32 n_tried = hw_head_index - sw_head_index; |
| n_packets += ixge_rx_queue_no_wrap (xm, xd, dq, sw_head_index, n_tried); |
| sw_head_index = |
| ixge_ring_add (dq, sw_head_index, |
| dq->rx.n_descriptors_done_this_call); |
| } |
| |
| done: |
| dq->head_index = sw_head_index; |
| dq->tail_index = |
| ixge_ring_add (dq, dq->tail_index, dq->rx.n_descriptors_done_total); |
| |
| /* Give tail back to hardware. */ |
| CLIB_MEMORY_BARRIER (); |
| |
| dr->tail_index = dq->tail_index; |
| |
| vlib_increment_combined_counter (vnet_main. |
| interface_main.combined_sw_if_counters + |
| VNET_INTERFACE_COUNTER_RX, |
| 0 /* thread_index */ , |
| xd->vlib_sw_if_index, n_packets, |
| dq->rx.n_bytes); |
| |
| return n_packets; |
| } |
| |
| static void |
| ixge_interrupt (ixge_main_t * xm, ixge_device_t * xd, u32 i) |
| { |
| vlib_main_t *vm = xm->vlib_main; |
| ixge_regs_t *r = xd->regs; |
| |
| if (i != 20) |
| { |
| ELOG_TYPE_DECLARE (e) = |
| { |
| .function = (char *) __FUNCTION__,.format = |
| "ixge %d, %s",.format_args = "i1t1",.n_enum_strings = |
| 16,.enum_strings = |
| { |
| "flow director", |
| "rx miss", |
| "pci exception", |
| "mailbox", |
| "link status change", |
| "linksec key exchange", |
| "manageability event", |
| "reserved23", |
| "sdp0", |
| "sdp1", |
| "sdp2", |
| "sdp3", |
| "ecc", "descriptor handler error", "tcp timer", "other",},}; |
| struct |
| { |
| u8 instance; |
| u8 index; |
| } *ed; |
| ed = ELOG_DATA (&vm->elog_main, e); |
| ed->instance = xd->device_index; |
| ed->index = i - 16; |
| } |
| else |
| { |
| u32 v = r->xge_mac.link_status; |
| uword is_up = (v & (1 << 30)) != 0; |
| |
| ELOG_TYPE_DECLARE (e) = |
| { |
| .function = (char *) __FUNCTION__,.format = |
| "ixge %d, link status change 0x%x",.format_args = "i4i4",}; |
| struct |
| { |
| u32 instance, link_status; |
| } *ed; |
| ed = ELOG_DATA (&vm->elog_main, e); |
| ed->instance = xd->device_index; |
| ed->link_status = v; |
| xd->link_status_at_last_link_change = v; |
| |
| vlib_process_signal_event (vm, ixge_process_node.index, |
| EVENT_SET_FLAGS, |
| ((is_up << 31) | xd->vlib_hw_if_index)); |
| } |
| } |
| |
| always_inline u32 |
| clean_block (u32 * b, u32 * t, u32 n_left) |
| { |
| u32 *t0 = t; |
| |
| while (n_left >= 4) |
| { |
| u32 bi0, bi1, bi2, bi3; |
| |
| t[0] = bi0 = b[0]; |
| b[0] = 0; |
| t += bi0 != 0; |
| |
| t[0] = bi1 = b[1]; |
| b[1] = 0; |
| t += bi1 != 0; |
| |
| t[0] = bi2 = b[2]; |
| b[2] = 0; |
| t += bi2 != 0; |
| |
| t[0] = bi3 = b[3]; |
| b[3] = 0; |
| t += bi3 != 0; |
| |
| b += 4; |
| n_left -= 4; |
| } |
| |
| while (n_left > 0) |
| { |
| u32 bi0; |
| |
| t[0] = bi0 = b[0]; |
| b[0] = 0; |
| t += bi0 != 0; |
| b += 1; |
| n_left -= 1; |
| } |
| |
| return t - t0; |
| } |
| |
| static void |
| ixge_tx_queue (ixge_main_t * xm, ixge_device_t * xd, u32 queue_index) |
| { |
| vlib_main_t *vm = xm->vlib_main; |
| ixge_dma_queue_t *dq = |
| vec_elt_at_index (xd->dma_queues[VLIB_TX], queue_index); |
| u32 n_clean, *b, *t, *t0; |
| i32 n_hw_owned_descriptors; |
| i32 first_to_clean, last_to_clean; |
| u64 hwbp_race = 0; |
| |
| /* Handle case where head write back pointer update |
| * arrives after the interrupt during high PCI bus loads. |
| */ |
| while ((dq->head_index == dq->tx.head_index_write_back[0]) && |
| dq->tx.n_buffers_on_ring && (dq->head_index != dq->tail_index)) |
| { |
| hwbp_race++; |
| if (IXGE_HWBP_RACE_ELOG && (hwbp_race == 1)) |
| { |
| ELOG_TYPE_DECLARE (e) = |
| { |
| .function = (char *) __FUNCTION__,.format = |
| "ixge %d tx head index race: head %4d, tail %4d, buffs %4d",.format_args |
| = "i4i4i4i4",}; |
| struct |
| { |
| u32 instance, head_index, tail_index, n_buffers_on_ring; |
| } *ed; |
| ed = ELOG_DATA (&vm->elog_main, e); |
| ed->instance = xd->device_index; |
| ed->head_index = dq->head_index; |
| ed->tail_index = dq->tail_index; |
| ed->n_buffers_on_ring = dq->tx.n_buffers_on_ring; |
| } |
| } |
| |
| dq->head_index = dq->tx.head_index_write_back[0]; |
| n_hw_owned_descriptors = ixge_ring_sub (dq, dq->head_index, dq->tail_index); |
| ASSERT (dq->tx.n_buffers_on_ring >= n_hw_owned_descriptors); |
| n_clean = dq->tx.n_buffers_on_ring - n_hw_owned_descriptors; |
| |
| if (IXGE_HWBP_RACE_ELOG && hwbp_race) |
| { |
| ELOG_TYPE_DECLARE (e) = |
| { |
| .function = (char *) __FUNCTION__,.format = |
| "ixge %d tx head index race: head %4d, hw_owned %4d, n_clean %4d, retries %d",.format_args |
| = "i4i4i4i4i4",}; |
| struct |
| { |
| u32 instance, head_index, n_hw_owned_descriptors, n_clean, retries; |
| } *ed; |
| ed = ELOG_DATA (&vm->elog_main, e); |
| ed->instance = xd->device_index; |
| ed->head_index = dq->head_index; |
| ed->n_hw_owned_descriptors = n_hw_owned_descriptors; |
| ed->n_clean = n_clean; |
| ed->retries = hwbp_race; |
| } |
| |
| /* |
| * This function used to wait until hardware owned zero descriptors. |
| * At high PPS rates, that doesn't happen until the TX ring is |
| * completely full of descriptors which need to be cleaned up. |
| * That, in turn, causes TX ring-full drops and/or long RX service |
| * interruptions. |
| */ |
| if (n_clean == 0) |
| return; |
| |
| /* Clean the n_clean descriptors prior to the reported hardware head */ |
| last_to_clean = dq->head_index - 1; |
| last_to_clean = (last_to_clean < 0) ? last_to_clean + dq->n_descriptors : |
| last_to_clean; |
| |
| first_to_clean = (last_to_clean) - (n_clean - 1); |
| first_to_clean = (first_to_clean < 0) ? first_to_clean + dq->n_descriptors : |
| first_to_clean; |
| |
| vec_resize (xm->tx_buffers_pending_free, dq->n_descriptors - 1); |
| t0 = t = xm->tx_buffers_pending_free; |
| b = dq->descriptor_buffer_indices + first_to_clean; |
| |
| /* Wrap case: clean from first to end, then start to last */ |
| if (first_to_clean > last_to_clean) |
| { |
| t += clean_block (b, t, (dq->n_descriptors - 1) - first_to_clean); |
| first_to_clean = 0; |
| b = dq->descriptor_buffer_indices; |
| } |
| |
| /* Typical case: clean from first to last */ |
| if (first_to_clean <= last_to_clean) |
| t += clean_block (b, t, (last_to_clean - first_to_clean) + 1); |
| |
| if (t > t0) |
| { |
| u32 n = t - t0; |
| vlib_buffer_free_no_next (vm, t0, n); |
| ASSERT (dq->tx.n_buffers_on_ring >= n); |
| dq->tx.n_buffers_on_ring -= n; |
| _vec_len (xm->tx_buffers_pending_free) = 0; |
| } |
| } |
| |
| /* RX queue interrupts 0 thru 7; TX 8 thru 15. */ |
| always_inline uword |
| ixge_interrupt_is_rx_queue (uword i) |
| { |
| return i < 8; |
| } |
| |
| always_inline uword |
| ixge_interrupt_is_tx_queue (uword i) |
| { |
| return i >= 8 && i < 16; |
| } |
| |
| always_inline uword |
| ixge_tx_queue_to_interrupt (uword i) |
| { |
| return 8 + i; |
| } |
| |
| always_inline uword |
| ixge_rx_queue_to_interrupt (uword i) |
| { |
| return 0 + i; |
| } |
| |
| always_inline uword |
| ixge_interrupt_rx_queue (uword i) |
| { |
| ASSERT (ixge_interrupt_is_rx_queue (i)); |
| return i - 0; |
| } |
| |
| always_inline uword |
| ixge_interrupt_tx_queue (uword i) |
| { |
| ASSERT (ixge_interrupt_is_tx_queue (i)); |
| return i - 8; |
| } |
| |
| static uword |
| ixge_device_input (ixge_main_t * xm, |
| ixge_device_t * xd, vlib_node_runtime_t * node) |
| { |
| ixge_regs_t *r = xd->regs; |
| u32 i, s; |
| uword n_rx_packets = 0; |
| |
| s = r->interrupt.status_write_1_to_set; |
| if (s) |
| r->interrupt.status_write_1_to_clear = s; |
| |
| /* *INDENT-OFF* */ |
| foreach_set_bit (i, s, ({ |
| if (ixge_interrupt_is_rx_queue (i)) |
| n_rx_packets += ixge_rx_queue (xm, xd, node, ixge_interrupt_rx_queue (i)); |
| |
| else if (ixge_interrupt_is_tx_queue (i)) |
| ixge_tx_queue (xm, xd, ixge_interrupt_tx_queue (i)); |
| |
| else |
| ixge_interrupt (xm, xd, i); |
| })); |
| /* *INDENT-ON* */ |
| |
| return n_rx_packets; |
| } |
| |
| static uword |
| ixge_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * f) |
| { |
| ixge_main_t *xm = &ixge_main; |
| ixge_device_t *xd; |
| uword n_rx_packets = 0; |
| |
| if (node->state == VLIB_NODE_STATE_INTERRUPT) |
| { |
| uword i; |
| |
| /* Loop over devices with interrupts. */ |
| /* *INDENT-OFF* */ |
| foreach_set_bit (i, node->runtime_data[0], ({ |
| xd = vec_elt_at_index (xm->devices, i); |
| n_rx_packets += ixge_device_input (xm, xd, node); |
| |
| /* Re-enable interrupts since we're going to stay in interrupt mode. */ |
| if (! (node->flags & VLIB_NODE_FLAG_SWITCH_FROM_INTERRUPT_TO_POLLING_MODE)) |
| xd->regs->interrupt.enable_write_1_to_set = ~0; |
| })); |
| /* *INDENT-ON* */ |
| |
| /* Clear mask of devices with pending interrupts. */ |
| node->runtime_data[0] = 0; |
| } |
| else |
| { |
| /* Poll all devices for input/interrupts. */ |
| vec_foreach (xd, xm->devices) |
| { |
| n_rx_packets += ixge_device_input (xm, xd, node); |
| |
| /* Re-enable interrupts when switching out of polling mode. */ |
| if (node->flags & |
| VLIB_NODE_FLAG_SWITCH_FROM_POLLING_TO_INTERRUPT_MODE) |
| xd->regs->interrupt.enable_write_1_to_set = ~0; |
| } |
| } |
| |
| return n_rx_packets; |
| } |
| |
| static char *ixge_error_strings[] = { |
| #define _(n,s) s, |
| foreach_ixge_error |
| #undef _ |
| }; |
| |
| /* *INDENT-OFF* */ |
| VLIB_REGISTER_NODE (ixge_input_node, static) = { |
| .function = ixge_input, |
| .type = VLIB_NODE_TYPE_INPUT, |
| .name = "ixge-input", |
| |
| /* Will be enabled if/when hardware is detected. */ |
| .state = VLIB_NODE_STATE_DISABLED, |
| |
| .format_buffer = format_ethernet_header_with_length, |
| .format_trace = format_ixge_rx_dma_trace, |
| |
| .n_errors = IXGE_N_ERROR, |
| .error_strings = ixge_error_strings, |
| |
| .n_next_nodes = IXGE_RX_N_NEXT, |
| .next_nodes = { |
| [IXGE_RX_NEXT_DROP] = "error-drop", |
| [IXGE_RX_NEXT_ETHERNET_INPUT] = "ethernet-input", |
| [IXGE_RX_NEXT_IP4_INPUT] = "ip4-input", |
| [IXGE_RX_NEXT_IP6_INPUT] = "ip6-input", |
| }, |
| }; |
| |
| VLIB_NODE_FUNCTION_MULTIARCH_CLONE (ixge_input) |
| CLIB_MULTIARCH_SELECT_FN (ixge_input) |
| /* *INDENT-ON* */ |
| |
| static u8 * |
| format_ixge_device_name (u8 * s, va_list * args) |
| { |
| u32 i = va_arg (*args, u32); |
| ixge_main_t *xm = &ixge_main; |
| ixge_device_t *xd = vec_elt_at_index (xm->devices, i); |
| return format (s, "TenGigabitEthernet%U", |
| format_vlib_pci_handle, &xd->pci_device.bus_address); |
| } |
| |
| #define IXGE_COUNTER_IS_64_BIT (1 << 0) |
| #define IXGE_COUNTER_NOT_CLEAR_ON_READ (1 << 1) |
| |
| static u8 ixge_counter_flags[] = { |
| #define _(a,f) 0, |
| #define _64(a,f) IXGE_COUNTER_IS_64_BIT, |
| foreach_ixge_counter |
| #undef _ |
| #undef _64 |
| }; |
| |
| static void |
| ixge_update_counters (ixge_device_t * xd) |
| { |
| /* Byte offset for counter registers. */ |
| static u32 reg_offsets[] = { |
| #define _(a,f) (a) / sizeof (u32), |
| #define _64(a,f) _(a,f) |
| foreach_ixge_counter |
| #undef _ |
| #undef _64 |
| }; |
| volatile u32 *r = (volatile u32 *) xd->regs; |
| int i; |
| |
| for (i = 0; i < ARRAY_LEN (xd->counters); i++) |
| { |
| u32 o = reg_offsets[i]; |
| xd->counters[i] += r[o]; |
| if (ixge_counter_flags[i] & IXGE_COUNTER_NOT_CLEAR_ON_READ) |
| r[o] = 0; |
| if (ixge_counter_flags[i] & IXGE_COUNTER_IS_64_BIT) |
| xd->counters[i] += (u64) r[o + 1] << (u64) 32; |
| } |
| } |
| |
| static u8 * |
| format_ixge_device_id (u8 * s, va_list * args) |
| { |
| u32 device_id = va_arg (*args, u32); |
| char *t = 0; |
| switch (device_id) |
| { |
| #define _(f,n) case n: t = #f; break; |
| foreach_ixge_pci_device_id; |
| #undef _ |
| default: |
| t = 0; |
| break; |
| } |
| if (t == 0) |
| s = format (s, "unknown 0x%x", device_id); |
| else |
| s = format (s, "%s", t); |
| return s; |
| } |
| |
| static u8 * |
| format_ixge_link_status (u8 * s, va_list * args) |
| { |
| ixge_device_t *xd = va_arg (*args, ixge_device_t *); |
| u32 v = xd->link_status_at_last_link_change; |
| |
| s = format (s, "%s", (v & (1 << 30)) ? "up" : "down"); |
| |
| { |
| char *modes[] = { |
| "1g", "10g parallel", "10g serial", "autoneg", |
| }; |
| char *speeds[] = { |
| "unknown", "100m", "1g", "10g", |
| }; |
| s = format (s, ", mode %s, speed %s", |
| modes[(v >> 26) & 3], speeds[(v >> 28) & 3]); |
| } |
| |
| return s; |
| } |
| |
| static u8 * |
| format_ixge_device (u8 * s, va_list * args) |
| { |
| u32 dev_instance = va_arg (*args, u32); |
| CLIB_UNUSED (int verbose) = va_arg (*args, int); |
| ixge_main_t *xm = &ixge_main; |
| ixge_device_t *xd = vec_elt_at_index (xm->devices, dev_instance); |
| ixge_phy_t *phy = xd->phys + xd->phy_index; |
| uword indent = format_get_indent (s); |
| |
| ixge_update_counters (xd); |
| xd->link_status_at_last_link_change = xd->regs->xge_mac.link_status; |
| |
| s = format (s, "Intel 8259X: id %U\n%Ulink %U", |
| format_ixge_device_id, xd->device_id, |
| format_white_space, indent + 2, format_ixge_link_status, xd); |
| |
| { |
| |
| s = format (s, "\n%UPCIe %U", format_white_space, indent + 2, |
| format_vlib_pci_link_speed, &xd->pci_device); |
| } |
| |
| s = format (s, "\n%U", format_white_space, indent + 2); |
| if (phy->mdio_address != ~0) |
| s = format (s, "PHY address %d, id 0x%x", phy->mdio_address, phy->id); |
| else if (xd->sfp_eeprom.id == SFP_ID_sfp) |
| s = format (s, "SFP %U", format_sfp_eeprom, &xd->sfp_eeprom); |
| else |
| s = format (s, "PHY not found"); |
| |
| /* FIXME */ |
| { |
| ixge_dma_queue_t *dq = vec_elt_at_index (xd->dma_queues[VLIB_RX], 0); |
| ixge_dma_regs_t *dr = get_dma_regs (xd, VLIB_RX, 0); |
| u32 hw_head_index = dr->head_index; |
| u32 sw_head_index = dq->head_index; |
| u32 nitems; |
| |
| nitems = ixge_ring_sub (dq, hw_head_index, sw_head_index); |
| s = format (s, "\n%U%d unprocessed, %d total buffers on rx queue 0 ring", |
| format_white_space, indent + 2, nitems, dq->n_descriptors); |
| |
| s = format (s, "\n%U%d buffers in driver rx cache", |
| format_white_space, indent + 2, |
| vec_len (xm->rx_buffers_to_add)); |
| |
| s = format (s, "\n%U%d buffers on tx queue 0 ring", |
| format_white_space, indent + 2, |
| xd->dma_queues[VLIB_TX][0].tx.n_buffers_on_ring); |
| } |
| { |
| u32 i; |
| u64 v; |
| static char *names[] = { |
| #define _(a,f) #f, |
| #define _64(a,f) _(a,f) |
| foreach_ixge_counter |
| #undef _ |
| #undef _64 |
| }; |
| |
| for (i = 0; i < ARRAY_LEN (names); i++) |
| { |
| v = xd->counters[i] - xd->counters_last_clear[i]; |
| if (v != 0) |
| s = format (s, "\n%U%-40U%16Ld", |
| format_white_space, indent + 2, |
| format_c_identifier, names[i], v); |
| } |
| } |
| |
| return s; |
| } |
| |
| static void |
| ixge_clear_hw_interface_counters (u32 instance) |
| { |
| ixge_main_t *xm = &ixge_main; |
| ixge_device_t *xd = vec_elt_at_index (xm->devices, instance); |
| ixge_update_counters (xd); |
| memcpy (xd->counters_last_clear, xd->counters, sizeof (xd->counters)); |
| } |
| |
| /* |
| * Dynamically redirect all pkts from a specific interface |
| * to the specified node |
| */ |
| static void |
| ixge_set_interface_next_node (vnet_main_t * vnm, u32 hw_if_index, |
| u32 node_index) |
| { |
| ixge_main_t *xm = &ixge_main; |
| vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); |
| ixge_device_t *xd = vec_elt_at_index (xm->devices, hw->dev_instance); |
| |
| /* Shut off redirection */ |
| if (node_index == ~0) |
| { |
| xd->per_interface_next_index = node_index; |
| return; |
| } |
| |
| xd->per_interface_next_index = |
| vlib_node_add_next (xm->vlib_main, ixge_input_node.index, node_index); |
| } |
| |
| |
| /* *INDENT-OFF* */ |
| VNET_DEVICE_CLASS (ixge_device_class) = { |
| .name = "ixge", |
| .tx_function = ixge_interface_tx, |
| .format_device_name = format_ixge_device_name, |
| .format_device = format_ixge_device, |
| .format_tx_trace = format_ixge_tx_dma_trace, |
| .clear_counters = ixge_clear_hw_interface_counters, |
| .admin_up_down_function = ixge_interface_admin_up_down, |
| .rx_redirect_to_node = ixge_set_interface_next_node, |
| }; |
| /* *INDENT-ON* */ |
| |
| #define IXGE_N_BYTES_IN_RX_BUFFER (2048) // DAW-HACK: Set Rx buffer size so all packets < ETH_MTU_SIZE fit in the buffer (i.e. sop & eop for all descriptors). |
| |
| static clib_error_t * |
| ixge_dma_init (ixge_device_t * xd, vlib_rx_or_tx_t rt, u32 queue_index) |
| { |
| ixge_main_t *xm = &ixge_main; |
| vlib_main_t *vm = xm->vlib_main; |
| ixge_dma_queue_t *dq; |
| clib_error_t *error = 0; |
| |
| vec_validate (xd->dma_queues[rt], queue_index); |
| dq = vec_elt_at_index (xd->dma_queues[rt], queue_index); |
| |
| if (!xm->n_descriptors_per_cache_line) |
| xm->n_descriptors_per_cache_line = |
| CLIB_CACHE_LINE_BYTES / sizeof (dq->descriptors[0]); |
| |
| if (!xm->n_bytes_in_rx_buffer) |
| xm->n_bytes_in_rx_buffer = IXGE_N_BYTES_IN_RX_BUFFER; |
| xm->n_bytes_in_rx_buffer = round_pow2 (xm->n_bytes_in_rx_buffer, 1024); |
| if (!xm->vlib_buffer_free_list_index) |
| { |
| xm->vlib_buffer_free_list_index = |
| vlib_buffer_get_or_create_free_list (vm, xm->n_bytes_in_rx_buffer, |
| "ixge rx"); |
| ASSERT (xm->vlib_buffer_free_list_index != 0); |
| } |
| |
| if (!xm->n_descriptors[rt]) |
| xm->n_descriptors[rt] = 4 * VLIB_FRAME_SIZE; |
| |
| dq->queue_index = queue_index; |
| dq->n_descriptors = |
| round_pow2 (xm->n_descriptors[rt], xm->n_descriptors_per_cache_line); |
| dq->head_index = dq->tail_index = 0; |
| |
| dq->descriptors = vlib_physmem_alloc_aligned (vm, &error, |
| dq->n_descriptors * |
| sizeof (dq->descriptors[0]), |
| 128 /* per chip spec */ ); |
| if (error) |
| return error; |
| |
| memset (dq->descriptors, 0, |
| dq->n_descriptors * sizeof (dq->descriptors[0])); |
| vec_resize (dq->descriptor_buffer_indices, dq->n_descriptors); |
| |
| if (rt == VLIB_RX) |
| { |
| u32 n_alloc, i; |
| |
| n_alloc = vlib_buffer_alloc_from_free_list |
| (vm, dq->descriptor_buffer_indices, |
| vec_len (dq->descriptor_buffer_indices), |
| xm->vlib_buffer_free_list_index); |
| ASSERT (n_alloc == vec_len (dq->descriptor_buffer_indices)); |
| for (i = 0; i < n_alloc; i++) |
| { |
| vlib_buffer_t *b = |
| vlib_get_buffer (vm, dq->descriptor_buffer_indices[i]); |
| dq->descriptors[i].rx_to_hw.tail_address = |
| vlib_physmem_virtual_to_physical (vm, b->data); |
| } |
| } |
| else |
| { |
| u32 i; |
| |
| dq->tx.head_index_write_back = |
| vlib_physmem_alloc (vm, &error, CLIB_CACHE_LINE_BYTES); |
| |
| for (i = 0; i < dq->n_descriptors; i++) |
| dq->descriptors[i].tx = xm->tx_descriptor_template; |
| |
| vec_validate (xm->tx_buffers_pending_free, dq->n_descriptors - 1); |
| } |
| |
| { |
| ixge_dma_regs_t *dr = get_dma_regs (xd, rt, queue_index); |
| u64 a; |
| |
| a = vlib_physmem_virtual_to_physical (vm, dq->descriptors); |
| dr->descriptor_address[0] = a & 0xFFFFFFFF; |
| dr->descriptor_address[1] = a >> (u64) 32; |
| dr->n_descriptor_bytes = dq->n_descriptors * sizeof (dq->descriptors[0]); |
| dq->head_index = dq->tail_index = 0; |
| |
| if (rt == VLIB_RX) |
| { |
| ASSERT ((xm->n_bytes_in_rx_buffer / 1024) < 32); |
| dr->rx_split_control = |
| ( /* buffer size */ ((xm->n_bytes_in_rx_buffer / 1024) << 0) |
| | ( /* lo free descriptor threshold (units of 64 descriptors) */ |
| (1 << 22)) | ( /* descriptor type: advanced one buffer */ |
| (1 << 25)) | ( /* drop if no descriptors available */ |
| (1 << 28))); |
| |
| /* Give hardware all but last 16 cache lines' worth of descriptors. */ |
| dq->tail_index = dq->n_descriptors - |
| 16 * xm->n_descriptors_per_cache_line; |
| } |
| else |
| { |
| /* Make sure its initialized before hardware can get to it. */ |
| dq->tx.head_index_write_back[0] = dq->head_index; |
| |
| a = |
| vlib_physmem_virtual_to_physical (vm, dq->tx.head_index_write_back); |
| dr->tx.head_index_write_back_address[0] = /* enable bit */ 1 | a; |
| dr->tx.head_index_write_back_address[1] = (u64) a >> (u64) 32; |
| } |
| |
| /* DMA on 82599 does not work with [13] rx data write relaxed ordering |
| and [12] undocumented set. */ |
| if (rt == VLIB_RX) |
| dr->dca_control &= ~((1 << 13) | (1 << 12)); |
| |
| CLIB_MEMORY_BARRIER (); |
| |
| if (rt == VLIB_TX) |
| { |
| xd->regs->tx_dma_control |= (1 << 0); |
| dr->control |= ((32 << 0) /* prefetch threshold */ |
| | (64 << 8) /* host threshold */ |
| | (0 << 16) /* writeback threshold */ ); |
| } |
| |
| /* Enable this queue and wait for hardware to initialize |
| before adding to tail. */ |
| if (rt == VLIB_TX) |
| { |
| dr->control |= 1 << 25; |
| while (!(dr->control & (1 << 25))) |
| ; |
| } |
| |
| /* Set head/tail indices and enable DMA. */ |
| dr->head_index = dq->head_index; |
| dr->tail_index = dq->tail_index; |
| } |
| |
| return error; |
| } |
| |
| static u32 |
| ixge_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hw, u32 flags) |
| { |
| ixge_device_t *xd; |
| ixge_regs_t *r; |
| u32 old; |
| ixge_main_t *xm = &ixge_main; |
| |
| xd = vec_elt_at_index (xm->devices, hw->dev_instance); |
| r = xd->regs; |
| |
| old = r->filter_control; |
| |
| if (flags & ETHERNET_INTERFACE_FLAG_ACCEPT_ALL) |
| r->filter_control = old | (1 << 9) /* unicast promiscuous */ ; |
| else |
| r->filter_control = old & ~(1 << 9); |
| |
| return old; |
| } |
| |
| static void |
| ixge_device_init (ixge_main_t * xm) |
| { |
| vnet_main_t *vnm = vnet_get_main (); |
| ixge_device_t *xd; |
| |
| /* Reset chip(s). */ |
| vec_foreach (xd, xm->devices) |
| { |
| ixge_regs_t *r = xd->regs; |
| const u32 reset_bit = (1 << 26) | (1 << 3); |
| |
| r->control |= reset_bit; |
| |
| /* No need to suspend. Timed to take ~1e-6 secs */ |
| while (r->control & reset_bit) |
| ; |
| |
| /* Software loaded. */ |
| r->extended_control |= (1 << 28); |
| |
| ixge_phy_init (xd); |
| |
| /* Register ethernet interface. */ |
| { |
| u8 addr8[6]; |
| u32 i, addr32[2]; |
| clib_error_t *error; |
| |
| addr32[0] = r->rx_ethernet_address0[0][0]; |
| addr32[1] = r->rx_ethernet_address0[0][1]; |
| for (i = 0; i < 6; i++) |
| addr8[i] = addr32[i / 4] >> ((i % 4) * 8); |
| |
| error = ethernet_register_interface |
| (vnm, ixge_device_class.index, xd->device_index, |
| /* ethernet address */ addr8, |
| &xd->vlib_hw_if_index, ixge_flag_change); |
| if (error) |
| clib_error_report (error); |
| } |
| |
| { |
| vnet_sw_interface_t *sw = |
| vnet_get_hw_sw_interface (vnm, xd->vlib_hw_if_index); |
| xd->vlib_sw_if_index = sw->sw_if_index; |
| } |
| |
| ixge_dma_init (xd, VLIB_RX, /* queue_index */ 0); |
| |
| xm->n_descriptors[VLIB_TX] = 20 * VLIB_FRAME_SIZE; |
| |
| ixge_dma_init (xd, VLIB_TX, /* queue_index */ 0); |
| |
| /* RX/TX queue 0 gets mapped to interrupt bits 0 & 8. */ |
| r->interrupt.queue_mapping[0] = (( /* valid bit */ (1 << 7) | |
| ixge_rx_queue_to_interrupt (0)) << 0); |
| |
| r->interrupt.queue_mapping[0] |= (( /* valid bit */ (1 << 7) | |
| ixge_tx_queue_to_interrupt (0)) << 8); |
| |
| /* No use in getting too many interrupts. |
| Limit them to one every 3/4 ring size at line rate |
| min sized packets. |
| No need for this since kernel/vlib main loop provides adequate interrupt |
| limiting scheme. */ |
| if (0) |
| { |
| f64 line_rate_max_pps = |
| 10e9 / (8 * (64 + /* interframe padding */ 20)); |
| ixge_throttle_queue_interrupt (r, 0, |
| .75 * xm->n_descriptors[VLIB_RX] / |
| line_rate_max_pps); |
| } |
| |
| /* Accept all multicast and broadcast packets. Should really add them |
| to the dst_ethernet_address register array. */ |
| r->filter_control |= (1 << 10) | (1 << 8); |
| |
| /* Enable frames up to size in mac frame size register. */ |
| r->xge_mac.control |= 1 << 2; |
| r->xge_mac.rx_max_frame_size = (9216 + 14) << 16; |
| |
| /* Enable all interrupts. */ |
| if (!IXGE_ALWAYS_POLL) |
| r->interrupt.enable_write_1_to_set = ~0; |
| } |
| } |
| |
| static uword |
| ixge_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) |
| { |
| vnet_main_t *vnm = vnet_get_main (); |
| ixge_main_t *xm = &ixge_main; |
| ixge_device_t *xd; |
| uword event_type, *event_data = 0; |
| f64 timeout, link_debounce_deadline; |
| |
| ixge_device_init (xm); |
| |
| /* Clear all counters. */ |
| vec_foreach (xd, xm->devices) |
| { |
| ixge_update_counters (xd); |
| memset (xd->counters, 0, sizeof (xd->counters)); |
| } |
| |
| timeout = 30.0; |
| link_debounce_deadline = 1e70; |
| |
| while (1) |
| { |
| /* 36 bit stat counters could overflow in ~50 secs. |
| We poll every 30 secs to be conservative. */ |
| vlib_process_wait_for_event_or_clock (vm, timeout); |
| |
| event_type = vlib_process_get_events (vm, &event_data); |
| |
| switch (event_type) |
| { |
| case EVENT_SET_FLAGS: |
| /* 1 ms */ |
| link_debounce_deadline = vlib_time_now (vm) + 1e-3; |
| timeout = 1e-3; |
| break; |
| |
| case ~0: |
| /* No events found: timer expired. */ |
| if (vlib_time_now (vm) > link_debounce_deadline) |
| { |
| vec_foreach (xd, xm->devices) |
| { |
| ixge_regs_t *r = xd->regs; |
| u32 v = r->xge_mac.link_status; |
| uword is_up = (v & (1 << 30)) != 0; |
| |
| vnet_hw_interface_set_flags |
| (vnm, xd->vlib_hw_if_index, |
| is_up ? VNET_HW_INTERFACE_FLAG_LINK_UP : 0); |
| } |
| link_debounce_deadline = 1e70; |
| timeout = 30.0; |
| } |
| break; |
| |
| default: |
| ASSERT (0); |
| } |
| |
| if (event_data) |
| _vec_len (event_data) = 0; |
| |
| /* Query stats every 30 secs. */ |
| { |
| f64 now = vlib_time_now (vm); |
| if (now - xm->time_last_stats_update > 30) |
| { |
| xm->time_last_stats_update = now; |
| vec_foreach (xd, xm->devices) ixge_update_counters (xd); |
| } |
| } |
| } |
| |
| return 0; |
| } |
| |
| static vlib_node_registration_t ixge_process_node = { |
| .function = ixge_process, |
| .type = VLIB_NODE_TYPE_PROCESS, |
| .name = "ixge-process", |
| }; |
| |
| clib_error_t * |
| ixge_init (vlib_main_t * vm) |
| { |
| ixge_main_t *xm = &ixge_main; |
| clib_error_t *error; |
| |
| xm->vlib_main = vm; |
| memset (&xm->tx_descriptor_template, 0, |
| sizeof (xm->tx_descriptor_template)); |
| memset (&xm->tx_descriptor_template_mask, 0, |
| sizeof (xm->tx_descriptor_template_mask)); |
| xm->tx_descriptor_template.status0 = |
| (IXGE_TX_DESCRIPTOR_STATUS0_ADVANCED | |
| IXGE_TX_DESCRIPTOR_STATUS0_IS_ADVANCED | |
| IXGE_TX_DESCRIPTOR_STATUS0_INSERT_FCS); |
| xm->tx_descriptor_template_mask.status0 = 0xffff; |
| xm->tx_descriptor_template_mask.status1 = 0x00003fff; |
| |
| xm->tx_descriptor_template_mask.status0 &= |
| ~(IXGE_TX_DESCRIPTOR_STATUS0_IS_END_OF_PACKET |
| | IXGE_TX_DESCRIPTOR_STATUS0_REPORT_STATUS); |
| xm->tx_descriptor_template_mask.status1 &= |
| ~(IXGE_TX_DESCRIPTOR_STATUS1_DONE); |
| |
| error = vlib_call_init_function (vm, pci_bus_init); |
| |
| return error; |
| } |
| |
| VLIB_INIT_FUNCTION (ixge_init); |
| |
| |
| static void |
| ixge_pci_intr_handler (vlib_pci_device_t * dev) |
| { |
| ixge_main_t *xm = &ixge_main; |
| vlib_main_t *vm = xm->vlib_main; |
| |
| vlib_node_set_interrupt_pending (vm, ixge_input_node.index); |
| |
| /* Let node know which device is interrupting. */ |
| { |
| vlib_node_runtime_t *rt = |
| vlib_node_get_runtime (vm, ixge_input_node.index); |
| rt->runtime_data[0] |= 1 << dev->private_data; |
| } |
| } |
| |
| static clib_error_t * |
| ixge_pci_init (vlib_main_t * vm, vlib_pci_device_t * dev) |
| { |
| ixge_main_t *xm = &ixge_main; |
| clib_error_t *error; |
| void *r; |
| ixge_device_t *xd; |
| |
| /* Device found: make sure we have dma memory. */ |
| if (unix_physmem_is_fake (vm)) |
| return clib_error_return (0, "no physical memory available"); |
| |
| error = vlib_pci_map_resource (dev, 0, &r); |
| if (error) |
| return error; |
| |
| vec_add2 (xm->devices, xd, 1); |
| |
| if (vec_len (xm->devices) == 1) |
| { |
| ixge_input_node.function = ixge_input_multiarch_select (); |
| } |
| |
| xd->pci_device = dev[0]; |
| xd->device_id = xd->pci_device.config0.header.device_id; |
| xd->regs = r; |
| xd->device_index = xd - xm->devices; |
| xd->pci_function = dev->bus_address.function; |
| xd->per_interface_next_index = ~0; |
| |
| |
| /* Chip found so enable node. */ |
| { |
| vlib_node_set_state (vm, ixge_input_node.index, |
| (IXGE_ALWAYS_POLL |
| ? VLIB_NODE_STATE_POLLING |
| : VLIB_NODE_STATE_INTERRUPT)); |
| |
| dev->private_data = xd->device_index; |
| } |
| |
| if (vec_len (xm->devices) == 1) |
| { |
| vlib_register_node (vm, &ixge_process_node); |
| xm->process_node_index = ixge_process_node.index; |
| } |
| |
| error = vlib_pci_bus_master_enable (dev); |
| |
| if (error) |
| return error; |
| |
| return vlib_pci_intr_enable (dev); |
| } |
| |
| /* *INDENT-OFF* */ |
| PCI_REGISTER_DEVICE (ixge_pci_device_registration,static) = { |
| .init_function = ixge_pci_init, |
| .interrupt_handler = ixge_pci_intr_handler, |
| .supported_devices = { |
| #define _(t,i) { .vendor_id = PCI_VENDOR_ID_INTEL, .device_id = i, }, |
| foreach_ixge_pci_device_id |
| #undef _ |
| { 0 }, |
| }, |
| }; |
| /* *INDENT-ON* */ |
| |
| void |
| ixge_set_next_node (ixge_rx_next_t next, char *name) |
| { |
| vlib_node_registration_t *r = &ixge_input_node; |
| |
| switch (next) |
| { |
| case IXGE_RX_NEXT_IP4_INPUT: |
| case IXGE_RX_NEXT_IP6_INPUT: |
| case IXGE_RX_NEXT_ETHERNET_INPUT: |
| r->next_nodes[next] = name; |
| break; |
| |
| default: |
| clib_warning ("%s: illegal next %d\n", __FUNCTION__, next); |
| break; |
| } |
| } |
| |
| /* *INDENT-OFF* */ |
| VLIB_PLUGIN_REGISTER () = { |
| .version = VPP_BUILD_VER, |
| .default_disabled = 1, |
| .description = "Intel 82599 Family Native Driver (experimental)", |
| }; |
| #endif |
| |
| /* *INDENT-ON* */ |
| |
| /* |
| * fd.io coding-style-patch-verification: ON |
| * |
| * Local Variables: |
| * eval: (c-set-style "gnu") |
| * End: |
| */ |