Merge git://git.kernel.org:/pub/scm/linux/kernel/git/netdev/net

Pull networking fixes from David Miller:

 1) Fix transmissions in dynamic SMPS mode in ath9k, from Felix Fietkau.

 2) TX skb error handling fix in mt76 driver, also from Felix.

 3) Fix BPF_FETCH atomic in x86 JIT, from Brendan Jackman.

 4) Avoid double free of percpu pointers when freeing a cloned bpf prog.
    From Cong Wang.

 5) Use correct printf format for dma_addr_t in ath11k, from Geert
    Uytterhoeven.

 6) Fix resolve_btfids build with older toolchains, from Kun-Chuan
    Hsieh.

 7) Don't report truncated frames to mac80211 in mt76 driver, from
    Lorenzop Bianconi.

 8) Fix watcdog timeout on suspend/resume of stmmac, from Joakim Zhang.

 9) mscc ocelot needs NET_DEVLINK selct in Kconfig, from Arnd Bergmann.

10) Fix sign comparison bug in TCP_ZEROCOPY_RECEIVE getsockopt(), from
    Arjun Roy.

11) Ignore routes with deleted nexthop object in mlxsw, from Ido
    Schimmel.

12) Need to undo tcp early demux lookup sometimes in nf_nat, from
    Florian Westphal.

13) Fix gro aggregation for udp encaps with zero csum, from Daniel
    Borkmann.

14) Make sure to always use imp*_ndo_send when necessaey, from Jason A.
    Donenfeld.

15) Fix TRSCER masks in sh_eth driver from Sergey Shtylyov.

16) prevent overly huge skb allocationsd in qrtr, from Pavel Skripkin.

17) Prevent rx ring copnsumer index loss of sync in enetc, from Vladimir
    Oltean.

18) Make sure textsearch copntrol block is large enough, from Wilem de
    Bruijn.

19) Revert MAC changes to r8152 leading to instability, from Hates Wang.

20) Advance iov in 9p even for empty reads, from Jissheng Zhang.

21) Double hook unregister in nftables, from PabloNeira Ayuso.

22) Fix memleak in ixgbe, fropm Dinghao Liu.

23) Avoid dups in pkt scheduler class dumps, from Maximilian Heyne.

24) Various mptcp fixes from Florian Westphal, Paolo Abeni, and Geliang
    Tang.

25) Fix DOI refcount bugs in cipso, from Paul Moore.

26) One too many irqsave in ibmvnic, from Junlin Yang.

27) Fix infinite loop with MPLS gso segmenting via virtio_net, from
    Balazs Nemeth.

* git://git.kernel.org:/pub/scm/linux/kernel/git/netdev/net: (164 commits)
  s390/qeth: fix notification for pending buffers during teardown
  s390/qeth: schedule TX NAPI on QAOB completion
  s390/qeth: improve completion of pending TX buffers
  s390/qeth: fix memory leak after failed TX Buffer allocation
  net: avoid infinite loop in mpls_gso_segment when mpls_hlen == 0
  net: check if protocol extracted by virtio_net_hdr_set_proto is correct
  net: dsa: xrs700x: check if partner is same as port in hsr join
  net: lapbether: Remove netif_start_queue / netif_stop_queue
  atm: idt77252: fix null-ptr-dereference
  atm: uPD98402: fix incorrect allocation
  atm: fix a typo in the struct description
  net: qrtr: fix error return code of qrtr_sendmsg()
  mptcp: fix length of ADD_ADDR with port sub-option
  net: bonding: fix error return code of bond_neigh_init()
  net: enetc: allow hardware timestamping on TX queues with tc-etf enabled
  net: enetc: set MAC RX FIFO to recommended value
  net: davicom: Use platform_get_irq_optional()
  net: davicom: Fix regulator not turned off on driver removal
  net: davicom: Fix regulator not turned off on failed probe
  net: dsa: fix switchdev objects on bridge master mistakenly being applied on ports
  ...
This commit is contained in:
Linus Torvalds
2021-03-09 17:15:56 -08:00
159 changed files with 1457 additions and 787 deletions

View File

@@ -260,6 +260,11 @@ static struct btf_id *add_symbol(struct rb_root *root, char *name, size_t size)
return btf_id__add(root, id, false);
}
/* Older libelf.h and glibc elf.h might not yet define the ELF compression types. */
#ifndef SHF_COMPRESSED
#define SHF_COMPRESSED (1 << 11) /* Section with compressed data. */
#endif
/*
* The data of compressed section should be aligned to 4
* (for 32bit) or 8 (for 64 bit) bytes. The binutils ld

View File

@@ -3850,7 +3850,6 @@ union bpf_attr {
*
* long bpf_check_mtu(void *ctx, u32 ifindex, u32 *mtu_len, s32 len_diff, u64 flags)
* Description
* Check ctx packet size against exceeding MTU of net device (based
* on *ifindex*). This helper will likely be used in combination
* with helpers that adjust/change the packet size.

View File

@@ -610,15 +610,16 @@ static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)
if (fd < 0)
continue;
memset(&map_info, 0, map_len);
err = bpf_obj_get_info_by_fd(fd, &map_info, &map_len);
if (err) {
close(fd);
continue;
}
if (!strcmp(map_info.name, "xsks_map")) {
if (!strncmp(map_info.name, "xsks_map", sizeof(map_info.name))) {
ctx->xsks_map_fd = fd;
continue;
break;
}
close(fd);

View File

@@ -16,6 +16,13 @@ bool skip = false;
#define STRSIZE 2048
#define EXPECTED_STRSIZE 256
#if defined(bpf_target_s390)
/* NULL points to a readable struct lowcore on s390, so take the last page */
#define BADPTR ((void *)0xFFFFFFFFFFFFF000ULL)
#else
#define BADPTR 0
#endif
#ifndef ARRAY_SIZE
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
#endif
@@ -113,11 +120,11 @@ int BPF_PROG(trace_netif_receive_skb, struct sk_buff *skb)
}
/* Check invalid ptr value */
p.ptr = 0;
p.ptr = BADPTR;
__ret = bpf_snprintf_btf(str, STRSIZE, &p, sizeof(p), 0);
if (__ret >= 0) {
bpf_printk("printing NULL should generate error, got (%d)",
__ret);
bpf_printk("printing %llx should generate error, got (%d)",
(unsigned long long)BADPTR, __ret);
ret = -ERANGE;
}

View File

@@ -15,5 +15,5 @@ __noinline int foo(const struct S *s)
SEC("cgroup_skb/ingress")
int test_cls(struct __sk_buff *skb)
{
return foo(skb);
return foo((const void *)skb);
}

View File

@@ -446,10 +446,8 @@ int _geneve_get_tunnel(struct __sk_buff *skb)
}
ret = bpf_skb_get_tunnel_opt(skb, &gopt, sizeof(gopt));
if (ret < 0) {
ERROR(ret);
return TC_ACT_SHOT;
}
if (ret < 0)
gopt.opt_class = 0;
bpf_trace_printk(fmt, sizeof(fmt),
key.tunnel_id, key.remote_ipv4, gopt.opt_class);

View File

@@ -250,12 +250,13 @@
BPF_MOV64_IMM(BPF_REG_5, 0),
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
BPF_FUNC_csum_diff),
BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xffff),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.fixup_map_array_ro = { 3 },
.result = ACCEPT,
.retval = -29,
.retval = 65507,
},
{
"invalid write map access into a read-only array 1",

View File

@@ -75,3 +75,26 @@
},
.result = ACCEPT,
},
{
"BPF_ATOMIC_AND with fetch - r0 as source reg",
.insns = {
/* val = 0x110; */
BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0x110),
/* old = atomic_fetch_and(&val, 0x011); */
BPF_MOV64_IMM(BPF_REG_0, 0x011),
BPF_ATOMIC_OP(BPF_DW, BPF_AND | BPF_FETCH, BPF_REG_10, BPF_REG_0, -8),
/* if (old != 0x110) exit(3); */
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0x110, 2),
BPF_MOV64_IMM(BPF_REG_0, 3),
BPF_EXIT_INSN(),
/* if (val != 0x010) exit(2); */
BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -8),
BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0x010, 2),
BPF_MOV64_IMM(BPF_REG_1, 2),
BPF_EXIT_INSN(),
/* exit(0); */
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.result = ACCEPT,
},

View File

@@ -94,3 +94,28 @@
.result = REJECT,
.errstr = "invalid read from stack",
},
{
"BPF_W cmpxchg should zero top 32 bits",
.insns = {
/* r0 = U64_MAX; */
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 1),
/* u64 val = r0; */
BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
/* r0 = (u32)atomic_cmpxchg((u32 *)&val, r0, 1); */
BPF_MOV32_IMM(BPF_REG_1, 1),
BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, BPF_REG_10, BPF_REG_1, -8),
/* r1 = 0x00000000FFFFFFFFull; */
BPF_MOV64_IMM(BPF_REG_1, 1),
BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 32),
BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 1),
/* if (r0 != r1) exit(1); */
BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_1, 2),
BPF_MOV32_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
/* exit(0); */
BPF_MOV32_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.result = ACCEPT,
},

View File

@@ -75,3 +75,28 @@
},
.result = ACCEPT,
},
{
"BPF_W atomic_fetch_or should zero top 32 bits",
.insns = {
/* r1 = U64_MAX; */
BPF_MOV64_IMM(BPF_REG_1, 0),
BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 1),
/* u64 val = r1; */
BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
/* r1 = (u32)atomic_fetch_or((u32 *)&val, 2); */
BPF_MOV32_IMM(BPF_REG_1, 2),
BPF_ATOMIC_OP(BPF_W, BPF_OR | BPF_FETCH, BPF_REG_10, BPF_REG_1, -8),
/* r2 = 0x00000000FFFFFFFF; */
BPF_MOV64_IMM(BPF_REG_2, 1),
BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 32),
BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 1),
/* if (r2 != r1) exit(1); */
BPF_JMP_REG(BPF_JEQ, BPF_REG_2, BPF_REG_1, 2),
BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
BPF_EXIT_INSN(),
/* exit(0); */
BPF_MOV32_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.result = ACCEPT,
},

View File

@@ -1524,6 +1524,14 @@ basic()
run_cmd "$IP nexthop replace id 2 blackhole dev veth1"
log_test $? 2 "Blackhole nexthop with other attributes"
# blackhole nexthop should not be affected by the state of the loopback
# device
run_cmd "$IP link set dev lo down"
check_nexthop "id 2" "id 2 blackhole"
log_test $? 0 "Blackhole nexthop with loopback device down"
run_cmd "$IP link set dev lo up"
#
# groups
#

View File

@@ -86,11 +86,20 @@ test_ip6gretap()
test_gretap_stp()
{
# Sometimes after mirror installation, the neighbor's state is not valid.
# The reason is that there is no SW datapath activity related to the
# neighbor for the remote GRE address. Therefore whether the corresponding
# neighbor will be valid is a matter of luck, and the test is thus racy.
# Set the neighbor's state to permanent, so it would be always valid.
ip neigh replace 192.0.2.130 lladdr $(mac_get $h3) \
nud permanent dev br2
full_test_span_gre_stp gt4 $swp3.555 "mirror to gretap"
}
test_ip6gretap_stp()
{
ip neigh replace 2001:db8:2::2 lladdr $(mac_get $h3) \
nud permanent dev br2
full_test_span_gre_stp gt6 $swp3.555 "mirror to ip6gretap"
}

View File

@@ -1785,7 +1785,7 @@ static void grand_child_serv(unsigned int nr, int cmd_fd, void *buf,
break;
default:
printk("got unknown msg type %d", msg->type);
};
}
}
static int grand_child_f(unsigned int nr, int cmd_fd, void *buf)

View File

@@ -4,7 +4,7 @@
TEST_PROGS := nft_trans_stress.sh nft_nat.sh bridge_brouter.sh \
conntrack_icmp_related.sh nft_flowtable.sh ipvs.sh \
nft_concat_range.sh nft_conntrack_helper.sh \
nft_queue.sh nft_meta.sh \
nft_queue.sh nft_meta.sh nf_nat_edemux.sh \
ipip-conntrack-mtu.sh
LDLIBS = -lmnl

View File

@@ -0,0 +1,99 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
#
# Test NAT source port clash resolution
#
# Kselftest framework requirement - SKIP code is 4.
ksft_skip=4
ret=0
sfx=$(mktemp -u "XXXXXXXX")
ns1="ns1-$sfx"
ns2="ns2-$sfx"
cleanup()
{
ip netns del $ns1
ip netns del $ns2
}
iperf3 -v > /dev/null 2>&1
if [ $? -ne 0 ];then
echo "SKIP: Could not run test without iperf3"
exit $ksft_skip
fi
iptables --version > /dev/null 2>&1
if [ $? -ne 0 ];then
echo "SKIP: Could not run test without iptables"
exit $ksft_skip
fi
ip -Version > /dev/null 2>&1
if [ $? -ne 0 ];then
echo "SKIP: Could not run test without ip tool"
exit $ksft_skip
fi
ip netns add "$ns1"
if [ $? -ne 0 ];then
echo "SKIP: Could not create net namespace $ns1"
exit $ksft_skip
fi
trap cleanup EXIT
ip netns add $ns2
# Connect the namespaces using a veth pair
ip link add name veth2 type veth peer name veth1
ip link set netns $ns1 dev veth1
ip link set netns $ns2 dev veth2
ip netns exec $ns1 ip link set up dev lo
ip netns exec $ns1 ip link set up dev veth1
ip netns exec $ns1 ip addr add 192.168.1.1/24 dev veth1
ip netns exec $ns2 ip link set up dev lo
ip netns exec $ns2 ip link set up dev veth2
ip netns exec $ns2 ip addr add 192.168.1.2/24 dev veth2
# Create a server in one namespace
ip netns exec $ns1 iperf3 -s > /dev/null 2>&1 &
iperfs=$!
# Restrict source port to just one so we don't have to exhaust
# all others.
ip netns exec $ns2 sysctl -q net.ipv4.ip_local_port_range="10000 10000"
# add a virtual IP using DNAT
ip netns exec $ns2 iptables -t nat -A OUTPUT -d 10.96.0.1/32 -p tcp --dport 443 -j DNAT --to-destination 192.168.1.1:5201
# ... and route it to the other namespace
ip netns exec $ns2 ip route add 10.96.0.1 via 192.168.1.1
sleep 1
# add a persistent connection from the other namespace
ip netns exec $ns2 nc -q 10 -w 10 192.168.1.1 5201 > /dev/null &
sleep 1
# ip daddr:dport will be rewritten to 192.168.1.1 5201
# NAT must reallocate source port 10000 because
# 192.168.1.2:10000 -> 192.168.1.1:5201 is already in use
echo test | ip netns exec $ns2 nc -w 3 -q 3 10.96.0.1 443 >/dev/null
ret=$?
kill $iperfs
# Check nc can connect to 10.96.0.1:443 (aka 192.168.1.1:5201).
if [ $ret -eq 0 ]; then
echo "PASS: nc can connect via NAT'd address"
else
echo "FAIL: nc cannot connect via NAT'd address"
exit 1
fi
exit 0