Merge branch 'net_proc_perf'
Jia He says:
====================
Reduce cache miss for snmp_fold_field
In a PowerPc server with large cpu number(160), besides commit
a3a773726c9f ("net: Optimize snmp stat aggregation by walking all
the percpu data at once"), I watched several other snmp_fold_field
callsites which would cause high cache miss rate.
test source code:
================
My simple test case, which read from the procfs items endlessly:
/***********************************************************/
int main(int argc, char **argv)
{
int i;
int fd = -1 ;
int rdsize = 0;
char buf[LINELEN+1];
buf[LINELEN] = 0;
memset(buf,0,LINELEN);
if(1 >= argc) {
printf("file name empty\n");
return -1;
}
fd = open(argv[1], O_RDWR, 0644);
if(0 > fd){
printf("open error\n");
return -2;
}
for(i=0;i<0xffffffff;i++) {
while(0 < (rdsize = read(fd,buf,LINELEN))){
//nothing here
}
lseek(fd, 0, SEEK_SET);
}
close(fd);
return 0;
}
/**********************************************************/
compile and run:
================
gcc test.c -o test
perf stat -d -e cache-misses ./test /proc/net/snmp
perf stat -d -e cache-misses ./test /proc/net/snmp6
perf stat -d -e cache-misses ./test /proc/net/sctp/snmp
perf stat -d -e cache-misses ./test /proc/net/xfrm_stat
before the patch set:
====================
Performance counter stats for 'system wide':
355911097 cache-misses [40.08%]
2356829300 L1-dcache-loads [60.04%]
355642645 L1-dcache-load-misses # 15.09% of all L1-dcache hits [60.02%]
346544541 LLC-loads [59.97%]
389763 LLC-load-misses # 0.11% of all LL-cache hits [40.02%]
6.245162638 seconds time elapsed
After the patch set:
===================
Performance counter stats for 'system wide':
194992476 cache-misses [40.03%]
6718051877 L1-dcache-loads [60.07%]
194871921 L1-dcache-load-misses # 2.90% of all L1-dcache hits [60.11%]
187632232 LLC-loads [60.04%]
464466 LLC-load-misses # 0.25% of all LL-cache hits [39.89%]
6.868422769 seconds time elapsed
The cache-miss rate can be reduced from 15% to 2.9%
changelog
=========
v6:
- correct v5
v5:
- order local variables from longest to shortest line
v4:
- move memset into one block of if statement in snmp6_seq_show_item
- remove the changes in netstat_seq_show considerred the stack usage is too large
v3:
- introduce generic interface (suggested by Marcelo Ricardo Leitner)
- use max_t instead of self defined macro (suggested by David Miller)
v2:
- fix bug in udplite statistics.
- snmp_seq_show is split into 2 parts
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/include/net/ip.h b/include/net/ip.h
index 9742b92..bc43c0f 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -219,6 +219,29 @@
}
#endif
+#define snmp_get_cpu_field64_batch(buff64, stats_list, mib_statistic, offset) \
+{ \
+ int i, c; \
+ for_each_possible_cpu(c) { \
+ for (i = 0; stats_list[i].name; i++) \
+ buff64[i] += snmp_get_cpu_field64( \
+ mib_statistic, \
+ c, stats_list[i].entry, \
+ offset); \
+ } \
+}
+
+#define snmp_get_cpu_field_batch(buff, stats_list, mib_statistic) \
+{ \
+ int i, c; \
+ for_each_possible_cpu(c) { \
+ for (i = 0; stats_list[i].name; i++) \
+ buff[i] += snmp_get_cpu_field( \
+ mib_statistic, \
+ c, stats_list[i].entry); \
+ } \
+}
+
void inet_get_local_port_range(struct net *net, int *low, int *high);
#ifdef CONFIG_SYSCTL
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 1ed015e..7143ca1 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -46,6 +46,8 @@
#include <net/sock.h>
#include <net/raw.h>
+#define TCPUDP_MIB_MAX max_t(u32, UDP_MIB_MAX, TCP_MIB_MAX)
+
/*
* Report socket allocation statistics [mea@utu.fi]
*/
@@ -356,22 +358,22 @@
atomic_long_t *ptr = net->mib.icmpmsg_statistics->mibs;
seq_puts(seq, "\nIcmp: InMsgs InErrors InCsumErrors");
- for (i = 0; icmpmibmap[i].name != NULL; i++)
+ for (i = 0; icmpmibmap[i].name; i++)
seq_printf(seq, " In%s", icmpmibmap[i].name);
seq_puts(seq, " OutMsgs OutErrors");
- for (i = 0; icmpmibmap[i].name != NULL; i++)
+ for (i = 0; icmpmibmap[i].name; i++)
seq_printf(seq, " Out%s", icmpmibmap[i].name);
seq_printf(seq, "\nIcmp: %lu %lu %lu",
snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_INMSGS),
snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_INERRORS),
snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_CSUMERRORS));
- for (i = 0; icmpmibmap[i].name != NULL; i++)
+ for (i = 0; icmpmibmap[i].name; i++)
seq_printf(seq, " %lu",
atomic_long_read(ptr + icmpmibmap[i].index));
seq_printf(seq, " %lu %lu",
snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_OUTMSGS),
snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_OUTERRORS));
- for (i = 0; icmpmibmap[i].name != NULL; i++)
+ for (i = 0; icmpmibmap[i].name; i++)
seq_printf(seq, " %lu",
atomic_long_read(ptr + (icmpmibmap[i].index | 0x100)));
}
@@ -379,14 +381,16 @@
/*
* Called from the PROCfs module. This outputs /proc/net/snmp.
*/
-static int snmp_seq_show(struct seq_file *seq, void *v)
+static int snmp_seq_show_ipstats(struct seq_file *seq, void *v)
{
- int i;
struct net *net = seq->private;
+ u64 buff64[IPSTATS_MIB_MAX];
+ int i;
+
+ memset(buff64, 0, IPSTATS_MIB_MAX * sizeof(u64));
seq_puts(seq, "Ip: Forwarding DefaultTTL");
-
- for (i = 0; snmp4_ipstats_list[i].name != NULL; i++)
+ for (i = 0; snmp4_ipstats_list[i].name; i++)
seq_printf(seq, " %s", snmp4_ipstats_list[i].name);
seq_printf(seq, "\nIp: %d %d",
@@ -394,54 +398,74 @@
net->ipv4.sysctl_ip_default_ttl);
BUILD_BUG_ON(offsetof(struct ipstats_mib, mibs) != 0);
- for (i = 0; snmp4_ipstats_list[i].name != NULL; i++)
- seq_printf(seq, " %llu",
- snmp_fold_field64(net->mib.ip_statistics,
- snmp4_ipstats_list[i].entry,
- offsetof(struct ipstats_mib, syncp)));
+ snmp_get_cpu_field64_batch(buff64, snmp4_ipstats_list,
+ net->mib.ip_statistics,
+ offsetof(struct ipstats_mib, syncp));
+ for (i = 0; snmp4_ipstats_list[i].name; i++)
+ seq_printf(seq, " %llu", buff64[i]);
+
+ return 0;
+}
+
+static int snmp_seq_show_tcp_udp(struct seq_file *seq, void *v)
+{
+ unsigned long buff[TCPUDP_MIB_MAX];
+ struct net *net = seq->private;
+ int i;
+
+ memset(buff, 0, TCPUDP_MIB_MAX * sizeof(unsigned long));
+
+ seq_puts(seq, "\nTcp:");
+ for (i = 0; snmp4_tcp_list[i].name; i++)
+ seq_printf(seq, " %s", snmp4_tcp_list[i].name);
+
+ seq_puts(seq, "\nTcp:");
+ snmp_get_cpu_field_batch(buff, snmp4_tcp_list,
+ net->mib.tcp_statistics);
+ for (i = 0; snmp4_tcp_list[i].name; i++) {
+ /* MaxConn field is signed, RFC 2012 */
+ if (snmp4_tcp_list[i].entry == TCP_MIB_MAXCONN)
+ seq_printf(seq, " %ld", buff[i]);
+ else
+ seq_printf(seq, " %lu", buff[i]);
+ }
+
+ memset(buff, 0, TCPUDP_MIB_MAX * sizeof(unsigned long));
+
+ snmp_get_cpu_field_batch(buff, snmp4_udp_list,
+ net->mib.udp_statistics);
+ seq_puts(seq, "\nUdp:");
+ for (i = 0; snmp4_udp_list[i].name; i++)
+ seq_printf(seq, " %s", snmp4_udp_list[i].name);
+ seq_puts(seq, "\nUdp:");
+ for (i = 0; snmp4_udp_list[i].name; i++)
+ seq_printf(seq, " %lu", buff[i]);
+
+ memset(buff, 0, TCPUDP_MIB_MAX * sizeof(unsigned long));
+
+ /* the UDP and UDP-Lite MIBs are the same */
+ seq_puts(seq, "\nUdpLite:");
+ snmp_get_cpu_field_batch(buff, snmp4_udp_list,
+ net->mib.udplite_statistics);
+ for (i = 0; snmp4_udp_list[i].name; i++)
+ seq_printf(seq, " %s", snmp4_udp_list[i].name);
+ seq_puts(seq, "\nUdpLite:");
+ for (i = 0; snmp4_udp_list[i].name; i++)
+ seq_printf(seq, " %lu", buff[i]);
+
+ seq_putc(seq, '\n');
+ return 0;
+}
+
+static int snmp_seq_show(struct seq_file *seq, void *v)
+{
+ snmp_seq_show_ipstats(seq, v);
icmp_put(seq); /* RFC 2011 compatibility */
icmpmsg_put(seq);
- seq_puts(seq, "\nTcp:");
- for (i = 0; snmp4_tcp_list[i].name != NULL; i++)
- seq_printf(seq, " %s", snmp4_tcp_list[i].name);
+ snmp_seq_show_tcp_udp(seq, v);
- seq_puts(seq, "\nTcp:");
- for (i = 0; snmp4_tcp_list[i].name != NULL; i++) {
- /* MaxConn field is signed, RFC 2012 */
- if (snmp4_tcp_list[i].entry == TCP_MIB_MAXCONN)
- seq_printf(seq, " %ld",
- snmp_fold_field(net->mib.tcp_statistics,
- snmp4_tcp_list[i].entry));
- else
- seq_printf(seq, " %lu",
- snmp_fold_field(net->mib.tcp_statistics,
- snmp4_tcp_list[i].entry));
- }
-
- seq_puts(seq, "\nUdp:");
- for (i = 0; snmp4_udp_list[i].name != NULL; i++)
- seq_printf(seq, " %s", snmp4_udp_list[i].name);
-
- seq_puts(seq, "\nUdp:");
- for (i = 0; snmp4_udp_list[i].name != NULL; i++)
- seq_printf(seq, " %lu",
- snmp_fold_field(net->mib.udp_statistics,
- snmp4_udp_list[i].entry));
-
- /* the UDP and UDP-Lite MIBs are the same */
- seq_puts(seq, "\nUdpLite:");
- for (i = 0; snmp4_udp_list[i].name != NULL; i++)
- seq_printf(seq, " %s", snmp4_udp_list[i].name);
-
- seq_puts(seq, "\nUdpLite:");
- for (i = 0; snmp4_udp_list[i].name != NULL; i++)
- seq_printf(seq, " %lu",
- snmp_fold_field(net->mib.udplite_statistics,
- snmp4_udp_list[i].entry));
-
- seq_putc(seq, '\n');
return 0;
}
@@ -469,21 +493,21 @@
struct net *net = seq->private;
seq_puts(seq, "TcpExt:");
- for (i = 0; snmp4_net_list[i].name != NULL; i++)
+ for (i = 0; snmp4_net_list[i].name; i++)
seq_printf(seq, " %s", snmp4_net_list[i].name);
seq_puts(seq, "\nTcpExt:");
- for (i = 0; snmp4_net_list[i].name != NULL; i++)
+ for (i = 0; snmp4_net_list[i].name; i++)
seq_printf(seq, " %lu",
snmp_fold_field(net->mib.net_statistics,
snmp4_net_list[i].entry));
seq_puts(seq, "\nIpExt:");
- for (i = 0; snmp4_ipextstats_list[i].name != NULL; i++)
+ for (i = 0; snmp4_ipextstats_list[i].name; i++)
seq_printf(seq, " %s", snmp4_ipextstats_list[i].name);
seq_puts(seq, "\nIpExt:");
- for (i = 0; snmp4_ipextstats_list[i].name != NULL; i++)
+ for (i = 0; snmp4_ipextstats_list[i].name; i++)
seq_printf(seq, " %llu",
snmp_fold_field64(net->mib.ip_statistics,
snmp4_ipextstats_list[i].entry,
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 2f1f5d4..35d4baa 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -4961,18 +4961,18 @@
}
static inline void __snmp6_fill_statsdev(u64 *stats, atomic_long_t *mib,
- int items, int bytes)
+ int bytes)
{
int i;
- int pad = bytes - sizeof(u64) * items;
+ int pad = bytes - sizeof(u64) * ICMP6_MIB_MAX;
BUG_ON(pad < 0);
/* Use put_unaligned() because stats may not be aligned for u64. */
- put_unaligned(items, &stats[0]);
- for (i = 1; i < items; i++)
+ put_unaligned(ICMP6_MIB_MAX, &stats[0]);
+ for (i = 1; i < ICMP6_MIB_MAX; i++)
put_unaligned(atomic_long_read(&mib[i]), &stats[i]);
- memset(&stats[items], 0, pad);
+ memset(&stats[ICMP6_MIB_MAX], 0, pad);
}
static inline void __snmp6_fill_stats64(u64 *stats, void __percpu *mib,
@@ -5005,7 +5005,7 @@
offsetof(struct ipstats_mib, syncp));
break;
case IFLA_INET6_ICMP6STATS:
- __snmp6_fill_statsdev(stats, idev->stats.icmpv6dev->mibs, ICMP6_MIB_MAX, bytes);
+ __snmp6_fill_statsdev(stats, idev->stats.icmpv6dev->mibs, bytes);
break;
}
}
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index 679253d0..cc8e3ae 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -30,6 +30,11 @@
#include <net/transp_v6.h>
#include <net/ipv6.h>
+#define MAX4(a, b, c, d) \
+ max_t(u32, max_t(u32, a, b), max_t(u32, c, d))
+#define SNMP_MIB_MAX MAX4(UDP_MIB_MAX, TCP_MIB_MAX, \
+ IPSTATS_MIB_MAX, ICMP_MIB_MAX)
+
static int sockstat6_seq_show(struct seq_file *seq, void *v)
{
struct net *net = seq->private;
@@ -191,25 +196,34 @@
atomic_long_t *smib,
const struct snmp_mib *itemlist)
{
+ unsigned long buff[SNMP_MIB_MAX];
int i;
- unsigned long val;
- for (i = 0; itemlist[i].name; i++) {
- val = pcpumib ?
- snmp_fold_field(pcpumib, itemlist[i].entry) :
- atomic_long_read(smib + itemlist[i].entry);
- seq_printf(seq, "%-32s\t%lu\n", itemlist[i].name, val);
+ if (pcpumib) {
+ memset(buff, 0, sizeof(unsigned long) * SNMP_MIB_MAX);
+
+ snmp_get_cpu_field_batch(buff, itemlist, pcpumib);
+ for (i = 0; itemlist[i].name; i++)
+ seq_printf(seq, "%-32s\t%lu\n",
+ itemlist[i].name, buff[i]);
+ } else {
+ for (i = 0; itemlist[i].name; i++)
+ seq_printf(seq, "%-32s\t%lu\n", itemlist[i].name,
+ atomic_long_read(smib + itemlist[i].entry));
}
}
static void snmp6_seq_show_item64(struct seq_file *seq, void __percpu *mib,
const struct snmp_mib *itemlist, size_t syncpoff)
{
+ u64 buff64[SNMP_MIB_MAX];
int i;
+ memset(buff64, 0, sizeof(unsigned long) * SNMP_MIB_MAX);
+
+ snmp_get_cpu_field64_batch(buff64, itemlist, mib, syncpoff);
for (i = 0; itemlist[i].name; i++)
- seq_printf(seq, "%-32s\t%llu\n", itemlist[i].name,
- snmp_fold_field64(mib, itemlist[i].entry, syncpoff));
+ seq_printf(seq, "%-32s\t%llu\n", itemlist[i].name, buff64[i]);
}
static int snmp6_seq_show(struct seq_file *seq, void *v)
diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index ef8ba77..206377f 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -73,13 +73,17 @@
/* Display sctp snmp mib statistics(/proc/net/sctp/snmp). */
static int sctp_snmp_seq_show(struct seq_file *seq, void *v)
{
+ unsigned long buff[SCTP_MIB_MAX];
struct net *net = seq->private;
int i;
- for (i = 0; sctp_snmp_list[i].name != NULL; i++)
+ memset(buff, 0, sizeof(unsigned long) * SCTP_MIB_MAX);
+
+ snmp_get_cpu_field_batch(buff, sctp_snmp_list,
+ net->sctp.sctp_statistics);
+ for (i = 0; sctp_snmp_list[i].name; i++)
seq_printf(seq, "%-32s\t%ld\n", sctp_snmp_list[i].name,
- snmp_fold_field(net->sctp.sctp_statistics,
- sctp_snmp_list[i].entry));
+ buff[i]);
return 0;
}
diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c
index 9c4fbd8..ba2b539 100644
--- a/net/xfrm/xfrm_proc.c
+++ b/net/xfrm/xfrm_proc.c
@@ -50,12 +50,18 @@
static int xfrm_statistics_seq_show(struct seq_file *seq, void *v)
{
+ unsigned long buff[LINUX_MIB_XFRMMAX];
struct net *net = seq->private;
int i;
+
+ memset(buff, 0, sizeof(unsigned long) * LINUX_MIB_XFRMMAX);
+
+ snmp_get_cpu_field_batch(buff, xfrm_mib_list,
+ net->mib.xfrm_statistics);
for (i = 0; xfrm_mib_list[i].name; i++)
seq_printf(seq, "%-24s\t%lu\n", xfrm_mib_list[i].name,
- snmp_fold_field(net->mib.xfrm_statistics,
- xfrm_mib_list[i].entry));
+ buff[i]);
+
return 0;
}