]> git.hungrycats.org Git - linux/commitdiff
net/sched: adjust device watchdog timer to detect stopped queue at right time
authorPraveen Kumar Kannoju <praveen.kannoju@oracle.com>
Wed, 8 May 2024 13:36:17 +0000 (19:06 +0530)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 1 Nov 2024 00:56:04 +0000 (01:56 +0100)
[ Upstream commit 33fb988b67050d9bb512f77f08453fa00088943c ]

Applications are sensitive to long network latency, particularly
heartbeat monitoring ones. Longer the tx timeout recovery higher the
risk with such applications on a production machines. This patch
remedies, yet honoring device set tx timeout.

Modify watchdog next timeout to be shorter than the device specified.
Compute the next timeout be equal to device watchdog timeout less the
how long ago queue stop had been done. At next watchdog timeout tx
timeout handler is called into if still in stopped state. Either called
or not called, restore the watchdog timeout back to device specified.

Signed-off-by: Praveen Kumar Kannoju <praveen.kannoju@oracle.com>
Link: https://lore.kernel.org/r/20240508133617.4424-1-praveen.kannoju@oracle.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Stable-dep-of: 95ecba62e2fd ("net: fix races in netdev_tx_sent_queue()/dev_watchdog()")
Signed-off-by: Sasha Levin <sashal@kernel.org>
net/sched/sch_generic.c

index 6ab9359c1706f1b177574c9fe2146b13ee2f369f..7f0c8df7b63e05750d1c80ef35c7051e0832aa66 100644 (file)
@@ -505,19 +505,22 @@ static void dev_watchdog(struct timer_list *t)
                        unsigned int timedout_ms = 0;
                        unsigned int i;
                        unsigned long trans_start;
+                       unsigned long oldest_start = jiffies;
 
                        for (i = 0; i < dev->num_tx_queues; i++) {
                                struct netdev_queue *txq;
 
                                txq = netdev_get_tx_queue(dev, i);
                                trans_start = READ_ONCE(txq->trans_start);
-                               if (netif_xmit_stopped(txq) &&
-                                   time_after(jiffies, (trans_start +
-                                                        dev->watchdog_timeo))) {
+                               if (!netif_xmit_stopped(txq))
+                                       continue;
+                               if (time_after(jiffies, trans_start + dev->watchdog_timeo)) {
                                        timedout_ms = jiffies_to_msecs(jiffies - trans_start);
                                        atomic_long_inc(&txq->trans_timeout);
                                        break;
                                }
+                               if (time_after(oldest_start, trans_start))
+                                       oldest_start = trans_start;
                        }
 
                        if (unlikely(timedout_ms)) {
@@ -530,7 +533,7 @@ static void dev_watchdog(struct timer_list *t)
                                netif_unfreeze_queues(dev);
                        }
                        if (!mod_timer(&dev->watchdog_timer,
-                                      round_jiffies(jiffies +
+                                      round_jiffies(oldest_start +
                                                     dev->watchdog_timeo)))
                                release = false;
                }