#include <linux/mroute.h>
#include <linux/netfilter_ipv4.h>
#include <linux/random.h>
+#include <linux/rcupdate.h>
#include <net/protocol.h>
#include <net/ip.h>
#include <net/route.h>
/* The locking scheme is rather straight forward:
*
- * 1) A BH protected rwlocks protect buckets of the central route hash.
+ * 1) Read-Copy Update protects the buckets of the central route hash.
* 2) Only writers remove entries, and they hold the lock
* as they look at rtable reference counts.
* 3) Only readers acquire references to rtable entries,
struct rt_hash_bucket {
struct rtable *chain;
- rwlock_t lock;
+ spinlock_t lock;
} __attribute__((__aligned__(8)));
static struct rt_hash_bucket *rt_hash_table;
struct rt_cache_iter_state *st = seq->private;
for (st->bucket = rt_hash_mask; st->bucket >= 0; --st->bucket) {
- read_lock_bh(&rt_hash_table[st->bucket].lock);
+ rcu_read_lock();
r = rt_hash_table[st->bucket].chain;
if (r)
break;
- read_unlock_bh(&rt_hash_table[st->bucket].lock);
+ rcu_read_unlock();
}
return r;
}
{
struct rt_cache_iter_state *st = seq->private;
+ read_barrier_depends();
r = r->u.rt_next;
while (!r) {
- read_unlock_bh(&rt_hash_table[st->bucket].lock);
+ rcu_read_unlock();
if (--st->bucket < 0)
break;
- read_lock_bh(&rt_hash_table[st->bucket].lock);
+ rcu_read_lock();
r = rt_hash_table[st->bucket].chain;
}
return r;
if (v && v != (void *)1) {
struct rt_cache_iter_state *st = seq->private;
- read_unlock_bh(&rt_hash_table[st->bucket].lock);
+ rcu_read_unlock();
}
}
static __inline__ void rt_free(struct rtable *rt)
{
- dst_free(&rt->u.dst);
+ call_rcu(&rt->u.dst.rcu_head, (void (*)(void *))dst_free, &rt->u.dst);
}
static __inline__ void rt_drop(struct rtable *rt)
{
ip_rt_put(rt);
- dst_free(&rt->u.dst);
+ call_rcu(&rt->u.dst.rcu_head, (void (*)(void *))dst_free, &rt->u.dst);
}
static __inline__ int rt_fast_clean(struct rtable *rth)
i = (i + 1) & rt_hash_mask;
rthp = &rt_hash_table[i].chain;
- write_lock(&rt_hash_table[i].lock);
+ spin_lock(&rt_hash_table[i].lock);
while ((rth = *rthp) != NULL) {
if (rth->u.dst.expires) {
/* Entry is expired even if it is in use */
*rthp = rth->u.rt_next;
rt_free(rth);
}
- write_unlock(&rt_hash_table[i].lock);
+ spin_unlock(&rt_hash_table[i].lock);
/* Fallback loop breaker. */
if ((jiffies - now) > 0)
rt_deadline = 0;
for (i = rt_hash_mask; i >= 0; i--) {
- write_lock_bh(&rt_hash_table[i].lock);
+ spin_lock_bh(&rt_hash_table[i].lock);
rth = rt_hash_table[i].chain;
if (rth)
rt_hash_table[i].chain = NULL;
- write_unlock_bh(&rt_hash_table[i].lock);
+ spin_unlock_bh(&rt_hash_table[i].lock);
for (; rth; rth = next) {
next = rth->u.rt_next;
k = (k + 1) & rt_hash_mask;
rthp = &rt_hash_table[k].chain;
- write_lock_bh(&rt_hash_table[k].lock);
+ spin_lock_bh(&rt_hash_table[k].lock);
while ((rth = *rthp) != NULL) {
if (!rt_may_expire(rth, tmo, expire)) {
tmo >>= 1;
rt_free(rth);
goal--;
}
- write_unlock_bh(&rt_hash_table[k].lock);
+ spin_unlock_bh(&rt_hash_table[k].lock);
if (goal <= 0)
break;
}
restart:
rthp = &rt_hash_table[hash].chain;
- write_lock_bh(&rt_hash_table[hash].lock);
+ spin_lock_bh(&rt_hash_table[hash].lock);
while ((rth = *rthp) != NULL) {
if (compare_keys(&rth->fl, &rt->fl)) {
/* Put it first */
rth->u.dst.__use++;
dst_hold(&rth->u.dst);
rth->u.dst.lastuse = now;
- write_unlock_bh(&rt_hash_table[hash].lock);
+ spin_unlock_bh(&rt_hash_table[hash].lock);
rt_drop(rt);
*rp = rth;
if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) {
int err = arp_bind_neighbour(&rt->u.dst);
if (err) {
- write_unlock_bh(&rt_hash_table[hash].lock);
+ spin_unlock_bh(&rt_hash_table[hash].lock);
if (err != -ENOBUFS) {
rt_drop(rt);
}
#endif
rt_hash_table[hash].chain = rt;
- write_unlock_bh(&rt_hash_table[hash].lock);
+ spin_unlock_bh(&rt_hash_table[hash].lock);
*rp = rt;
return 0;
}
{
struct rtable **rthp;
- write_lock_bh(&rt_hash_table[hash].lock);
+ spin_lock_bh(&rt_hash_table[hash].lock);
ip_rt_put(rt);
for (rthp = &rt_hash_table[hash].chain; *rthp;
rthp = &(*rthp)->u.rt_next)
rt_free(rt);
break;
}
- write_unlock_bh(&rt_hash_table[hash].lock);
+ spin_unlock_bh(&rt_hash_table[hash].lock);
}
void ip_rt_redirect(u32 old_gw, u32 daddr, u32 new_gw,
rthp=&rt_hash_table[hash].chain;
- read_lock(&rt_hash_table[hash].lock);
+ rcu_read_lock();
while ((rth = *rthp) != NULL) {
struct rtable *rt;
+ read_barrier_depends();
if (rth->fl.fl4_dst != daddr ||
rth->fl.fl4_src != skeys[i] ||
rth->fl.fl4_tos != tos ||
break;
dst_clone(&rth->u.dst);
- read_unlock(&rt_hash_table[hash].lock);
+ rcu_read_unlock();
rt = dst_alloc(&ipv4_dst_ops);
if (rt == NULL) {
/* Copy all the information. */
*rt = *rth;
+ INIT_RCU_HEAD(&rt->u.dst.rcu_head);
rt->u.dst.__use = 1;
atomic_set(&rt->u.dst.__refcnt, 1);
if (rt->u.dst.dev)
ip_rt_put(rt);
goto do_next;
}
- read_unlock(&rt_hash_table[hash].lock);
+ rcu_read_unlock();
do_next:
;
}
for (i = 0; i < 2; i++) {
unsigned hash = rt_hash_code(daddr, skeys[i], tos);
- read_lock(&rt_hash_table[hash].lock);
+ rcu_read_lock();
for (rth = rt_hash_table[hash].chain; rth;
rth = rth->u.rt_next) {
+ read_barrier_depends();
if (rth->fl.fl4_dst == daddr &&
rth->fl.fl4_src == skeys[i] &&
rth->rt_dst == daddr &&
}
}
}
- read_unlock(&rt_hash_table[hash].lock);
+ rcu_read_unlock();
}
return est_mtu ? : new_mtu;
}
tos &= IPTOS_RT_MASK;
hash = rt_hash_code(daddr, saddr ^ (iif << 5), tos);
- read_lock(&rt_hash_table[hash].lock);
+ rcu_read_lock();
for (rth = rt_hash_table[hash].chain; rth; rth = rth->u.rt_next) {
+ read_barrier_depends();
if (rth->fl.fl4_dst == daddr &&
rth->fl.fl4_src == saddr &&
rth->fl.iif == iif &&
dst_hold(&rth->u.dst);
rth->u.dst.__use++;
rt_cache_stat[smp_processor_id()].in_hit++;
- read_unlock(&rt_hash_table[hash].lock);
+ rcu_read_unlock();
skb->dst = (struct dst_entry*)rth;
return 0;
}
}
- read_unlock(&rt_hash_table[hash].lock);
+ rcu_read_unlock();
/* Multicast recognition logic is moved from route cache to here.
The problem was that too many Ethernet cards have broken/missing
hash = rt_hash_code(flp->fl4_dst, flp->fl4_src ^ (flp->oif << 5), flp->fl4_tos);
- read_lock_bh(&rt_hash_table[hash].lock);
+ rcu_read_lock();
for (rth = rt_hash_table[hash].chain; rth; rth = rth->u.rt_next) {
+ read_barrier_depends();
if (rth->fl.fl4_dst == flp->fl4_dst &&
rth->fl.fl4_src == flp->fl4_src &&
rth->fl.iif == 0 &&
dst_hold(&rth->u.dst);
rth->u.dst.__use++;
rt_cache_stat[smp_processor_id()].out_hit++;
- read_unlock_bh(&rt_hash_table[hash].lock);
+ rcu_read_unlock();
*rp = rth;
return 0;
}
}
- read_unlock_bh(&rt_hash_table[hash].lock);
+ rcu_read_unlock();
return ip_route_output_slow(rp, flp);
}
if (h < s_h) continue;
if (h > s_h)
s_idx = 0;
- read_lock_bh(&rt_hash_table[h].lock);
+ rcu_read_lock();
for (rt = rt_hash_table[h].chain, idx = 0; rt;
rt = rt->u.rt_next, idx++) {
+ read_barrier_depends();
if (idx < s_idx)
continue;
skb->dst = dst_clone(&rt->u.dst);
cb->nlh->nlmsg_seq,
RTM_NEWROUTE, 1) <= 0) {
dst_release(xchg(&skb->dst, NULL));
- read_unlock_bh(&rt_hash_table[h].lock);
+ rcu_read_unlock();
goto done;
}
dst_release(xchg(&skb->dst, NULL));
}
- read_unlock_bh(&rt_hash_table[h].lock);
+ rcu_read_unlock();
}
done:
rt_hash_mask--;
for (i = 0; i <= rt_hash_mask; i++) {
- rt_hash_table[i].lock = RW_LOCK_UNLOCKED;
+ rt_hash_table[i].lock = SPIN_LOCK_UNLOCKED;
rt_hash_table[i].chain = NULL;
}