blob: f44de4bc2100a811f4c2886668e390a9ac74a82a [file] [log] [blame]
/*
* Copyright (c) 2006 Patrick McHardy <kaber@trash.net>
* Copyright © CC Computer Consultants GmbH, 2007 - 2008
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This is a replacement of the old ipt_recent module, which carried the
* following copyright notice:
*
* Author: Stephen Frost <sfrost@snowman.net>
* Copyright 2002-2003, Stephen Frost, 2.5.x port by laforge@netfilter.org
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/init.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/string.h>
#include <linux/ctype.h>
#include <linux/list.h>
#include <linux/random.h>
#include <linux/jhash.h>
#include <linux/bitops.h>
#include <linux/skbuff.h>
#include <linux/inet.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter/xt_recent.h>
MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>");
MODULE_DESCRIPTION("Xtables: \"recently-seen\" host matching");
MODULE_LICENSE("GPL");
MODULE_ALIAS("ipt_recent");
MODULE_ALIAS("ip6t_recent");
static unsigned int ip_list_tot __read_mostly = 100;
static unsigned int ip_list_hash_size __read_mostly;
static unsigned int ip_list_perms __read_mostly = 0644;
static unsigned int ip_list_uid __read_mostly;
static unsigned int ip_list_gid __read_mostly;
module_param(ip_list_tot, uint, 0400);
module_param(ip_list_hash_size, uint, 0400);
module_param(ip_list_perms, uint, 0400);
module_param(ip_list_uid, uint, 0644);
module_param(ip_list_gid, uint, 0644);
MODULE_PARM_DESC(ip_list_tot, "number of IPs to remember per list");
MODULE_PARM_DESC(ip_list_hash_size, "size of hash table used to look up IPs");
MODULE_PARM_DESC(ip_list_perms, "permissions on /proc/net/xt_recent/* files");
MODULE_PARM_DESC(ip_list_uid, "default owner of /proc/net/xt_recent/* files");
MODULE_PARM_DESC(ip_list_gid, "default owning group of /proc/net/xt_recent/* files");
/* retained for backwards compatibility */
static unsigned int ip_pkt_list_tot __read_mostly;
module_param(ip_pkt_list_tot, uint, 0400);
MODULE_PARM_DESC(ip_pkt_list_tot, "number of packets per IP address to remember (max. 255)");
#define XT_RECENT_MAX_NSTAMPS 256
struct recent_entry {
struct list_head list;
struct list_head lru_list;
union nf_inet_addr addr;
u_int16_t family;
u_int8_t ttl;
u_int8_t index;
u_int16_t nstamps;
unsigned long stamps[0];
};
struct recent_table {
struct list_head list;
char name[XT_RECENT_NAME_LEN];
union nf_inet_addr mask;
unsigned int refcnt;
unsigned int entries;
u8 nstamps_max_mask;
struct list_head lru_list;
struct list_head iphash[0];
};
struct recent_net {
struct list_head tables;
#ifdef CONFIG_PROC_FS
struct proc_dir_entry *xt_recent;
#endif
};
static unsigned int recent_net_id __read_mostly;
static inline struct recent_net *recent_pernet(struct net *net)
{
return net_generic(net, recent_net_id);
}
static DEFINE_SPINLOCK(recent_lock);
static DEFINE_MUTEX(recent_mutex);
#ifdef CONFIG_PROC_FS
static const struct file_operations recent_mt_fops;
#endif
static u_int32_t hash_rnd __read_mostly;
static inline unsigned int recent_entry_hash4(const union nf_inet_addr *addr)
{
return jhash_1word((__force u32)addr->ip, hash_rnd) &
(ip_list_hash_size - 1);
}
static inline unsigned int recent_entry_hash6(const union nf_inet_addr *addr)
{
return jhash2((u32 *)addr->ip6, ARRAY_SIZE(addr->ip6), hash_rnd) &
(ip_list_hash_size - 1);
}
static struct recent_entry *
recent_entry_lookup(const struct recent_table *table,
const union nf_inet_addr *addrp, u_int16_t family,
u_int8_t ttl)
{
struct recent_entry *e;
unsigned int h;
if (family == NFPROTO_IPV4)
h = recent_entry_hash4(addrp);
else
h = recent_entry_hash6(addrp);
list_for_each_entry(e, &table->iphash[h], list)
if (e->family == family &&
memcmp(&e->addr, addrp, sizeof(e->addr)) == 0 &&
(ttl == e->ttl || ttl == 0 || e->ttl == 0))
return e;
return NULL;
}
static void recent_entry_remove(struct recent_table *t, struct recent_entry *e)
{
list_del(&e->list);
list_del(&e->lru_list);
kfree(e);
t->entries--;
}
/*
* Drop entries with timestamps older then 'time'.
*/
static void recent_entry_reap(struct recent_table *t, unsigned long time)
{
struct recent_entry *e;
/*
* The head of the LRU list is always the oldest entry.
*/
e = list_entry(t->lru_list.next, struct recent_entry, lru_list);
/*
* The last time stamp is the most recent.
*/
if (time_after(time, e->stamps[e->index-1]))
recent_entry_remove(t, e);
}
static struct recent_entry *
recent_entry_init(struct recent_table *t, const union nf_inet_addr *addr,
u_int16_t family, u_int8_t ttl)
{
struct recent_entry *e;
unsigned int nstamps_max = t->nstamps_max_mask;
if (t->entries >= ip_list_tot) {
e = list_entry(t->lru_list.next, struct recent_entry, lru_list);
recent_entry_remove(t, e);
}
nstamps_max += 1;
e = kmalloc(struct_size(e, stamps, nstamps_max), GFP_ATOMIC);
if (e == NULL)
return NULL;
memcpy(&e->addr, addr, sizeof(e->addr));
e->ttl = ttl;
e->stamps[0] = jiffies;
e->nstamps = 1;
e->index = 1;
e->family = family;
if (family == NFPROTO_IPV4)
list_add_tail(&e->list, &t->iphash[recent_entry_hash4(addr)]);
else
list_add_tail(&e->list, &t->iphash[recent_entry_hash6(addr)]);
list_add_tail(&e->lru_list, &t->lru_list);
t->entries++;
return e;
}
static void recent_entry_update(struct recent_table *t, struct recent_entry *e)
{
e->index &= t->nstamps_max_mask;
e->stamps[e->index++] = jiffies;
if (e->index > e->nstamps)
e->nstamps = e->index;
list_move_tail(&e->lru_list, &t->lru_list);
}
static struct recent_table *recent_table_lookup(struct recent_net *recent_net,
const char *name)
{
struct recent_table *t;
list_for_each_entry(t, &recent_net->tables, list)
if (!strcmp(t->name, name))
return t;
return NULL;
}
static void recent_table_flush(struct recent_table *t)
{
struct recent_entry *e, *next;
unsigned int i;
for (i = 0; i < ip_list_hash_size; i++)
list_for_each_entry_safe(e, next, &t->iphash[i], list)
recent_entry_remove(t, e);
}
static bool
recent_mt(const struct sk_buff *skb, struct xt_action_param *par)
{
struct net *net = xt_net(par);
struct recent_net *recent_net = recent_pernet(net);
const struct xt_recent_mtinfo_v1 *info = par->matchinfo;
struct recent_table *t;
struct recent_entry *e;
union nf_inet_addr addr = {}, addr_mask;
u_int8_t ttl;
bool ret = info->invert;
if (xt_family(par) == NFPROTO_IPV4) {
const struct iphdr *iph = ip_hdr(skb);
if (info->side == XT_RECENT_DEST)
addr.ip = iph->daddr;
else
addr.ip = iph->saddr;
ttl = iph->ttl;
} else {
const struct ipv6hdr *iph = ipv6_hdr(skb);
if (info->side == XT_RECENT_DEST)
memcpy(&addr.in6, &iph->daddr, sizeof(addr.in6));
else
memcpy(&addr.in6, &iph->saddr, sizeof(addr.in6));
ttl = iph->hop_limit;
}
/* use TTL as seen before forwarding */
if (xt_out(par) != NULL &&
(!skb->sk || !net_eq(net, sock_net(skb->sk))))
ttl++;
spin_lock_bh(&recent_lock);
t = recent_table_lookup(recent_net, info->name);
nf_inet_addr_mask(&addr, &addr_mask, &t->mask);
e = recent_entry_lookup(t, &addr_mask, xt_family(par),
(info->check_set & XT_RECENT_TTL) ? ttl : 0);
if (e == NULL) {
if (!(info->check_set & XT_RECENT_SET))
goto out;
e = recent_entry_init(t, &addr_mask, xt_family(par), ttl);
if (e == NULL)
par->hotdrop = true;
ret = !ret;
goto out;
}
if (info->check_set & XT_RECENT_SET)
ret = !ret;
else if (info->check_set & XT_RECENT_REMOVE) {
recent_entry_remove(t, e);
ret = !ret;
} else if (info->check_set & (XT_RECENT_CHECK | XT_RECENT_UPDATE)) {
unsigned long time = jiffies - info->seconds * HZ;
unsigned int i, hits = 0;
for (i = 0; i < e->nstamps; i++) {
if (info->seconds && time_after(time, e->stamps[i]))
continue;
if (!info->hit_count || ++hits >= info->hit_count) {
ret = !ret;
break;
}
}
/* info->seconds must be non-zero */
if (info->check_set & XT_RECENT_REAP)
recent_entry_reap(t, time);
}
if (info->check_set & XT_RECENT_SET ||
(info->check_set & XT_RECENT_UPDATE && ret)) {
recent_entry_update(t, e);
e->ttl = ttl;
}
out:
spin_unlock_bh(&recent_lock);
return ret;
}
static void recent_table_free(void *addr)
{
kvfree(addr);
}
static int recent_mt_check(const struct xt_mtchk_param *par,
const struct xt_recent_mtinfo_v1 *info)
{
struct recent_net *recent_net = recent_pernet(par->net);
struct recent_table *t;
#ifdef CONFIG_PROC_FS
struct proc_dir_entry *pde;
kuid_t uid;
kgid_t gid;
#endif
unsigned int nstamp_mask;
unsigned int i;
int ret = -EINVAL;
size_t sz;
net_get_random_once(&hash_rnd, sizeof(hash_rnd));
if (info->check_set & ~XT_RECENT_VALID_FLAGS) {
pr_info_ratelimited("Unsupported userspace flags (%08x)\n",
info->check_set);
return -EINVAL;
}
if (hweight8(info->check_set &
(XT_RECENT_SET | XT_RECENT_REMOVE |
XT_RECENT_CHECK | XT_RECENT_UPDATE)) != 1)
return -EINVAL;
if ((info->check_set & (XT_RECENT_SET | XT_RECENT_REMOVE)) &&
(info->seconds || info->hit_count ||
(info->check_set & XT_RECENT_MODIFIERS)))
return -EINVAL;
if ((info->check_set & XT_RECENT_REAP) && !info->seconds)
return -EINVAL;
if (info->hit_count >= XT_RECENT_MAX_NSTAMPS) {
pr_info_ratelimited("hitcount (%u) is larger than allowed maximum (%u)\n",
info->hit_count, XT_RECENT_MAX_NSTAMPS - 1);
return -EINVAL;
}
ret = xt_check_proc_name(info->name, sizeof(info->name));
if (ret)
return ret;
if (ip_pkt_list_tot && info->hit_count < ip_pkt_list_tot)
nstamp_mask = roundup_pow_of_two(ip_pkt_list_tot) - 1;
else if (info->hit_count)
nstamp_mask = roundup_pow_of_two(info->hit_count) - 1;
else
nstamp_mask = 32 - 1;
mutex_lock(&recent_mutex);
t = recent_table_lookup(recent_net, info->name);
if (t != NULL) {
if (nstamp_mask > t->nstamps_max_mask) {
spin_lock_bh(&recent_lock);
recent_table_flush(t);
t->nstamps_max_mask = nstamp_mask;
spin_unlock_bh(&recent_lock);
}
t->refcnt++;
ret = 0;
goto out;
}
sz = sizeof(*t) + sizeof(t->iphash[0]) * ip_list_hash_size;
t = kvzalloc(sz, GFP_KERNEL);
if (t == NULL) {
ret = -ENOMEM;
goto out;
}
t->refcnt = 1;
t->nstamps_max_mask = nstamp_mask;
memcpy(&t->mask, &info->mask, sizeof(t->mask));
strcpy(t->name, info->name);
INIT_LIST_HEAD(&t->lru_list);
for (i = 0; i < ip_list_hash_size; i++)
INIT_LIST_HEAD(&t->iphash[i]);
#ifdef CONFIG_PROC_FS
uid = make_kuid(&init_user_ns, ip_list_uid);
gid = make_kgid(&init_user_ns, ip_list_gid);
if (!uid_valid(uid) || !gid_valid(gid)) {
recent_table_free(t);
ret = -EINVAL;
goto out;
}
pde = proc_create_data(t->name, ip_list_perms, recent_net->xt_recent,
&recent_mt_fops, t);
if (pde == NULL) {
recent_table_free(t);
ret = -ENOMEM;
goto out;
}
proc_set_user(pde, uid, gid);
#endif
spin_lock_bh(&recent_lock);
list_add_tail(&t->list, &recent_net->tables);
spin_unlock_bh(&recent_lock);
ret = 0;
out:
mutex_unlock(&recent_mutex);
return ret;
}
static int recent_mt_check_v0(const struct xt_mtchk_param *par)
{
const struct xt_recent_mtinfo_v0 *info_v0 = par->matchinfo;
struct xt_recent_mtinfo_v1 info_v1;
/* Copy revision 0 structure to revision 1 */
memcpy(&info_v1, info_v0, sizeof(struct xt_recent_mtinfo));
/* Set default mask to ensure backward compatible behaviour */
memset(info_v1.mask.all, 0xFF, sizeof(info_v1.mask.all));
return recent_mt_check(par, &info_v1);
}
static int recent_mt_check_v1(const struct xt_mtchk_param *par)
{
return recent_mt_check(par, par->matchinfo);
}
static void recent_mt_destroy(const struct xt_mtdtor_param *par)
{
struct recent_net *recent_net = recent_pernet(par->net);
const struct xt_recent_mtinfo_v1 *info = par->matchinfo;
struct recent_table *t;
mutex_lock(&recent_mutex);
t = recent_table_lookup(recent_net, info->name);
if (--t->refcnt == 0) {
spin_lock_bh(&recent_lock);
list_del(&t->list);
spin_unlock_bh(&recent_lock);
#ifdef CONFIG_PROC_FS
if (recent_net->xt_recent != NULL)
remove_proc_entry(t->name, recent_net->xt_recent);
#endif
recent_table_flush(t);
recent_table_free(t);
}
mutex_unlock(&recent_mutex);
}
#ifdef CONFIG_PROC_FS
struct recent_iter_state {
const struct recent_table *table;
unsigned int bucket;
};
static void *recent_seq_start(struct seq_file *seq, loff_t *pos)
__acquires(recent_lock)
{
struct recent_iter_state *st = seq->private;
const struct recent_table *t = st->table;
struct recent_entry *e;
loff_t p = *pos;
spin_lock_bh(&recent_lock);
for (st->bucket = 0; st->bucket < ip_list_hash_size; st->bucket++)
list_for_each_entry(e, &t->iphash[st->bucket], list)
if (p-- == 0)
return e;
return NULL;
}
static void *recent_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
struct recent_iter_state *st = seq->private;
const struct recent_table *t = st->table;
const struct recent_entry *e = v;
const struct list_head *head = e->list.next;
while (head == &t->iphash[st->bucket]) {
if (++st->bucket >= ip_list_hash_size)
return NULL;
head = t->iphash[st->bucket].next;
}
(*pos)++;
return list_entry(head, struct recent_entry, list);
}
static void recent_seq_stop(struct seq_file *s, void *v)
__releases(recent_lock)
{
spin_unlock_bh(&recent_lock);
}
static int recent_seq_show(struct seq_file *seq, void *v)
{
const struct recent_entry *e = v;
struct recent_iter_state *st = seq->private;
const struct recent_table *t = st->table;
unsigned int i;
i = (e->index - 1) & t->nstamps_max_mask;
if (e->family == NFPROTO_IPV4)
seq_printf(seq, "src=%pI4 ttl: %u last_seen: %lu oldest_pkt: %u",
&e->addr.ip, e->ttl, e->stamps[i], e->index);
else
seq_printf(seq, "src=%pI6 ttl: %u last_seen: %lu oldest_pkt: %u",
&e->addr.in6, e->ttl, e->stamps[i], e->index);
for (i = 0; i < e->nstamps; i++)
seq_printf(seq, "%s %lu", i ? "," : "", e->stamps[i]);
seq_putc(seq, '\n');
return 0;
}
static const struct seq_operations recent_seq_ops = {
.start = recent_seq_start,
.next = recent_seq_next,
.stop = recent_seq_stop,
.show = recent_seq_show,
};
static int recent_seq_open(struct inode *inode, struct file *file)
{
struct recent_iter_state *st;
st = __seq_open_private(file, &recent_seq_ops, sizeof(*st));
if (st == NULL)
return -ENOMEM;
st->table = PDE_DATA(inode);
return 0;
}
static ssize_t
recent_mt_proc_write(struct file *file, const char __user *input,
size_t size, loff_t *loff)
{
struct recent_table *t = PDE_DATA(file_inode(file));
struct recent_entry *e;
char buf[sizeof("+b335:1d35:1e55:dead:c0de:1715:5afe:c0de")];
const char *c = buf;
union nf_inet_addr addr = {};
u_int16_t family;
bool add, succ;
if (size == 0)
return 0;
if (size > sizeof(buf))
size = sizeof(buf);
if (copy_from_user(buf, input, size) != 0)
return -EFAULT;
/* Strict protocol! */
if (*loff != 0)
return -ESPIPE;
switch (*c) {
case '/': /* flush table */
spin_lock_bh(&recent_lock);
recent_table_flush(t);
spin_unlock_bh(&recent_lock);
return size;
case '-': /* remove address */
add = false;
break;
case '+': /* add address */
add = true;
break;
default:
pr_info_ratelimited("Need \"+ip\", \"-ip\" or \"/\"\n");
return -EINVAL;
}
++c;
--size;
if (strnchr(c, size, ':') != NULL) {
family = NFPROTO_IPV6;
succ = in6_pton(c, size, (void *)&addr, '\n', NULL);
} else {
family = NFPROTO_IPV4;
succ = in4_pton(c, size, (void *)&addr, '\n', NULL);
}
if (!succ)
return -EINVAL;
spin_lock_bh(&recent_lock);
e = recent_entry_lookup(t, &addr, family, 0);
if (e == NULL) {
if (add)
recent_entry_init(t, &addr, family, 0);
} else {
if (add)
recent_entry_update(t, e);
else
recent_entry_remove(t, e);
}
spin_unlock_bh(&recent_lock);
/* Note we removed one above */
*loff += size + 1;
return size + 1;
}
static const struct file_operations recent_mt_fops = {
.open = recent_seq_open,
.read = seq_read,
.write = recent_mt_proc_write,
.release = seq_release_private,
.owner = THIS_MODULE,
.llseek = seq_lseek,
};
static int __net_init recent_proc_net_init(struct net *net)
{
struct recent_net *recent_net = recent_pernet(net);
recent_net->xt_recent = proc_mkdir("xt_recent", net->proc_net);
if (!recent_net->xt_recent)
return -ENOMEM;
return 0;
}
static void __net_exit recent_proc_net_exit(struct net *net)
{
struct recent_net *recent_net = recent_pernet(net);
struct recent_table *t;
/* recent_net_exit() is called before recent_mt_destroy(). Make sure
* that the parent xt_recent proc entry is is empty before trying to
* remove it.
*/
spin_lock_bh(&recent_lock);
list_for_each_entry(t, &recent_net->tables, list)
remove_proc_entry(t->name, recent_net->xt_recent);
recent_net->xt_recent = NULL;
spin_unlock_bh(&recent_lock);
remove_proc_entry("xt_recent", net->proc_net);
}
#else
static inline int recent_proc_net_init(struct net *net)
{
return 0;
}
static inline void recent_proc_net_exit(struct net *net)
{
}
#endif /* CONFIG_PROC_FS */
static int __net_init recent_net_init(struct net *net)
{
struct recent_net *recent_net = recent_pernet(net);
INIT_LIST_HEAD(&recent_net->tables);
return recent_proc_net_init(net);
}
static void __net_exit recent_net_exit(struct net *net)
{
recent_proc_net_exit(net);
}
static struct pernet_operations recent_net_ops = {
.init = recent_net_init,
.exit = recent_net_exit,
.id = &recent_net_id,
.size = sizeof(struct recent_net),
};
static struct xt_match recent_mt_reg[] __read_mostly = {
{
.name = "recent",
.revision = 0,
.family = NFPROTO_IPV4,
.match = recent_mt,
.matchsize = sizeof(struct xt_recent_mtinfo),
.checkentry = recent_mt_check_v0,
.destroy = recent_mt_destroy,
.me = THIS_MODULE,
},
{
.name = "recent",
.revision = 0,
.family = NFPROTO_IPV6,
.match = recent_mt,
.matchsize = sizeof(struct xt_recent_mtinfo),
.checkentry = recent_mt_check_v0,
.destroy = recent_mt_destroy,
.me = THIS_MODULE,
},
{
.name = "recent",
.revision = 1,
.family = NFPROTO_IPV4,
.match = recent_mt,
.matchsize = sizeof(struct xt_recent_mtinfo_v1),
.checkentry = recent_mt_check_v1,
.destroy = recent_mt_destroy,
.me = THIS_MODULE,
},
{
.name = "recent",
.revision = 1,
.family = NFPROTO_IPV6,
.match = recent_mt,
.matchsize = sizeof(struct xt_recent_mtinfo_v1),
.checkentry = recent_mt_check_v1,
.destroy = recent_mt_destroy,
.me = THIS_MODULE,
}
};
static int __init recent_mt_init(void)
{
int err;
BUILD_BUG_ON_NOT_POWER_OF_2(XT_RECENT_MAX_NSTAMPS);
if (!ip_list_tot || ip_pkt_list_tot >= XT_RECENT_MAX_NSTAMPS)
return -EINVAL;
ip_list_hash_size = 1 << fls(ip_list_tot);
err = register_pernet_subsys(&recent_net_ops);
if (err)
return err;
err = xt_register_matches(recent_mt_reg, ARRAY_SIZE(recent_mt_reg));
if (err)
unregister_pernet_subsys(&recent_net_ops);
return err;
}
static void __exit recent_mt_exit(void)
{
xt_unregister_matches(recent_mt_reg, ARRAY_SIZE(recent_mt_reg));
unregister_pernet_subsys(&recent_net_ops);
}
module_init(recent_mt_init);
module_exit(recent_mt_exit);