[AUDIT] break large execve argument logging into smaller messages

execve arguments can be quite large.  There is no limit on the number of
arguments and a 4G limit on the size of an argument.

this patch prints those aruguments in bite sized pieces.  a userspace size
limitation of 8k was discovered so this keeps messages around 7.5k

single arguments larger than 7.5k in length are split into multiple records
and can be identified as aX[Y]=

Signed-off-by: Eric Paris <eparis@redhat.com>
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index aaaca8a..6e03322 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -78,6 +78,9 @@
 /* Indicates that audit should log the full pathname. */
 #define AUDIT_NAME_FULL -1
 
+/* no execve audit message should be longer than this (userspace limits) */
+#define MAX_EXECVE_AUDIT_LEN 7500
+
 /* number of audit rules */
 int audit_n_rules;
 
@@ -965,39 +968,54 @@
 	return rc;
 }
 
-static void audit_log_execve_info(struct audit_buffer *ab,
-		struct audit_aux_data_execve *axi)
+/*
+ * to_send and len_sent accounting are very loose estimates.  We aren't
+ * really worried about a hard cap to MAX_EXECVE_AUDIT_LEN so much as being
+ * within about 500 bytes (next page boundry)
+ *
+ * why snprintf?  an int is up to 12 digits long.  if we just assumed when
+ * logging that a[%d]= was going to be 16 characters long we would be wasting
+ * space in every audit message.  In one 7500 byte message we can log up to
+ * about 1000 min size arguments.  That comes down to about 50% waste of space
+ * if we didn't do the snprintf to find out how long arg_num_len was.
+ */
+static int audit_log_single_execve_arg(struct audit_context *context,
+					struct audit_buffer **ab,
+					int arg_num,
+					size_t *len_sent,
+					const char __user *p,
+					char *buf)
 {
-	int i;
-	long len, ret;
-	const char __user *p;
-	char *buf;
+	char arg_num_len_buf[12];
+	const char __user *tmp_p = p;
+	/* how many digits are in arg_num? 3 is the length of a=\n */
+	size_t arg_num_len = snprintf(arg_num_len_buf, 12, "%d", arg_num) + 3;
+	size_t len, len_left, to_send;
+	size_t max_execve_audit_len = MAX_EXECVE_AUDIT_LEN;
+	unsigned int i, has_cntl = 0, too_long = 0;
+	int ret;
 
-	if (axi->mm != current->mm)
-		return; /* execve failed, no additional info */
+	/* strnlen_user includes the null we don't want to send */
+	len_left = len = strnlen_user(p, MAX_ARG_STRLEN) - 1;
 
-	p = (const char __user *)axi->mm->arg_start;
+	/*
+	 * We just created this mm, if we can't find the strings
+	 * we just copied into it something is _very_ wrong. Similar
+	 * for strings that are too long, we should not have created
+	 * any.
+	 */
+	if (unlikely((len  = -1) || len > MAX_ARG_STRLEN - 1)) {
+		WARN_ON(1);
+		send_sig(SIGKILL, current, 0);
+	}
 
-	for (i = 0; i < axi->argc; i++, p += len) {
-		len = strnlen_user(p, MAX_ARG_STRLEN);
-		/*
-		 * We just created this mm, if we can't find the strings
-		 * we just copied into it something is _very_ wrong. Similar
-		 * for strings that are too long, we should not have created
-		 * any.
-		 */
-		if (!len || len > MAX_ARG_STRLEN) {
-			WARN_ON(1);
-			send_sig(SIGKILL, current, 0);
-		}
-
-		buf = kmalloc(len, GFP_KERNEL);
-		if (!buf) {
-			audit_panic("out of memory for argv string\n");
-			break;
-		}
-
-		ret = copy_from_user(buf, p, len);
+	/* walk the whole argument looking for non-ascii chars */
+	do {
+		if (len_left > MAX_EXECVE_AUDIT_LEN)
+			to_send = MAX_EXECVE_AUDIT_LEN;
+		else
+			to_send = len_left;
+		ret = copy_from_user(buf, tmp_p, to_send);
 		/*
 		 * There is no reason for this copy to be short. We just
 		 * copied them here, and the mm hasn't been exposed to user-
@@ -1007,13 +1025,130 @@
 			WARN_ON(1);
 			send_sig(SIGKILL, current, 0);
 		}
+		buf[to_send] = '\0';
+		has_cntl = audit_string_contains_control(buf, to_send);
+		if (has_cntl) {
+			/*
+			 * hex messages get logged as 2 bytes, so we can only
+			 * send half as much in each message
+			 */
+			max_execve_audit_len = MAX_EXECVE_AUDIT_LEN / 2;
+			break;
+		}
+		len_left -= to_send;
+		tmp_p += to_send;
+	} while (len_left > 0);
 
-		audit_log_format(ab, "a%d=", i);
-		audit_log_untrustedstring(ab, buf);
-		audit_log_format(ab, "\n");
+	len_left = len;
 
-		kfree(buf);
+	if (len > max_execve_audit_len)
+		too_long = 1;
+
+	/* rewalk the argument actually logging the message */
+	for (i = 0; len_left > 0; i++) {
+		int room_left;
+
+		if (len_left > max_execve_audit_len)
+			to_send = max_execve_audit_len;
+		else
+			to_send = len_left;
+
+		/* do we have space left to send this argument in this ab? */
+		room_left = MAX_EXECVE_AUDIT_LEN - arg_num_len - *len_sent;
+		if (has_cntl)
+			room_left -= (to_send * 2);
+		else
+			room_left -= to_send;
+		if (room_left < 0) {
+			*len_sent = 0;
+			audit_log_end(*ab);
+			*ab = audit_log_start(context, GFP_KERNEL, AUDIT_EXECVE);
+			if (!*ab)
+				return 0;
+		}
+
+		/*
+		 * first record needs to say how long the original string was
+		 * so we can be sure nothing was lost.
+		 */
+		if ((i == 0) && (too_long))
+			audit_log_format(*ab, "a%d_len=%ld ", arg_num,
+					 has_cntl ? 2*len : len);
+
+		/*
+		 * normally arguments are small enough to fit and we already
+		 * filled buf above when we checked for control characters
+		 * so don't bother with another copy_from_user
+		 */
+		if (len >= max_execve_audit_len)
+			ret = copy_from_user(buf, p, to_send);
+		else
+			ret = 0;
+		if (ret) {
+			WARN_ON(1);
+			send_sig(SIGKILL, current, 0);
+		}
+		buf[to_send] = '\0';
+
+		/* actually log it */
+		audit_log_format(*ab, "a%d", arg_num);
+		if (too_long)
+			audit_log_format(*ab, "[%d]", i);
+		audit_log_format(*ab, "=");
+		if (has_cntl)
+			audit_log_hex(*ab, buf, to_send);
+		else
+			audit_log_format(*ab, "\"%s\"", buf);
+		audit_log_format(*ab, "\n");
+
+		p += to_send;
+		len_left -= to_send;
+		*len_sent += arg_num_len;
+		if (has_cntl)
+			*len_sent += to_send * 2;
+		else
+			*len_sent += to_send;
 	}
+	/* include the null we didn't log */
+	return len + 1;
+}
+
+static void audit_log_execve_info(struct audit_context *context,
+				  struct audit_buffer **ab,
+				  struct audit_aux_data_execve *axi)
+{
+	int i;
+	size_t len, len_sent = 0;
+	const char __user *p;
+	char *buf;
+
+	if (axi->mm != current->mm)
+		return; /* execve failed, no additional info */
+
+	p = (const char __user *)axi->mm->arg_start;
+
+	audit_log_format(*ab, "argc=%d ", axi->argc);
+
+	/*
+	 * we need some kernel buffer to hold the userspace args.  Just
+	 * allocate one big one rather than allocating one of the right size
+	 * for every single argument inside audit_log_single_execve_arg()
+	 * should be <8k allocation so should be pretty safe.
+	 */
+	buf = kmalloc(MAX_EXECVE_AUDIT_LEN + 1, GFP_KERNEL);
+	if (!buf) {
+		audit_panic("out of memory for argv string\n");
+		return;
+	}
+
+	for (i = 0; i < axi->argc; i++) {
+		len = audit_log_single_execve_arg(context, ab, i,
+						  &len_sent, p, buf);
+		if (len <= 0)
+			break;
+		p += len;
+	}
+	kfree(buf);
 }
 
 static void audit_log_exit(struct audit_context *context, struct task_struct *tsk)
@@ -1157,7 +1292,7 @@
 
 		case AUDIT_EXECVE: {
 			struct audit_aux_data_execve *axi = (void *)aux;
-			audit_log_execve_info(ab, axi);
+			audit_log_execve_info(context, &ab, axi);
 			break; }
 
 		case AUDIT_SOCKETCALL: {
@@ -2094,8 +2229,6 @@
 	return 0;
 }
 
-int audit_argv_kb = 32;
-
 int audit_bprm(struct linux_binprm *bprm)
 {
 	struct audit_aux_data_execve *ax;
@@ -2104,14 +2237,6 @@
 	if (likely(!audit_enabled || !context || context->dummy))
 		return 0;
 
-	/*
-	 * Even though the stack code doesn't limit the arg+env size any more,
-	 * the audit code requires that _all_ arguments be logged in a single
-	 * netlink skb. Hence cap it :-(
-	 */
-	if (bprm->argv_len > (audit_argv_kb << 10))
-		return -E2BIG;
-
 	ax = kmalloc(sizeof(*ax), GFP_KERNEL);
 	if (!ax)
 		return -ENOMEM;