hv_netvsc: Implement partial copy into send buffer

If remaining space in a send buffer slot is too small for the whole message,
we only copy the RNDIS header and PPI data into send buffer, so we can batch
one more packet each time. It reduces the vmbus per-message overhead.

Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
Reviewed-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 4487167..a3a9d38 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -277,15 +277,16 @@
 }
 
 static u32 init_page_array(void *hdr, u32 len, struct sk_buff *skb,
-			   struct hv_page_buffer *pb)
+			   struct hv_netvsc_packet *packet)
 {
+	struct hv_page_buffer *pb = packet->page_buf;
 	u32 slots_used = 0;
 	char *data = skb->data;
 	int frags = skb_shinfo(skb)->nr_frags;
 	int i;
 
 	/* The packet is laid out thus:
-	 * 1. hdr
+	 * 1. hdr: RNDIS header and PPI
 	 * 2. skb linear data
 	 * 3. skb fragment data
 	 */
@@ -294,6 +295,9 @@
 					offset_in_page(hdr),
 					len, &pb[slots_used]);
 
+	packet->rmsg_size = len;
+	packet->rmsg_pgcnt = slots_used;
+
 	slots_used += fill_pg_buf(virt_to_page(data),
 				offset_in_page(data),
 				skb_headlen(skb), &pb[slots_used]);
@@ -578,7 +582,7 @@
 	rndis_msg->msg_len += rndis_msg_size;
 	packet->total_data_buflen = rndis_msg->msg_len;
 	packet->page_buf_cnt = init_page_array(rndis_msg, rndis_msg_size,
-					skb, &page_buf[0]);
+					       skb, packet);
 
 	ret = netvsc_send(net_device_ctx->device_ctx, packet);