[qca-nss-clients] adding Hawkeye profiling support

Allowing user program to change status event counter configuration;
Revised counter Tx process from using stop/start to directly use counter request.

Change-Id: I267ac171b1bb35cbc692425153cabaac51750163
Signed-off-by: Guojun Jin <g.jin@codeaurora.org>
diff --git a/profiler/profile.c b/profiler/profile.c
index 50be1fc..e4038de 100644
--- a/profiler/profile.c
+++ b/profiler/profile.c
@@ -34,6 +34,7 @@
 #include <asm/uaccess.h>
 #include <asm/page.h>
 #include <asm/thread_info.h>
+#include <linux/ctype.h>
 #include <nss_api_if.h>
 
 #include "profilenode.h"
@@ -87,6 +88,9 @@
  * LINUX and Ultra counters must all fit in one packet
  */
 #define PROFILE_LINUX_MAX_COUNTERS 40
+#define PROFILE_STS_EVENT_COUNTERS 8
+#define PROFILE_STS_EVENT_THREAD_BITS 5
+
 static int profile_num_counters = 0;
 static volatile unsigned int *profile_counter[PROFILE_LINUX_MAX_COUNTERS];
 static char profile_name[PROFILE_LINUX_MAX_COUNTERS][PROFILE_COUNTER_NAME_LENGTH];
@@ -183,7 +187,7 @@
 	ph.pph.ddr_freq = pn->pnc.un.ddr_freq;
 	ph.pph.cpu_id = pn->pnc.un.cpu_id;
 	ph.pph.seq_num = htonl(pn->profile_sequence_num);
-	ph.pph.sample_stack_words = htonl(PROFILE_STACK_WORDS);
+	ph.pph.sample_stack_words = PROFILE_STACK_WORDS;
 
 	ns = (blen - sizeof(ph)) / sizeof(struct profile_sample);
 	profileInfo("%X: blen %d ns = %d psc_hd count %d ssets %d phs %d pss %d\n", pn->profile_sequence_num, blen, ns, psc_hd->count, psc_hd->exh.sample_sets, sizeof(ph), sizeof(struct profile_sample));
@@ -262,7 +266,7 @@
 };
 
 /*
- * make a packet full of performance counters
+ * make a packet full of performance counters (software)
  */
 static int profile_make_stats_packet(char *buf, int bytes, struct profile_io *pn)
 {
@@ -344,16 +348,24 @@
 
 	if (!pn->pnc.enabled && nss_get_state(pn->ctx) == NSS_STATE_INITIALIZED) {
 		nss_tx_status_t ret;
+
+		/*
+		 * sw_ksp_ptr is used as event flag. NULL means normal I/O
+		 */
+		pn->sw_ksp_ptr = NULL;
 		pn->pnc.enabled = 1;
 		pn->profile_first_packet = 1;
 		pn->pnc.un.hd_magic = UBI32_PROFILE_HD_MAGIC | NSS_PROFILER_START_MSG;
-		ret = nss_profiler_if_tx_buf(pn->ctx, &pn->pnc.un, sizeof(pn->pnc.un), profiler_handle_reply);
-		profileInfo("%s: %d -- %p: ccl %p sp %p\n", __func__, ret, pn, pn->ccl, pn->pnc.samples);
+		ret = nss_profiler_if_tx_buf(pn->ctx, &pn->pnc.un,
+				sizeof(pn->pnc.un), profiler_handle_reply, pn);
+		profileInfo("%s: %d -- %p: ccl %p sp %p\n", __func__, ret,
+			pn, pn->ccl, pn->pnc.samples);
 		filp->private_data = pn;
 		return 0;
 	}
 
-	profileWarn("profile ena %d nss stat %x\n", pn->pnc.enabled, nss_get_state(pn->ctx));
+	profileWarn("profile ena %d nss stat %x\n", pn->pnc.enabled,
+			nss_get_state(pn->ctx));
 	return -EBUSY;
 }
 
@@ -373,6 +385,14 @@
 	if (!pn->pnc.enabled) {
 		return -EPERM;
 	}
+	if (pn->sw_ksp_ptr) {
+		struct debug_box *db = (struct debug_box *) pn->sw_ksp_ptr;
+		slen = (PROFILE_STS_EVENT_COUNTERS + 1) * sizeof(db->data[0]);
+		if (copy_to_user(buf, db->data, slen))
+			return -EFAULT;
+		return	slen;
+	}
+
 	if (!pn->pnc.samples) {
 		return -ENOMEM;
 	}
@@ -408,8 +428,10 @@
 		nss_tx_status_t ret;
 		pn->pnc.enabled = 1;
 		pn->pnc.un.hd_magic = UBI32_PROFILE_HD_MAGIC | NSS_PROFILER_START_MSG;
-		ret = nss_profiler_if_tx_buf(pn->ctx, &pn->pnc.un, sizeof(pn->pnc.un), profiler_handle_reply);
-		profileWarn("%s: restart %d -- %p: ccl %p sp %p\n", __func__, ret, pn, pn->ccl, pn->pnc.samples);
+		ret = nss_profiler_if_tx_buf(pn->ctx, &pn->pnc.un, sizeof(pn->pnc.un),
+						profiler_handle_reply, pn);
+		profileWarn("%s: restart %d -- %p: ccl %p sp %p\n", __func__,
+				ret, pn, pn->ccl, pn->pnc.samples);
 	}
 
 	return result + slen;
@@ -427,9 +449,11 @@
 
 	if (pn->pnc.enabled) {
 		nss_tx_status_t ret;
+		pn->sw_ksp_ptr = NULL;
 		pn->pnc.enabled = 0;
 		pn->pnc.un.hd_magic = UBI32_PROFILE_HD_MAGIC | NSS_PROFILER_STOP_MSG;
-		ret = nss_profiler_if_tx_buf(pn->ctx, &pn->pnc.un, sizeof(pn->pnc.un), profiler_handle_reply);
+		ret = nss_profiler_if_tx_buf(pn->ctx, &pn->pnc.un,
+				sizeof(pn->pnc.un), profiler_handle_reply, pn);
 		profileInfo("%s: %p %d\n", __func__, pn, ret);
 		return 0;
 	}
@@ -438,8 +462,121 @@
 	return -EBADF;
 }
 
-#define isspace(c)	(c==' ' || c=='\t')
+/*
+ * profiler_handle_stat_event_reply()
+ *	print current FW stat event counter configurations
+ */
+static void profiler_handle_stat_event_reply(struct nss_ctx_instance *nss_ctx,
+						struct nss_cmn_msg *ncm)
+{
+	struct profile_io *pio = (struct profile_io *) ncm->app_data;
+	struct debug_box *pdb = (struct debug_box *) &pio->pnc;
+	struct debug_box *db = (struct debug_box *) &ncm[1];
+	int i, thrds;
 
+	for (i = 0; i < db->dlen; i++)
+		printk("stat counter %d: %x\n", i, db->data[i]);
+
+	thrds = db->data[i];
+	i = (1 << PROFILE_STS_EVENT_THREAD_BITS) - 1;
+	profileInfo("%d: event end mark %x, ThrA %d ThrB %d\n",
+		ncm->len, thrds, (thrds & i) + 1,
+		((thrds >> PROFILE_STS_EVENT_THREAD_BITS) & i) + 1);
+
+	/*
+	 * save data for read()
+	 */
+	memcpy(pdb->data, db->data, (db->dlen + 1) * sizeof(db->data[0]));
+}
+
+/*
+ * parse_sys_stat_event_req()
+ *	process FW stat events request: event#1 index#1 event#2 index#2 ...
+ */
+static int parse_sys_stat_event_req(const char *buf, size_t count,
+				struct debug_box *db, struct profile_io *pio)
+{
+	char *cp;
+	int result;
+
+	printk("%d cmd buf %s\n", count, buf);
+	if (count < 19) /* minimum data for sys_stat_event request */
+		return	-EINVAL;
+
+	if (strncmp(buf, "get-sys-stat-events", 19) == 0) {
+		db->hd_magic = UBI32_PROFILE_HD_MAGIC | NSS_PROFILER_GET_SYS_STAT_EVENT;
+		db->dlen = result;
+		result = nss_profiler_if_tx_buf(pio->ctx, &pio->pnc.un,
+					sizeof(pio->pnc.un),
+					profiler_handle_stat_event_reply, pio);
+		profileInfo("get_sys_stat_events: %d\n", result);
+		return	result == NSS_TX_SUCCESS ? count : -EFAULT;
+	}
+
+	if (strncmp(buf, "set-sys-stat-events", 19)) {
+		printk("unknow event: %s\n", buf);
+		return	-EINVAL;
+	}
+
+	db->dlen = sizeof(pio->pnc.un);
+	memset(db->data, 0, PROFILE_STS_EVENT_COUNTERS * sizeof(db->data[0]));
+
+	cp = strchr(buf, ' ');
+	if (!cp) {
+		printk("no enough paramters %s\n", buf);
+		return	-EINVAL;
+	}
+
+	do {
+		int idx, event;
+
+		while (isspace(*cp))
+			cp++;
+		event = kstrtoul(cp, NULL, 0);
+
+		cp = strchr(cp, ' ');
+		if (!cp) {
+			printk("missing index %s\n", buf);
+			return	-EINVAL;
+		}
+		while (isspace(*cp))
+			cp++;
+		idx = event >> 16;
+		if (idx) {
+			if ((event & 0x1FF) < 50) {
+				printk("thr ID (%d) ignored for event %d\n",
+					idx, event & 0x1FF);
+			} else if (idx > 12) {
+				if ((idx >>= 5) > 12) {
+					printk("tID %d too big [1..12]\n", idx);
+					return	-E2BIG;
+				}
+			}
+		}
+		idx = kstrtoul(cp, NULL, 10);
+		if (idx < 0 || idx > 7) {
+			printk("index %d out of range [0..7]\n", idx);
+			return	-ERANGE;
+		}
+		printk("%p: e %d i %d\n", db, event, idx);
+		db->data[idx] = event;
+		cp = strchr(cp, ' ');
+	} while (cp);
+	db->hd_magic = UBI32_PROFILE_HD_MAGIC | NSS_PROFILER_SET_SYS_STAT_EVENT;
+	result = nss_profiler_if_tx_buf(pio->ctx, &pio->pnc.un, sizeof(pio->pnc.un),
+				profiler_handle_stat_event_reply, pio);
+	profileInfo("%p: %d send cmd %x to FW ret %d\n",
+			db, count, db->hd_magic, result);
+	return	count;
+}
+
+/*
+ * parseDbgData()
+ *	parsing debug requests: base_address [options] cmd length
+ *
+ * cmd is either read or write
+ * option is one of mio, moveio, h [heap security verify], etc.
+ */
 static int parseDbgData(const char *buf, size_t count, struct debug_box *db)
 {
 	char *cp;
@@ -523,7 +660,7 @@
 {
 	int result;
 	struct debug_box *db;
-	struct profile_io *pio = node[0];
+	struct profile_io *pio = (struct profile_io *)filp->private_data;
 
 	if (!pio) {
 		return -ENOENT;
@@ -535,6 +672,19 @@
 
 	db = (struct debug_box *) &pio->pnc;
 	db->dlen = db->opts = 0;
+
+	if (!isdigit(buf[0])) {
+		result = parse_sys_stat_event_req(buf, count, db, pio);
+
+		if ((result > 0) && (filp->f_flags & O_RDWR)) {
+			/*
+			 * set flag so event-counter can read the data from FW
+			 */
+			pio->sw_ksp_ptr = (uint32_t *)db;
+		}
+		return	result;
+	}
+
 	result = parseDbgData(buf, count, db);
 	if (result < 0) {
 		return	result;
@@ -546,7 +696,8 @@
 		db->hd_magic = UBI32_PROFILE_HD_MAGIC | NSS_PROFILER_DEBUG_WR_MSG;
 		db->dlen = result;
 	}
-	result = nss_profiler_if_tx_buf(pio->ctx, &pio->pnc.un, sizeof(pio->pnc.un), profiler_handle_debug_reply);
+	result = nss_profiler_if_tx_buf(pio->ctx, &pio->pnc.un,
+			sizeof(pio->pnc.un), profiler_handle_debug_reply, pio);
 	printk("dbg res %d dlen = %d opt %x\n", result, db->dlen, db->opts);
 	return	count;
 }
@@ -664,7 +815,7 @@
 			pn->pnc.un.cpu_id = ntohl(pTx->cpu_id);
 			pn->pnc.un.cpu_freq = ntohl(pTx->cpu_freq);
 			pn->pnc.un.ddr_freq = ntohl(pTx->ddr_freq);
-			pn->pnc.un.num_counters = ntohl(pTx->num_counters);
+			pn->pnc.un.num_counters = pTx->num_counters;
 		} else {
 			pn->pnc.un = *pTx;
 		}
@@ -680,7 +831,9 @@
 		if (pn->pnc.enabled > 0) {
 			pn->pnc.enabled = -1;
 			pn->pnc.un.hd_magic = UBI32_PROFILE_HD_MAGIC | NSS_PROFILER_STOP_MSG;
-			ret = nss_profiler_if_tx_buf(pn->ctx, &pn->pnc.un, sizeof(pn->pnc.un), profiler_handle_reply);
+			ret = nss_profiler_if_tx_buf(pn->ctx,
+					&pn->pnc.un, sizeof(pn->pnc.un),
+					profiler_handle_reply, pn);
 			profileWarn("%d temp stop sampling engine %d\n", swap, ret);
 		}
 		if (swap < 3) {
@@ -696,19 +849,16 @@
 	memcpy(&nsb->psc_header, buf, buf_len); /* pn->pnc.pn2h->psc_header = *psc_hd; maybe faster, but take more memory */
 
 	nsb->mh.md_type = PINGPONG_FULL;
-	//kxdump((void*)(nsb->samples + 23), sizeof(*nsb->samples) << 1, "1st 2 samples");
+
+	/*
+	 * ask for perf_counters (software counters) update every 32 samples
+	 */
 	if (!wr) {
-		/*
-		 * should be UBI32_PROFILE_HD_MAGIC | NSS_PROFILER_COUNTERS_MSG
-		 * but FW is hard to change due to packge warehouse, so using
-		 * STOP/START instead till PROFILER_COUNTERS_MSG done in FW
-		 */
-		pn->pnc.un.hd_magic = UBI32_PROFILE_HD_MAGIC | NSS_PROFILER_STOP_MSG;
-		ret = nss_profiler_if_tx_buf(pn->ctx, &pn->pnc.un, sizeof(pn->pnc.un), profiler_handle_reply);
+		pn->pnc.un.hd_magic = UBI32_PROFILE_HD_MAGIC | NSS_PROFILER_COUNTERS_MSG;
+		ret = nss_profiler_if_tx_buf(pn->ctx, &pn->pnc.un,
+				sizeof(pn->pnc.un), profiler_handle_reply, pn);
 		if (ret == NSS_TX_FAILURE)
-			printk("STOP Cmd failed %d %d\n", ret, wr);
-		pn->pnc.un.hd_magic = UBI32_PROFILE_HD_MAGIC | NSS_PROFILER_START_MSG;
-		ret = nss_profiler_if_tx_buf(pn->ctx, &pn->pnc.un, sizeof(pn->pnc.un), profiler_handle_reply);
+			printk("req counters Cmd failed %d %d\n", ret, wr);
 	}
 	profileInfo("filled %p %p wr %d\n", nsb, nsb->samples, pn->ccl_write);
 }
@@ -748,9 +898,11 @@
 	}
 
 	/*
-	 * sw_ksp is an array of pointers to struct thread_info, the current task executing for each linux virtual processor
+	 * sw_ksp is an array of pointers to struct thread_info,
+	 * the current task executing for each linux virtual processor
 	node->sw_ksp_ptr = sw_ksp;
 	 */
+	node->sw_ksp_ptr = NULL;
 	node->task_offset = offsetof(struct thread_info, task);
 	node->pid_offset = offsetof(struct task_struct, tgid);
 }
diff --git a/profiler/profilenode.h b/profiler/profilenode.h
index 9ffb181..34a3889 100644
--- a/profiler/profilenode.h
+++ b/profiler/profilenode.h
@@ -86,7 +86,10 @@
 
 struct profile_session {	// use for per session start
 	uint32_t hd_magic;	// common ovarlay in all headers
-	uint32_t num_counters;	// how many registered performance (app) counters -- may change
+	uint8_t	num_counters;	/* # performance (app) counters registered (changeable) */
+	uint8_t	unused1B;
+	uint8_t nc_sts_sel_thrA;
+	uint8_t nc_sts_sel_thrB;
 	uint32_t ocm_size;
 	uint32_t sram_start;
 
diff --git a/profiler/profpkt.h b/profiler/profpkt.h
index 752af06..331afec 100644
--- a/profiler/profpkt.h
+++ b/profiler/profpkt.h
@@ -79,7 +79,10 @@
 	uint16_t magic;			/* magic number and version */
 	uint8_t header_size;		/* number of bytes in profile header */
 	uint8_t sample_count;		/* number of samples in the packet */
-	uint32_t sample_stack_words;	/* number of stack words in the sample */
+	uint8_t nc_sts_tselA;		/* thr 1 statistics requst to FW */
+	uint8_t nc_sts_tselB;		/* thr 2 requst to FW */
+	uint8_t spare1B;
+	uint8_t sample_stack_words;	/* number of stack words in the sample */
 	uint32_t seq_num;		/* to detect dropped profiler packets */
 	uint32_t profile_instructions;	/* instructions executed by profiler mainline */