/*
 imkmeans.c implementation of interfaces for IMinerTreeModel objects.
*/

#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <memory.h>

#include "IMTree.h"


/* alloc memory for type IMinerTreeModel. */
long IMCSRC_STDCALL IMinerTreeModel_create(
 IMinerObject* pModel,		    /* out: data object */
 IMinerObject* md,			    /* in:  meta data object */
 long nTrees,					/* in:  number of trees */
 long nLevels,					/* in:  number of levels output */
 long nMaxComps,				/* in:  max number of comparison values for a split */
 long *nNodes,					/* in:  number of nodes in tree of length (nTrees) */
 const double *pdNodeNums,		/* in:  node numbers of length (nTrees*nNodes[tree]) */
 const double *pdColumns,		/* in:  column to make comparison with of length (nTrees*nNodes[tree]) */
 const double *pdOpTypes,		/* in:  type of operation for split of length (nTrees*nNodes[tree]) */
 const double *pdOpValues,		/* in:  value to compare for split of length (nTrees*nNodes[tree]*nMaxComps) */
 const double *pdProbs,			/* in:  output probabilities for node of length (nTrees*nNodes[tree]*nLevels) */
 const double *pdScores,		/* in:  node score of length (nTrees*nNodes[tree]) */
 const char **pcDepLevels		/* in:  dependent levels if categorical (nLevels) */
)
{
	char *ppszMD="metaData", *ppszC="classification", *ppszTD="treeDescription", *ppszL="outputLevels", *ppszMC="maxComparisons", *ppszDL="dmDependentLevels", **ppszMemberNames;
	long tree,node,level,nStatus=IMINER_SUCCESS,bClassification=(nLevels>0)?1:0,nCols=nLevels+nMaxComps+4L;
	long treeVal, probVal, compVal, nodePos, maxNodes, maxNodeNum, nNum, val;
	IMinerObject *tmp;

	if(pModel == NULL || nTrees < 1L)
		return IMINER_BAD_INPUT_ARGS;

	/* a TreeModel always have nTrees+IMINER_TREE_DESCRIPTION_NUM elements as list */
	nStatus = IMinerList_create(pModel, IMINER_TREE_DESCRIPTION_NUM+nTrees, IMINER_MODE_MODEL);
	if(nStatus != IMINER_SUCCESS)
		return nStatus;

	ppszMemberNames = (char**)malloc((IMINER_TREE_DESCRIPTION_NUM+nTrees)*sizeof(char*));

	/* member: meta data */
	ppszMemberNames[IMINER_TREE_META_DATA_NUM] = ppszMD;
	nStatus = IMinerList_clone(IMINER_TREE_META_DATA(pModel), md);
	if(nStatus != IMINER_SUCCESS)
		return nStatus;

	/* member: classification */
	ppszMemberNames[IMINER_TREE_CLASSIFICATION_NUM] = ppszC;
	nStatus = IMinerVector_create(IMINER_TREE_CLASSIFICATION(pModel), 1, IMINER_MODE_LONG, &bClassification);
	if(nStatus != IMINER_SUCCESS)
		return nStatus;

	/* member: num output levels */
	ppszMemberNames[IMINER_TREE_LEVELS_NUM] = ppszL;
	nStatus = IMinerVector_create(IMINER_TREE_LEVELS(pModel), 1, IMINER_MODE_LONG, &nLevels);
	if(nStatus != IMINER_SUCCESS)
		return nStatus;

	/* member: max num comparisons for split */
	ppszMemberNames[IMINER_TREE_MAX_COMPARISON_NUM] = ppszMC;
	nStatus = IMinerVector_create(IMINER_TREE_MAX_COMPARISONS(pModel), 1, IMINER_MODE_LONG, &nMaxComps);
	if(nStatus != IMINER_SUCCESS)
		return nStatus;

	/* member: dependent levels */
	ppszMemberNames[IMINER_TREE_DEP_LEVELS_NUM] = ppszDL;
	nStatus = IMinerVector_create(IMINER_TREE_DEP_LEVELS(pModel), (bClassification) ? nLevels : 0, IMINER_MODE_STRING, pcDepLevels);
	if(nStatus != IMINER_SUCCESS)
		return nStatus;

	/* get max num nodes */
	for (tree=0; tree<nTrees; tree++) {
		if (tree==0) maxNodes = nNodes[tree];
		else if (maxNodes<nNodes[tree]) maxNodes=nNodes[tree];
	}

	/* get max node nums */
	for (tree=0, nodePos=maxNodes-1; tree<nTrees; tree++, nodePos+=maxNodes) {
		nNum = (long)(pdNodeNums[nodePos]+1);
		if (tree==0) maxNodeNum = nNum;
		else if (maxNodeNum<nNum) maxNodeNum=nNum;
	}

	/* member: tree matrix */
	for (tree=0; tree<nTrees; tree++) {
		ppszMemberNames[IMINER_TREE_DESCRIPTION_NUM+tree] = ppszTD;
		tmp = IMINER_TREE_DESCRIPTION(pModel, tree);
		nStatus = IMinerDoubleMatrix_create(tmp, maxNodes, nCols, NULL, NULL, NULL);
		if(nStatus != IMINER_SUCCESS)
			return nStatus;

		treeVal = tree*maxNodes;
		probVal = treeVal*nLevels;
		compVal = treeVal*nMaxComps;
		for (nodePos=0, node=0; node<maxNodes; node++) {
			IMINER_TREE_NODE_NUM(tmp, node) = pdNodeNums[treeVal];
			IMINER_TREE_NODE_COLUMN(tmp, node) = pdColumns[treeVal];
			IMINER_TREE_NODE_OPERATOR(tmp, node) = pdOpTypes[treeVal];
			for (val=0; val<nMaxComps; val++) {
				IMINER_TREE_NODE_OPVALUE(tmp, node, val) = pdOpValues[compVal+nodePos*nMaxComps+val];
			}
			IMINER_TREE_NODE_SCORE(tmp, node, nMaxComps) = pdScores[treeVal];
			for (level=0; level<nLevels; level++) {
				IMINER_TREE_NODE_PROB(tmp, node, nMaxComps, level) = pdProbs[probVal+nodePos*nLevels+level];
			}
			nodePos++; treeVal++;
		}
	}

	/*member 4 (special) : member names */
	nStatus = IMinerList_setNamesFromStrings(pModel, (const char**)ppszMemberNames);

	/* set the class ID */
	pModel->m_nMode = IMINER_MODE_MODEL;

	free(ppszMemberNames);

	return nStatus;
}

long IMCSRC_STDCALL IMinerTreeModel_isValid(const IMinerObject* pModel)
{
	long nStatus=IMINER_SUCCESS, t, treeCount;
	IMinerObject* pElement;

	if(pModel==NULL  || !IMinerList_isValid(pModel))
		return 0L;

	treeCount = IMINER_TREE_GET_COUNT(pModel);

	if (!IMinerList_isValid(IMINER_TREE_META_DATA(pModel))) return 0L;

	pElement = IMINER_TREE_CLASSIFICATION(pModel);
	if(!IMinerVector_isValid(pElement) )
		return 0L;

	pElement = IMINER_TREE_LEVELS(pModel);
	if(!IMinerVector_isValid(pElement) )
		return 0L;

	pElement = IMINER_TREE_MAX_COMPARISONS(pModel);
	if(!IMinerVector_isValid(pElement) )
		return 0L;

	pElement = IMINER_TREE_DEP_LEVELS(pModel);
	if(!IMinerVector_isValid(pElement) )
		return 0L;

	for (t=0; t<treeCount; t++) {
		pElement = IMINER_TREE_DESCRIPTION(pModel, t);
		if(!IMinerDoubleMatrix_isValid(pElement) )
			return 0L;
	}

	/* special member : member names mut also be valid and has the same length as this object*/
	pElement = IMINER_LIST_NAMES_PTR(pModel);
	if(!IMinerVector_isString(pElement) && pElement->m_nLen == pModel->m_nLen)
		return 0L;

	return 1L;
}

/* free memory */
long IMCSRC_STDCALL IMinerTreeModel_destroy(IMinerObject* pModel )
{
	return IMinerObject_destroy(pModel);
}


/* write to stdout */
long IMCSRC_STDCALL IMinerTreeModel_print(const IMinerObject* pModel)
{
	long nStatus, t, treeCount;

	/* printf("Begin IMinerTreeModel ...\n"); */
	if(!IMinerTreeModel_isValid(pModel))
	{
		IMiner_error("%s(%d) : ", __FILE__, __LINE__);
		IMiner_error("Invalid pModel\n");
		return IMINER_BAD_INPUT_ARGS;
	}

	printf("Meta Data:\n");
	if (IMinerMetaData_print(IMINER_TREE_META_DATA(pModel))!=IMINER_SUCCESS) {
		return IMINER_BAD_INPUT_ARGS;
	}

	printf("Classification:\n");
	nStatus = IMinerVector_print(IMINER_TREE_CLASSIFICATION(pModel));
	if(nStatus != IMINER_SUCCESS)
		return IMINER_BAD_INPUT_ARGS;

	printf("Num Levels Output:\n");
	nStatus = IMinerVector_print(IMINER_TREE_LEVELS(pModel));
	if(nStatus != IMINER_SUCCESS)
		return IMINER_BAD_INPUT_ARGS;

	printf("Dependent Levels:\n");
	nStatus = IMinerVector_print(IMINER_TREE_DEP_LEVELS(pModel));
	if(nStatus != IMINER_SUCCESS)
		return IMINER_BAD_INPUT_ARGS;

	printf("Num Max Comparisons:\n");
	nStatus = IMinerVector_print(IMINER_TREE_MAX_COMPARISONS(pModel));
	if(nStatus != IMINER_SUCCESS)
		return IMINER_BAD_INPUT_ARGS;

	treeCount = IMINER_TREE_GET_COUNT(pModel);
	for (t=0; t<treeCount; t++) {
		printf("Tree Description (%d):\n", t);
		nStatus = IMinerDoubleMatrix_print(IMINER_TREE_DESCRIPTION(pModel, t));
		if(nStatus != IMINER_SUCCESS)
			return IMINER_BAD_INPUT_ARGS;
	}

	return IMINER_SUCCESS;
}

char hash_key[256];
long getModelLeafNum(IMinerObject *tree, IMinerHashTable *mapping, long curNode, double *rowData,
					 long nMaxComparisons, long *numLevels) {
	long opCode=0, column=0, nextNode=curNode*2, rightNode=0, i=0, matrixRow=0;
	double val = 0.0;

	sprintf(hash_key, "%ld", curNode);
	matrixRow = IMinerHashTable_get(mapping, hash_key);
	if (matrixRow<0)  return curNode;

	opCode	= (long)IMINER_TREE_NODE_OPERATOR(tree, matrixRow);
	column	= (long)IMINER_TREE_NODE_COLUMN(tree, matrixRow);
	if (column<0) return curNode;

	for (i=0; i<nMaxComparisons; i++) {
		val	= IMINER_TREE_NODE_OPVALUE(tree, matrixRow, i);
		if ((val==-1 && (matrixRow==-1 || numLevels[column]>0)) || (i>0 && numLevels[column]==0)) break;

		switch (opCode) {
			case TREE_E: rightNode=(rowData[column]!=val); break;
			case TREE_LT: rightNode=(rowData[column]>=val); break;
			case TREE_LTE: rightNode=(rowData[column]>val); break;
			case TREE_GT: rightNode=(rowData[column]<=val); break;
			case TREE_GTE: rightNode=(rowData[column]<val); break;
			default: return curNode;
		}
		if (!rightNode) break;
	}

	if (rightNode) nextNode++;
	return getModelLeafNum(tree, mapping, nextNode, rowData, nMaxComparisons, numLevels);
}

/* predict cluster memberships */
long IMCSRC_STDCALL IMinerTreeModel_predict(
 IMinerObject* pOutput,      /* out: output rectangular data */
 const IMinerObject* input,  /* in: input rectangular data */
 IMinerObject* pDescr,		 /* in: input description (if NULL, description will be
                                    created from input data) */
 const IMinerObject* pModel  /* in: the model */
)
{
	long nStatus=IMINER_SUCCESS, nTrees, bClassification, nLevels, nMaxComparisons, nInputColumns;
	long i, row, nOutputs, output, prob, tree, leafRow, *pnColumnsModes, matrixRow;
	long nInputRows, nOutputColumns, nMode, *numLevels, maxRows;
	IMinerObject pInput, *pmTree, *pvDepLevs, A;
	double  *rowData;

	char *pr_name_foramt="Pr(%s)", *pr_name, **columnNames;

	if(pOutput==NULL || !IMinerDataSet_isValid(input) || !IMinerTreeModel_isValid(pModel))
		return IMINER_BAD_INPUT_ARGS;

	/* convert input if needed*/
	nStatus = IMinerMetaData_InputConvert(&pInput, IMINER_TREE_META_DATA(pModel), input, pDescr);
	if(nStatus != IMINER_SUCCESS) return IMINER_FAIL;

	nTrees				= IMINER_TREE_GET_COUNT(pModel);
	bClassification		= IMINER_LONG_VALUE(IMINER_TREE_CLASSIFICATION(pModel), 0);
	nLevels				= IMINER_LONG_VALUE(IMINER_TREE_LEVELS(pModel), 0);
	nMaxComparisons		= IMINER_LONG_VALUE(IMINER_TREE_MAX_COMPARISONS(pModel), 0);
	pvDepLevs			= IMINER_TREE_DEP_LEVELS(pModel);

	nInputColumns		= IMINER_DATASET_NCOLUMNS(&pInput);
	nInputRows			= IMINER_DATASET_NROWS(&pInput);
	nOutputs			= (bClassification) ? nLevels : 1;

	/* num levels in each dependent column */
	numLevels = (long*)malloc(nInputColumns*sizeof(long));
	for (i=0; i<nInputColumns; i++) {
		numLevels[i] = IMINER_MD_COLUMN_LEVEL_COUNT(IMINER_TREE_META_DATA(pModel), i);
	}

	/* create mapping array*/
	/* first get max num rows */
	for (maxRows=0, tree=0; tree<nTrees; tree++) {
		pmTree = IMINER_TREE_DESCRIPTION(pModel, tree);
		row = (long)IMINER_TREE_NODE_NUM(pmTree, IMINER_MATRIX_NROWS(pmTree)-1);
		if (row > maxRows) maxRows = row;
	}

	/* create a matrix double to store the input as double */
	nStatus = IMinerDoubleMatrix_create(&A, nInputRows, nInputColumns, NULL, NULL, NULL);
	if(nStatus != IMINER_SUCCESS) return IMINER_FAIL;

	/* create the output object of the same size as input + one column per component */
	nOutputColumns = /*nInputColumns + */nOutputs;
	pnColumnsModes = (long*) malloc(nOutputColumns*sizeof(long));
	for(i=0L; i<nOutputColumns; ++i) pnColumnsModes[i] = IMINER_MODE_DOUBLE;
	nStatus = IMinerDataSet_create(pOutput, nInputRows, nOutputColumns, pnColumnsModes);
	if(nStatus != IMINER_SUCCESS) return nStatus;
	if (bClassification) {
		columnNames = (char**)malloc(nLevels*sizeof(char*));
		for (i=0; i<nLevels; i++) {
			pr_name = IMINER_STRING_VALUE(pvDepLevs, i);
			columnNames[i] = (char*)malloc((strlen(pr_name) + 5)*sizeof(char));
			sprintf(columnNames[i], "Pr(%s)", pr_name);
		}
	} else {
		columnNames = (char**)malloc(sizeof(char*));
		columnNames[0] = (char*)malloc(15*sizeof(char));
		sprintf(columnNames[0], "PREDICT.fit");
	}
	nStatus = IMinerDataSet_setColumnNamesFromStrings(pOutput, (const char**)columnNames);
	if(nStatus != IMINER_SUCCESS) return nStatus;

	if (bClassification) {for (i=0; i<nLevels; i++) {free(columnNames[i]);}}
	else free(columnNames[0]);
	free(columnNames);

	/* Set the input data to the matrix
	TODO: apply coding factor expansion of categorical data */
	/* copy content from the input object */
	for(output=nOutputs,i=0; output<nOutputColumns; ++output, ++i)
	{
		nStatus = IMinerDataSet_getColumnMode(&nMode, &pInput, i);
		if(nStatus != IMINER_SUCCESS) return IMINER_BAD_INPUT_ARGS;
		/* if(nMode != IMINER_MODE_DOUBLE) return IMINER_BAD_INPUT_ARGS; */
		/* copy content of pInput into jth column of pOutput */
		nStatus = IMinerDataSet_setColumnAt(pOutput, output, IMINER_DATASET_COLUMN_PTR(&pInput, i));
		if(nStatus != IMINER_SUCCESS) return nStatus;
	}


	rowData = (double*)malloc(nInputColumns*sizeof(double));
	for (tree=0L; tree<nTrees; tree++) {
		IMinerHashTable *pnMapping=0;

		/* evaluate tree for each row */
		pmTree = IMINER_TREE_DESCRIPTION(pModel, tree);

		/* create mapping */
		pnMapping = IMinerHashTable_createHash();
		maxRows = (long)IMINER_MATRIX_NROWS(pmTree);
		for (row=0L; row<maxRows; row++) {
			int asdf;
			sprintf(hash_key, "%ld", (long)IMINER_TREE_NODE_NUM(pmTree, row));
			asdf = IMinerHashTable_get(pnMapping, hash_key);
			IMinerHashTable_put(pnMapping, hash_key, row);
		}

		/* for each row (object), find the component score */
		for (row=0L; row<nInputRows; row++) {
			for (i=0L; i<nInputColumns; i++) {
				if (IMINER_IS_FACTOR(IMINER_DATASET_COLUMN_PTR(&pInput, i))) {
					rowData[i] = IMINER_FACTOR_DATA_VALUE(IMINER_DATASET_COLUMN_PTR(&pInput, i), row);
				} else {
					rowData[i] = IMINER_DOUBLE_VALUE(IMINER_DATASET_COLUMN_PTR(&pInput, i), row);
				}
			}

			/* get tree leaf row num */
			leafRow = getModelLeafNum(pmTree, pnMapping, 1, rowData, nMaxComparisons, numLevels);
			sprintf(hash_key, "%ld", leafRow);
			matrixRow = IMinerHashTable_get(pnMapping, hash_key);

			/* start with 0.0 probability */
			for (prob=0L; !tree && prob<nOutputs; prob++) {
				IMINER_DOUBLE_VALUE(IMINER_DATASET_COLUMN_PTR(pOutput, prob), row) = 0.0;
			}

			if (bClassification) for (prob=0L; prob<nOutputs; prob++) {
				/*double percent = IMINER_TREE_NODE_PROB(pmTree, pnMapping[leafRow], nMaxComparisons, prob);*/
				IMINER_DOUBLE_VALUE(IMINER_DATASET_COLUMN_PTR(pOutput, prob), row) +=
					IMINER_TREE_NODE_PROB(pmTree, matrixRow, nMaxComparisons, prob);
			} else for (prob=0L; prob<nOutputs; prob++) {
				/*double percent = IMINER_TREE_NODE_SCORE(pmTree, pnMapping[leafRow], nMaxComparisons);*/
				IMINER_DOUBLE_VALUE(IMINER_DATASET_COLUMN_PTR(pOutput, prob), row) +=
					IMINER_TREE_NODE_SCORE(pmTree, matrixRow, nMaxComparisons);
			}
		}

		IMinerHashTable_freeHash(pnMapping);
	}

	for (row=0L; row<nInputRows; row++) {
		for (prob=0L; prob<nOutputs; prob++) {
			double tempProb = IMINER_DOUBLE_VALUE(IMINER_DATASET_COLUMN_PTR(pOutput, prob), row);
			IMINER_DOUBLE_VALUE(IMINER_DATASET_COLUMN_PTR(pOutput, prob), row) = (nTrees) ? tempProb/nTrees : 0.0;
		}
	}

	free(pnColumnsModes);
	free(numLevels);
	free(rowData);

	IMinerObject_destroy(&pInput);
	IMinerObject_destroy(&A);

	return IMINER_SUCCESS;
}

