#include <stdio.h>
#include <stdlib.h>

#include <string.h>
#include <ctype.h>
#include <time.h>

#include "common.h"
#include "msa_k.h"
#include "k_group_metis.h"

int debug_k = -101;
int first_k;

int main_k(int argc,char** argv)
//int main(int argc,char** argv)
{   
    char** sequences;
	char** alignmentResult;
	int sequenceCount = 4;
	int i;
	
	max_number_in_group_k= 4;

	if((sequences=(char**) malloc((sequenceCount)*sizeof(char*)))==NULL)
	{
		fprintf(stderr,"unable to allocate memory.");
		exit(1);
	}
	if((alignmentResult=(char**) malloc((sequenceCount)*sizeof(char*)))==NULL)
	{
		fprintf(stderr,"unable to allocate memory.");
		exit(1);
	}

	for(i = 0; i < sequenceCount; i ++)
	{
		if((sequences[i] =(char*) malloc((MAXLEN_K)*sizeof(char)))==NULL)
		{
			fprintf(stderr,"unable to allocate memory.");
			exit(1);
		}
		if((alignmentResult[i] =(char*) malloc((MAXLEN_K)*sizeof(char)))==NULL)		
		{
			fprintf(stderr,"unable to allocate memory.");
			exit(1);
		}
	}
	//initial
	strcpy(sequences[0],"erlseddpaaqaleyr");
	strcpy(sequences[1],"navaadnataialkyn");
	strcpy(sequences[2],".praedgha.h..dyv");
	strcpy(sequences[3],"apvdeknpqavalgyv");
	/*
	strcpy(sequences[4],"gcta");
	strcpy(sequences[5],"gcat");
	strcpy(sequences[6],"gcat");
	strcpy(sequences[7],"gctt");
	strcpy(sequences[8],"gcta");
	strcpy(sequences[9],"gcat");
	strcpy(sequences[10],"gcat");
	strcpy(sequences[11],"gctt");*/

	//alignSequences_k(sequences, alignmentResult, sequenceCount);
	align_alignment_k(sequences, alignmentResult, sequenceCount,16,2,1,"test");
	///////////////////
	for(i = 0; i < sequenceCount; i ++)
	{
		free(sequences[i]);
		free(alignmentResult[i]);
	}
	free(sequences);
	free(alignmentResult);
	return 0;
}
void set_weight_k(int  weight_expectation_value,int weight_length_value)
{
	weight_expectation_k = weight_expectation_value;
	weight_length_k = weight_length_value;
}

int alignTwoSequence_K(char sequence1[MAXLEN_K],char sequence2[MAXLEN_K])
{

    int rowLength;
    int colLength;

	int** scoreV;//current

	int** track;
	int i,j;
	int row = 0;
    int col = 0;
    int above = 0;
    int proceeding = 0;
    int diagnal =0;
    //char a,b;
	char char1;
	char char2;

	int gapOpenPenalty;
	int gapExtensionPenalty;
	int alignmentScore;



    // initialize
	//add on 09/09/2005

	rowLength = strlen(sequence1) + 1;

	colLength = strlen(sequence2) + 1;
	
	
	if((scoreV=(int **) malloc((rowLength+1)*sizeof(int*)))==NULL)
	//add 1 to prevent overflow
	{
		fprintf(stderr,"unable to allocate memory.");
		exit(1);
	}
	
	if((track=(int **) malloc((rowLength+1)*sizeof(int*)))==NULL)
	//add 1 to prevent overflow
	{
		fprintf(stderr,"unable to allocate memory.");
		exit(1);
	}
	for (i = 0; i< rowLength+1;i++)
	{
		
		if((scoreV[i]=(int *) malloc((colLength+1)*sizeof(int)))==NULL)
		//add 1 to prevent overflow
		{
			fprintf(stderr,"unable to allocate memory.");
			exit(1);
		}
		
		if((track[i]=(int *) malloc((colLength+1)*sizeof(int)))==NULL)
		//add 1 to prevent overflow
		{
			fprintf(stderr,"unable to allocate memory.");
			exit(1);
		}
	}
	/****************************************************************************************/


	gapExtensionPenalty = getWeight_K('a','-');
	gapOpenPenalty = getWeight_K('a','-');

	{
		for (i = 1; i< colLength;i++)
		{
		
			scoreV[0][i] = gapOpenPenalty + (i-1) * gapExtensionPenalty;
			
		}
		for (i = 1; i< rowLength;i++)
		{
			
			scoreV[i][0] = gapOpenPenalty + (i-1) * gapExtensionPenalty;
			
		}
	
		scoreV[0][0] = 0;
	
	}
    track[0][0] = 0;
	for (i = 1; i< rowLength;i++)
		track[i][0] = UP;
    for (j = 1; j< colLength;j++)
        track[0][j] = LEFT;
    //begin to calculate

	for (row = 1; row < rowLength;row ++)
    {
		for (col = 1; col < colLength; col ++)
		{
         // compute the maximum one
				char1 = sequence1[row - 1];
			char2 = sequence2[col - 1];
			above = scoreV[row -1][col] +gapOpenPenalty;
			proceeding = scoreV[row][col-1] + gapExtensionPenalty;
			 diagnal = scoreV[row -1][col -1] + getWeight_K(char1,char2);

			//scoreV[row][col] = scoreV[row-1][col-1]+getWeight(char1,char2);
			
			if(above >= proceeding)
		   {
			  if (above >= diagnal)
			  {
				scoreV[row][col] = above;//above is the max
				track[row][col] = UP;
			  }
			  else
			  {
				scoreV[row][col] = diagnal;//diagal is the max
				track[row][col] = DIAGNAL;
			  }
		   }
		   else //preceding is the bigger
		   {
			 if (proceeding >= diagnal)
			 {
			   scoreV[row][col] = proceeding;//preceding is the max
			   track[row][col] = LEFT;
			 }
			 else
			 {
			   scoreV[row][col] = diagnal;//diagnal is the max
			   track[row][col] = DIAGNAL;
			  }
		   }//if else

      }//for col
    }//for row

	//after alignment score matrix set, here need to track back
	alignmentScore = scoreV[rowLength-1][colLength-1];

	//here actual track back
	
	/****************************************************************************************/

	for (i = 0; i< rowLength+1;i++)
	{
	
		free(scoreV[i]);
		
		free(track[i]);
	}
	
	free(scoreV);

	free(track);
	return alignmentScore;
}
int getWeight_K(char a,char b)
{
	//return getWeightDNA(a,b);
	return getWeightBLOSUM62(a,b);
}
int align_alignment_k(char** input, char** output, int sequenceCount,int length, int max_number_in_group,int dos_flag, char* uid)
{

	char ** alignment;
	char ** alignment_without_empty_sequneces;
	char ** input_without_empty_sequneces;
	char ** output_without_empty_sequneces;
	int index;

	int i;
	int j;
	int sp_score;
	int r;
	
	int input_sequence_actual_length;

	int empty_flag[MAX_SEQ_NUM];
	
	int ret_alignment_length;

	if(max_number_in_group <= 1)
	{
		return 0;
	}
	if(max_number_in_group > sequenceCount)
	{
		max_number_in_group = sequenceCount;
	}

	if((alignment =(char **) malloc((sequenceCount)*sizeof(char*)))==NULL)
	{
		printf("unable to allocate memory.");
		exit(1);
	}

	group_number_k = 0;
	for( i = 0; i < sequenceCount; i ++)
	{
		if((alignment[i]=(char *) malloc((length + 1)*sizeof(char)))==NULL)
		{
			printf("unable to allocate memory.");
			exit(1);
		}
		input_sequence_actual_length = 0;
		for( j = 0; j < length; j ++)
		{
			if(!input[i][j])
			{
				alignment[i][j] = '*';
			}
			else if (isGap_k(input[i][j]))
			{
				alignment[i][j] = '*';
			}
			else
			{
				alignment[i][j] = input[i][j];
				input_sequence_actual_length ++;
			}
		}
		alignment[i][j] = 0;
		if(input_sequence_actual_length > 0)
		{
			input_sequence_actual_lengths_k[group_number_k] = input_sequence_actual_length;
			group_number_k ++;
			empty_flag[i] = 0;
		}
		else
		{
			empty_flag[i] = 1;
		}
	}

	//set initial values
	

	max_number_in_group_k = max_number_in_group;
	dos_k_group_metis = dos_flag;
	
	//**********************************************************************//
	if((alignment_without_empty_sequneces =(char **) malloc((group_number_k)*sizeof(char*)))==NULL)
	{
		printf("unable to allocate memory.");
		exit(1);
	}
	for( i = 0; i < group_number_k; i ++)
	{
		if((alignment_without_empty_sequneces[i]=(char *) malloc((length + 1)*sizeof(char)))==NULL)
		{
			printf("unable to allocate memory.");
			exit(1);
		}
	}
	//**********************************************************************//
	if((input_without_empty_sequneces =(char **) malloc((group_number_k)*sizeof(char*)))==NULL)
	{
		printf("unable to allocate memory.");
		exit(1);
	}
	for( i = 0; i < group_number_k; i ++)
	{
		if((input_without_empty_sequneces[i]=(char *) malloc((length + 1)*sizeof(char)))==NULL)
		{
			printf("unable to allocate memory.");
			exit(1);
		}
	}
	//**********************************************************************//
	if((output_without_empty_sequneces =(char **) malloc((group_number_k)*sizeof(char*)))==NULL)
	{
		printf("unable to allocate memory.");
		exit(1);
	}
	for( i = 0; i < group_number_k; i ++)
	{
		if((output_without_empty_sequneces[i]=(char *) malloc((length*group_number_k + 1)*sizeof(char)))==NULL)
		{
			printf("unable to allocate memory.");
			exit(1);
		}
	}

	index = 0;
	for( i = 0; i < sequenceCount; i ++)
	{
		if(!empty_flag[i])
		{
			strcpy(input_without_empty_sequneces[index],input[i]);
			strcpy(alignment_without_empty_sequneces[index],alignment[i]);
			index ++;
		}
	}


	input_k = input_without_empty_sequneces;
	output_k = output_without_empty_sequneces;
	sequenceCount_k = group_number_k;
	length_k = length;

	//**********************************************************************//
	//random setting, this is for metis used temp file name 
	//also for methods random
	//**********************************************************************//
	srand(time(NULL));
	r=rand();
	if(uid)
	{
		sprintf(unique_string_k,"%s%d",uid,r);
	}
	else
	{
		sprintf(unique_string_k,"%d",r);
	}
	//**********************************************************************//

	sp_score = alignSequences_k(alignment_without_empty_sequneces, output_without_empty_sequneces, sequenceCount_k);

	index = 0;

	ret_alignment_length = strlen(output_without_empty_sequneces[0]);

	for(i = 0; i < sequenceCount; i ++)
	{
		if(empty_flag[i])
		{
			//strcpy(output[i],input[i]);
			for( j = 0; j < ret_alignment_length; j ++)
			{
				output[i][j] = '*';
			}
			output[i][j] = 0;
		}
		else
		{
			strcpy(output[i],output_without_empty_sequneces[index]);
			index ++;
		}
	}

	for(i = 0; i < sequenceCount; i ++)
	{
		free(alignment[i]);
	}
	free(alignment);
	//**********************************************************************//
	for( i = 0; i < sequenceCount_k; i ++)
	{
		free(alignment_without_empty_sequneces[i]);
		free(output_without_empty_sequneces[i]);
		free(input_without_empty_sequneces[i]);
	}
	free(alignment_without_empty_sequneces);
	free(output_without_empty_sequneces);
	free(input_without_empty_sequneces);
	return sp_score;
}
int alignSequences_k(char** input, char** output, int sequenceCount)
{
	int i;
	int j;
	GROUP_NODE* group_node;
	char ** result;
	int length;
	int alignment_length;
	int non_gap_found;
	int sp_score;

	//1.initial

	if(sequenceCount <= 0)
	{
		return 0;
	}
	if(sequenceCount == 1)
	{
		strcpy(output[0],input[0]);
		return 0;
	}
	for( i = 0; i < MAX_SEQ_NUM; i ++)
	{
		group_nodes_k[i] = 0;
	}
	for( i = 0; i < sequenceCount; i ++)
	{
		group_node = get_new_group_node();
		group_node->childrenCount = 0;
		group_node->alignmentLength = strlen(input[i]);
		group_node->alignment_sequences_count = 1;
		if((group_node->alignment[0]=(char *) malloc((group_node->alignmentLength + 1)*sizeof(char)))==NULL)
		{
			printf("unable to allocate memory.");
			exit(1);
		}
		strcpy(group_node->alignment[0],input[i]);
		group_node->orginal_index[0] = i;
		group_nodes_k[i] = group_node;
	}

	//this is for combination fator for methods combine
	set_weight_k(5,10);

	//for outputing first time sw score 
	first_k = 1;

	//**********************************************************************//
	//score matrix setting, 
	//the pair score of upperbound, induced sp, induced gap number
	//**********************************************************************//
	sum_upper_bound_k = 0;
	sum_induced_sp_k = 0;

	for( i = 0; i < sequenceCount_k; i ++)
	{
		for( j = i + 1; j < sequenceCount_k; j ++)
		{
			sequence_pair_upperbound_scores_k[i][j] = getUpperBoundScore(input_k[i],input_k[j]);
			sequence_pair_induced_sp_scores_k[i][j] = getInducedSPScore(input_k[i],input_k[j]);
			sequence_pair_induced_gap_number_k[i][j] = getInducedGapNumber(input_k[i],input_k[j]);

			sequence_pair_upperbound_scores_k[j][i] = sequence_pair_upperbound_scores_k[i][j];
			sequence_pair_induced_sp_scores_k[j][i] = sequence_pair_induced_sp_scores_k[i][j];
			sequence_pair_induced_gap_number_k[j][i] = sequence_pair_induced_gap_number_k[i][j];

			sum_upper_bound_k += sequence_pair_upperbound_scores_k[i][j];
			sum_induced_sp_k += sequence_pair_induced_sp_scores_k[i][j];
		}
	}

	while (group_number_k > 1)
	{
		if(debug_k > 10)
		{
			printf("\nnow totally has %d groups.\n",group_number_k);
		
		}
	
		//3. group the group_nodes

		group_k();

		first_k = 0;
		//4. align each group
		align_all_groups();
	
	}

	length = current_parent_group_node->alignmentLength;

	if((result =(char **) malloc((sequenceCount)*sizeof(char*)))==NULL)
	{
		printf("unable to allocate memory.");
		exit(1);
	}
	for( i = 0; i < sequenceCount; i ++)
	{
		if((result[i]=(char *) malloc((length + 1)*sizeof(char)))==NULL)
		{
			printf("unable to allocate memory.");
			exit(1);
		}
	}
	for( i = 0; i < sequenceCount; i ++)
	{
		j = current_parent_group_node->orginal_index[i];
		strcpy(result[j],current_parent_group_node->alignment[i]);
	}

	alignment_length = 0;
	for( j = 0; j < length; j ++)
	{
		non_gap_found = 0;
		for(i = 0 ; i < sequenceCount; i ++)
		{
			if(isalpha(result[i][j]))
			{
				non_gap_found = 1;
			}
		}
		if(!non_gap_found)
		{
			continue;
		}
		for(i = 0 ; i < sequenceCount; i ++)
		{
			output[i][alignment_length] = result[i][j];
		}
		alignment_length ++;
	}
	for(i = 0 ; i < sequenceCount; i ++)
	{
		output[i][alignment_length] = 0;
	}
	sp_score = getAlignmentSPScore_k(output,sequenceCount, alignment_length);
	for(i = 0; i < sequenceCount; i ++)
	{
		free(result[i]);
	}
	free(result);
	recursiveFree(current_parent_group_node);
	return sp_score;
}
void recursiveFree(GROUP_NODE* root)
{
	int i;
	if(!root)
	{
		return;
	}
	for( i = 0; i < group_number_k; i ++)
	{
		recursiveFree(root->children_group_nodes[i]);
	}
	for( i = 0; i < root->alignment_sequences_count; i ++)
	{
		free(root->alignment[i]);
	}
	free(root);

}
//this function is to align all groups 
void align_all_groups()
{
	int i;
	for( i = 0; i < group_number_k; i ++)
	{
		align_a_group(i);
	}
}
//this function aligns a group
void align_a_group(int group_index)
{
	GROUP_NODE* p_parent_group;
	//GROUP_NODE* p_child_group;
	
	p_parent_group = group_nodes_k[group_index];
	if(p_parent_group->childrenCount <= 0)
	{
		return;
	}

	setOptimalAlignment_k(group_index);
}

//this function is to group the nodes in group_nodes_k
//each group has no more than K children
void group_k()
{
	int i;
	int j;
	int group_flag[MAX_SEQ_NUM];
	int new_group_number_k;
	GROUP_NODE* new_group_nodes_k[MAX_SEQ_NUM];
	GROUP_NODE* group_node;
	int sum_sw_score;

	//ret = set_group_flag_by_div(group_flag);
	//ret = set_group_flag_by_random(group_flag);
	//ret = set_group_flag_by_length(group_flag);
	//ret = set_group_flag_by_metis(group_flag);
	//ret = set_group_flag_by_min_gaps(group_flag);

	if(debug_k > 1)
	{
		for( i = 0; i < sequenceCount_k; i ++)
		{
			printf("%d ",group_flag[i]);
		}
		printf("\n");
		fflush(stdout);
	}
	new_group_number_k = p_group_function(group_flag);
	if(debug_k > 1)
	{
		for( i = 0; i < sequenceCount_k; i ++)
		{
			printf("%d ",group_flag[i]);
		}
		printf("\n");
		fflush(stdout);
	}
	//*******************************************************************************************************//
	//here to print out the information of pair scores                                                       //
	if( first_k )
	{
		sum_sw_score = get_sum_weight_edges_k(group_flag);
		g_sum_sw_score = sum_sw_score;
		first_k = 0;
		if(debug_k > 1)
		{
			printf("sum of sw: %d\n",sum_sw_score);
		}
	}
	// end of debugging                                                                                      //
	//*******************************************************************************************************//
	for( i = 0; i < MAX_SEQ_NUM; i ++)
	{
		new_group_nodes_k[i] = 0;
	}
	for( i = 0; i < new_group_number_k; i ++)
	{
		group_node = get_new_group_node();
		group_node->childrenCount = 0;
		for( j = 0; j < group_number_k; j ++)
		{
			if(group_flag[j] == i)
			{
				group_node->children_group_nodes[group_node->childrenCount] = group_nodes_k[j];
				group_node->childrenCount ++;
			}
		}
		new_group_nodes_k[i] = group_node;
	}
	for( i = 0; i < MAX_SEQ_NUM; i ++)
	{
		group_nodes_k[i] = new_group_nodes_k[i];
	}
	group_number_k = new_group_number_k;
}


//set the group flag by calling metis
int set_group_flag_by_metis(int* group_flag)
{
	int ret;

	if(!p_pairwise_score_function)
	{
		p_pairwise_score_function = &get_weight_of_2_groups_combine_expectation_length;
	}
	//1. construct the table for the weight of each pair of groups
	erase_group_pair_scores_k();
	set_group_pair_scores_k();
	ret = set_flag_array_by_metis_from_matrix(group_flag,group_pair_scores_k);
	equal_size(group_flag);
	return ret;
}
//set group flag by divide function
int set_group_flag_by_div(int* group_flag)
{
	int i;
	int group_index;

	for(i = 0; i < MAX_SEQ_NUM; i ++)
	{
		group_flag[i] = -1;
	}
	i = 0;
	group_index = 0;
	while(i < group_number_k)
	{
		if((i % max_number_in_group_k) == 3)
		{
			group_index ++;
		}
		group_flag[i] = group_index;
		i ++;
	}
	return (group_index + 1);
}
//set group randomly
int set_group_flag_by_random(int* group_flag)
{
	int group_count;
	int group_index;
	int i;
	for(i = 0; i < MAX_SEQ_NUM; i ++)
	{
		group_flag[i] = -1;
	}
	i = 0;
	group_index = 0;
	while(i < group_number_k)
	{
		group_flag[i] = group_index;
		group_count = group_index + 1;
		if((i % max_number_in_group_k) == (max_number_in_group_k - 1))
		{
			group_index ++;
		}
		i ++;
	}

	setRandom(group_flag);
	return group_count;
}
//this function shuffle the group_flag array randomly
void setRandom(int group_flag[MAX_SEQ_NUM])
{
	int r;

	int flag;
	double holder;
	int i,j;
	double random_chance[MAX_SEQ_NUM];

	if (group_number_k <= 0)
	{
		return;
	}

	for( i = 0; i < group_number_k; i ++)
	{
		r=rand();
		random_chance[i] = (double)r / ((double)RAND_MAX + 1);
	}
	//sort the random_residue by random_chance[i]

	// Bubble sort method.
	for (i = (group_number_k - 1); i >= 0; i--)
	{
		for (j = 1; j <= i; j++)
		{
			if (random_chance[j-1] > random_chance[j])
			{
				holder = random_chance[j - 1];
				random_chance[j - 1] = random_chance[j];
				random_chance[j] = holder;

				flag = group_flag[j - 1];
				group_flag[j - 1] = group_flag[j];
				group_flag[j] = flag;
			}
		}
	}
}
//the function returns the estimatation score for two groups, used in create group_pair_scores_k
int get_weight_of_2_groups_expectation(int group_index1,int group_index2)
{
	GROUP_NODE* group1;
	GROUP_NODE* group2;

	int upper_bound;
	int induced_sp;

	int i;
	int j;

	int index1;
	int index2;

	group1 = group_nodes_k[group_index1];
	group2 = group_nodes_k[group_index2];
	
	upper_bound = 0;
	induced_sp = 0;

	for( i = 0; i < group1->alignment_sequences_count; i ++)
	{
		index1 = group1->orginal_index[i];
		for( j = 0; j < group2->alignment_sequences_count; j ++)
		{
			index2 = group2->orginal_index[j];
			upper_bound += sequence_pair_upperbound_scores_k[index1][index2];
			induced_sp += sequence_pair_induced_sp_scores_k[index1][index2];
		}
	}

	return upper_bound-induced_sp;
}

//the function returns the estimatation score for two groups, used in create group_pair_scores_k
int get_weight_of_2_groups_combine_expectation_length(int group_index1,int group_index2)
{
	GROUP_NODE* group1;
	GROUP_NODE* group2;

	int upper_bound;
	int induced_sp;

	int i;
	int j;

	int index1;
	int index2;

	double expectation_score;
	double length_score;
	int length1;
	int length2;

	int ret;

	group1 = group_nodes_k[group_index1];
	group2 = group_nodes_k[group_index2];
	
	upper_bound = 0;
	induced_sp = 0;

	for( i = 0; i < group1->alignment_sequences_count; i ++)
	{
		index1 = group1->orginal_index[i];
		for( j = 0; j < group2->alignment_sequences_count; j ++)
		{
			index2 = group2->orginal_index[j];
			upper_bound += sequence_pair_upperbound_scores_k[index1][index2];
			induced_sp += sequence_pair_induced_sp_scores_k[index1][index2];
		}
	}

	
	length1 = get_group_length_k(group1);
	length2 = get_group_length_k(group2);
	expectation_score = ((double)(upper_bound-induced_sp)/(6 * (length1 + length2)));
	length_score = 1 - ( ((double)(ABS(length1 - length2))/(MAX(length1 , length2))));

	ret =  (int)(expectation_score * weight_expectation_k + 
		   length_score * weight_length_k);
	return ret;
}

//this function set the values of group_pair_scores_k 
void set_group_pair_scores_k()
{
	int i;
	int j;
	for( i = 0; i < group_number_k; i ++)
	{
		for( j = i + 1; j < group_number_k; j ++)
		{
			//group_pair_scores_k[i][j] = get_weight_of_2_groups(i,j);
			group_pair_scores_k[i][j] = p_pairwise_score_function(i,j);
			group_pair_scores_k[j][i] = group_pair_scores_k[i][j];
		}
	}

}
//this function erase the group_pair_scores to all -MAXINT
void erase_group_pair_scores_k()
{
	int i;
	int j;

	for( i = 0; i < MAX_SEQ_NUM; i ++)
	{
		for( j = 0; j < MAX_SEQ_NUM; j ++)
		{
			group_pair_scores_k[i][j] = -MAXINT;
		}
	}

}
//this function return a initialed group_node
GROUP_NODE* get_new_group_node()
{
	GROUP_NODE* ret;
	int i;
	if((ret=(GROUP_NODE*) malloc(sizeof(GROUP_NODE)))==NULL)
	{
		printf("unable to allocate memory.");
		exit(1);
	}
	ret->childrenCount = 0;
	ret->alignmentLength = 0;
	ret->alignment_sequences_count = 0;
	for(i = 0; i < MAX_SEQ_NUM; i ++)
	{
		ret->alignment[i] = 0;
		ret->children_group_nodes[i] = 0;
		ret->orginal_index[i] = 0;
	}
	return ret;	
}

//this function align the child nodes of the given group
//the resulting alignment is set to alignment of parent
//using extension of w-m danymic programming
//get the optimal multiple alignment 
int setOptimalAlignment_k(int parent_group_index)
{
	//first initialize the score array

	int dimensionCount; 
	//this dimensionCount is how many dimension ,that is , how many actual fragments
	int* window[MAX_SEQ_NUM];

	int dimensionLength[MAX_SEQ_NUM];
	//this to store the lengthes of sequences fragment,similar to rowLength and colLength
	//int multipleDimensionScore[SPACE_SIZE];
	int* multipleDimensionScore;
	//this array is scores for multiple dimension, need to compute location with
	//dimensionPos array
	int dimensionPos[MAX_SEQ_NUM];
	//this array is the multiple axis pos
   	//int multipleDimensionTrack[SPACE_SIZE];

	short int* multipleDimensionTrack;
	//this array is to track back;also used with dimensionPos array
	int i,j;
	//this is to tempary store scores of multiple dimension, 
	//for later comparison.
	//totally it should have 2^dimensionCount - 1 scores
	int multipleDirection[MAX_SEQ_NUM];
	//record the direction information, 
	//1 indicate go along the direction
	//0 means not go along the direction
	
	int direction;
	//it is to compute multipleDirection
	int directionCount;
	//it is the count of total directions
	
	int alignmentScore;
	int cellValue;
	int maxValue;
	int maxDirection;
	int temp[MAX_SEQ_NUM][WINDOW_ALIGNMENT_SIZE_K];
	int alignment_start_pos;
	int alignmentLength;

	GROUP_NODE* p_parent_group;
	int N;
	int col;
	int row_start_index;
	int child_group_index;
	int col_pos;

    int max_length;

	// initialize
	p_parent_group = group_nodes_k[parent_group_index];
	if(p_parent_group->childrenCount <= 0)
	{
		return 0;
	}
	
	p_parent_group->alignment_sequences_count = 0;
	for( i = 0; i < p_parent_group->childrenCount; i ++)
	{
		p_parent_group->alignment_sequences_count += p_parent_group->children_group_nodes[i]->alignment_sequences_count;
	}
	
	for( i = 0; i < p_parent_group->alignment_sequences_count; i ++)
	{
		if((p_parent_group->alignment[i]=(char*) malloc((WINDOW_ALIGNMENT_SIZE_K + 1)*sizeof(char)))==NULL)
		//add 1 to prevent overflow
		{
			printf("unable to allocate memory for multipleDimensionTrack.");
			exit(1);
		}
	}

	if(p_parent_group->childrenCount == 1)
	{
		for( i = 0; i < p_parent_group->alignment_sequences_count; i ++)
		{
			strcpy(p_parent_group->alignment[i],p_parent_group->children_group_nodes[0]->alignment[i]);
			p_parent_group->orginal_index[i] = p_parent_group->children_group_nodes[0]->orginal_index[i];
		}
		p_parent_group->alignmentLength = p_parent_group->children_group_nodes[0]->alignmentLength;

		return 0;
	}

	current_parent_group_node = p_parent_group;
	//first set compact window

	//dimensionCount = p_parent_group->childrenCount;
	dimensionCount = p_parent_group->childrenCount;
	
	//compute the whole one dimension array size
	//fix one dimension to 0,set values for all posibiity of other dimension
	max_length = -1;
	for ( i = 0; i < dimensionCount ; i ++)
	{
		//for each dimension
		dimensionPos[i] = 0;
		dimensionLength[i] = p_parent_group->children_group_nodes[i]->alignmentLength;
		if(max_length < dimensionLength[i])
		{
			max_length = dimensionLength[i];
		}
	}
	for ( i = 0; i < dimensionCount ; i ++)
	{
		if((window[i]=(int*) malloc((max_length + 1)*sizeof(int)))==NULL)
		//add 1 to prevent overflow
		{
			printf("unable to allocate memory for multipleDimensionTrack.");
			exit(1);
		}
	}

	for ( i = 0; i < dimensionCount ; i ++)
	{
		for( j = 0 ; j < dimensionLength[i]; j ++)
		{
			window[i][j] = j;
		}
	}

	N = 1;
	directionCount = 1;
	for ( i = 0; i < dimensionCount; i ++)
	{
		N = (dimensionLength[i] + 1)* N;
		directionCount = 2*directionCount;
	}
	N--;
	
	if((multipleDimensionScore=(int*) malloc((N + 2)*sizeof(int)))==NULL)
	//add 1 to prevent overflow
	{
		printf("unable to allocate memory for multipleDimensionScore.");
		exit(1);
	}
	if((multipleDimensionTrack=(short int*) malloc((N + 2)*sizeof(short int)))==NULL)
	//add 1 to prevent overflow
	{
		printf("unable to allocate memory for multipleDimensionTrack.");
		exit(1);
	}
	for( i = 0; i < N + 1; i ++)
	{
		multipleDimensionScore[i] = 0;
		multipleDimensionTrack[i] = 0;
	}

	
	//E(i, 0) = gap_open + i x gap_extend

	//THIS is the position [0,0,..,0]
    multipleDimensionScore[0] = 0;
	//For the track array:multipleDimensionTrack, each position has a value between 2^sequenceCount and 1
	//this value indicates the direction it gets, 
	//it is computed from a direction array,which has length of sequenceCount,
	//the elements in this direction array use 
	//1 indicate there is a movement from previos cell along this dimension
	//0 indicate it is same (match) on this dimension
	
    //begin to calculate

	for (i = 1; i <= N; i ++)
    {
		//first compute the position of multiple dimension cell
		setMultipleDimensionPointInArrayFromPosition_k(i, dimensionPos,dimensionCount,dimensionLength);
		//then compute the value of this cell
		//for each dimension,test value of pos - 1, and pos
		//so it has loop indicator value between 2^sequenceCount and 1

		maxValue = -MAXINT;
		maxDirection = 0;
		for ( direction  = 1; direction < directionCount ; direction ++)
		{
			
			//get value for this direction
			//first set multipleDirection
			setMultipleDirection_k(direction,multipleDirection,dimensionCount);
			//get the value of the cell for this direction
			cellValue = getScoreOfCell_k(multipleDimensionScore,dimensionPos,multipleDirection,dimensionCount,dimensionLength,window);
			if(cellValue > maxValue)
			{
				maxValue = cellValue;
				maxDirection = direction;
			}
			if(debug_k > 10)
			{
				printf("for i = %d, direction: %d: cellValue = %d\n",i,direction,cellValue);
			}
		}
		//record the direction
		if(maxValue > -MAXINT)
		{
			multipleDimensionScore[i] = maxValue;
			multipleDimensionTrack[i] = maxDirection;
		}
		else
		{
			return -MAXINT;
		}
	}//for i


	//after alignment score matrix set, here need to track back

	//get the dimension pos at the last
	for ( i = 0; i < dimensionCount; i ++)
	{
		dimensionPos[i] = dimensionLength[i];
	}
	//or
	//setMultipleDimensionPointInArrayFromPosition(N, dimensionPos);
	i = N;
	i = getPositionOfMultipleDimensionPointInArray_k(dimensionPos,dimensionCount,dimensionLength);
	alignment_start_pos = WINDOW_ALIGNMENT_SIZE_K - 1;
	alignmentLength = 0;
	while( i > 0)
	{
		
		//get the direction
		direction = multipleDimensionTrack[i];
		setMultipleDirection_k(direction, multipleDirection,dimensionCount);
		for ( j = 0; j < dimensionCount; j ++)
		{
			if(multipleDirection[j] == 1)
			{
				//a step along this direction
				temp[j][alignment_start_pos] = window[j][dimensionPos[j] -1];
				dimensionPos[j] --;
			}
			else
			{
				//a gap along this direction
	
				temp[j][alignment_start_pos] = -1;
				//dimensionPos[j] --;
			}
		}
		alignment_start_pos --;
		alignmentLength ++;
		i = getPositionOfMultipleDimensionPointInArray_k(dimensionPos,dimensionCount,dimensionLength);
	}

	//set back to alignment

	for ( col = 0; col < alignmentLength; col ++)
	{
		
		row_start_index = 0;
		for ( child_group_index = 0; child_group_index < dimensionCount; child_group_index ++)
		{ 
			if(temp[child_group_index][alignment_start_pos +1  + col] == -1)
			{
				for( i = 0; i < p_parent_group->children_group_nodes[child_group_index]->alignment_sequences_count; i ++)
				{
					p_parent_group->alignment[row_start_index + i][col] = '*';
				}
			}
			else if(temp[child_group_index][alignment_start_pos +1  + col]  >= 0)
			{
				col_pos = temp[child_group_index][alignment_start_pos +1  + col];
				for( i = 0; i < p_parent_group->children_group_nodes[child_group_index]->alignment_sequences_count; i ++)
				{
					p_parent_group->alignment[row_start_index + i][col] = p_parent_group->children_group_nodes[child_group_index]->alignment[i][col_pos];
				}
			}
			else
			{
				exit(0);
			}
			row_start_index += p_parent_group->children_group_nodes[child_group_index]->alignment_sequences_count;	
		}
	}
	row_start_index = 0;
	for ( child_group_index = 0; child_group_index < dimensionCount; child_group_index ++)
	{
		for( i = 0; i < p_parent_group->children_group_nodes[child_group_index]->alignment_sequences_count; i ++)
		{
			p_parent_group->orginal_index[row_start_index + i] = p_parent_group->children_group_nodes[child_group_index]->orginal_index[i];
		}
		row_start_index += p_parent_group->children_group_nodes[child_group_index]->alignment_sequences_count;
	}
	p_parent_group->alignmentLength = alignmentLength;
	for ( i = 0; i < p_parent_group->alignment_sequences_count; i ++)
	{
		p_parent_group->alignment[i][p_parent_group->alignmentLength] = 0;
	}
	alignmentScore = multipleDimensionScore[N];

	free(multipleDimensionScore);
	free(multipleDimensionTrack);
	for ( i = 0; i < dimensionCount ; i ++)
	{
		free(window[i]);
	}
	return alignmentScore;
}

void setMultipleDimensionPointInArrayFromPosition_k(int pos, int dimensionPos[MAX_SEQ_NUM],int dimensionCount,int dimensionLength[MAX_SEQ_NUM])
//this will set dimension array from pos
{
	int i;
	for( i = 0; i < MAX_SEQ_NUM; i ++)
	{
		dimensionPos[i] = -MAXINT;
	}
	i = dimensionCount - 1;
	
	while(i > 0)
	{
		dimensionPos[i] = pos%(dimensionLength[i] +1);
		pos =  pos/(dimensionLength[i] + 1) ;
		i --;
	}
	dimensionPos[i] = pos;
}

int getScoreOfCell_k(int multipleDimensionScore[SPACE_SIZE],int dimensionPos[MAX_SEQ_NUM],int multipleDirection[MAX_SEQ_NUM],int dimensionCount,int dimensionLength[MAX_SEQ_NUM],int* window[MAX_SEQ_NUM])
//this is to get the value of the cell in the multiple dimensional space
//along the perticular direction (indicated by multipleDirection)
//the cell needed to compute is the current cell (indicated by dimensionPos) 
//offset along the dirction (indicated by multipleDirection)
{
	int pos;
	int i;
	int j;
	int gapCount;
	int func;
	char column1[MAX_SEQ_NUM];
	char column2[MAX_SEQ_NUM];
	int pos1;
	int pos2;

	int count1;
	int count2;
	//int gapPenalty ;

	i = 0;
	pos = 0;
	gapCount = 0;
	//gapPenalty = getWeight_K('a','-');
	while(i < dimensionCount)
	{
		if(dimensionPos[i] < multipleDirection[i])
		{
			return -MAXINT+1;
		}
		else
		{
			if( i > 0)
			{
				pos = pos*(dimensionLength[i]+1) + dimensionPos[i]  - multipleDirection[i];
			}
			else
			{
				pos = dimensionPos[i]  - multipleDirection[i];
			}
			i ++;
		}
	}
	if(debug_k > 101)
	{
		for( i = 0; i < dimensionCount ; i ++)
		{
			pos1 = window[i][dimensionPos[i]-1];
			if(pos1 < 0)
			{
				pos1 = -1;
			}
			count1 = set_group_column(column1,i,pos1);
			printf(" group: %d: ",i);
			for( j = 0; j < count1 ; j ++)
			{
				printf("%c",column1[j]);
			}
			printf(":\n");
		}
		
	}
	//now we have the position of the cell along the direction,it is at multipleDimensionScore[pos]
	//calculate the value from the cell and from the direction
	func = 0;
	//for each dimension, 0 means gap;
	//1 means a step!

	for( i = 0; i < dimensionCount ; i ++)
	{
		for ( j = i+1 ; j < dimensionCount; j ++)
		{
			if((multipleDirection[i] == 0) && (multipleDirection[j] == 0))
			{
				//do nothing 
				continue;
			}
			else if((multipleDirection[i] == 1) && (multipleDirection[j] == 1))
			{
				if((dimensionPos[i] < 1)||(dimensionPos[j] < 1))
				{
					//do nothing
					continue;
				}
				pos1 = window[i][dimensionPos[i]-1];
				pos2 = window[j][dimensionPos[j]-1];
				count1 = set_group_column(column1,i,pos1);
				count2 = set_group_column(column2,j,pos2);
				func = func + getSPScoreOfTwoColumns(column1,count1,column2,count2);
			}//if else
			else if((multipleDirection[i] == 1) || (multipleDirection[j] == 1))
			{
				if(multipleDirection[i] == 1)
				{
					//along this direction, there is a step
					if(dimensionPos[i] < 1)
					{
						continue;
					}
					//if this direction is another gap,if so, do nothing
					if(dimensionPos[i] >= 1)
					{
						pos1 = window[i][dimensionPos[i]-1];
						if(pos1 < 0)
						{
							continue;
						}
						count1 = set_group_column(column1,i,pos1);
						
						if(isGapColumn(column1,count1))
						{
							continue;
						}
						count2 = set_group_column(column2,j,-1);
					}
				}
				if(multipleDirection[j] == 1)
				{
					// along this direction, there is a step
					if(dimensionPos[j] < 1)
					{
						continue;
					}
					//if this direction is another gap,if so, do nothing
					if(dimensionPos[j] >= 1)
					{
						pos1 = window[j][dimensionPos[j]-1];
						if(pos1 < 0)
						{
							continue;
						}
						count1 = set_group_column(column1,j,pos1);
						if(isGapColumn(column1,count1))
						{
							continue;
						}
						count2 = set_group_column(column2,i,-1);
					}
				}
				//func = func + gapPenalty*count1;
				func = func + getSPScoreOfTwoColumns(column1,count1,column2,count2);
			}

		}//for j
	}//for j

	return (multipleDimensionScore[pos] + func);
}

int getPositionOfMultipleDimensionPointInArray_k(int dimensionPos[MAX_SEQ_NUM],int dimensionCount,int dimensionLength[MAX_SEQ_NUM])
{
	int i;
	int pos;
	i = 0;
	pos = 0;
	while(i < dimensionCount)
	{
		if(i > 0)
		{
			pos = pos*(dimensionLength[i]+1) + dimensionPos[i];
		}
		else
		{
			pos = dimensionPos[0];
		}
		i ++;
	}
	return pos;
}

void setMultipleDirection_k(int direction, int multipleDirection[MAX_SEQ_NUM], int dimensionCount)
{
	int i;
	i = dimensionCount - 1;
	
	while(i >= 0)
	{
		multipleDirection[i] = direction%2 ;
		direction =  direction/2 ;
		i --;
	}
}

//this will set the corresponding chars to column array
//the count will be returned
int set_group_column(char column[MAX_SEQ_NUM],int child_group_index,int col_pos)
{
	int i;
	if(col_pos >= 0)
	{
		for( i = 0; i < current_parent_group_node->children_group_nodes[child_group_index]->alignment_sequences_count; i ++)
		{
			column[i] = current_parent_group_node->children_group_nodes[child_group_index]->alignment[i][col_pos];
		}
	}
	else
	{
		for( i = 0; i < current_parent_group_node->children_group_nodes[child_group_index]->alignment_sequences_count; i ++)
		{
			column[i] = '*';
		}
	}
	column[i] = 0;
	return i;
}

int isGapColumn(char column[MAX_SEQ_NUM],int count)
{
	int i;
	for( i = 0; i < count; i ++)
	{
		if((column[i]) && (!isGap_k(column[i])))
		{
			return 0;
		}
	}
	return 1;
}
int isGap_k(char letter)
{
	if((letter=='.')||(letter=='-')||(letter=='*'))
		return 1;
	else
		return 0;
}

int getSPScoreOfTwoColumns(char column1[MAX_SEQ_NUM],int count1,char column2[MAX_SEQ_NUM],int count2)
{
	char column[MAX_SEQ_NUM];
	int i;
	int j;
	int score;
	int count;
	score = 0;
	
	for( i = 0; i < count1; i ++)
	{
		column[i] = column1[i];
	}
	for( j = 0; j < count2; j ++)
	{
		column[i+j] = column2[j];
	}
	count = count1 + count2;
	for( i = 0; i < count; i ++)
	{
		for( j = i + 1; j < count; j ++)
		{
			score += getWeight_K(column[i],column[j]);
		}
	}
	if(debug_k > 10)
	{
		printf("column: ");
		for( i = 0; i < count; i ++)
		{
			printf("%c",column[i]);
		}
		printf("\nscore = %d\n",score);
	}
	return score;
}

int getInducedSPScore(char* sequence1, char* sequence2)
//only for compute aligned sequence
{

	int k;

	int score;
	int column_score;
	int length;

	score = 0;
	
	length = (int)strlen(sequence1);
	
	if( length > (int)strlen(sequence2))
	{
		length = (int)strlen(sequence2);
	}
	for( k = 0; k < length; k++)
	{
		column_score = getWeight_K(sequence1[k],sequence2[k]);
		score = score + column_score;
	}
	return score;
}
int getUpperBoundScore(char* sequence1, char* sequence2)
{
	 
	int i;
	int score;
	char seq1[WINDOW_ALIGNMENT_SIZE_K];
	char seq2[WINDOW_ALIGNMENT_SIZE_K];
	int length1;
	int length2;

	score = 0;

	length1 = 0;
	length2 = 0;

	for(i = 0; i < (int)strlen(sequence1);  i++)
	{
		if(isGap_k(sequence1[i]))
		{
			continue;
		}
		seq1[length1] = sequence1[i];
		length1++;
	}
	
	for(i = 0; i < (int)strlen(sequence2);  i++)
	{
		if(isGap_k(sequence2[i]))
		{
			continue;
		}
		seq2[length2] = sequence2[i];
		length2++;
	}
	seq1[length1] = 0;
	seq2[length2] = 0;
 	score = alignTwoSequence_K(seq1,seq2);

	return score;
}

int getAlignmentSPScore_k(char** alignment,int sequencesCount,int length)
//only for compute aligned sequence
{
	int i;
	int j;
	int k;

	int score;

	score = 0;
	for( k = 0; k < length; k++)
	{
		for( i = 0; i < sequencesCount; i ++)
		{
			for( j = i + 1; j < sequencesCount; j ++)
			{
					score = score + getWeight_K(alignment[i][k],alignment[j][k]);
			}
		}
	}
	return score;
}

//this function return the average length of a group
int get_group_length_k(GROUP_NODE* group_node)
{
	int sum;
	int length;
	int i;
	int j;
	char* sequence;
	int count;
	count = group_node->childrenCount;
	if(count <= 0)
	{
		count = 1;
	}
	sum = 0;
	for( i = 0; i < count; i ++)
	{
		length = 0;
		sequence = group_node->alignment[i];
		for(j = 0; j < (int)strlen(sequence);  j++)
		{
			if(isGap_k(sequence[j]))
			{
				continue;
			}
			length ++;
		}
		sum += length;
	}
	return sum/(count);
}
//set group flag by length
int set_group_flag_by_length(int* group_flag)
{
	int i,j;
	int group_index_array[MAX_SEQ_NUM];
	int group_index;

	int index;
	int group_lengths[MAX_SEQ_NUM];
	int holder;
	int group_count;

	for(i = 0; i < group_number_k; i ++)
	{
		group_lengths[i] = get_group_length_k(group_nodes_k[i]);
	}

	for(i = 0; i < group_number_k; i ++)
	{
		group_index_array[i] = i;
	}

	for (i = (group_number_k - 1); i >= 0; i--)
	{
		for (j = 1; j <= i; j++)
		{
			if (group_lengths[j-1] > group_lengths[j])
			{

				holder = group_lengths[j - 1];
				group_lengths[j - 1] = group_lengths[j];
				group_lengths[j] = holder;

				index = group_index_array[j - 1];
				group_index_array[j - 1] = group_index_array[j];
				group_index_array[j] = index;
			}
		}
	}

	i = 0;
	group_index = 0;
	while(i < group_number_k)
	{
		j = group_index_array[i];
		group_flag[j] = group_index;
		group_count = group_index + 1;
		if((i % max_number_in_group_k) == (max_number_in_group_k - 1))
		{
			group_index ++;
		}
		i ++;
	}
	return group_count;
}

void bubbleSort_int(int numbers[], int array_size)
{
  int i, j, temp;

  for (i = (array_size - 1); i >= 0; i--)
  {
    for (j = 1; j <= i; j++)
    {
      if (numbers[j-1] > numbers[j])
      {
        temp = numbers[j-1];
        numbers[j-1] = numbers[j];
        numbers[j] = temp;
      }
    }
  }
}

void bubbleSort_double(double numbers[], int array_size)
{
  int i, j;
  double temp;

  for (i = (array_size - 1); i >= 0; i--)
  {
    for (j = 1; j <= i; j++)
    {
      if (numbers[j-1] > numbers[j])
      {
        temp = numbers[j-1];
        numbers[j-1] = numbers[j];
        numbers[j] = temp;
      }
    }
  }
}

//set group by minimize the gaps
int set_group_flag_by_min_gaps(int* group_flag)
{
	int group_count;

	int i;
	int j;
	int score_before,score_after;

	int edge_weights[MAX_SEQ_NUM][MAX_SEQ_NUM];
	int group_flag_last[MAX_SEQ_NUM];

	for(i = 0; i < MAX_SEQ_NUM; i ++)
	{
		group_flag[i] = -1;
	}
	//first initialize the edge weight matrix
	for(i = 0; i < MAX_SEQ_NUM; i ++)
	{
		for(j = 0; j < MAX_SEQ_NUM; j ++)
		{
			edge_weights[i][j] = -1;
		}
	}

	for(i = 0; i < group_number_k; i ++)
	{
		for(j = i + 1; j < group_number_k; j ++)
		{
			edge_weights[i][j] = get_upperbound_of_2_group_nodes(group_nodes_k[i],group_nodes_k[j]);
			edge_weights[i][j] -= get_induced_sp_of_2_group_nodes(group_nodes_k[i],group_nodes_k[j]);

			edge_weights[j][i] = edge_weights[i][j];
		}
	}

	group_count = set_flag_array_by_metis_from_matrix(group_flag,edge_weights);
	equal_size(group_flag);
	//now to iterate
	//score_before = get_sum_weight_edges_k(group_flag);
	score_before = -MAXINT;
	iteration_times_k = 0;
	while(1)
	{
		
		group_count = set_flag_array_by_metis_from_matrix(group_flag,edge_weights);
		equal_size(group_flag);
		score_after = get_sum_weight_edges_k(group_flag);

		if(score_after < score_before)
		{
			break;
		}
		iteration_times_k ++;
		if(iteration_times_k > 10)
		{
			break;
		}
		for(i = 0; i < MAX_SEQ_NUM; i ++)
		{
			group_flag_last[i] = group_flag[i];
		}
		
		score_before = score_after;

		for(i = 0; i < group_count; i ++)
		{
			for(j = i + 1; j < group_count; j ++)
			{
				edge_weights[i][j] = get_pair_score_of_2_group_index_from_group_flag(i, j, group_flag);
				edge_weights[j][i] = edge_weights[i][j];
			}
		}
	}

	for(i = 0; i < MAX_SEQ_NUM; i ++)
	{
		group_flag[i] = group_flag_last[i];
	}

	return group_count;
}
int get_upperbound_of_2_group_nodes(GROUP_NODE* group1,GROUP_NODE* group2)
{

	int upper_bound;

	int i;
	int j;

	int index1;
	int index2;

	upper_bound = 0;

	for( i = 0; i < group1->alignment_sequences_count; i ++)
	{
		index1 = group1->orginal_index[i];
		for( j = 0; j < group2->alignment_sequences_count; j ++)
		{
			index2 = group2->orginal_index[j];
			upper_bound += sequence_pair_upperbound_scores_k[index1][index2];
		}
	}

	return upper_bound;
}

int get_weight_gap_penalty_of_2_group_nodes(GROUP_NODE* group1,GROUP_NODE* group2)
{

	int weight_gap_penalty;

	int i;
	int j;

	int index1;
	int index2;

	weight_gap_penalty = 0;

	for( i = 0; i < group1->alignment_sequences_count; i ++)
	{
		index1 = group1->orginal_index[i];
		for( j = 0; j < group2->alignment_sequences_count; j ++)
		{
			index2 = group2->orginal_index[j];
			weight_gap_penalty += sequence_pair_weight_gap_penalty_k[index1][index2];
		}
	}

	return weight_gap_penalty;
}

int set_flag_array_by_metis_from_matrix(int* group_flag,int group_pair_scores[MAX_SEQ_NUM][MAX_SEQ_NUM])
{
	int** p_weight_matrix;
	int i;
	int j;
	int ret;

	if( group_number_k <= max_number_in_group_k)
	{
		for(i = 0; i < group_number_k+1; i ++)
		{
			group_flag[i] = 0;
		}
		return 1;
	}
	if((p_weight_matrix=(int**) malloc((group_number_k+1)*sizeof(int*)))==NULL)
	{
		printf("unable to allocate memory.");
		exit(1);
	}

	for(i = 0; i < group_number_k+1; i ++)
	{
		if((p_weight_matrix[i] =(int*) malloc((group_number_k+1)*sizeof(int)))==NULL)
		{
			printf("unable to allocate memory.");
			exit(1);
		}
	}
	for(i = 0; i < group_number_k; i ++)
	{
		for(j = 0; j < group_number_k; j ++)
		{
			p_weight_matrix[i][j] = group_pair_scores[i][j];
		}
	}
	strcpy(unique_string_k_group_metis,unique_string_k);
	ret = group_k_metis(p_weight_matrix,group_number_k,max_number_in_group_k,group_flag,dos_k_group_metis);
	
	for(i = 0; i < group_number_k+1; i ++)
	{
		free(p_weight_matrix[i]);
	}
	free(p_weight_matrix);
	return ret;
}
int get_max_length_of_group(GROUP_NODE* group_node)
{
	int max_length;

	int length;
	int i;
	int j;
	char* sequence;
	int count;

	count = group_node->childrenCount;
	if(count <= 0)
	{
		count = 1;
	}
	max_length = -MAXINT;
	for( i = 0; i < count; i ++)
	{
		length = 0;
		sequence = group_node->alignment[i];
		for(j = 0; j < (int)strlen(sequence);  j++)
		{
			if(isGap_k(sequence[j]))
			{
				continue;
			}
			length ++;
		}
		if(max_length  < length)
		{
			max_length = length;
		}
	}
	return max_length;
}

int get_min_length_of_group(GROUP_NODE* group_node)
{
	int min_length;

	int length;
	int i;
	int j;
	char* sequence;
	int count;

	count = group_node->childrenCount;
	if(count <= 0)
	{
		count = 1;
	}
	min_length = MAXINT;
	for( i = 0; i < count; i ++)
	{
		length = 0;
		sequence = group_node->alignment[i];
		for(j = 0; j < (int)strlen(sequence);  j++)
		{
			if(isGap_k(sequence[j]))
			{
				continue;
			}
			length ++;
		}
		if(min_length  > length)
		{
			min_length = length;
		}
	}
	return min_length;
}
int get_induced_sp_of_2_group_nodes(GROUP_NODE* group1,GROUP_NODE* group2)
{
	
	int induced_sp;

	int i;
	int j;

	int index1;
	int index2;

	
	induced_sp = 0;

	for( i = 0; i < group1->alignment_sequences_count; i ++)
	{
		index1 = group1->orginal_index[i];
		for( j = 0; j < group2->alignment_sequences_count; j ++)
		{
			index2 = group2->orginal_index[j];
			induced_sp += sequence_pair_induced_sp_scores_k[index1][index2];
		}
	}

	return induced_sp;
}

//side effect: the pair score is set to sequence_pair_weight_gap_penalty_k
int get_sum_weight_edges_k(int* group_flag)
{

	int i;
	int j;
	int sum;
	int pair_score;
	for(i = 0; i < MAX_SEQ_NUM; i ++)
	{
		for(j =  0; j < MAX_SEQ_NUM; j ++)
		{
			sequence_pair_weight_gap_penalty_k[i][j] = 0;
		}
	}
	sum = 0;
	for(i = 0; i < sequenceCount_k; i ++)
	{
		for(j =  i + 1; j < sequenceCount_k; j ++)
		{
			pair_score = get_gap_weight_of_2_sequences_from_group_flag(i,j,group_flag);
			sequence_pair_weight_gap_penalty_k[i][j] = pair_score;
			sequence_pair_weight_gap_penalty_k[j][i] = pair_score;
			sum += pair_score;
		}
	}


	return sum_upper_bound_k - sum_induced_sp_k + sum;
}

int get_gap_weight_of_2_sequences_from_group_flag(int sequence_index1,int sequence_index2,int* group_flag)
{
	double EG_ij;
	int mumGap_ij;
	int score;
	int gap_penalty ;
	int group_index1;
	int group_index2;

	group_index1 = get_group_index_by_sequnce_index(sequence_index1);
	group_index2 = get_group_index_by_sequnce_index(sequence_index2);
	EG_ij = get_EG_gap_score_for_2_groups(sequence_index1,sequence_index2,group_index1,group_index2,group_flag);
	mumGap_ij = get_mumGap_score_for_2_seuquences(sequence_index1,sequence_index2);
	gap_penalty =  get_gap_penalty_k();
	score = (int)((EG_ij - mumGap_ij) * (gap_penalty));
	return score;
}
int get_gap_penalty_k()
{
	return -4;
}
//this function return the expecation of gaps
double get_EG_gap_score_for_2_groups(int sequence_index1, int sequence_index2,int group_index1,int group_index2,int* group_flag)
{

	int l_i;
	int l_j;
	int g_if;
	int g_jh;
	int length_f;
	int length_h;

	double score;

	l_i = get_length_of_sequence_by_index(sequence_index1);
	l_j = get_length_of_sequence_by_index(sequence_index2);
	length_f = get_max_length_of_group_index_from_group_flag(group_index1,group_flag);
	length_h = get_max_length_of_group_index_from_group_flag(group_index2,group_flag);

	g_if = length_f - l_i;
	g_jh = length_h - l_j;

	score = ((double)(l_i * l_j + g_if * g_jh)) /
			((double)((l_i + g_if) * (l_j + g_jh)));
	score = 1 - score;
	//score = score * (MAX((l_i + g_if),(l_j + g_jh)));
	score = score * length_k;
	return score;
}

int getInducedGapNumber(char* sequence1, char* sequence2)
//only for compute aligned sequence
{
	int k;

	int count;

	int length;

	int gap;

	count = 0;
	
	length = (int)strlen(sequence1);
	
	if( length > (int)strlen(sequence2))
	{
		length = (int)strlen(sequence2);
	}
	for( k = 0; k < length; k++)
	{
		gap = is_single_gap_column_K(sequence1[k],sequence2[k]);
		count = gap + count;
	}
	return count;
}
int is_single_gap_column_K(char a,char b)
{
	if((isGap_k(a)) && (isGap_k(b)))
	{
		return 0;
	}
	else if (( !(isGap_k(a)) ) && ( !(isGap_k(b)) ))
	{
		return 0;
	}
	return 1;
}
//this function return the group index for a given sequence index 
int get_group_index_by_sequnce_index(int sequence_index)
{
	int i;
	for( i = 0; i < group_number_k; i ++)
	{
		if( is_sequence_in_group(sequence_index,group_nodes_k[i]))
		{
			return i;
		}
	}
	return -1;
}
int is_sequence_in_group(int sequence_index,GROUP_NODE* group_node)
{
	int i;

	for( i = 0; i < group_node->alignment_sequences_count; i ++)
	{
		if( sequence_index == group_node->orginal_index[i])
		{
			return 1;
		}
	}
	return 0;
}
int get_max_length_of_group_index_from_group_flag(int group_index,int* group_flag)
{
	int max_length;

	int length;
	int i;
	int upper_level_index;

	upper_level_index = group_flag[group_index];

	max_length = -MAXINT;
	for( i = 0; i < group_number_k; i ++)
	{
		if(!(upper_level_index == group_flag[i]))
		{
			continue;
		}
		length = get_max_length_of_group(group_nodes_k[i]);
	
		if(max_length  < length)
		{
			max_length = length;
		}
	}
	return max_length;
}

int get_length_of_sequence_by_index(int sequence_index)
{
	return input_sequence_actual_lengths_k[sequence_index];
}
int get_mumGap_score_for_2_seuquences(int sequence_index1,int sequence_index2)
{
	return sequence_pair_induced_gap_number_k[sequence_index1][sequence_index2];
}

int get_pair_score_of_2_group_index_from_group_flag(int group_index1, int group_index2, int* group_flag)
{
	GROUP_NODE* group_nodes_array1[MAX_SEQ_NUM];
	GROUP_NODE* group_nodes_array2[MAX_SEQ_NUM];
	int i,j;
	int count1;
	int count2;
	GROUP_NODE* group_node1;
	GROUP_NODE* group_node2;

	int upper_bound;
	int induced_sp;
	int weight_gap_penalty;


	count1 = 0;
	count2 = 0;

	for( i = 0; i < group_number_k; i ++)
	{
		if( group_flag[i] == group_index1)
		{
			group_nodes_array1[count1] = group_nodes_k[i];
			count1 ++;
		}
		if( group_flag[i] == group_index2)
		{
			group_nodes_array2[count2] = group_nodes_k[i];
			count2 ++;
		}
	}

	upper_bound = 0;
	induced_sp = 0;
	weight_gap_penalty = 0;
	for(i = 0; i < count1; i ++)
	{
		group_node1 = group_nodes_array1[i];
		for(j =  0; j < count2; j ++)
		{
			group_node2 = group_nodes_array2[j];
			upper_bound += get_upperbound_of_2_group_nodes(group_node1,group_node2);
			induced_sp += get_induced_sp_of_2_group_nodes(group_node1,group_node2);
			weight_gap_penalty += get_weight_gap_penalty_of_2_group_nodes(group_node1,group_node2);
		}
	}
	return upper_bound - induced_sp + weight_gap_penalty;
}

//this function is to make up the grouping equal size
//it is obsoleted
//this is just a backup
void equal_size_without_gap_exp(int* group_flag)
{
	
	int set_greater_k[MAX_SEQ_NUM];
	int set_less_k[MAX_SEQ_NUM];
	int set_equal_k[MAX_SEQ_NUM];
	int count_greater_k;
	int count_less_k;
	int count_equal_k;

	int group_count;

	int current_group_greater_k[MAX_SEQ_NUM];
	int current_group_less_k[MAX_SEQ_NUM];


	int i,j,k,m;

	int max_score;
	int max_less_group_index_in_flag;
	int max_less_group_index;

	int count_current_group_greater_k;
	int count_current_group_less_k;

	int score_out_from_greater;
	int score_out_from_less;

	int change;

	int max_greater_index_in_group;
	int old_group_index;

	int debug_in = 0;

	count_greater_k = 0;
	count_less_k = 0;
	count_equal_k = 0;
	group_count = -1;
	//how many groups this array has
	
	for(i = 0; i < group_number_k; i++)
	{
		if(group_count < group_flag[i])
		{
			group_count = group_flag[i];
		}
	}
	group_count ++;
	
	//then set the initial groups
	for(i = 0; i < group_count; i++)
	{
		if(count_in_array_k(i,group_flag) == max_number_in_group_k)
		{
			set_equal_k[count_equal_k] = i;
			count_equal_k ++;
		}
		else if(count_in_array_k(i,group_flag) > max_number_in_group_k)
		{
			set_greater_k[count_greater_k] = i;
			count_greater_k ++;
		}
		else
		{
			set_less_k[count_less_k] = i;
			count_less_k ++;
		}
	}
		
	//loop
	for(i = 0; i < count_greater_k; i ++)
	{
		//set current group greater
		count_current_group_greater_k = set_group_array_from_flag_array(current_group_greater_k, set_greater_k[i], group_flag);
			if(debug_in)
			{
				for( m = 0; m < group_number_k; m ++)
				{
					printf("%d ",group_flag[m]);
				}
				printf("\n");
				fflush(stdout);
			}
		while(count_current_group_greater_k > max_number_in_group_k)
		{
			//test all possibility of all j in greater group 
			
			for( j = 0; j < count_current_group_greater_k; j ++)
			{
				
				max_score = -MAXINT;
				//test this j move to all less group.
				for( k = 0; k < count_less_k; k ++)
				{
				
					if(set_less_k[k] < 0)
					{
						continue;
					}
				
					//this score is if the j in the k, the change all other group
					count_current_group_less_k = set_group_array_from_flag_array(current_group_less_k, set_less_k[k], group_flag);
					//get the score for setting j to this group k
					score_out_from_greater = 0;
					for( m = 0; m < count_current_group_less_k; m ++)
					{
						 score_out_from_greater += p_pairwise_score_function(current_group_less_k[m],current_group_greater_k[j]);
					}
					
					//get the score for setting j to this group k
					score_out_from_less = 0;
					for( m = 0; m < count_current_group_greater_k; m ++)
					{
						if(  m == j )
						{
							continue;
						}
						score_out_from_less += p_pairwise_score_function(current_group_greater_k[m],current_group_greater_k[j]);
					}//for m
					change = score_out_from_greater - score_out_from_less;

					if(change >  max_score)
					{
						max_score = change;
						max_less_group_index = k;
						max_less_group_index_in_flag = set_less_k[k];
						max_greater_index_in_group = current_group_greater_k[j];
					}
				}// for k
			}//for j
			//move j in greater to k in less;
		
			old_group_index = group_flag[max_greater_index_in_group];
			if(debug_in)
			{
				for( m = 0; m < group_number_k; m ++)
				{
					printf("%d ",group_flag[m]);
				}
				printf("\n");
				fflush(stdout);
			}
			group_flag[max_greater_index_in_group] = max_less_group_index_in_flag;
			if(debug_in)
			{
				for( m = 0; m < group_number_k; m ++)
				{
					printf("%d ",group_flag[m]);
				}
				printf("\n");
				fflush(stdout);
			}
	
			count_current_group_greater_k = count_in_array_k(old_group_index,group_flag);
		
			count_current_group_less_k = count_in_array_k(max_less_group_index_in_flag,group_flag);
		
			//move if it is equal size
			if(count_current_group_less_k == max_number_in_group_k)
			{
				set_less_k[max_less_group_index] = -1;
			}
		}//while(count_current_group_greater_k )
	}
}
//this function is to make up the grouping equal size
//currently used after clustering by metis
void equal_size(int* group_flag)
{
	
	int set_greater_k[MAX_SEQ_NUM];
	int set_less_k[MAX_SEQ_NUM];
	int set_equal_k[MAX_SEQ_NUM];
	int count_greater_k;
	int count_less_k;
	int count_equal_k;

	int group_count;

	int current_group_greater_k[MAX_SEQ_NUM];


	int i,j,k,m;

	int max_score;
	int max_less_group_index_in_flag;
	int max_less_group_index;

	int count_current_group_greater_k;
	int count_current_group_less_k;

	int max_greater_index_in_group;
	int old_group_index;

	int group_flag_to_test[MAX_SEQ_NUM];
	int debug_in = 0;

	int seq_index1;
	int current_sw;

	count_greater_k = 0;
	count_less_k = 0;
	count_equal_k = 0;
	group_count = -1;
	//how many groups this array has
	
	for(i = 0; i < group_number_k; i++)
	{
		if(group_count < group_flag[i])
		{
			group_count = group_flag[i];
		}
	}
	group_count ++;
	
	//then set the initial groups: equal size, greater, less
	for(i = 0; i < group_count; i++)
	{
		if(count_in_array_k(i,group_flag) == max_number_in_group_k)
		{
			set_equal_k[count_equal_k] = i;
			count_equal_k ++;
		}
		else if(count_in_array_k(i,group_flag) > max_number_in_group_k)
		{
			set_greater_k[count_greater_k] = i;
			count_greater_k ++;
		}
		else
		{
			set_less_k[count_less_k] = i;
			count_less_k ++;
		}
	}
		
	//loop
	for(i = 0; i < count_greater_k; i ++)
	{
		//set current group greater indexed by i
		count_current_group_greater_k = set_group_array_from_flag_array(current_group_greater_k, set_greater_k[i], group_flag);
			if(debug_in)
			{
				for( m = 0; m < group_number_k; m ++)
				{
					printf("%d ",group_flag[m]);
				}
				printf("\n");
				fflush(stdout);
			}
		while(count_current_group_greater_k > max_number_in_group_k)
		{
			//test all possibility of all j in current greater group 
			//find out the best way to move
			//depends on to maximize or minimize
			for( j = 0; j < count_current_group_greater_k; j ++)
			{
				max_score = -MAXINT;
				
				//test this j move to all less group.
				for( k = 0; k < count_less_k; k ++)
				{
					if(set_less_k[k] < 0)
					{
						continue;
					}
					
					//now to construct the group_flag_to_test: change the node group_index
					for( m = 0; m < group_number_k; m ++)
					{
						group_flag_to_test[m] = group_flag[m]; 
					}
	
					seq_index1 = current_group_greater_k[j];
				
					group_flag_to_test[seq_index1] = set_less_k[k];

					current_sw = get_sum_weight_edges_k(group_flag_to_test);

					if(current_sw >  max_score)
					{
						max_score = current_sw;
						max_less_group_index = k;
						max_less_group_index_in_flag = set_less_k[k];
						max_greater_index_in_group = current_group_greater_k[j];
					}
				}// for k
			}//for j
			//move j in greater to k in less;
		
			old_group_index = group_flag[max_greater_index_in_group];
			if(debug_in)
			{
				for( m = 0; m < group_number_k; m ++)
				{
					printf("%d ",group_flag[m]);
				}
				printf("\n");
				fflush(stdout);
			}
			group_flag[max_greater_index_in_group] = max_less_group_index_in_flag;
			if(debug_in)
			{
				for( m = 0; m < group_number_k; m ++)
				{
					printf("%d ",group_flag[m]);
				}
				printf("\n");
				fflush(stdout);
			}
	
			count_current_group_greater_k = count_in_array_k(old_group_index,group_flag);
		
			count_current_group_less_k = count_in_array_k(max_less_group_index_in_flag,group_flag);
		
			//move if it is equal size
			if(count_current_group_less_k == max_number_in_group_k)
			{
				set_less_k[max_less_group_index] = -1;
			}
		}//while(count_current_group_greater_k )
	}
}
//this function set the array to contain the index given from the given flag array
int set_group_array_from_flag_array(int* group_array_to_set,int group_index, int* group_flag)
{
	int i;
	int count;
	count = 0;
	for( i = 0; i < group_number_k; i ++)
	{
		if(group_flag[i] == group_index)
		{
			group_array_to_set[count] = i;
			count ++;
		}
	}
	return count;
}
//this function return the number of index in the group_flag array
int count_in_array_k(int group_index, int* group_flag)
{
	int i;
	int count;
	count = 0;
	for( i = 0; i < group_number_k; i ++)
	{
		if(group_flag[i] == group_index)
		{
			count ++;
		}
	}
	return count;
}

