//March 2016, Etemadi and Lu 
//EG mthod  
#include<iostream>
#include<fstream>
#include<string.h>
#include<iomanip>
#include<omp.h>
#include<cstdlib>
#include<time.h>
#include <cmath>
#include <iomanip>
using namespace std;             
char Dataset_name[20][30]={"Ego-facebook","Enron-email","Brightkite","Dblp-Coau"
                           ,"Amazon","Web-NotreDame","Citeseer","Dogster","Web-Google","Youtube","Dblp","As-skitter","FLicker","Orkut","Livejournal"
                           ,"Orkut2","Web-Arabic","MicrosoftAG","Twitter","Friendster"};
// 0 "ego-Facebook"  ,1   "Enron-email",2   "Brightkite" ,3   "DBLP-coauthor",4   "amazon"      
// 5  "web-NotreDame", 6  "citeseer"     , 7 "Petster"     ,8 "web-Google" ,9  "Youtube"    ,10  "dblp"       ,11  "as-skitter"   ,12  "FLicker"
// 13 "Orkut"        , 14 "Livejournal"  ,15 "Orkut2"     ,16  "Web-Arabic",17  "MicrosoftAG",18  "twitter"      ,19  "friendster"                          
   
double min_RSE[20]={0.34, 0.34 ,0.34 ,0.34 ,0.34 ,0.34 ,0.34 ,0.34 ,0.34 ,0.34 ,0.34 ,0.34 ,0.34 ,0.34 ,0.34 ,0.34 ,0.34 ,0.34 ,0.34 ,0.34 };
double step[20]={0.03, 0.03 ,0.03 ,0.03 ,0.03 ,0.03 ,0.03 ,0.03 ,0.03 ,0.03 ,0.03 ,0.03 ,0.03 ,0.03 ,0.03 ,0.03 ,0.03 ,0.03 ,0.03 ,0.03 };
double max_RSE[20]={0.04,0.04,0.04,0.04,0.04,0.04,0.04,0.04,0.04,0.04,0.04,0.04,0.04,0.04,0.04,0.04,0.04,0.04,0.04,0.04};
long repeat[20]={1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000};

class node
{
  public:
     long nid;
     node * link;
};

long count_closed_wedge(node **,long);
void run_dataset(long);
void load_graph(long,node **,long **,long,long);
void print_edges(node **,long);
long count_closed_wedge_based_G(node **,node **,long,long &);
void create_sample_graph_g(long **,node **,double,long &,long,long);
void delete_nodes(node **,long);
double find_p(double p1,double (*f)(double,double,double,double),double T,double K,double RSE );
double RSE_EG(double T,double K,double p,double RSE);
long count_closure_check(node **,long,node **);
int main()
{  srand (time(NULL));

   // #pragma omp  parallel for schedule(dynamic,1)   /*uncomment for parallel run*/
   for(long i=0;i<=7;i++)  
     run_dataset(i);     
   return 0; 
}

void run_dataset(long i)
{   unsigned long N=0,M=0,CW=0,T=0,K=0,W=0;double CC=0,degree_ratio=0; string dataname;long M_g=0,W_g=0;
    char path[300]="../";
    strcat(path,Dataset_name[i]);
    strcat(path,"/properties.txt");
    ifstream fp;
    fp.open(path);
    fp>>dataname; fp>>N; fp>>M; fp>>CW; fp>>CC;fp>>W;fp>>T;fp>>K;fp>>degree_ratio;
    fp.close();
    //cout<<"\ndata"<<dataname<<"N="<<N<<"M="<<M<<"Cw="<<CW<<"W="<<W<<"T="<<T<<"K="<<K<<"deg="<<degree_ratio;
    node **nodes_G=new node*[N];    
    for(long t=0;t<N;t++){
	   nodes_G[t]=new node();
     nodes_G[t]->nid=0;
	   nodes_G[t]->link=NULL;
    }
    long **edges_G=new long*[M+1];
    for(long u = 0; u <=M; u++)
         edges_G[u] = new long[2];
    load_graph(i,nodes_G,edges_G,M,N); 
   // print_edges(nodes_G,N);
    
    long CW_REg=0,CW_REG=0;   //"intermidiate_data_RE_g3/"
    char path_results_REG[100]="intermidiate_data_REG/";
    
    char file_name2[400]="";strcat(file_name2,path_results_REG);strcat(file_name2,Dataset_name[i]);strcat(file_name2,"-raw-data-REG.txt"); 
    ofstream fp_raw_REG;fp_raw_REG.open(file_name2);  
    char file_name3[400]="";strcat(file_name3,path_results_REG);strcat(file_name3,Dataset_name[i]);
    strcat(file_name3,"-raw-data-plot-REG.txt"); 
    ofstream fp_raw_plot_REG;fp_raw_plot_REG.open(file_name3);  
    
    
    fp_raw_REG<<"****% Etemadi and Lu   Date: " <<__TIMESTAMP__;fp_raw_plot_REG<<"****% Etemadi and Lu   Date: " <<__TIMESTAMP__;
   
    fp_raw_REG<<"\nGraph name= "<<Dataset_name[i]<< " C= "<<CC<<" N= "<<N<<" M= "<<M<<" Triangles= "<<T<<" Pairs of dependent tiangles(K)= "
              << K<<"  CW= "<<CW<<"  W= "<<W<<" degree ratio="<<degree_ratio<<"\n\n # iteration= "<< repeat[i]<<"\n\n";
    fp_raw_plot_REG<<"\nGraph name= "<<Dataset_name[i]<< " C= "<<CC<<" N= "<<N<<" M= "<<M<<" Triangles= "<<T<<" Pairs of dependent tiangles(K)= "
                   << K<<"  CW= "<<CW<<"  W= "<<W<<" degree ratio="<<degree_ratio<<"\n\n # iteration= "<< repeat[i]<<"\n\n";
    
    fp_raw_REG<<CC<<","<<N<<","<<M<<","<<T<<","<< K<<","<<CW<<","<<W<<","<<degree_ratio<<"\n"<< repeat[i]<<"\n";
    fp_raw_plot_REG<<CC<<","<<N<<","<<M<<","<<T<<","<< K<<","<<CW<<","<<W<<","<<degree_ratio<<"\n"<< repeat[i]<<"\n";
    fp_raw_plot_REG<<"#run                   # sampled edges        CW             W                  CW in g"
                   <<"          CW checked in G \n";  
    fp_raw_plot_REG<<"---------------------  ---------------------  -------------  -----------------  ---------------  --------------- \n";
    
    
    double Ecc=0;
    node **nodes_g=new node*[N];
    for(long t=0;t<N;t++){
	          nodes_g[t]=new node();
            nodes_g[t]->nid=0;
	          nodes_g[t]->link=NULL;
         }
    node **nodes_g1=new node*[N];
    for(long t=0;t<N;t++){
	          nodes_g1[t]=new node();
            nodes_g1[t]->nid=0;
	          nodes_g1[t]->link=NULL;
         }     
    double Rerroe_tri=0,est_tri=0,ki=1.0,prob=0.0/*sampling probability*/; double step1=step[i];
    double avr_edge,avr_CW,avr_W,avr_CW_g,avr_CW_G,avr_REtri,Delta_g,Delta_G; 
    long closure_check=0;
    for(double sz=min_RSE[i];sz>=max_RSE[i];sz=sz-step1){
               
       // select Delta|_G=sz;
        // find probability  
        Delta_G=(1.0/(sz*sz));
        prob = pow((2.0 * Delta_G) / (N *  degree_ratio * CC),(1.0/2.0));
        prob = (prob * N) / (2.0 * M); 
        prob=find_p(prob,RSE_EG,T,K,sz);
        
        
       fp_raw_plot_REG<<prob<<",0,0,0,0,0,0,0,0\n";      
       fp_raw_REG<<"\n\np="<<setprecision(15)<<prob<<"\n       #run                   # sampled edges        CW"
                 <<"             W                  CW in g          CW checked in G   closure check   \n";  
       fp_raw_REG<<"---------------------  ---------------------  -------------  -----------------  ---------------  ---------------   ---------------\n";
                 
       
       avr_edge=avr_CW=avr_W=avr_CW_g=avr_CW_G=avr_REtri=0;W_g=0;
       for(long r=0;r<repeat[i];r++){
         
         create_sample_graph_g(edges_G,nodes_g,prob,M_g,N,M);
         CW_REg=count_closed_wedge(nodes_g,N);
         CW_REG=count_closed_wedge_based_G(nodes_g,nodes_G,N,W_g);
         closure_check=count_closure_check(nodes_g,N,nodes_g1);
         Ecc=(1.0*CW_REG)/(1.0*W_g);
         est_tri=(1.0*CW_REG)/(3*prob*prob);
         Rerroe_tri=(est_tri-T)/T;
         avr_edge+=M_g;avr_CW+=CW_REG;avr_W+=W_g;avr_CW_G+=(CW_REG-CW_REg);avr_REtri+=abs(Rerroe_tri);
         fp_raw_plot_REG<<r<<","<<M_g<<","<<CW_REG<<","<<W_g<<","<<CW_REg<<","<<CW_REG-CW_REg<<","<<closure_check<<"\n";
         fp_raw_REG<<setw(21)<<r<<setw(21)<<M_g<<setw(13)<<CW_REG<<setw(17)<<W_g<<setw(21)<<CW_REg<<setw(21)<<CW_REG-CW_REg<<setw(21)<<closure_check<<"\n";      
         delete_nodes(nodes_g,N);
         delete_nodes(nodes_g1,N);
       }//for(long r=0;
       avr_edge/=(1.0*repeat[i]);avr_CW/=(1.0*repeat[i]);avr_W/=(1.0*repeat[i]);avr_CW_g/=(1.0*repeat[i]);avr_CW_G/=(1.0*repeat[i]);avr_REtri/=(1.0*repeat[i]);
       fp_raw_REG<<"---------------------  ---------------------  -------------  -----------------  ---------------  ---------------\n";           
       fp_raw_REG<<setw(21)<<repeat[i]<<setw(21)<<avr_edge<<setw(13)<<avr_CW<<setw(17)<<avr_W<<setw(21)<<avr_CW_g<<setw(21)<<avr_CW_G<<"\n";            
       
    }//for(double sz=min_cw 
    delete_nodes(nodes_G,N);
    delete [] nodes_G;
    delete [] nodes_g;
    fp_raw_REG.close();
    fp_raw_plot_REG.close();
}
//*************************

double find_p(double p1,double (*f)(double,double,double,double),double T,double K,double RSE ){
  double p0=0.8,p_new=0;
   do
  {  
     p_new=(p0+p1)/2.0;
     if(abs(f(T,K,p_new,RSE))<=0.000000001)
        return p_new;
     else if (f(T,K,p_new,RSE)*f(T,K,p1,RSE)<0){
        p0=p_new;
     }
     else{
     p1=p_new;
     }
  }while(1);   
}
double RSE_EG(double T,double K,double p,double RSE){
   double a=RSE-pow((1.0/(3.0*T*p*p))*((1-p*p)+((8.0*K)/(3.0*T))*(p-p*p)),(1.0/2.0));
   return a; 
}

//******load graph*********
void load_graph(long ii,node *nodes[],long **edges_G, long M,long N)
{   ifstream fp;long s,e,k=1;node *p=NULL; node *ne=NULL,*le=NULL;
    char path1[300]="../";
    strcat(path1,Dataset_name[ii]);
    strcat(path1,"/edges.txt");
    fp.open(path1);   
    for(long j=0;j<M;j++){
       fp>>s;fp>>e;     
       if(s<e){//aa e
         edges_G[j+1][0]=s; edges_G[j+1][1]=e;
         p=new node();p->nid=e;
         nodes[s]->nid++;
         p->link=nodes[s]->link;
         nodes[s]->link=p;     
         }
      else if(s>e) {   // add s
         edges_G[j+1][0]=e; edges_G[j+1][1]=s;
         p=new node();
         p->nid=s;
         nodes[e]->nid++;
         p->link=nodes[e]->link;
         nodes[e]->link=p;
       }  
     }      
    fp.close();
}
void print_edges(node *nodes[],long N)
{    node *p;
     for(long i=0;i<N;i++)
     {  p=nodes[i]->link;
        while(p!=NULL){
          cout<<"\nedge="<<i<<" "<<p->nid;
          p=p->link;
        }
     }
} 
long count_closed_wedge(node * nodes_g[],long N)
{   long s,e,com=0,CW=0;node *pn,*pn1,*fs;
    for(long i=0;i<N;i++){
     pn= nodes_g[i]->link;
     while(pn!=NULL){
       s=pn->nid;pn1= pn->link;
       while(pn1!=NULL){
         e=pn1->nid;
         fs=nodes_g[s]->link; 
         com=0;
         while(fs!=NULL && com==0){
           if(fs->nid==e){  
              com++; break;
            }
           else{
              fs=fs->link;
           }           
         }  
         if(com!=0)     
            CW=CW+1;  
        pn1=pn1->link;         
       }       
       pn=pn->link;
     }// end while(pn!=NULL)
    }// end for i
    return CW;
}
//create sample graph g with probability p
void create_sample_graph_g(long **edges_G,node *nodes_g[],double p,long &num_sampled_edges,long N,long M){   
    double ran_num,a1=0,a2=0,a3=0;long s,e; node *pn,*fnew; num_sampled_edges=0;  
    long ki=0,rand1; 
    while (ki<M){  
       rand1=rand()%RAND_MAX;
       ran_num=(double) rand1 / (double) (RAND_MAX);
       a1= log((1-ran_num)/(1-p));
       a2=log(1-p);
       a3=ceil(((a1/a2)+1));
       ki=ki+a3;
       if(ki>0 && ki<=M){
         s=edges_G[ki][0];
         e=edges_G[ki][1];
         num_sampled_edges++;
         //if(s<=e){//aa e
         fnew=new node();fnew->nid=e;nodes_g[s]->nid++;fnew->link=NULL;
         fnew->link=nodes_g[s]->link;
         nodes_g[s]->link=fnew;     
        // }
        // else {   // add s
        fnew=new node();fnew->nid=s;nodes_g[e]->nid++;fnew->link=NULL;
        fnew->link=nodes_g[e]->link;
        nodes_g[e]->link=fnew;
       
       } // end if(ki<M)   
    }// end while  
}

// check the cost of closure checks
long count_closure_check(node * nodes_g[],long N,node * nodes_g1[])
{  
   node *p,*pn,*pn1,*fs,*fe,*starts,*starte;
   long s,e,t,flag,ck=0;
    for(long i=0;i<N;i++){
     pn= nodes_g[i]->link;
     while(pn!=NULL){
       pn1= pn->link;
       s=pn->nid;
       while(pn1!=NULL){
         e=pn1->nid;
         if(s<e){t=e;e=s;s=t;}
         fs=nodes_g1[s]->link;
         flag=0;
         while(fs!=NULL){
         if(fs->nid==e){flag=1;break;}
         fs=fs->link;
         }
         if(flag==0){  
           ck++;
           p=new node();p->nid=e;
           nodes_g1[s]->nid++;
           p->link=nodes_g1[s]->link;
           nodes_g1[s]->link=p;
         }
         
         pn1=pn1->link;
         }
       pn=pn->link;
     }
     }     
   return ck;
     
}

// count the number of closed wedges based on original graph G
long count_closed_wedge_based_G(node * nodes_g[],node * nodes_G[],long N,long &W_g)
{
    long s,e,com=0,CW=0,srn;node *pn,*pn1,*fs,*fe;W_g=0;
    for(long i=0;i<N;i++){
     pn= nodes_g[i]->link;
     while(pn!=NULL){
       s=pn->nid;pn1= pn->link;
       while(pn1!=NULL){
         e=pn1->nid;
         W_g++;
         if(s<e){fs=nodes_G[s]->link;srn=e;}
         else{fs=nodes_G[e]->link;srn=s;} 
         com=0;
         while(fs!=NULL && com==0){
           if(fs->nid==srn){  
              com++; break;
            }
           else{
              fs=fs->link;
           }           
         }  
         if(com!=0)     
            CW=CW+1;  
        pn1=pn1->link;         
       }       
       pn=pn->link;
     }// end while(pn!=NULL)
    }// end for i
    return CW;

} 
void delete_nodes(node *nodes[],long N)
{  node *pn,*pn1;
   for(long i=0;i<N;i++){
      pn= nodes[i]->link;
      nodes[i]->link=NULL;
      nodes[i]->nid=0;
      while(pn!=NULL){
         pn1=pn;         
         pn=pn->link;
         delete pn1;
      }      
   }
}

