#!/usr/bin/env python

#------------------------------------------------------------------------------------
# Import modules
#------------------------------------------------------------------------------------

import string, re, getopt
import os, sys, commands

def Usage():
	os.system("clear")
		
		
	print "\n\nmb_burnin.py\n\nA python script to run mrBayes, check for convergence and calculate a sensible burnin."
	print 'Copyright Simon R Harris, Newcastle University, Newcastle Upon Tyne, UK. 2008'
	
	print '\nmb_burnin.py Usage:\n'
	print '\tmb_burnin.py [options]\n'
	
	print "\tMrBayes options:"
	print "\t\t-i\tInput file in any format readable by readseq"
	print "\t\t-l\tLog file name"
	print "\t\t-o\tOutput file name"
	print "\t\t-d\tData type\t[protein, DNA, RNA, nucleotide, standard, continuous]\n\t\t\t\t\t(default=protein)"
	print "\t\t-m\tEvolutionary model\t[if datatype=protein protein: WAG, Poisson, Dayhoff, Mtrev, Mtmam, Rtrev, Cprev, Vt, Blosum62]"
	print "\t\t\t\t\t\t(default=WAG)"
	print "\t\t\t\t\t\t[if datatype=DNA/RNA/nucleotide: 4by4, doublet, codon]"
	print "\t\t\t\t\t\t(default=4by4)"
	print "\t\t-t\tNumber of states\t[if datatype=DNA/RNA/nucleotide: 6, 2, 1]\n\t\t\t\t\t\t(default=6)"
	print "\t\t-v\tUse covarion model"
	print "\t\t-g\tNumber of generations in initial analysis (default=1000000)"
	print "\t\t-e\tEstimate gamma"
	print "\t\t-n\tNumber of gamma categories (default=4)"
	print "\t\t-p\tEstimate proportion of invariant sites"
	print "\t\t-f\tPrint frequency (default=5000)"
	print "\t\t-s\tSampling frequency (default=5000)"
	print "\t\t-r\tNumber of runs (default=2)"
	print "\t\t-c\tNumber of chains per run (default=4)"
	print "\tOther options:"
	print "\t\t-a\tAverage standard deviation of split frequencies (ASDSF) convergence cutoff (default=0.1)"
	print "\t\t-z\tMaximum number of generations to run to try to reach convergence (default=5000000)"
	print "\t\t-x\tMinimum number of samples/trees for consensus calculation (default=1000)"
	print "\t\t-b\tRound burn-in up to the nearest n generations (default=50000)"
	print "\t\t-h\tShow this help screen"
	print "\t\t-I\tEnter interactive mode"
	


#------------------------------------------------------------------------------------
# Get command line arguments
#------------------------------------------------------------------------------------





class analysisClass:
	inputfile=""
	logfile=""
	outputfile=""
	datatype="protein"
	mbinputfile=""
	printfreq=5000
	samplefreq=500
	gens=1000000
	gamma='no'
	ngamma=4
	pinvar='no'
	nruns=2
	nchains=4
	maxit=5000000
	model="WAG"
	covarion="no"
	nst="6"
	run=1
	temp=0.2
	checkfreq=100
	remaining=1000
	completeruns=0
	burnin=-1
	burnround=50000
	interactive='n'
	path="./"
	textstring=""
	asdsf=0.1
	
	
	def getcommands(self, arg):

	
		try:
			opts, args = getopt.getopt(argv, "hi:l:o:d:m:t:v:g:en:pf:s:r:c:z:b:x:a:I", ["help", "in=", "log=", "out=", "data=", "model=", "nst=", "covarion=", "gens=", "gamma=", "gammacats=", "pinvar=", "printfreq=", "samplefreq=", "runs=", "chains=", "maxgens=", "minsamples=", "roundburn=", "asdsf=", "interactive="])
		except getopt.GetoptError:
			Usage()
			sys.exit(2)

	
		for opt, arg in opts:
			if opt in ("-h", "--help"):
				Usage()
				sys.exit()
			elif opt in ("-i", "--in"):
				self.inputfile=arg
			elif opt in ("-o", "--out"):
				self.outputfile=arg
			elif opt in ("-l", "--log"):
				self.logfile=arg
			elif opt in ("-d", "--data"):
				self.datatype=arg.lower()
				if self.datatype in ["dna", "rna", "nucleotide"] and self.model not in ["4by4", "doublet", "codon"]:
					self.model="4by4"
				if self.datatype in ["dna", "rna"]:
					self.datatype=self.datatype.upper()
			elif opt in ("-m", "--model"):
				self.model=arg.lower()
				if self.model=="wag":
					self.model=self.model.upper()
				elif self.model in ["poisson", "dayhoff", "mtrev", "mtmam", "rtrev", "cprev", "vt", "blosum62"]:
					self.model=self.model[0].upper()+self.model[1:]
			elif opt in ("-t", "--nst"):
				self.nst=arg
			elif opt in ("-v", "--covarion"):
				self.covarion="yes"
			elif opt in ("-g", "--gens"):
				self.gens=int(arg)
			elif opt in ("-e", "--gamma"):
				self.gamma="yes"
			elif opt in ("-n", "--gammacats"):
				self.ngamma=int(arg)
			elif opt in ("-p", "--pinvar"):
				self.pinvar="yes"
			elif opt in ("-f", "--printfreq"):
				self.printfreq=int(arg)
			elif opt in ("-s", "--samplefreq"):
				self.samplefreq=int(arg)
			elif opt in ("-r", "--runs"):
				self.nruns=int(arg)
			elif opt in ("-c", "--chains"):
				self.nchains=int(arg)
			elif opt in ("-z", "--maxgens"):
				self.maxit=int(arg)
			elif opt in ("-x", "--minsamples"):
				self.remaining=int(arg)
			elif opt in ("-b", "--roundburn"):
				self.burnround=int(arg)
			elif opt in ("-a", "--asdsf"):
				self.asdsf=float(arg)
			elif opt in ("-I", "--interactive"):
				self.interactive='y'
	
		if self.inputfile=='':  
			self.interactive='y'
			self.changeoption("i")
		if self.gens>self.maxit:
			self.maxit=self.gens
		if self.gens/100<self.samplefreq:
			self.samplefreq=self.gens/100
		if self.gens<self.printfreq:
			self.printfreq=self.gens
		if self.remaining>(self.maxit/self.samplefreq)/2:
			self.remaining=(self.maxit/self.samplefreq)/2
		
		if self.gens<self.burnround:
			self.burnround=self.gens/5
		if self.nruns<2 or self.nruns>100:
			print "\nError: Numer of mcmc runs (r) must be between 2 and 100\n"
			sys.exit()
		elif self.burnround<0:
			print "\nError: Cannot round burn-in (b) by a negative number of generations\n"
			sys.exit()
		elif self.printfreq<1:
			print "\nError: Print frequency (f) must be greater than zero\n"
			sys.exit()
		elif self.samplefreq<1:
			print "\nError: Sampling frequency (s) must be greater than zero\n"
			sys.exit()
		if self.remaining<1:
			print "\nError: Minimum number of samples/trees after burn-in (x) must be greater than zero\n"
			sys.exit()
		elif self.asdsf<0.001 or self.asdsf>1:
			print "\nError: ASDSF cutoff (a) must be between 0.001 and 1\n"
			sys.exit()
		elif self.nchains<1 or self.nchains>100:
			print "\nError: Numer of chains (c) must be between 1 and 100\n"
			sys.exit()
		elif self.gens<1000 or self.gens>1000000000:
			print "\nError: Numer of generations (g) must be between 2 and 1000000000\n"
			sys.exit()
		elif self.maxit>1000000000:
			print "\nError: Maximum numer of generations (z) must be under 1000000000\n"
			sys.exit()
		elif self.datatype not in ["standard","DNA","RNA","nucleotide","protein","continuous"]:
			print "\nError: Data type (d) must be one of: standard, DNA, RNA, nucleotide, protein, continuous\n"
			sys.exit()
		elif self.model not in ["WAG", "Poisson", "Dayhoff", "Mtrev", "Mtmam", "Rtrev", "Cprev", "Vt", "Blosum62"] and self.datatype=="protein":
			print "\nError: Model (m) must be one of: WAG, Poisson, Dayhoff, Mtrev, Mtmam, Rtrev, Cprev, Vt or Blosum62 if datatype is protein\n"
			sys.exit()
		elif self.model not in ["4by4", "doublet", "codon"] and self.datatype in ["DNA", "RNA", "nucleotide"]:
			print "\nError: Model (m) must be one of: 4by4, doublet or codon if datatype is DNA, RNA or nucleotide\n"
			sys.exit()
		elif self.nst not in ["6", "2", "1"] and self.datatype in ["DNA", "RNA", "nucleotide"]:
			print "\nError: Number of states (t) must be one of: 6, 2 or 1\n"
			sys.exit()
		elif (self.ngamma<1 or self.ngamma>20) and self.gamma=="yes":
			print "\nError: Number of gamma categories (n) must be between 1 and 20\n"
			sys.exit()

	
	
	def creatembblock(self):
		data=open(self.mbinputfile,"rU").read()
		data=data.replace("\nBEGIN mrbayes;","\n[BEGIN mrbayes;").replace("\tquit;\nendblock;\n","\tquit;\nendblock;]\n").replace("\tquit;\n\tendblock;\n","\tquit;\nendblock;]\n")
		data=data.replace("end;\n[BEGIN mrbayes;","end;]\n[BEGIN mrbayes;").replace("\nbegin trees;","\n[begin trees;")
		output=open(self.mbinputfile,"w")
		print >> output, data
		
		
		
		
		print >> output, "[mb_burnin run "+str(self.run)+"."+self.textstring+"]"
		
		if self.run>1:
			
			checklines=open(self.outputfile+".ckp", "rU").read().split("begin mrbayes;")
			
			print >>output, "[importing trees from last mb_burnin run.]"
			for line in checklines[0].split('\n')[4:-2]:
				print >>output, line
		
		print >> output, "BEGIN mrbayes;"
		print >> output, "\tset autoclose=yes;"
		print >> output, "\tlog start filename="+self.logfile,
		if self.run==1:
			print >> output, "replace;"
		else:
			print >> output, "append;"
			
			
			
		if self.datatype=="protein":
			print >> output, "\tprset aamodelpr=fixed("+self.model+") statefreqpr=fixed(empirical);"
		elif self.datatype in ["DNA","RNA","nucleotide"]:
			print >> output, "\tlset nucmodel="+self.model+" nst="+str(self.nst)+";"
		if self.gamma=="no" and self.pinvar=="no":
			print >> output, "\tlset rates=equal;"
		elif self.gamma=="yes" and self.pinvar=="no":
			print >> output, "\tlset rates=gamma;"
		elif self.gamma=="no" and self.pinvar=="yes":
			print >> output, "\tlset rates=propinv;"
		else:
			print >> output, "\tlset rates=invgamma Ngammacat=4;"
		
		if self.run>1:
			print >> output, "\tmcmcp Nruns="+str(self.nruns)+";"
			for line in checklines[1].split('\n')[:-3]:
				print >>output, line
		
		if self.samplefreq>self.checkfreq:
			self.checkfreq=self.samplefreq
		print >> output, "\tmcmc ngen="+str(self.gens)+" printfreq="+str(self.printfreq)+" samplefreq="+str(self.samplefreq)+" Nruns="+str(self.nruns)+" nchains="+str(self.nchains)+" Checkpoint=yes Checkfreq="+str(self.checkfreq)+" Temp="+str(self.temp)
		if self.run==1:
			print >> output, "\tstarttree=random filename="+self.outputfile+";"
		else:
			print >> output, "\tfilename="+self.outputfile+";"
		print >> output, "\tlog stop;\n\tquit;\nendblock;\n"
		output.close()
	
	
	def creatembsumtblock(self):
		data=open(self.mbinputfile,"rU").read()
		data=data.replace("\nBEGIN mrbayes;","\n[BEGIN mrbayes;").replace("\tquit;\nendblock;\n","\tquit;\nendblock;]\n").replace("\tquit;\n\tendblock;\n","\tquit;\nendblock;]\n")
		data=data.replace("end;\n[BEGIN mrbayes;","end;]\n[BEGIN mrbayes;").replace("\nbegin trees;","\n[begin trees;")
		output=open(self.mbinputfile,"w")
		print >> output, data
		
		print >> output, "[mb_burnin calculating consensus tree."+self.textstring+"]"
		
		print >> output, "BEGIN mrbayes;"
		print >> output, "\tset autoclose=yes;"
		print >> output, "\tlog start filename="+self.logfile+" append;"
		print >> output, "sumt filename= "+self.outputfile+".all Contype=Allcompat relburnin=no Nruns="+str(self.nruns)+" burnin="+str(self.burnin)+";"
		
		print >> output, "\tlog stop;\n\tquit;\nendblock;\n"
		output.close()
	
	
	
	def getinput(self):
		indata=open(self.inputfile,"rU").read()
	
		if indata[:6].lower()!="#nexus":
			os.system("java -cp "+self.path+"readseq.jar run -a -f 17 "+self.inputfile)
			self.mbinputfile=self.inputfile+".nexus"
		else:
			self.mbinputfile=self.inputfile
		
		indata=open(self.mbinputfile,"rU").read().lower()
		start = string.find(indata, 'datatype')
		
		if start==-1:
			print "Cannot find datatype in NEXUS file"
		else:
			if indata[start:].split("=")[1].split()[0].lower() in ["standard","DNA","RNA","nucleotide","protein","continuous"]:
				self.datatype=indata[start:].split("=")[1].split()[0].lower()
			else:
				print "Cannot find datatype in NEXUS file"
				
				
				
	def menu(self):
		os.system("clear")
		
		print "\n\nmb_burnin.py\n\nA python script to run mrBayes, check for convergence and calculate a sensible burnin.\nSimon Harris 2008\n"
		
		print "MrBayes options:"
		print "\ti: Input file\t\t\t\t\t\t\t"+str(self.inputfile)
		print "\tl: Log file\t\t\t\t\t\t\t"+str(self.logfile)
		print "\to: Output file\t\t\t\t\t\t\t"+str(self.outputfile)
		print "\td: Data type\t\t\t\t\t\t\t"+str(self.datatype)
		print "\tm: Model\t\t\t\t\t\t\t"+str(self.model)
		if self.datatype in ["DNA","RNA","nucleotide"]:
			print "\tt: Number of states\t\t\t\t\t\t"+str(self.nst)
		print "\tv: Use covarion model?\t\t\t\t\t\t"+str(self.covarion)
		print "\tg: Number of generations in initial analysis\t\t\t"+str(self.gens)
		print "\te: Estimate gamma?\t\t\t\t\t\t"+str(self.gamma)
		if self.gamma=='yes':
			print "\tn: Number of gamma categories\t\t\t\t\t"+str(self.ngamma)
		print "\tp: Estimate proportion of invariant sites?\t\t\t"+str(self.pinvar)
		print "\tf: Print frequency?\t\t\t\t\t\t"+str(self.printfreq)
		print "\ts: Sampling frequency?\t\t\t\t\t\t"+str(self.samplefreq)
		print "\tr: Number of runs?\t\t\t\t\t\t"+str(self.nruns)
		print "\tc: Number of chains?\t\t\t\t\t\t"+str(self.nchains)
		print "Other options:"
		print "\ta: ASDSF convergence cutoff\t\t\t\t\t"+str(self.asdsf)
		print "\tz: Maximum number of generations to try to reach convergence\t"+str(self.maxit)
		print "\tx: Minimum number of samples/trees remaining after burn-in\t"+str(self.remaining)
		print "\tb: Round burn-in to nearest\t\t\t\t\t"+str(self.burnround)
		print "\tq: Quit"
	
	
	def changeoption(self, option):
		
		if option=='i':
			foo=''
			while not os.path.isfile(foo) and foo.lower()!='q':
				foo=raw_input('\nEnter the name of an input file: ')
			if foo.lower()!='q':
				self.inputfile=foo
				#self.getinput()
				self.logfile=self.inputfile.split('/')[-1].split('.')[0]+".logmb"
				self.outputfile=self.inputfile.split('/')[-1].split('.')[0]+".mrbayes"
	
		elif option=='l':
			foo=''
			foo=raw_input('\nEnter the name of a log file: ')
			self.logfile=foo
		elif option=='o':
			foo=''
			foo=raw_input('\nEnter the name of an output file: ')
			self.outputfile=foo
		elif option=="d":
			if self.datatype=="protein":
				self.model="4by4"
				self.datatype="DNA"
			elif self.datatype=="DNA":
				self.datatype="RNA"
			elif self.datatype=="RNA":
				self.datatype="nucleotide"
			elif self.datatype=="nucleotide":
				self.model="standard discrete morphological"
				self.datatype="standard"
			elif self.datatype=="standard":
				self.datatype="continuous"
			elif self.datatype=="continuous":
				self.model="WAG"
				self.datatype="protein"
		elif option=="m":
			if self.datatype=="protein":
				if self.model=="WAG":
					self.model="Poisson"
				elif self.model=="Poisson":
					self.model="Dayhoff"
				elif self.model=="Dayhoff":
					self.model="Mtrev"
				elif self.model=="Mtrev":
					self.model="Mtmam"
				elif self.model=="Mtmam":
					self.model="Rtrev"
				elif self.model=="Rtrev":
					self.model="Cprev"
				elif self.model=="Cprev":
					self.model="Vt"
				elif self.model=="Vt":
					self.model="Blosum62"
				elif self.model=="Blosum62":
					self.model="WAG"			
			elif self.datatype in ["DNA","RNA","nucleotide"]:
				if self.model=="4by4":
					self.model="doublet"
				elif self.model=="doublet":
					self.model="codon"
				elif self.model=="codon":
					self.model="4by4"
		elif option=="t":
			if self.nst=="6":
				self.nst="2"
			elif self.nst=="2":
				self.nst="1"
			elif self.nst=="1":
				self.nst="6"
		elif option=="g":
			foo=-1
			while foo<1000 or foo>1000000000:
				foo=int(raw_input('\nEnter the number of generations (between 1000 and 1000000000): '))
			self.gens=foo
			if self.gens/100<self.samplefreq:
				self.samplefreq=self.gens/100
			if self.gens<self.printfreq:
				self.printfreq=self.gens
			if self.remaining>(self.maxit/self.samplefreq)/2:
				self.remaining=(self.maxit/self.samplefreq)/2
			if self.gens>self.maxit:
				self.maxit=self.gens
			if self.gens<self.burnround:
				self.burnround=self.gens/5
		elif option=="e":
			if self.gamma=="yes":
				self.gamma="no"
			elif self.gamma=="no":
				self.gamma="yes"
		elif option=="n" and self.gamma=="yes":
			foo=-1
			while foo<1 or foo>20:
				foo=int(raw_input('\nEnter the number of gamma categories (between 1 and 20): '))
			self.ngamma=foo
		elif option=="p":
			if self.pinvar=="yes":
				self.pinvar="no"
			elif self.pinvar=="no":
				self.pinvar="yes"
		elif option=="v":
			if self.covarion=="yes":
				self.covarion="no"
			elif self.covarion=="no":
				self.covarion="yes"
		elif option=="f":
			foo=-1
			while foo<1 or foo>self.gens:
				foo=int(raw_input('\nEnter frequency to print to the screen (between 1 and '+str(self.gens)+'): '))
			self.printfreq=foo
		elif option=="s":
			foo=-1
			while foo<1 or foo>self.gens/100:
				foo=int(raw_input('\nEnter frequency to sample chains (between 1 and '+str(self.gens/100)+'): '))
			self.samplefreq=foo
			if self.remaining>(self.gens/self.samplefreq)/2:
				self.remaining=(self.gens/self.samplefreq)/2
		elif option=="r":
			foo=-1
			while foo<2 or foo>100:
				foo=int(raw_input('\nEnter number of mcmc runs (between 2 and 100): '))
			self.nruns=foo
		elif option=="c":
			foo=-1
			while foo<1 or foo>100:
				foo=int(raw_input('\nEnter number of chains per run (between 1 and 100): '))
			self.nchains=foo
		elif option=="z":
			foo=-1
			while foo<self.gens or foo>1000000000:
				foo=int(raw_input('\nEnter maximum number of generations to run to attempt to reach convergence (between '+str(self.gens)+' and 1000000000): '))
			self.maxit=foo
			if self.remaining>(self.maxit/self.samplefreq)/2:
				self.remaining=(self.maxit/self.samplefreq)/2
		elif option=="x":
			foo=-1
			while foo<1 or foo>(self.maxit/self.samplefreq)/2:
				foo=int(raw_input('\nEnter minimum number of samples that must remian after the burn-in is removed (between 1 and '+str((self.maxit/self.samplefreq)/2)+'): '))
			self.remaining=foo
		elif option=="b":
			foo=-1
			while foo<1000 or foo>self.gens:
				foo=int(raw_input('\nRound burn-in to the nearest (between 1000 and '+str(self.gens)+' generations): '))
			self.burnround=foo
		elif option=="a":
			foo=-1
			while foo<0.001 or foo>1:
				foo=float(raw_input('\nAverage standard deviation of split frequencies (ASDSF) convergence cutoff (between 0.001 and 1): '))
			self.burnround=foo
				
				
				
	def calctemp(self):
		self.temp=0.2
		loglines=open(self.logfile,"rU").readlines()
		swapno=0
		heatingcount=0
		barcount=0
		for line in loglines:
			if len (line.strip().split())>1 and line.strip().split()[1]=="heating":
				heatingcount=heatingcount+1
				if heatingcount==self.run-1:
					self.temp=float(line.strip().split()[4])

			elif len(line.split("|"))>1:
				barcount=barcount+1
				if barcount>(self.run-2)*(self.nruns*4):
					if swapno<3 and float(line.split("|")[1].split()[swapno])<0.1:
						self.temp=self.temp-0.05
						self.textstring=self.textstring+". Temperature of heated chains decreased by 0.5"
						return
					if swapno<3 and float(line.split("|")[1].split()[swapno])>0.7:
						self.temp=self.temp+0.05
						self.textstring=self.textstring+". Temperature of heated chains increased by 0.5"
						return
					swapno=swapno+1
					if swapno==4:
						swapno=0





	

#------------------------------------------------------------------------------------
# Run main() with command line arguments
#------------------------------------------------------------------------------------
if __name__ == "__main__":
	
	analysis=analysisClass()
	
	argv=sys.argv[1:]
	analysis.getcommands(argv)
	
	if not os.path.isfile(analysis.inputfile):
		print "\nInvalid filename"
		analysis.changeoption("i")
	
	pathsplit=sys.argv[0].split("/")[:-1]
	if len(pathsplit)>1:
		analysis.path='/'.join(pathsplit)+"/"

		
	analysis.getinput()

	analysis.logfile=analysis.inputfile.split('/')[-1].split('.')[0]+".logmb"
	analysis.outputfile=analysis.inputfile.split('/')[-1].split('.')[0]+".mrbayes"
	if analysis.datatype=="protein":
		analysis.model="WAG"
	elif analysis.datatype in ["DNA","RNA","nucleotide"]:
		analysis.model="GTR"
	
	
	if analysis.interactive=='n':
		option='y'
	else:
		option='h'
	
	while option!='y':
		analysis.menu()
		foo=raw_input('\nSelect an option to change or type y to run analysis: ')
		option=foo[0].lower()
		if option=='q':
			sys.exit()
		if option !='y':
			analysis.changeoption(option)
	
	
	
	analysis.burnin=-1
	#logfile=open("mb_burnins.log","a")
	
	initialgens=analysis.gens
	
	converged='n'
	enoughtrees='n'
	
	
	while (enoughtrees=='n' or converged=='n') and analysis.completeruns<analysis.maxit:
	
		if (analysis.maxit-analysis.completeruns)<analysis.gens:
			analysis.gens=(analysis.maxit-analysis.completeruns)

		
		analysis.creatembblock()
		analysis.run=analysis.run+1
		
		os.system(analysis.path+"mrbayes/mb "+analysis.mbinputfile)
		
		
		converged='y'
		enoughtrees='y'
		
		
		if analysis.run>2:
			
			mcmcdata=open(analysis.outputfile+".all.mcmc","rU").read()[:-1]	
			
			startnum=int(mcmcdata.split("\n")[-1].split()[0])
			
			newmcmcdata=open(analysis.outputfile+".mcmc","rU").readlines()
			
			for line in newmcmcdata[8:]:
				words=line.strip().split('\t')
				words[0]=str(int(words[0])+startnum)
				mcmcdata=mcmcdata+"\n"+'\t'.join(words)
		else:
			mcmcdata=open(analysis.outputfile+".mcmc","rU").read()
			
		mcmcfile=open(analysis.outputfile+".all.mcmc","w")
		print >> mcmcfile, mcmcdata,
		
		mcmcfile.close()
		
		#print "Checking convergence and calculating burnin for", analysis.inputfile, "..."
		
		burnins=[]
		
		for i in range(0,analysis.nruns):
			burnins.append([0,0,0,0,0])
		asdsf=0
		asdsfburnin=0
		analysis.burnin=-1
		
		mcmclines=open(analysis.outputfile+".all.mcmc","rU").readlines()
		count=0
		for mcmcline in mcmclines[8:]:
			if len(mcmcline.strip().split("\t"))>5:
				count=int(mcmcline.strip().split("\t")[0])
				if float(mcmcline.strip().split("\t")[-1])>=analysis.asdsf:
					asdsf=count+1000
					
		
		
		analysis.completeruns=analysis.completeruns+analysis.gens
		print asdsf, count, analysis.completeruns
		
		if count<analysis.completeruns:
			print "WARNING, this analysis is not complete!"
			#print>> logfile, "A "+analysis.inputfile+". WARNING, analysis is not complete!"
			#analysis.completeruns=analysis.completeruns+count
			sys.exit()
		
		elif asdsf==(count+1000):
			#print "WARNING, this analysis has not converged!!! Try running it longer or changing the heating of the chains"
			analysis.textstring=" Analysis had not converged after last round."
			analysis.gens=initialgens
			#print>> logfile, "B "+analysis.inputfile+". WARNING, analysis has not converged!!! Try running it longer or changing the heating of the chains"
			analysis.calctemp()
			converged='n'
			enoughtrees='n'
		else:
			x=0
			while asdsf>x:
				x=x+analysis.burnround
				asdsfburnin=(x/1000)+1
				
		
		
		for i in range(1,analysis.nruns+1):
		
			
			if analysis.run>2:
				
				pdata=open(analysis.outputfile+".all.run"+str(i)+".p","rU").read()[:-2]
				
				
				startnum=int(pdata.split("\n")[-1].split()[0])
				
				newpdata=open(analysis.outputfile+".run"+str(i)+".p","rU").readlines()
				
				for line in newpdata[3:]:
					words=line.strip().split()
					words[0]=str(int(words[0])+startnum)
					pdata=pdata+"\n"+'\t'.join(words)
					
				
				treedata=open(analysis.outputfile+".all.run"+str(i)+".t","rU").read()
				treedata=treedata.replace("\nend;\n","")
				
				newtreedata=open(analysis.outputfile+".run"+str(i)+".t","rU").readlines()
				
				for line in newtreedata[3:]:
					words=line.strip().split()
					if len(words)>1:
						splitwordzero=words[1].split('.')
						
						if len(splitwordzero)>1:
							
							if int(splitwordzero[1])>1:
							
								splitwordzero[1]=str(int(splitwordzero[1])+startnum)
								
								words[1]=".".join(splitwordzero)
							
								treedata=treedata+"\n\t"+' '.join(words)
						
				
				
				treedata=treedata+"\nend;\n"
				
			
			else:
				treedata=open(analysis.outputfile+".run"+str(i)+".t","rU").read()
				pdata=open(analysis.outputfile+".run"+str(i)+".p","rU").read()
			
			
			treefile=open(analysis.outputfile+".all.run"+str(i)+".t","w")
			
			pfile=open(analysis.outputfile+".all.run"+str(i)+".p","w")
			print >> pfile, pdata,
			print >> treefile, treedata,
			
			treefile.close()
			pfile.close()
			
			
			sumlikes=0.0
			likelines=open(analysis.outputfile+".all.run"+str(i)+".p","rU").readlines()
			for likeline in likelines[-100:]:
				words=likeline.split()
				sumlikes=sumlikes+(1-float(words[1]))
				#print 1-float(words[1])
			sumlikes=sumlikes/100
			for j, likeline in enumerate(likelines[2:]):
				words=likeline.split()
				if (1-float(words[1]))<sumlikes:
					x=0
					while int(words[0])>x:
						x=x+analysis.burnround
					burnins[i-1]=[j+1,float(words[1]),words[0],x,(x/analysis.samplefreq)+1]
					break
		
		
		
		
		analysis.burnin=asdsfburnin
		analysis.textstring=" Burnin = "+str(analysis.burnin)+" based on average standard deviation of split frequencies"
		#print>> logfile, "C "+analysis.inputfile+". Burnin =", analysis.burnin, "based on average standard deviation of split frequencies"
		
		if converged =="y":
			for x in range(0,analysis.nruns):
		
				if burnins[x][4]>analysis.burnin and burnins[x][4]>asdsfburnin:
					analysis.burnin=burnins[x][4]
					analysis.textstring=" Burnin = "+str(analysis.burnin)+" based on average of final 100 generations of chain "+str(x+1)
				elif burnins[x][4]==analysis.burnin and burnins[x][4]>asdsfburnin:
					analysis.textstring=analysis.textstring.replace("chain ", "chains ")+" and "+str(x+1)
		
		
		
		if (analysis.completeruns-(analysis.burnin*analysis.samplefreq))<(analysis.remaining*analysis.samplefreq) and converged=='y':
			#print>> logfile, "F "+analysis.inputfile+". WARNING, burnin is very long ("+str(analysis.burnin)+"). You may want to run the analysis for more generations to improve the result"
			analysis.textstring=" Analysis appears to have converged, but more generations are needed to reach a sample of "+str(analysis.remaining)+" trees after the burn-in"
			analysis.calctemp()
			analysis.gens=((analysis.remaining+10)*analysis.samplefreq)-(analysis.completeruns-(analysis.burnin*analysis.samplefreq))
			enoughtrees='n'
		elif converged=='y':
			enoughtrees='y'
		
		
		x=0
		while analysis.gens>x:
			x=x+1000
		analysis.gens=x
		
		#print asdsfburnin, analysis.burnin, analysis.completeruns, (analysis.burnin*analysis.samplefreq), (analysis.remaining*analysis.samplefreq),converged
	
	
	#print "Creating nexus file and running mrBayes to calculate consensus tree ..."
	
	if converged=="n":
		print "WARNING!!! This analysis appears not to have converged. Try rerunning it with a larger maximum number of generations."
	elif enoughtrees=="n":
		print "WARNING!!! This analysis appears to have converged, but more generations are needed to reach a sample of "+str(analysis.remaining)+" trees after the burn-in. Try rerunning it with a larger maximum number of generations."
	else:
		analysis.creatembsumtblock()
		os.system(analysis.path+"mrbayes/mb "+analysis.mbinputfile)
		

	for i in range(1,analysis.nruns+1):
		os.system("mv "+analysis.outputfile+".all.run"+str(i)+".p "+analysis.outputfile+".run"+str(i)+".p")
		os.system("mv "+analysis.outputfile+".all.run"+str(i)+".t "+analysis.outputfile+".run"+str(i)+".t")
	if converged=="y":
		os.system("mv "+analysis.outputfile+".all.con "+analysis.outputfile+".con")
		os.system("mv "+analysis.outputfile+".all.trprobs "+analysis.outputfile+".trprobs")
		os.system("mv "+analysis.outputfile+".all.parts "+analysis.outputfile+".parts")
	os.system("mv "+analysis.outputfile+".all.mcmc "+analysis.outputfile+".mcmc")
	os.system("mv "+analysis.outputfile+".ckp~ "+analysis.outputfile+".mcmc")
	
	

		