#!/usr/bin/perl -w # # cel2ratios, written by Felix Naef (2001-2002) # # Takes two processed cell files (.PCEL files) and computes ratios plus other stuff # use strict; use Getopt::Long; my($cell_file1, $cell_file2, $pval, $noisecut, $adjust, $quantile, $max_cells, $outfile, $noout, $sgf, $mmfl); #defaults $pval=0; $adjust=0; $noout=0; $sgf=0; $mmfl=0, $max_cells=24, $quantile=0; $noisecut=3; &GetOptions("f1=s" => \$cell_file1, "f2=s" => \$cell_file2, "o=s" => \$outfile, "noout" => \$noout, "p" => \$pval, "adj=i" => \$adjust, "sgf" => \$sgf, "max=i" => \$max_cells, "nc=f" => \$noisecut, "qu=f" => \$quantile, "mm=i" => \$mmfl); if ( (not defined $outfile) || substr($outfile, -4) ne ".RAT") {die "Output file must have .RAT extension\n Usage: cel2ratios OPTIONS where OPTIONS are: -f1 first .PCEL file -f2 second .PCEL file -o output .RAT file -noout do not output ratio file, only output the .REP file -p compute Wilcoxon p-values for ratios -adj adjust (normalization), 1 for global, 2 for local (robust but slow cf. \$Nloess) -sgf computes the log-ratio divided by the local variance (robust but slow \$Nloess) -mm default is PM only; 1 for PM-MM; 2 for MM only; 3 for all PM,MMs -max default=24, maximum number of probepairs per probeset (some probesets have more, i.e. in HG_U95) -nc default is 3, noise cutoff, probes below nc*sigma_eff are discarded -qu default is .5, quantile to use in LTS regression \n";} # Parameters my $log2=log(2.0); my $min_adjust=50; my $fc_adjust=1.2; my $tol=0.00001; #convergence can be slow my $max_iter=5000; my $span = .75; # span used in loess function my $Nloess = 4000; # number of randomly chosen points in loess [0: all] my $loess_it = 4; # number of iterations in loess $max_cells *=2 if $mmfl==3; # Open various files $outfile =~ s/.RAT/_PM.RAT/ if not $mmfl; $outfile =~ s/.RAT/_SUB.RAT/ if $mmfl==1; $outfile =~ s/.RAT/_MM.RAT/ if $mmfl==2; $outfile =~ s/.RAT/_APP.RAT/ if $mmfl==3; my $outrep = $outfile; $outrep =~ s/RAT/REP/; my $addrep = $outfile; $addrep =~ s/RAT/AREP/; open(REP, ">$outrep") || die "Can't open $outrep\n"; #report file open(OUT, ">$outfile") || die "Can't open $outfile\n" unless $noout; #output file #write params to .REP file print REP "PM only\n" if not $mmfl; print REP "MM only\n" if $mmfl==2; print REP "PM-MM \n" if $mmfl==1; print REP "PM,MM \n" if $mmfl==3; print REP "File1: $cell_file1\n"; print REP "File2: $cell_file2\n"; print REP "Maximum number of probepairs: $max_cells\n"; print REP "P-values: $pval\n"; print REP "Adjust: $adjust\n"; print REP "No output: $noout\n"; print REP "Local STD: $sgf\n"; print REP "Output file: $outfile\n"; print REP "Noise cutoff: $noisecut\n"; print REP "LTS Quantile: $quantile\n"; print REP "Loess span: $span\n"; print REP "NLoess: $Nloess\n"; my @files=($cell_file1, $cell_file2); my $nfiles=2; my ($file, $pm, $flag, $n, $dummy, $nf, $mm, $pmmm); my (@label, @pm, @mm, @pmmm, @flag, @data_cell); my %data; my ($sig_file, $bkg, $sig, $sig1, $sat, $sig_eff, $int, $ints); my %sat; $nf=0; foreach $file (@files) { open(F1, $file) || die "Can't open $file\n"; $sig_file = $file; $sig_file =~ s/.PCEL/.REP/; ($bkg, $sig, $sat)=&read_sigma_sat($sig_file); printf REP "$file: Bare sigma: %.3f\n", $sig; $sig1=$sig if $file eq $cell_file1; $sig_eff = $sig; $sig_eff = sqrt(2.0)*$sig if ($mmfl==1); $sat -= $bkg * 1.05; #this is arbitrary for now while () { chomp; @label=split(/\t/); $pm=; chomp $pm; @pm=split(/\t/, $pm); $mm=; chomp $mm; @mm=split(/\t/, $mm); $pmmm=; chomp $pmmm; @pmmm=split(/\t/, $pmmm); $flag=; unless ( $mmfl==3 ) { unless ( $label[1] > $max_cells ) { for ($n=0; $n < $label[1]; $n++) { my $nsat=0; if($mmfl==0) { my $m; for ($m=0; $m < $label[1]; $m++) { $nsat++ if $pm[$m] > $sat; } } if($mmfl==0) { $sat{$label[0]} = "PM" if $nsat <= $label[1]/2; $sat{$label[0]} = "MM" if $nsat > $label[1]/2; } ($int, $ints) = ($pm[$n], $pm[$n]) if (not $mmfl) && $nsat <= $label[1]/2; ($int, $ints) = ($mm[$n], $mm[$n]) if (not $mmfl) && $nsat > $label[1]/2; ($int, $ints) = ($pm[$n]-$mm[$n], $pm[$n]) if $mmfl == 1; ($int, $ints) = ($mm[$n], $mm[$n]) if $mmfl == 2; $flag = flag($ints, $sig_eff, $sat); @data_cell=($int, $flag); $data{$label[0]} -> [$n] -> [$nf] = [@data_cell]; } } } else { unless ( $label[1] > $max_cells/2 ) { for ($n=0; $n < $label[1]; $n++) { ($int, $ints) = ($pm[$n], $pm[$n]); $flag = flag($ints, $sig_eff, $sat); @data_cell=($ints, $flag); $data{$label[0]} -> [2*$n] -> [$nf] = [@data_cell]; ($int, $ints) = ($mm[$n], $mm[$n]); $flag = flag($ints, $sig_eff, $sat); @data_cell=($int, $flag); $data{$label[0]} -> [2*$n+1] -> [$nf] = [@data_cell]; } } } } close(F1); $nf++; } # Compute ratios, run through all affy_labels my $tmpf1 = $outfile . ".tmp"; my $tmpf2 = $outfile . ".tmpl"; my $outfile_Rout = $outfile . ".Rout"; my %above; count_noise(); write_cells(); call_R_regression(); my %output; read_R_regression(); unlink $tmpf1, $tmpf2, $outfile_Rout; #adjust my @all_ratios; my ($label, $ratio); if ($adjust) { foreach $label (keys %output) { $ratio = $output{$label} -> [2]; unless ($ratio eq "NA") { push @all_ratios, $ratio; } } } # Adjust scaling by centering the ratio histogram and optionnally loess my ($log_adj, $del, $n_iter, $scale_global); my (%scale_local, %sgf); if ($adjust) { ($log_adj, $del, $n_iter)=adjust(@all_ratios) if $cell_file1 ne $cell_file2; ($log_adj, $del, $n_iter)=(0,0,0) if $cell_file1 eq $cell_file2; printf REP "\nAdjusted scale (log2 base): %.3f\n", $log_adj; print REP "Number of iterations: $n_iter\n"; printf REP "Delta: %.6e\n",$del; $scale_global=$log_adj; printf REP "\nScale factor for $cell_file1: %.3f\n", 2**(- $log_adj); printf REP "\nScaled sigma 1: %.3f\n", $sig1*2**(-$log_adj); &scale_global(); } my ($R_local_reg_dat, $R_local_reg, $R_out_local_reg); if ($adjust == 2) { unless($cell_file1 eq $cell_file2) { $R_local_reg_dat = $outfile; $R_local_reg_dat =~ s/RAT/local.dat/; $R_local_reg = $outfile; $R_local_reg =~ s/RAT/local.R/; $R_out_local_reg = $outfile; $R_out_local_reg =~ s/RAT/local.R_out/; &local_scaling_R; &read_local_scaling_R; print REP "\nComputed local regression scaling."; &scale_local; unlink ($R_local_reg_dat, $R_local_reg, $R_out_local_reg); } } my (%pvals, %reg); my ($R_script_pval, $R_script_out_pval); my ($R_script_sgf, $R_script_sgf_dat, $R_script_out_sgf); if ($pval) { $R_script_pval = $outfile; $R_script_pval =~ s/RAT/pval.R/; $R_script_out_pval = $outfile; $R_script_out_pval =~ s/RAT/pval.R_out/; &write_cells(); &call_R_pvalues; &read_R_pvalues; print REP "\nComputed the paired Wilcoxin test p-values.\n"; unlink $tmpf1, $tmpf2, $R_script_pval, $R_script_out_pval; } if ($sgf) { $R_script_sgf_dat = $outfile; $R_script_sgf_dat =~ s/RAT/sgf.dat/; $R_script_sgf = $outfile; $R_script_sgf =~ s/RAT/sgf.R/; $R_script_out_sgf = $outfile; $R_script_out_sgf =~ s/RAT/sgf.R_out/; &call_R_sgf; &read_R_sgf; print REP "Computed the local STD.\n"; unlink $R_script_sgf_dat, $R_script_sgf, $R_script_out_sgf; } # Write output my (@result1, @result); my $line0 = "Label\tI1\tI2\tRatio\tLog2 Ratio\tRes\tRes2\tNcells\tNabove"; $line0 .= "\tp-value" if $pval; $line0 .= "\tLog-Ratio/Std" if $sgf; $line0 .= "\tlocal scale" if $adjust==2; $line0 .= "\tlocal sigma" if $sgf; $line0 .= "\tPM|MM" if $mmfl==0; $line0 .= "\n"; unless ($noout) { print OUT $line0; foreach $label (keys %output) { @result=@{$output{$label}}; my $format1="%s\t"; if ($result[0] ne "NA") { @result1=( $result[0], $result[1], 2**$result[2], @result[2..5]); $format1 .= ("%.5f\t" x 6) . "%d\t"; } else { @result1 = qw (NA NA NA NA NA NA NA); $format1 .= ("%s\t" x 7); } push @result1, $above{$label}; $format1 .= "%s\t"; if ($pval) { if ($pvals{$label} =~ /NA/) {$format1 .= "%s\t";} else {$format1 .= "%.3e\t";} push @result1, $pvals{$label}; } if ($sgf) { if (exists $sgf{$label}) { if ($sgf{$label} ne "NA" && $sgf{$label} > 0 ) { $format1 .= "%.3e\t"; push @result1, $result[2] / $sgf{$label} ; } else { $format1 .= "%s\t"; push @result1, "NA"; } } else { $format1 .= "%s\t"; push @result1, "NA"; } } if ($adjust==2) { if (exists $scale_local{$label}) { if ($scale_local{$label} ne "NA" ) { $format1 .= "%.3e\t"; push @result1, $scale_local{$label} ; } else { $format1 .= "%s\t"; push @result1, "NA"; } } else { $format1 .= "%s\t"; push @result1, "NA"; } } if ($sgf) { if (exists $sgf{$label}) { if ($sgf{$label} ne "NA" && $sgf{$label} > 0 ) { $format1 .= "%.3e\t"; push @result1, $sgf{$label} ; } else { $format1 .= "%s\t"; push @result1, "NA"; } } else { $format1 .= "%s\t"; push @result1, "NA"; } } if ($mmfl == 0) { $format1 .= "%s\t"; push @result1, $sat{$label}; } chop $format1; $format1 .= "\n"; printf OUT $format1, $label, @result1; } } sub mean { use strict; if (scalar @_ == 0) {return "NA"}; my $mean=0; my $n=0; my $el; foreach $el (@_) { $n++; $mean += $el; } $mean /= $n; return $mean; } sub by_number { $a <=> $b; } sub by_ratio { use strict; my @c=split(":", $a); my @d=split(":", $b); $c[0] <=> $d[0]; } sub adjust { use strict; if (scalar @_ < $min_adjust) {return (0, 0, 0);} my @l_ratios=sort by_number @_; my @histo=(); my $len = scalar @l_ratios; my $win = int(sqrt($len)/2); my $dy = 2*$win; my $sl_max = -1; my $i_max; my ($i, $diff, $slope); #open(HIS, ">hfc"); for ($i=$win; $i < $len - $win; $i+=$win) { $diff = $l_ratios[$i+$win] - $l_ratios[$i-$win]; if ($diff > 0) { $slope = $dy / $diff; $histo[$i]=$slope; if ($slope > $sl_max) { $sl_max=$slope; $i_max=$i;} # print HIS $l_ratios[$i], "\t", $slope, "\n"; } } #close(HIS); #center the histogram recurively my $fc_cutoff=log($fc_adjust)/$log2; my $delta=1; my $center_new; my $nc; my $center=$l_ratios[$i_max]; my $n_iter=0; while ($delta > $tol && $n_iter < $max_iter) { $n_iter++; $center_new=0; $nc=0; for ($i=0; $i < $len; $i++) { if ( abs($l_ratios[$i]-$center) < $fc_cutoff) { $center_new += $l_ratios[$i]; $nc++; } } $center_new /= $nc; $delta = abs($center-$center_new); $center = $center_new; } return ($center, $delta, $n_iter) ; } sub write_cells { use strict; open(TMP, ">$tmpf1") || die "Can't open $tmpf1\n"; open(TMPL, ">$tmpf2") || die "Can't open $tmpf2\n"; my $ncells; foreach $label (keys %data) { print TMPL $label, "\n"; my ($n, $i1, $i2, $f1, $f2); my @int1=(); my @int2=(); $ncells = scalar @{$data{$label}}; my $ngood=0; for ($n=0; $n < $ncells; $n++) { $i1 = $data{$label} -> [$n] -> [0] -> [0]; $i2 = $data{$label} -> [$n] -> [1] -> [0]; $f1 = $data{$label} -> [$n] -> [0] -> [1]; $f2 = $data{$label} -> [$n] -> [1] -> [1]; if ( ($f1 =~ /^S/ && $f2 =~ /^S/) || ($f1 =~ /^N/ && $f2 =~ /^N/) || ($i1 <= 0 || $i2 <= 0) || ($f1 =~ /NA/ || $f2 =~ /NA/) ) { # push @int1, "NA"; # push @int2, "NA"; } else { $ngood++; push @int1, $i1; push @int2, $i2; } } # Fill in the blanks for ($n = $ngood; $n < $max_cells; $n++) { push @int1, "NA"; push @int2, "NA"; } print TMP join("\t", @int1), "\n"; print TMP join("\t", @int2), "\n"; } close(TMP); close(TMPL) } sub count_noise { use strict; my $ncells; foreach $label (keys %data) { my ($f1, $f2); my $n1=0; my $n2=0; $ncells = scalar @{$data{$label}}; for ($n=0; $n < $ncells; $n++) { $f1 = $data{$label} -> [$n] -> [0] -> [1]; $f2 = $data{$label} -> [$n] -> [1] -> [1]; $n1++ if $f1 !~ /^N/; $n2++ if $f2 !~ /^N/; } $above{$label} = "$n1:$n2"; } } sub call_R_pvalues { use strict; open(TMP, ">$R_script_pval") || die "Can't open \n"; print TMP "x <- matrix(scan(\"$tmpf1\"), ncol = $max_cells, byrow = T)\n"; print TMP "ncells<-dim(x)[2]\n"; print TMP "nprobes<-dim(x)[1] / 2\n"; print TMP "p <- 1:nprobes\n"; print TMP "for (i in 1:nprobes) { \n"; print TMP "e1<-x[2*i-1,]\n"; print TMP "e2<-x[2*i,]\n"; print TMP "if (length(e1[e1==\"NA\"]) == ncells) {\n"; print TMP "p[i] <- NA}\n"; print TMP "else {\n"; print TMP "p[i] <- wilcox.test(e1, e2, paired = T)\$p.value\n"; print TMP "}\n"; print TMP "}\n"; print TMP "write.table(p, file = \"$R_script_out_pval\", quote=F, row.names=F, col.names=F)\n"; close(TMP); system("R --vsize=40M --no-save --slave --quiet < $R_script_pval\n"); } sub call_R_regression { use strict; open(TMP, ">$outfile.Reg") || die "Can't open $outfile.R\n"; print TMP "library(lqs)\n"; print TMP "l2 <- log(2.0)\n"; print TMP "x <- matrix(scan(\"$tmpf1\"), ncol = $max_cells, byrow = T)\n"; print TMP "ncells<-dim(x)[2]\n"; print TMP "nprobes<-dim(x)[1] / 2\n"; print TMP "i1 <- 1:nprobes\n"; print TMP "i2 <- 1:nprobes\n"; print TMP "sl <- 1:nprobes\n"; print TMP "r2 <- 1:nprobes\n"; print TMP "r2t <- 1:nprobes\n"; print TMP "nt <- 1:nprobes\n"; print TMP "for (n in 1:nprobes) { \n"; print TMP "e1<-x[2*n-1,]\n"; print TMP "e2<-x[2*n,]\n"; print TMP "e1 <- e1[e1!=\"NA\"]\n"; print TMP "e2 <- e2[e2!=\"NA\"]\n"; print TMP "l1 <- length(e1)\n"; print TMP "if (l1 == 0) {\n"; print TMP "i1[n] <- NA\n"; print TMP "i2[n] <- NA\n"; print TMP "sl[n] <- NA\n"; print TMP "r2[n] <- NA\n"; print TMP "r2t[n] <- NA\n"; print TMP "nt[n] <- NA\n"; print TMP "}\n"; print TMP "else {\n"; print TMP "e1 <- log(e1)\n"; print TMP "e2 <- log(e2)\n"; print TMP "ro <- ltsreg(e1-e2 ~ +1, quantile=ceiling(length(e1)*$quantile))\n" if $quantile > 0; print TMP "ro <- ltsreg(e1-e2 ~ +1)\n" if $quantile == 0; print TMP "sl[n] <- ro\$coefficients[[1]] / l2\n"; # #print TMP "if (sl[n] < 0) sl[n] <- min(e1-e2) / l2\n"; #print TMP "if (sl[n] > 0) sl[n] <- max(e1-e2) / l2\n"; # print TMP "res <- ro\$residuals\n"; print TMP "ord <- order(res^2)\n"; print TMP "nn <- floor(l1/2)+1\n"; print TMP "nt[n] <- l1\n"; print TMP "i1[n] <- exp(mean(e1[ord[1:nn]]))\n"; print TMP "i2[n] <- exp(mean(e2[ord[1:nn]]))\n"; print TMP "r2[n] <- sqrt(mean(res[ord[1:nn]]^2)/l2)\n"; print TMP "r2t[n] <- sqrt(mean(res^2)/l2)\n"; print TMP "}\n"; print TMP "}\n"; print TMP "reg <- data.frame(i1,i2,sl,r2,r2t,nt)\n"; print TMP "write.table(reg, file = \"$outfile_Rout\", sep=\"\\t\", quote=F, row.names=F, col.names=F)\n"; close(TMP); system("R --vsize=80M --no-save --slave --quiet < $outfile.Reg\n"); unlink("$outfile.Reg"); } sub call_R_sgf { use strict; open(RDAT, ">$R_script_sgf_dat"); print RDAT "Label\tI1\tI2\tNgood\n"; foreach $label (keys %output) { unless (@{$output{$label}}[2] eq "NA") { print RDAT $label, "\t", @{$output{$label}}[0], "\t", @{$output{$label}}[1], "\t", @{$output{$label}}[5], "\n"; } } close(RDAT); my $ng_co = 3; open(RS, ">$R_script_sgf"); print RS "l2 <- log(2.0)\n"; print RS "dat <- read.table(\"$R_script_sgf_dat\", sep=\"\\t\", header=T, as.is=c(1))\n"; print RS "lgm0 <- ( log(dat\$I1) + log(dat\$I2) )/2\n"; print RS "lab <- dat\$Label\n"; print RS "dat <- subset(dat, Ngood != \"NA\" & Ngood > $ng_co)\n"; print RS "lgm <- ( log(dat\$I1) + log(dat\$I2) )/2\n"; print RS "lrat <- log(dat\$I1) - log(dat\$I2)\n"; print RS "lrat <- lrat^2\n"; print RS "library(modreg)\n"; print RS "if ($Nloess > 0) {\n"; print RS " ri <- floor(runif($Nloess, 2, length(lrat)))\n"; print RS " ri <- c(ri, 1, length(lrat))\n"; print RS " lgm <- lgm[ri]\n"; print RS " lrat <- lrat[ri]\n"; print RS " w<-rep(1, length(lrat))\n"; print RS "}\n"; print RS "w[1]<-0\n"; print RS "w[length(lrat)]<-0\n"; print RS "w<-rep(1, length(lrat))\n"; print RS "w[0:10]<-0\n"; print RS "w[(length(lrat)-10):length(lrat)]<-0\n"; print RS "l <- loess(lrat ~ lgm, weights=w, span=$span, family=\"symmetric\", iterations=$loess_it, trace.hat=\"approximate\")\n"; print RS "min <- min(lgm)\n"; print RS "max <- max(lgm)\n"; print RS "lgm0[lgm0 > max] <- max-0.001\n"; print RS "lgm0[lgm0 < min] <- min+0.001\n"; print RS "locsig <- predict(l, newdata=lgm0)\n"; print RS "minp <- min(locsig[locsig>0])\n"; print RS "locsig[locsig<=0] <- minp\n"; print RS "out <- data.frame(lab, sqrt(locsig)/l2)\n"; print RS "write.table(out, file=\"$R_script_out_sgf\", row.names=F, quote=F, sep=\"\\t\")\n"; close(RS); system("R --vsize=128M --no-save --slave --quiet < $R_script_sgf\n"); } sub read_R_sgf{ open (ROUT, $R_script_out_sgf); my @local=; close(ROUT); chomp @local; my @sgf; foreach (@local) { @sgf = split(/\t/); $sgf{$sgf[0]} = $sgf[1]; } } sub local_scaling_R{ use strict; open(RDAT, ">$R_local_reg_dat"); print RDAT "Label\tI1\tI2\tl2ratio\tNgood\n"; foreach $label (keys %output) { unless (@{$output{$label}}[2] eq "NA") { print RDAT $label, "\t", @{$output{$label}}[0], "\t", @{$output{$label}}[1], "\t", @{$output{$label}}[2], "\t", @{$output{$label}}[5],"\n"; } } close(RDAT); my $ng_co = 3; my $l2r_co = 1; open(RS, ">$R_local_reg"); print RS "dat <- read.table(\"$R_local_reg_dat\", sep=\"\\t\", header=T, as.is=c(1))\n"; print RS "lgm0 <- ( log(dat\$I1) + log(dat\$I2) )/2\n"; print RS "lab <- dat\$Label\n"; print RS "dat <- subset(dat, l2ratio != \"NA\" & abs(l2ratio) < $l2r_co & Ngood > $ng_co)\n"; print RS "lgm <- ( log(dat\$I1) + log(dat\$I2) )/2\n"; print RS "lrat <- log(dat\$I1) - log(dat\$I2)\n"; print RS "library(modreg)\n"; print RS "if ($Nloess > 0) {\n"; print RS " ri <- floor(runif($Nloess, 2, length(lrat)))\n"; print RS " ri <- c(ri, 1, length(lrat))\n"; print RS " lgm <- lgm[ri]\n"; print RS " lrat <- lrat[ri]\n"; print RS " w<-rep(1, length(lrat))\n"; print RS "}\n"; print RS "w<-rep(1, length(lrat))\n"; print RS "w[1]<-0\n"; print RS "w[length(lrat)]<-0\n"; print RS "l <- loess(lrat ~ lgm, weights=w, family=\"symmetric\", trace.hat=\"approximate\")\n"; print RS "min <- min(lgm)\n"; print RS "max <- max(lgm)\n"; print RS "lgm0[lgm0 > max] <- max-0.001\n"; print RS "lgm0[lgm0 < min] <- min+0.001\n"; print RS "locscale <- predict(l, newdata=lgm0)\n"; print RS "out <- data.frame(lab, locscale)\n"; print RS "write.table(out, file=\"$R_out_local_reg\", row.names=F, quote=F, sep=\"\\t\")\n"; close(RS); system("R --vsize=128M --no-save --slave --quiet < $R_local_reg\n"); } sub read_local_scaling_R{ open (ROUT, $R_out_local_reg); my @local=; close(ROUT); chomp @local; my @local_reg; foreach (@local) { @local_reg = split(/\t/); $scale_local{$local_reg[0]} = $local_reg[1]; } } sub read_R_pvalues { open (ROUT, $R_script_out_pval); open (LABELS, $tmpf2); my @pvalues=; chomp @pvalues; my @labels=; chomp @labels; if (scalar @pvalues != scalar @labels) {print REP "Error in the p- values!\n"} my $n; for($n=0; $n; chomp @reg; my @labels=; chomp @labels; my @line; if (scalar @reg != scalar @labels) {print REP "Error in the regression!\n"} my $n; for($n=0; $n){ if (/^Background=/) { $bkg=(split(/=/))[1]; } if (/^Std_Dev_Background=/) { $sigma=(split(/=/))[1]; } if (/^Saturation_Cutoff=/) { $sat=(split(/=/))[1]; last; } } close(TMP); die "Could not read the background!" if not defined $bkg; die "Could not read sigma!" if not defined $sigma; die "Could not read saturation!" if not defined $sat; return ($bkg, $sigma, $sat); } sub scale_global () { use strict; my $scale = 2**$scale_global; foreach $label (keys %data) { my $n; my $ncells = scalar @{$data{$label}}; for ($n=0; $n < $ncells; $n++) { $data{$label} -> [$n] -> [0] -> [0] /= $scale; } unless ( @{$output{$label}}[2] eq "NA") { @{$output{$label}}[0] /= $scale; @{$output{$label}}[2] -= $scale_global; } } } sub scale_local () { use strict; foreach $label (keys %data) { my ($n, $scale); if (exists $scale_local{$label}) { $scale = 2**$scale_local{$label}; my $ncells = scalar @{$data{$label}}; for ($n=0; $n < $ncells; $n++) { $data{$label} -> [$n] -> [0] -> [0] /= $scale; } unless ( @{$output{$label}}[2] eq "NA") { @{$output{$label}}[0] /= $scale; @{$output{$label}}[2] -= $scale_local{$label}; } } } } sub flag { return "N" if $_[0] eq "NA"; return "N" if $_[0] < $noisecut*$_[1]; return "G" if $_[0] > $noisecut*$_[1] && $_[0] <= int($_[2]); return "S" if $_[0] >= int($_[2]); } sub perc { my @good=(); my $length=0; my $el; foreach $el (@_) { if ($el ne "NA"){ push @good, $el; $length++;} } if ($length==0) {return qw (NA NA NA NA NA);} my @b = sort {$a <=> $b} @good; my ($med, $first_q, $third_q); if ($length % 2 == 1) { my $mid=int($length/2); $med = $b[$mid] ; } else { my $mid=$length/2; $med = ($b[$mid-1] + $b[$mid]) / 2; } if ($length 0 ; return "NA"; }