#!/usr/bin/perl -w # INPUT: .cdf file # OUTPUT: .psc file # creates table: probesetid x1 y1 (PM only) # " x2 y2 # " x3 y3 # # assumes each PM (at y) has corresponging MM (at y+1) use strict; use Getopt::Long; my ($cdf_file,$psc_file); my %probeset; &GetOptions("cdf=s" => \$cdf_file, "out=s" => \$psc_file); ($cdf_file) && ($psc_file) || die "Usage: cdf2psc OPTIONS where OPTIONS are: -cdf .CDF FILE -out output (.PSC FILE) \n"; open (CDF, $cdf_file) || die "Can't open $cdf_file: $!\n"; foreach () { if (/^Cell/ && !/^CellHeader/) { my ($x,$y,$probe) = (split /\t/,$_)[0,1,4]; ($x) = (split /=/,$x)[1]; if ($probe =~ /\D/ && /\S/ ) { $probeset{$probe}{$x}{$y}++; } } } close (CDF) || die "Can't close $cdf_file: $!\n"; # filters out .cdf header and pairs each x,y coordinate pair with a probeset id # probeset must contain a non digit, non whitespace character #$cdf_file =~ s/\.cdf$//i; open (PSC, ">$psc_file") || die "Can't create $psc_file: $!\n"; my ($probekey, $xkey, $ykey); my $z = 0; foreach $probekey (sort keys %probeset) { if ($probekey eq "") { next; } foreach $xkey (sort {$a <=> $b} keys %{$probeset{$probekey}}) { foreach $ykey (sort {$a <=> $b} keys %{$probeset{$probekey}{$xkey}}) { if ($z == 0) { print PSC "$probekey\t$xkey\t$ykey\n"; #PM $z = 1; } else { $z = 0; #skip MM } } } } close (PSC) || die "Can't close $psc_file: $!\n"; # sorts by probeset id, then x coordinate, then y coordinate # prints to psc_file only if probeset id exists # prints only PM cells