clear import dataframe.*; %task 1 %------------------------------------------------------------------- %make chip--clinical id mapping file ChipInfo = TextArray('listname','\t', 'NaN', 0); %the first column stores the chip file name ChipNames = ChipInfo.columnm(1); %parse the chip file names into useful infomation ChipInfo = TextArray(ChipNames,'_.'); %the forth column gives the unique id similar to clinnames ChipID = ChipInfo.columnm(2); ClinInfo = TextArray('Lung_fixed.clinical.txt','\t', 'NaN', 1); ClinNames = deblank(cellstr(char(ClinInfo.columnm('Genome #')))); if (length(ClinNames) ~= length(ChipNames)) error('The number of chips do not match the number of samples'); end columns = cell(2,1); columns{1} = ChipNames; columns{2} = ClinNames; headers = {'chip_file_name', 'clin_id'}; Map = TextArray(columns, headers); Map.set('\t','NA',1); Map.save('lung_chip_clin_map.txt'); %----------------------------------------------------------------- %task 2 %----------------------------------------------------------------- %make normalized clinical file clin = TextArray('Lung_fixed.clinical.txt','\t', 'NaN', 1); clin.setKeys(clin.columnm('Genome #')); clin.saveAll('temp.txt'); clin = Tools.normaliseClinKeys('lung_chip_clin_map.txt','temp.txt',1,2); clin.save('Lung_fixed.clinical.dfm',true,false,true); %task 3 %-------------------------------------------------------------------- %make standard rma text array y = Tools.makeTextArray('lung_rma.dat','listname','h133+.genedescriptors'); y.save('lung_rma.dfm',true,false,true); %--------------------------------------------------------------------- %task 4 %-------------------------------------------------------------------- %use code table d = TextArray('Lung_fixed.clinical.dfm','tab','NaN',1); columns = cell(1,1); columns{1} = 'Race'; columns{2} = 'Sex'; columns{3} = 'Stg'; columns{4} = 'Diff'; columns{5} = 'CellType'; columns{6} = 'VascInv'; columns{7} = 'LymphInv'; columns{8} = 'LN Inv'; columns{9} = 'Pleural Inv'; codetable = d.createCodeTablem(columns,true); codetable.save('Lung.Clinical.CodeTable.dfm',true,false,true); % then edit the codetable file as desired codetable = TextArray('Duke.Clinical.CodeTable.dfm','tab','NaN',true); %% That sets up the code table, and now change the clinical data in d to %% numeric using this codetable % d.applyCodeTable(codetable); d.saveAll('BreastCancer.Duke101.Clinical.Dec.20.04.dfm'); % saves as text file d1 = d.getDataArray; d1.saveAll('d1.dfm'); %-------------------------------------------------------------------------- %task 6 %--------------------------------------------------------------------- %merge clinical and gene expression data Tools.mergeCG('Lung_fixed.clinical.dfm','lung_rma.dfm','lung_combined.dfm'); %--------------------------------------------------------------------- %task 6 %-------------------------------------------------------------------- %use code table d = TextArray('clin_s.dfm','tab','NaN',1); columns = cell(1,1); columns{1} = 'Study'; columns{2} = 'Path Stg'; codetable = d.createCodeTablem(columns,true); codetable.save('Duke.Clinical.CodeTable.dfm',true,false,true); % then edit the codetable file as desired codetable = TextArray('Duke.Clinical.CodeTable.dfm','tab','NaN',true); %% That sets up the code table, and now change the clinical data in d to %% numeric using this codetable % d.applyCodeTable(codetable); d.saveAll('BreastCancer.Duke101.Clinical.Dec.20.04.dfm'); % saves as text file d1 = d.getDataArray; d1.saveAll('d1.dfm'); %--------------------------------------------------------------------------