|
| 1 | +# -*- coding: utf-8 -*- |
| 2 | +""" |
| 3 | +Created on Wed Aug 26 09:39:56 2017 |
| 4 | +
|
| 5 | +@author: zmzhang |
| 6 | +""" |
| 7 | + |
| 8 | +import pandas as pd |
| 9 | +import subprocess, os, sys |
| 10 | +import pyopenms |
| 11 | +from _pymass import mzXMLParser |
| 12 | +import _pymass as pm |
| 13 | +from FPIC import data2mzxml, tic, toc |
| 14 | +import numpy as np |
| 15 | + |
| 16 | +df_std = pd.DataFrame(columns=['name','rt', 'mz']) |
| 17 | +df_std.loc[len(df_std)] = ['propionyl', 1.4, 221.15751] |
| 18 | +df_std.loc[len(df_std)] = ['nialamide', 5.7, 299.15025] |
| 19 | +df_std.loc[len(df_std)] = ['sulfadimethoxine', 8.4, 317.11851] |
| 20 | +df_std.loc[len(df_std)] = ['reserpine', 10.6, 609.28065] |
| 21 | +df_std.loc[len(df_std)] = ['terfenadine', 12.4, 472.32099] |
| 22 | +df_std.loc[len(df_std)] = ['hexadecanoyl', 16.3, 403.36096] |
| 23 | +df_std.loc[len(df_std)] = ['octadecanoyl', 18.1, 431.39227] |
| 24 | + |
| 25 | +df_std.rt = df_std.rt * 60 |
| 26 | + |
| 27 | + |
| 28 | +def FeatureFindingMetabo(mzfile, noise_threshold_int, snr): |
| 29 | + finder = 'C:/Program Files/OpenMS/bin/FeatureFinderMetabo.exe' |
| 30 | + feature_file = 'tmp.featureXML' |
| 31 | + noise_threshold_int = noise_threshold_int / snr |
| 32 | + subprocess.call([finder, '-in', mzfile, '-out', feature_file, |
| 33 | + '-algorithm:common:noise_threshold_int', f'{noise_threshold_int}', |
| 34 | + '-algorithm:common:chrom_peak_snr', f'{snr}', |
| 35 | + '-algorithm:common:chrom_fwhm', '10', |
| 36 | + '-algorithm:mtd:mass_error_ppm', '20', |
| 37 | + '-algorithm:mtd:reestimate_mt_sd', 'true', |
| 38 | + '-algorithm:mtd:min_sample_rate', '0', |
| 39 | + '-algorithm:mtd:min_trace_length', '2', |
| 40 | + '-algorithm:epd:width_filtering', 'off', |
| 41 | + '-algorithm:ffm:charge_lower_bound', '1', |
| 42 | + '-algorithm:ffm:charge_lower_bound', '5']) |
| 43 | + featuremap = pyopenms.FeatureMap() |
| 44 | + featurexml = pyopenms.FeatureXMLFile() |
| 45 | + featurexml.load(feature_file, featuremap) |
| 46 | + os.remove(feature_file) |
| 47 | + return featuremap |
| 48 | + |
| 49 | + |
| 50 | +def parse_featureXML_FFM(featuremap): |
| 51 | + df = pd.DataFrame(columns=['rt', 'mz', 'intensity']) |
| 52 | + for i in range(featuremap.size()): |
| 53 | + feature = featuremap[i] |
| 54 | + isotope_distances = feature.getMetaValue(b'isotope_distances') |
| 55 | + rt = feature.getRT() |
| 56 | + mz = feature.getMZ() |
| 57 | + intensity = feature.getIntensity() |
| 58 | + for j in range(feature.getMetaValue(b'num_of_masstraces')): |
| 59 | + if j == 0: |
| 60 | + df.loc[len(df)] = [rt, mz, intensity] |
| 61 | + else: |
| 62 | + mz_delta = isotope_distances[j-1] |
| 63 | + mz = mz + mz_delta |
| 64 | + df.loc[len(df)] = [rt, mz, intensity] |
| 65 | + return df |
| 66 | + |
| 67 | +def pics2df(pics): |
| 68 | + df = pd.DataFrame(columns=['rt', 'mz', 'intensity']) |
| 69 | + for i,pic in enumerate(pics): |
| 70 | + idx = pic[:,2].argmax() |
| 71 | + rt = pic[idx,0] |
| 72 | + mz = pic[idx,1] |
| 73 | + intensity = pic[idx,2] |
| 74 | + df.loc[len(df)] = [rt, mz, intensity] |
| 75 | + return df |
| 76 | + |
| 77 | +cs = ['1000', '0500', '0200', '0100', '0050', '0020', '0010', '0005', '0002', '0001', '0000'] |
| 78 | + |
| 79 | +result_ffm = pd.DataFrame(np.float64('NaN'),index = range(len(cs)), |
| 80 | + columns=df_std.name.tolist() + ['concentration']) |
| 81 | +result_fpic = pd.DataFrame(np.float64('NaN'),index = range(len(cs)), |
| 82 | + columns=df_std.name.tolist() + ['concentration']) |
| 83 | + |
| 84 | +for k,c in enumerate(cs): |
| 85 | + result_ffm.at[k,'concentration'] = c |
| 86 | + result_fpic.at[k,'concentration'] = c |
| 87 | + name = f'2012_02_03_PStd_{c}_3' |
| 88 | + mzMLfile = f'MTBLS234/{name}.mzML' |
| 89 | + mzXMLfile = f'MTBLS234/{name}.mzxml' |
| 90 | + intensity = 30 |
| 91 | + feature_map = FeatureFindingMetabo(mzMLfile, intensity, 3) |
| 92 | + df_ffm = parse_featureXML_FFM(feature_map) |
| 93 | + |
| 94 | + tic() |
| 95 | + parser=mzXMLParser() |
| 96 | + lcms = parser.parseFile(mzXMLfile.encode(sys.getfilesystemencoding())) |
| 97 | + pics_c = pm.FPICs(lcms, intensity, 200.0, 0.5) |
| 98 | + toc() |
| 99 | + df_fpic = pics2df(pics_c) |
| 100 | + |
| 101 | + for i in range(len(df_std)): |
| 102 | + mz = df_std.at[i, 'mz'] |
| 103 | + rt = df_std.at[i, 'rt'] |
| 104 | + mz_tol = 0.014 |
| 105 | + if i == 6: # peak shape of Octadecanoyl-L-carnitine-d3 is bad |
| 106 | + rt_tol = 60 |
| 107 | + else: |
| 108 | + rt_tol = 30 |
| 109 | + mz_l = mz - mz_tol |
| 110 | + mz_u = mz + mz_tol |
| 111 | + rt_l = rt - rt_tol |
| 112 | + rt_u = rt + rt_tol |
| 113 | + query_str = f'{mz_l} < mz < {mz_u} and {rt_l} < rt < {rt_u}' |
| 114 | + |
| 115 | + |
| 116 | + df_ffm_filtered = df_ffm.query(query_str) |
| 117 | + if len(df_ffm_filtered) >0: |
| 118 | + result_ffm.at[k, df_std.at[i,'name']] = df_ffm_filtered.intensity.values.max() |
| 119 | + |
| 120 | + df_fpic_filtered = df_fpic.query(query_str) |
| 121 | + if len(df_fpic_filtered) >0: |
| 122 | + result_fpic.at[k, df_std.at[i,'name']] = df_fpic_filtered.intensity.values.max() |
| 123 | + |
| 124 | + |
| 125 | + |
| 126 | +cc_ffm_rd = result_ffm.corr().round(4) |
| 127 | +cc_fpic_rd = result_fpic.corr().round(4) |
| 128 | +result_ffm_rd = result_ffm.round(0) |
| 129 | +result_fpic_rd = result_fpic.round(0) |
0 commit comments