Skip to content

Commit 0f64f35

Browse files
committed
Quantitative analysis of MTBLS234 dataset
1 parent c40126a commit 0f64f35

File tree

1 file changed

+129
-0
lines changed

1 file changed

+129
-0
lines changed

python/quantitative.py

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
# -*- coding: utf-8 -*-
2+
"""
3+
Created on Wed Aug 26 09:39:56 2017
4+
5+
@author: zmzhang
6+
"""
7+
8+
import pandas as pd
9+
import subprocess, os, sys
10+
import pyopenms
11+
from _pymass import mzXMLParser
12+
import _pymass as pm
13+
from FPIC import data2mzxml, tic, toc
14+
import numpy as np
15+
16+
df_std = pd.DataFrame(columns=['name','rt', 'mz'])
17+
df_std.loc[len(df_std)] = ['propionyl', 1.4, 221.15751]
18+
df_std.loc[len(df_std)] = ['nialamide', 5.7, 299.15025]
19+
df_std.loc[len(df_std)] = ['sulfadimethoxine', 8.4, 317.11851]
20+
df_std.loc[len(df_std)] = ['reserpine', 10.6, 609.28065]
21+
df_std.loc[len(df_std)] = ['terfenadine', 12.4, 472.32099]
22+
df_std.loc[len(df_std)] = ['hexadecanoyl', 16.3, 403.36096]
23+
df_std.loc[len(df_std)] = ['octadecanoyl', 18.1, 431.39227]
24+
25+
df_std.rt = df_std.rt * 60
26+
27+
28+
def FeatureFindingMetabo(mzfile, noise_threshold_int, snr):
29+
finder = 'C:/Program Files/OpenMS/bin/FeatureFinderMetabo.exe'
30+
feature_file = 'tmp.featureXML'
31+
noise_threshold_int = noise_threshold_int / snr
32+
subprocess.call([finder, '-in', mzfile, '-out', feature_file,
33+
'-algorithm:common:noise_threshold_int', f'{noise_threshold_int}',
34+
'-algorithm:common:chrom_peak_snr', f'{snr}',
35+
'-algorithm:common:chrom_fwhm', '10',
36+
'-algorithm:mtd:mass_error_ppm', '20',
37+
'-algorithm:mtd:reestimate_mt_sd', 'true',
38+
'-algorithm:mtd:min_sample_rate', '0',
39+
'-algorithm:mtd:min_trace_length', '2',
40+
'-algorithm:epd:width_filtering', 'off',
41+
'-algorithm:ffm:charge_lower_bound', '1',
42+
'-algorithm:ffm:charge_lower_bound', '5'])
43+
featuremap = pyopenms.FeatureMap()
44+
featurexml = pyopenms.FeatureXMLFile()
45+
featurexml.load(feature_file, featuremap)
46+
os.remove(feature_file)
47+
return featuremap
48+
49+
50+
def parse_featureXML_FFM(featuremap):
51+
df = pd.DataFrame(columns=['rt', 'mz', 'intensity'])
52+
for i in range(featuremap.size()):
53+
feature = featuremap[i]
54+
isotope_distances = feature.getMetaValue(b'isotope_distances')
55+
rt = feature.getRT()
56+
mz = feature.getMZ()
57+
intensity = feature.getIntensity()
58+
for j in range(feature.getMetaValue(b'num_of_masstraces')):
59+
if j == 0:
60+
df.loc[len(df)] = [rt, mz, intensity]
61+
else:
62+
mz_delta = isotope_distances[j-1]
63+
mz = mz + mz_delta
64+
df.loc[len(df)] = [rt, mz, intensity]
65+
return df
66+
67+
def pics2df(pics):
68+
df = pd.DataFrame(columns=['rt', 'mz', 'intensity'])
69+
for i,pic in enumerate(pics):
70+
idx = pic[:,2].argmax()
71+
rt = pic[idx,0]
72+
mz = pic[idx,1]
73+
intensity = pic[idx,2]
74+
df.loc[len(df)] = [rt, mz, intensity]
75+
return df
76+
77+
cs = ['1000', '0500', '0200', '0100', '0050', '0020', '0010', '0005', '0002', '0001', '0000']
78+
79+
result_ffm = pd.DataFrame(np.float64('NaN'),index = range(len(cs)),
80+
columns=df_std.name.tolist() + ['concentration'])
81+
result_fpic = pd.DataFrame(np.float64('NaN'),index = range(len(cs)),
82+
columns=df_std.name.tolist() + ['concentration'])
83+
84+
for k,c in enumerate(cs):
85+
result_ffm.at[k,'concentration'] = c
86+
result_fpic.at[k,'concentration'] = c
87+
name = f'2012_02_03_PStd_{c}_3'
88+
mzMLfile = f'MTBLS234/{name}.mzML'
89+
mzXMLfile = f'MTBLS234/{name}.mzxml'
90+
intensity = 30
91+
feature_map = FeatureFindingMetabo(mzMLfile, intensity, 3)
92+
df_ffm = parse_featureXML_FFM(feature_map)
93+
94+
tic()
95+
parser=mzXMLParser()
96+
lcms = parser.parseFile(mzXMLfile.encode(sys.getfilesystemencoding()))
97+
pics_c = pm.FPICs(lcms, intensity, 200.0, 0.5)
98+
toc()
99+
df_fpic = pics2df(pics_c)
100+
101+
for i in range(len(df_std)):
102+
mz = df_std.at[i, 'mz']
103+
rt = df_std.at[i, 'rt']
104+
mz_tol = 0.014
105+
if i == 6: # peak shape of Octadecanoyl-L-carnitine-d3 is bad
106+
rt_tol = 60
107+
else:
108+
rt_tol = 30
109+
mz_l = mz - mz_tol
110+
mz_u = mz + mz_tol
111+
rt_l = rt - rt_tol
112+
rt_u = rt + rt_tol
113+
query_str = f'{mz_l} < mz < {mz_u} and {rt_l} < rt < {rt_u}'
114+
115+
116+
df_ffm_filtered = df_ffm.query(query_str)
117+
if len(df_ffm_filtered) >0:
118+
result_ffm.at[k, df_std.at[i,'name']] = df_ffm_filtered.intensity.values.max()
119+
120+
df_fpic_filtered = df_fpic.query(query_str)
121+
if len(df_fpic_filtered) >0:
122+
result_fpic.at[k, df_std.at[i,'name']] = df_fpic_filtered.intensity.values.max()
123+
124+
125+
126+
cc_ffm_rd = result_ffm.corr().round(4)
127+
cc_fpic_rd = result_fpic.corr().round(4)
128+
result_ffm_rd = result_ffm.round(0)
129+
result_fpic_rd = result_fpic.round(0)

0 commit comments

Comments
 (0)