Calculate synthetic NBBO from prop feeds
Overview
In this example, we will calculate a synthetic NBBO by taking the best bid and offer seen across the different exchanges. Unlike the official SIP NBBO which ignores odd lots and is calculated by CTA and UTP in their datacenters, this synthetic NBBO is derived from the direct prop feeds. Databento provides top-of-book coverage for all equity exchanges (except LTSE, which only provides <0.01% of total market volume). Databento captures all equity data in the NY4 datacenter with PTP timestamps. See our timestamping guide for more information.
Design
We'll use the MBP-1 schema and download data for the different equity exchanges. Next, we'll process the data sequentially based on ts_recv, which is the timestamp when Databento received the data from the exchange. We'll calculate the synthetic NBBO by taking the best bid and offer across these exchanges. After that, we'll plot this over a 100-millisecond window to take a closer look at how the synthetic NBBO reacts when price moves.
Example
import os from collections import defaultdict from dataclasses import dataclass, field from heapq import merge from typing import Union import databento as db import matplotlib.dates as mdates import matplotlib.pyplot as plt import pandas as pd @dataclass() class PriceLevel: price: float size: int = 0 count: int = 0 update_time: int = 0 def __str__(self) -> str: return f"{self.size:4} @ {self.price:6.2f} | {self.count:2} order(s)" @staticmethod def _bid_sort(r) -> tuple[float, int, int]: return r.price, r.size, -r.update_time @staticmethod def _ask_sort(r) -> tuple[float, int, int]: return -r.price, r.size, -r.update_time @dataclass() class MbpBook: offer: Union[PriceLevel, None] = field(default=None) bid: Union[PriceLevel, None] = field(default=None) def bbo(self) -> tuple[Union[PriceLevel, None], Union[PriceLevel, None]]: return self.bid, self.offer def apply(self, mbp1: db.MBP1Msg) -> None: level = mbp1.levels[0] ts_recv = mbp1.ts_recv if level.bid_px == db.UNDEF_PRICE: self.bid = None else: self.bid = PriceLevel(level.pretty_bid_px, level.bid_sz, level.bid_ct, ts_recv) if level.ask_px == db.UNDEF_PRICE: self.offer = None else: self.offer = PriceLevel(level.pretty_ask_px, level.ask_sz, level.ask_ct, ts_recv) @dataclass() class MbpMarket: books: defaultdict[int, defaultdict[int, MbpBook]] = field( default_factory=lambda: defaultdict(lambda: defaultdict(MbpBook)), ) def get_book(self, instrument_id: int, publisher_id: int) -> MbpBook: return self.books[instrument_id][publisher_id] def bbo( self, instrument_id: int, publisher_id: int, ) -> tuple[Union[PriceLevel, None], Union[PriceLevel, None]]: return self.books[instrument_id][publisher_id].bbo() def aggregated_bbo( self, instrument_id: int, ) -> tuple[Union[PriceLevel, None], Union[PriceLevel, None]]: """Calculate the aggregated BBO across all venues""" agg_bbo: list[Union[PriceLevel, None]] = [None, None] all_bbos = list(zip(*(book.bbo() for book in self.books[instrument_id].values()))) for idx, reducer in ((0, max), (1, min)): all_best = [b for b in all_bbos[idx] if b] if all_best: best_price = reducer(b.price for b in all_best) best = [b for b in all_best if b.price == best_price] agg_bbo[idx] = PriceLevel( price=best_price, size=sum(b.size for b in best), count=sum(b.count for b in best), ) return tuple(agg_bbo) def consolidated_bbo( self, instrument_id: int, ) -> tuple[Union[PriceLevel, None], Union[PriceLevel, None]]: all_bids, all_offers = zip(*(book.bbo() for book in self.books[instrument_id].values())) best_bid = max((b for b in all_bids if b), key=PriceLevel._bid_sort, default=None) best_offer = max((o for o in all_offers if o), key=PriceLevel._ask_sort, default=None) return best_bid, best_offer def apply(self, msg: db.MBP1Msg) -> None: book = self.books[msg.instrument_id][msg.publisher_id] book.apply(msg) if __name__ == "__main__": equity_datasets = [ "XNAS.ITCH", # Nasdaq "XBOS.ITCH", # Nasdaq BX "XPSX.ITCH", # Nasdaq PSX "XNYS.PILLAR", # NYSE "ARCX.PILLAR", # NYSE Arca "XASE.PILLAR", # NYSE American "XCHI.PILLAR", # NYSE Texas "XCIS.TRADESBBO", # NYSE National "MEMX.MEMOIR", # Members Exchange "EPRL.DOM", # MIAX Pearl "IEXG.TOPS", # IEX "BATS.PITCH", # Cboe BZX "BATY.PITCH", # Cboe BYX "EDGA.PITCH", # Cboe EDGA "EDGX.PITCH", # Cboe EDGX ] symbol = "NVDA" start = pd.Timestamp(2025, 6, 17, 9, 30, tz="US/Eastern") end = pd.Timestamp(2025, 6, 17, 10, 0, tz="US/Eastern") schema = "mbp-1" client = db.Historical(key="$YOUR_API_KEY") # Get data for all datasets dataset_data_dict: dict[str, db.DBNStore] = {} for dataset in equity_datasets: dataset_name = dataset.replace(".", "-").lower() data_path = f"{dataset_name}-{symbol}-{start.date().isoformat().replace('-', '')}.{schema}.dbn.zst" if os.path.exists(data_path): data = db.DBNStore.from_file(data_path) else: data = client.timeseries.get_range( dataset=dataset, start=start, end=end, symbols=symbol, schema=schema, path=data_path, ) dataset_data_dict[dataset] = data # Merge all datasets into one stream sorted by ts_recv data = merge(*dataset_data_dict.values(), key=lambda x: x.ts_recv) # Iterate over the records and calculate the consolidated BBO cbbo_list: list[tuple[pd.Timestamp, float, float]] = [] market = MbpMarket() for record in data: market.apply(record) best_bid, best_offer = market.consolidated_bbo(record.instrument_id) cbbo_list.append(( record.pretty_ts_recv, best_bid.price if best_bid is not None else float("Nan"), best_offer.price if best_offer is not None else float("Nan"), )) # Create DataFrame df = pd.DataFrame(cbbo_list, columns=["Timestamp", "Bid", "Offer"]) df = df.set_index("Timestamp") df["is_crossed"] = df["Bid"] >= df["Offer"] # Now we'll plot a small slice of time when the book is crossed start_time = pd.Timestamp(2025, 6, 17, 9, 56, 31, 650000, tz="US/Eastern") end_time = pd.Timestamp(2025, 6, 17, 9, 56, 31, 750000, tz="US/Eastern") df = df.loc[start_time:end_time] fig, ax = plt.subplots(figsize=(11, 6)) # Shade periods where book is not crossed green plt.fill_between( df.index, df["Bid"], df["Offer"], where=~df["is_crossed"], alpha=0.2, linewidth=0, color="green", step="post", label="Normal market", ) # Shade periods where book is crossed red plt.fill_between( df.index, df["Offer"], df["Bid"], where=df["is_crossed"], alpha=0.2, linewidth=0, color="red", step="post", label="Crossed market", ) # Plot BBO lines for col, color in [("Offer", "C1"), ("Bid", "C0")]: plt.hlines( y=df[col][:-1], xmin=df.index[:-1], xmax=df.index[1:], colors=color, label=col, ) plt.gca().xaxis.set_major_formatter(mdates.DateFormatter("%H:%M:%S.%f", tz="US/Eastern")) plt.ylabel("Price (USD)") plt.xlabel("Timestamp (ET)") plt.title(f"{symbol} synthetic NBBO") plt.legend() plt.tight_layout() plt.show()
Results
Notice that the synthetic NBBO may occasionally appear crossed, unlike the regulatory NBBO published by the SIPs. This is expected behavior due to two factors: first, proprietary feeds include odd lot quotations, which are excluded from SIP NBBO calculations; second, the feeds originate from different data centers, resulting in receive-time deltas. These characteristics can temporarily produce crossed markets, but they also enable the construction of a more predictive microprice.