shiyu-coder
diff --git a/‎tests/data/generate_regression_output.py‎
Lines changed: 91 additions & 0 deletions b/‎tests/data/generate_regression_output.py‎
Lines changed: 91 additions & 0 deletions
@@ -0,0 +1,91 @@
+import random
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+import torch
+
+from model import Kronos, KronosPredictor, KronosTokenizer
+
+
+TEST_DATA_ROOT = Path(__file__).parent
+INPUT_DATA_PATH = TEST_DATA_ROOT / "regression_input.csv"
+OUTPUT_DATA_DIR = TEST_DATA_ROOT
+MAX_CTX_LEN = 512
+TEST_CTX_LEN = [512, 256]
+PRED_LEN = 8
+FEATURE_NAMES = ["open", "high", "low", "close", "volume", "amount"]
+
+MODEL_REVISION = "901c26c1332695a2a8f243eb2f37243a37bea320"
+TOKENIZER_REVISION = "0e0117387f39004a9016484a186a908917e22426"
+SEED = 123
+
+DEVICE = "cpu"
+
+
+def set_seed(seed: int) -> None:
+ random.seed(seed)
+ np.random.seed(seed)
+ torch.manual_seed(seed)
+ if torch.backends.cudnn.is_available():
+ torch.backends.cudnn.deterministic = True
+ torch.backends.cudnn.benchmark = False
+
+
+def generate_output(ctx_len: int) -> None:
+ if ctx_len > MAX_CTX_LEN:
+ raise ValueError(
+ f"Context length for output generation ({ctx_len}) "
+ f"cannot exceed maximum context length ({MAX_CTX_LEN})."
+ )
+
+ context_df = df.iloc[:ctx_len].copy()
+ future_timestamps = df["timestamps"].iloc[
+ ctx_len : ctx_len + PRED_LEN
+ ].reset_index(drop=True)
+
+ tokenizer = KronosTokenizer.from_pretrained("NeoQuasar/Kronos-Tokenizer-base", revision=TOKENIZER_REVISION)
+ model = Kronos.from_pretrained("NeoQuasar/Kronos-small", revision=MODEL_REVISION)
+ tokenizer.eval()
+ model.eval()
+
+ predictor = KronosPredictor(
+ model, tokenizer, device=DEVICE, max_context=MAX_CTX_LEN
+ )
+
+ with torch.no_grad():
+ pred_df = predictor.predict(
+ df=context_df[FEATURE_NAMES].reset_index(drop=True),
+ x_timestamp=context_df["timestamps"].reset_index(drop=True),
+ y_timestamp=future_timestamps,
+ pred_len=PRED_LEN,
+ T=1.0,
+ top_k=1,
+ top_p=1.0,
+ verbose=False,
+ sample_count=1,
+ )
+
+ if pred_df.shape != (PRED_LEN, len(FEATURE_NAMES)):
+ raise ValueError(f"Unexpected prediction shape: {pred_df.shape}")
+
+ output_df = pred_df.reset_index(drop=True)
+ output_df["timestamps"] = future_timestamps
+ output_df = output_df[["timestamps"] + FEATURE_NAMES]
+ output_df.to_csv(OUTPUT_DATA_DIR / f"regression_output_{ctx_len}.csv", index=False)
+ print(f"Saved {ctx_len} fixture to {OUTPUT_DATA_DIR / f'regression_output_{ctx_len}.csv'}")
+
+
+if __name__ == "__main__":
+ set_seed(SEED)
+
+
+ df = pd.read_csv(INPUT_DATA_PATH, parse_dates=["timestamps"])
+ if df.shape[0] < MAX_CTX_LEN + PRED_LEN:
+ raise ValueError(
+ f"Input data must have at least {MAX_CTX_LEN + PRED_LEN} rows, "
+ f"found {df.shape[0]} instead."
+ )
+
+ for ctx_len in TEST_CTX_LEN:
+ generate_output(ctx_len)