maxim5
diff --git a/‎predict/__init__.py‎
Lines changed: 1 addition & 1 deletion b/‎predict/__init__.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎predict/ensemble.py‎
Lines changed: 31 additions & 0 deletions b/‎predict/ensemble.py‎
Lines changed: 31 additions & 0 deletions
diff --git a/‎run_predict.py‎
Lines changed: 8 additions & 60 deletions b/‎run_predict.py‎
Lines changed: 8 additions & 60 deletions
diff --git a/‎run_visual.py‎
Lines changed: 5 additions & 40 deletions b/‎run_visual.py‎
Lines changed: 5 additions & 40 deletions
@@ -2,5 +2,5 @@
 # -*- coding: utf-8 -*-
 __author__ = 'maxim'
 
-from ensemble import Ensemble
+from ensemble import Ensemble, predict_multiple
 from model_io import get_model_info, ModelNotAvailable
@@ -2,9 +2,11 @@
 # -*- coding: utf-8 -*-
 __author__ = 'maxim'
 
+from itertools import izip, count
 import os
 
 import numpy as np
+import pandas as pd
 
 from train.job_info import parse_model_infos
 from util import *
@@ -53,3 +55,32 @@ def ensemble_top_models(job_info, top_n=5):
  models = [get_model_info(path, strict=False) for path in model_paths]
  top_models = [model for model in models if model.is_available()][:top_n]
  return Ensemble(top_models)
+
+
+def predict_multiple(job_info, raw_df, rows_to_predict, top_models_num=5):
+ debug('Predicting %s target=%s' % (job_info.name, job_info.target))
+
+ raw_targets = raw_df[job_info.target][-(rows_to_predict + 1):].reset_index(drop=True)
+ changes_df = to_changes(raw_df)
+ target_changes = changes_df[job_info.target][-rows_to_predict:].reset_index(drop=True)
+ dates = changes_df.date[-rows_to_predict:].reset_index(drop=True)
+
+ df = changes_df[:-1] # the data for models is shifted by one: the target for the last row is unknown
+
+ ensemble = Ensemble.ensemble_top_models(job_info, top_n=top_models_num)
+ predictions = ensemble.predict_aggregated(df, last_rows=rows_to_predict)
+
+ result = []
+ for idx, date, prediction_change, target_change in izip(count(), dates, predictions, target_changes):
+ debug('%%-change on %s: predict=%+.5f target=%+.5f' % (date, prediction_change, target_change))
+
+ # target_change is approx. raw_targets[idx + 1] / raw_targets[idx] - 1.0
+ raw_target = raw_targets[idx + 1]
+ raw_predicted = (1 + prediction_change) * raw_targets[idx]
+ debug(' value on %s: predict= %.5f target= %.5f' % (date, raw_predicted, raw_target))
+
+ result.append({'Time': date, 'Prediction': raw_predicted, 'True': raw_target})
+
+ result_df = pd.DataFrame(result)
+ result_df.set_index('Time', inplace=True)
+ return result_df
@@ -2,80 +2,28 @@
 # -*- coding: utf-8 -*-
 __author__ = 'maxim'
 
-
-import os
-import numpy as np
-
 import poloniex
 from predict import *
 from train import *
-from train.evaluator import Evaluator
 from util import *
 
-
-def try_model(path, data_dir='_data', zoo_dir='_zoo'):
- model_info = get_model_info(path)
- run_params = model_info.run_params
- job = JobInfo(data_dir, zoo_dir, run_params['name'], run_params['target'])
- raw_df = read_df(job.get_source_name())
- changes_df = to_changes(raw_df)
- data_set = to_dataset(changes_df, run_params['k'], run_params['target'], model_info.model_class.DATA_WITH_BIAS)
-
- model = model_info.model_class(**model_info.model_params)
- evaluator = Evaluator()
-
- with model.session():
- model.restore(model_info.path)
- test_eval, test_stats = evaluator.eval(model, data_set)
- info('Result:\n%sEval=%.6f\n' % (evaluator.stats_str(test_stats), test_eval))
-
-
-def predict_model(changes_df, path):
- model_info = get_model_info(path)
- run_params = model_info.run_params
- model = model_info.model_class(**model_info.model_params)
- x = to_dataset_for_prediction(changes_df[:-1], run_params['k'], model_info.model_class.DATA_WITH_BIAS)
- x = x[-1:]
-
- with model.session():
- model.restore(model_info.path)
- predicted = float(model.predict(x))
- info('Predicted change=%.5f' % predicted)
- return predicted
-
-
-def predict_all_models(changes_df, name, accept):
- home_dir = '_zoo/%s' % name
- models = [dir for dir in os.listdir(home_dir) if accept(dir)]
- if not models:
- info('No models found for %s' % name)
- return
-
- predictions = []
- for model in models:
- try:
- value = predict_model(changes_df, os.path.join(home_dir, model))
- predictions.append(value)
- except ModelNotAvailable as e:
- warn('Cannot use model from "%s": class "%s" is not available not this system' % (model, e.model_class))
- warn('Most probable reason is that model dependencies are not met')
- info()
- info('Mean predicted value for %s: %.5f' % (name, np.mean(predictions)))
- info()
-
-
 def main():
- tickers, periods, targets = parse_command_line(default_tickers=[],
+ tickers, periods, targets = parse_command_line(default_tickers=['BTC_ETH'],
  default_periods=['day'],
  default_targets=['high'])
 
  for ticker in tickers:
  for period in periods:
  for target in targets:
+ job = JobInfo('_data', '_zoo', name='%s_%s' % (ticker, period), target=target)
  raw_df = poloniex.get_latest_data(ticker, period=period, depth=100)
- changes_df = to_changes(raw_df)
- predict_all_models(changes_df, '%s_%s' % (ticker, period), lambda name: name.startswith('%s_' % target))
+ result_df = predict_multiple(job, raw_df=raw_df, rows_to_predict=1)
+
+ raw_df.set_index('date', inplace=True)
+ result_df.rename(columns={"True": "Current-Truth"}, inplace=True)
 
+ info('Latest chart info:', raw_df.tail(2), '', sep='\n')
+ info('Prediction for "%s":' % target, result_df, '', sep='\n')
 
 if __name__ == '__main__':
  main()
@@ -2,50 +2,14 @@
 # -*- coding: utf-8 -*-
 __author__ = 'maxim'
 
-from itertools import izip, count
-
 import matplotlib.pyplot as plt
-import pandas as pd
 
-from predict import *
-from train import *
-from util import *
+from predict import predict_multiple
+from train import JobInfo
+from util import parse_command_line, read_df
 
 plt.style.use('ggplot')
 
-
-def predict_multiple(job_info, last_rows):
- debug('Predicting %s target=%s' % (job_info.name, job_info.target))
-
- raw_df = read_df(job_info.get_source_name())
- raw_targets = raw_df[job_info.target][-(last_rows + 1):].reset_index(drop=True)
-
- changes_df = to_changes(raw_df)
- target_changes = changes_df[job_info.target][-last_rows:].reset_index(drop=True)
- dates = changes_df.date[-last_rows:].reset_index(drop=True)
-
- df = changes_df[:-1] # the data for models is shifted by one: the target for the last row is unknown
-
- ensemble = Ensemble.ensemble_top_models(job_info)
- predictions = ensemble.predict_aggregated(df, last_rows=last_rows)
-
- result = []
- for idx, date, prediction_change, target_change in izip(count(), dates, predictions, target_changes):
- debug('%%-change on %s: predict=%+.5f target=%+.5f' % (date, prediction_change, target_change))
-
- # target_change is approx. raw_targets[idx + 1] / raw_targets[idx] - 1.0
- raw_target = raw_targets[idx + 1]
- raw_predicted = (1 + prediction_change) * raw_targets[idx]
- debug(' value on %s: predict= %.5f target= %.5f' % (date, raw_predicted, raw_target))
-
- result.append({'Time': date, 'Prediction': raw_predicted, 'True': raw_target})
-
- result_df = pd.DataFrame(result)
- result_df.set_index('Time', inplace=True)
- result_df.index.names = ['']
- return result_df
-
-
 def main():
  train_date = None
  tickers, periods, targets = parse_command_line(default_tickers=['BTC_ETH', 'BTC_LTC'],
@@ -56,7 +20,8 @@ def main():
  for period in periods:
  for target in targets:
  job = JobInfo('_data', '_zoo', name='%s_%s' % (ticker, period), target=target)
- result_df = predict_multiple(job, last_rows=120)
+ result_df = predict_multiple(job, raw_df=read_df(job.get_source_name()), rows_to_predict=120)
+ result_df.index.names = ['']
  result_df.plot(title=job.name)
 
  if train_date is not None: