scaleapi
diff --git a/‎launch/client.py‎
Lines changed: 72 additions & 5 deletions b/‎launch/client.py‎
Lines changed: 72 additions & 5 deletions
diff --git a/‎launch/constants.py‎
Lines changed: 3 additions & 0 deletions b/‎launch/constants.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎launch/make_batch_file.py‎
Lines changed: 9 additions & 0 deletions b/‎launch/make_batch_file.py‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎tests/test_make_batch_file.py‎
Lines changed: 21 additions & 0 deletions b/‎tests/test_make_batch_file.py‎
Lines changed: 21 additions & 0 deletions
@@ -3,6 +3,7 @@
 import os
 import shutil
 import tempfile
+from io import StringIO
 from typing import Any, Callable, Dict, List, Optional, TypeVar, Union
 
 import cloudpickle
@@ -13,13 +14,17 @@
 from launch.constants import (
  ASYNC_TASK_PATH,
  ASYNC_TASK_RESULT_PATH,
+ BATCH_TASK_INPUT_SIGNED_URL_PATH,
+ BATCH_TASK_PATH,
+ BATCH_TASK_RESULTS_PATH,
  ENDPOINT_PATH,
  MODEL_BUNDLE_SIGNED_URL_PATH,
  SCALE_LAUNCH_ENDPOINT,
  SYNC_TASK_PATH,
 )
 from launch.errors import APIError
 from launch.find_packages import find_packages_from_imports, get_imports
+from launch.make_batch_file import make_batch_input_file
 from launch.model_bundle import ModelBundle
 from launch.model_endpoint import (
  AsyncEndpoint,
@@ -84,6 +89,7 @@ def __init__(
  self.connection = Connection(api_key, endpoint)
  self.self_hosted = self_hosted
  self.upload_bundle_fn: Optional[Callable[[str, str], None]] = None
+ self.upload_batch_csv_fn: Optional[Callable[[str, str], None]] = None
  self.endpoint_auth_decorator_fn: Callable[
  [Dict[str, Any]], Dict[str, Any]
  ] = lambda x: x
@@ -108,11 +114,27 @@ def register_upload_bundle_fn(
  See register_bundle_location_fn for more notes on the signature of upload_bundle_fn
 
  Parameters:
- upload_bundle_fn: Function that takes in a serialized bundle, and uploads that bundle to an appropriate
+ upload_bundle_fn: Function that takes in a serialized bundle (bytes type), and uploads that bundle to an appropriate
  location. Only needed for self-hosted mode.
  """
  self.upload_bundle_fn = upload_bundle_fn
 
+ def register_upload_batch_csv_fn(
+ self, upload_batch_csv_fn: Callable[[str, str], None]
+ ):
+ """
+ For self-hosted mode only. Registers a function that handles batch text upload. This function is called as
+
+ upload_batch_csv_fn(csv_text, csv_url)
+
+ This function should directly write the contents of csv_text as a text string into csv_url.
+
+ Parameters:
+ upload_batch_csv_fn: Function that takes in a csv text (string type), and uploads that bundle to an appropriate
+ location. Only needed for self-hosted mode.
+ """
+ self.upload_batch_csv_fn = upload_batch_csv_fn
+
  def register_bundle_location_fn(
  self, bundle_location_fn: Callable[[], str]
  ):
@@ -744,20 +766,62 @@ def get_async_response(self, async_task_id: str) -> Dict[str, Any]:
  )
  return resp
 
- def batch_async_request(self, endpoint_id: str, urls: List[str]):
+ def batch_async_request(
+ self,
+ bundle_name: str,
+ urls: List[str],
+ batch_url_file_location: str = None,
+ serialization_format: str = "json",
+ ):
  """
  Sends a batch inference request to the Model Endpoint at endpoint_id, returns a key that can be used to retrieve
  the results of inference at a later time.
 
  Parameters:
- endpoint_id: The id of the endpoint to make the request to
+ bundle_name: The id of the bundle to make the request to
+ serialization_format: Serialization format of output, either 'pickle' or 'json'.
+ 'pickle' corresponds to pickling results + returning
  urls: A list of urls, each pointing to a file containing model input.
  Must be accessible by Scale Launch, hence urls need to either be public or signedURLs.
+ batch_url_file_location: In self-hosted mode, the input to the batch job will be uploaded
+ to this location if provided. Otherwise, one will be determined from bundle_location_fn()
 
  Returns:
  An id/key that can be used to fetch inference results at a later time
  """
- raise NotImplementedError
+ f = StringIO()
+ make_batch_input_file(urls, f)
+ f.seek(0)
+
+ if self.self_hosted:
+ # TODO make this not use bundle_location_fn()
+ if batch_url_file_location is None:
+ file_location = self.bundle_location_fn() # type: ignore
+ else:
+ file_location = batch_url_file_location
+ self.upload_batch_csv_fn( # type: ignore
+ f.getvalue(), file_location
+ )
+ else:
+ model_bundle_s3_url = self.connection.post(
+ {}, BATCH_TASK_INPUT_SIGNED_URL_PATH
+ )
+ s3_path = model_bundle_s3_url["signedUrl"]
+ requests.put(s3_path, data=f.getvalue())
+ file_location = f"s3://{model_bundle_s3_url['bucket']}/{model_bundle_s3_url['key']}"
+
+ logger.info("Writing batch task csv to %s", file_location)
+
+ payload = dict(
+ input_path=file_location,
+ serialization_format=serialization_format,
+ )
+ payload = self.endpoint_auth_decorator_fn(payload)
+ resp = self.connection.post(
+ route=f"{BATCH_TASK_PATH}/{bundle_name}",
+ payload=payload,
+ )
+ return resp["job_id"]
 
  def get_batch_async_response(self, batch_async_task_id: str):
  """
@@ -770,4 +834,7 @@ def get_batch_async_response(self, batch_async_task_id: str):
  Returns:
  TODO Something similar to a list of signed s3URLs
  """
- raise NotImplementedError
+ resp = self.connection.get(
+ route=f"{BATCH_TASK_RESULTS_PATH}/{batch_async_task_id}"
+ )
+ return resp
@@ -1,8 +1,11 @@
 ENDPOINT_PATH = "endpoints"
 MODEL_BUNDLE_SIGNED_URL_PATH = "model_bundle_upload"
+BATCH_TASK_INPUT_SIGNED_URL_PATH = "batch_task_input_upload"
 ASYNC_TASK_PATH = "task_async"
 ASYNC_TASK_RESULT_PATH = "task/result"
 SYNC_TASK_PATH = "task_sync"
+BATCH_TASK_PATH = "batch_job"
+BATCH_TASK_RESULTS_PATH = "batch_job"
 SCALE_LAUNCH_ENDPOINT = "https://api.scale.com/v1/hosted_inference"
 
 DEFAULT_NETWORK_TIMEOUT_SEC = 120
@@ -0,0 +1,9 @@
+import csv
+from typing import IO, List
+
+
+def make_batch_input_file(urls: List[str], file: IO[str]):
+ writer = csv.DictWriter(file, fieldnames=["id", "url"])
+ writer.writeheader()
+ for i, url in enumerate(urls):
+ writer.writerow({"id": i, "url": url})
@@ -0,0 +1,21 @@
+import csv
+from io import StringIO
+
+from launch.make_batch_file import make_batch_input_file
+
+
+def test_make_batch_file():
+ f = StringIO()
+ urls = ["one_url.count", "two_urls.count", "three_urls.count"]
+ make_batch_input_file(urls, f)
+ f.seek(0)
+
+ reader = csv.DictReader(f)
+ rows = [row for row in reader]
+ print(f.getvalue())
+ print(rows)
+ for tup in zip(enumerate(urls), rows):
+ print(tup)
+ (i, expected_row), actual_row = tup
+ assert str(i) == actual_row["id"]
+ assert expected_row == actual_row["url"]