Module scrapfly.scrapy.request

Classes

class ScrapflyScrapyRequest (scrape_config: ScrapeConfig,
meta: Dict = {},
*args,
**kwargs)
Expand source code
class ScrapflyScrapyRequest(Request): scrape_config: ScrapeConfig # See request_from_dict method in scrapy.utils.request attributes = tuple( attr for attr in Request.attributes if attr not in ["body", "cookies", "headers", "method", "url"]) + ( "scrape_config",) # url:str inherited # method:str inherited # body:bytes inherited # headers:Dict inherited # encoding:Dict inherited def __init__(self, scrape_config: ScrapeConfig, meta: Dict = {}, *args, **kwargs): self.scrape_config = scrape_config meta['scrapfly_scrape_config'] = self.scrape_config super().__init__( *args, url=self.scrape_config.url, headers=self.scrape_config.headers, cookies=self.scrape_config.cookies, body=self.scrape_config.body, meta=meta, **kwargs ) def to_dict(self, *, spider: Optional["scrapy.Spider"] = None) -> dict: if spider is None: raise ValueError("The 'spider' argument is required to serialize the request.") d = super().to_dict(spider=spider) d['scrape_config'] = self.scrape_config return d @classmethod def from_dict(cls, data): scrape_config_data = data['meta']['scrapfly_scrape_config'].to_dict() scrape_config = ScrapeConfig.from_dict(scrape_config_data) request = cls(scrape_config=scrape_config) return request def replace(self, *args, **kwargs): for x in [ 'meta', 'flags', 'encoding', 'priority', 'dont_filter', 'callback', 'errback', 'cb_kwargs', ]: kwargs.setdefault(x, getattr(self, x)) kwargs['scrape_config'] = deepcopy(self.scrape_config) cls = kwargs.pop('cls', self.__class__) return cls(*args, **kwargs)

Represents an HTTP request, which is usually generated in a Spider and executed by the Downloader, thus generating a :class:Response.

Ancestors

  • scrapy.http.request.Request
  • scrapy.utils.trackref.object_ref

Class variables

var attributes : tuple[str, ...]

The type of the None singleton.

var scrape_configScrapeConfig

The type of the None singleton.

Static methods

def from_dict(data)

Methods

def replace(self, *args, **kwargs)
Expand source code
def replace(self, *args, **kwargs): for x in [ 'meta', 'flags', 'encoding', 'priority', 'dont_filter', 'callback', 'errback', 'cb_kwargs', ]: kwargs.setdefault(x, getattr(self, x)) kwargs['scrape_config'] = deepcopy(self.scrape_config) cls = kwargs.pop('cls', self.__class__) return cls(*args, **kwargs)

Create a new Request with the same attributes except for those given new values

def to_dict(self, *, spider: ForwardRef('scrapy.Spider') | None = None) ‑> dict
Expand source code
def to_dict(self, *, spider: Optional["scrapy.Spider"] = None) -> dict: if spider is None: raise ValueError("The 'spider' argument is required to serialize the request.") d = super().to_dict(spider=spider) d['scrape_config'] = self.scrape_config return d

Return a dictionary containing the Request's data.

Use :func:~scrapy.utils.request.request_from_dict to convert back into a :class:~scrapy.Request object.

If a spider is given, this method will try to find out the name of the spider methods used as callback and errback and include them in the output dict, raising an exception if they cannot be found.