4545from vertexai .preview .rag .utils .resources import (
4646 EmbeddingModelConfig ,
4747 JiraSource ,
48+ LayoutParserConfig ,
4849 Pinecone ,
4950 RagCorpus ,
5051 RagFile ,
@@ -466,6 +467,7 @@ def import_files(
466467 max_embedding_requests_per_min : int = 1000 ,
467468 use_advanced_pdf_parsing : Optional [bool ] = False ,
468469 partial_failures_sink : Optional [str ] = None ,
470+ layout_parser : Optional [LayoutParserConfig ] = None ,
469471) -> ImportRagFilesResponse :
470472 """
471473 Import files to an existing RagCorpus, wait until completion.
@@ -581,13 +583,21 @@ def import_files(
581583 exist - if it does not exist, it will be created. If it does exist,
582584 the schema will be checked and the partial failures will be appended
583585 to the table.
586+ layout_parser: Configuration for the Document AI Layout Parser Processor
587+ to use for document parsing. Optional.
588+ If not None,`use_advanced_pdf_parsing` must be False.
584589 Returns:
585590 ImportRagFilesResponse.
586591 """
587592 if source is not None and paths is not None :
588593 raise ValueError ("Only one of source or paths must be passed in at a time" )
589594 if source is None and paths is None :
590595 raise ValueError ("One of source or paths must be passed in" )
596+ if use_advanced_pdf_parsing and layout_parser is not None :
597+ raise ValueError (
598+ "Only one of use_advanced_pdf_parsing or layout_parser may be "
599+ "passed in at a time"
600+ )
591601 corpus_name = _gapic_utils .get_corpus_name (corpus_name )
592602 request = _gapic_utils .prepare_import_files_request (
593603 corpus_name = corpus_name ,
@@ -599,6 +609,7 @@ def import_files(
599609 max_embedding_requests_per_min = max_embedding_requests_per_min ,
600610 use_advanced_pdf_parsing = use_advanced_pdf_parsing ,
601611 partial_failures_sink = partial_failures_sink ,
612+ layout_parser = layout_parser ,
602613 )
603614 client = _gapic_utils .create_rag_data_service_client ()
604615 try :
@@ -619,6 +630,7 @@ async def import_files_async(
619630 max_embedding_requests_per_min : int = 1000 ,
620631 use_advanced_pdf_parsing : Optional [bool ] = False ,
621632 partial_failures_sink : Optional [str ] = None ,
633+ layout_parser : Optional [LayoutParserConfig ] = None ,
622634) -> operation_async .AsyncOperation :
623635 """
624636 Import files to an existing RagCorpus asynchronously.
@@ -734,13 +746,21 @@ async def import_files_async(
734746 exist - if it does not exist, it will be created. If it does exist,
735747 the schema will be checked and the partial failures will be appended
736748 to the table.
749+ layout_parser: Configuration for the Document AI Layout Parser Processor
750+ to use for document parsing. Optional.
751+ If not None,`use_advanced_pdf_parsing` must be False.
737752 Returns:
738753 operation_async.AsyncOperation.
739754 """
740755 if source is not None and paths is not None :
741756 raise ValueError ("Only one of source or paths must be passed in at a time" )
742757 if source is None and paths is None :
743758 raise ValueError ("One of source or paths must be passed in" )
759+ if use_advanced_pdf_parsing and layout_parser is not None :
760+ raise ValueError (
761+ "Only one of use_advanced_pdf_parsing or layout_parser may be "
762+ "passed in at a time"
763+ )
744764 corpus_name = _gapic_utils .get_corpus_name (corpus_name )
745765 request = _gapic_utils .prepare_import_files_request (
746766 corpus_name = corpus_name ,
@@ -752,6 +772,7 @@ async def import_files_async(
752772 max_embedding_requests_per_min = max_embedding_requests_per_min ,
753773 use_advanced_pdf_parsing = use_advanced_pdf_parsing ,
754774 partial_failures_sink = partial_failures_sink ,
775+ layout_parser = layout_parser ,
755776 )
756777 async_client = _gapic_utils .create_rag_data_service_async_client ()
757778 try :
0 commit comments