Skip to main content
Version: 1.3.8

SparkFilesystemDatasource

Signature

class great_expectations.datasource.fluent.SparkFilesystemDatasource(*, type: Literal['spark_filesystem'] = 'spark_filesystem', name: str, id: Optional[uuid.UUID] = None, assets: List[Union[great_expectations.datasource.fluent.data_asset.path.spark.csv_asset.CSVAsset, great_expectations.datasource.fluent.data_asset.path.spark.csv_asset.DirectoryCSVAsset, great_expectations.datasource.fluent.data_asset.path.spark.parquet_asset.ParquetAsset, great_expectations.datasource.fluent.data_asset.path.spark.parquet_asset.DirectoryParquetAsset, great_expectations.datasource.fluent.data_asset.path.spark.orc_asset.ORCAsset, great_expectations.datasource.fluent.data_asset.path.spark.orc_asset.DirectoryORCAsset, great_expectations.datasource.fluent.data_asset.path.spark.json_asset.JSONAsset, great_expectations.datasource.fluent.data_asset.path.spark.json_asset.DirectoryJSONAsset, great_expectations.datasource.fluent.data_asset.path.spark.text_asset.TextAsset, great_expectations.datasource.fluent.data_asset.path.spark.text_asset.DirectoryTextAsset, great_expectations.datasource.fluent.data_asset.path.spark.delta_asset.DeltaAsset, great_expectations.datasource.fluent.data_asset.path.spark.delta_asset.DirectoryDeltaAsset]] = [], spark_config: Optional[Dict[pydantic.v1.types.StrictStr, Union[pydantic.v1.types.StrictStr, pydantic.v1.types.StrictInt, pydantic.v1.types.StrictFloat, pydantic.v1.types.StrictBool]]] = None, force_reuse_spark_context: bool = True, persist: bool = True, base_directory: pathlib.Path, data_context_root_directory: Optional[pathlib.Path] = None)

SparkFilesystemDatasource is a subclass of SparkDatasource which connects to the filesystem.

Methods

add_csv_asset

Signature

add_csv_asset(name: str, *, id: <pydantic.v1.fields.DeferredType object at 0x7f3cd3f28bc0> = None, order_by: <pydantic.v1.fields.DeferredType object at 0x7f3cd3f28c80> = None, batch_metadata: <pydantic.v1.fields.DeferredType object at 0x7f3cd3f28dd0> = None, batch_definitions: <pydantic.v1.fields.DeferredType object at 0x7f3cd3f28f80> = None, connect_options: <pydantic.v1.fields.DeferredType object at 0x7f3cd3f29040> = None, pathGlobFilter: typing.Optional[typing.Union[bool, str]] = None, recursiveFileLookup: typing.Optional[typing.Union[bool, str]] = None, modifiedBefore: typing.Optional[typing.Union[bool, str]] = None, modifiedAfter: typing.Optional[typing.Union[bool, str]] = None, schema: typing.Optional[typing.Union[great_expectations.datasource.fluent.serializable_types.pyspark.SerializableStructType, str]] = None, sep: typing.Optional[str] = None, encoding: typing.Optional[str] = None, quote: typing.Optional[str] = None, escape: typing.Optional[str] = None, comment: typing.Optional[str] = None, header: typing.Optional[typing.Union[bool, str]] = None, inferSchema: typing.Optional[typing.Union[bool, str]] = None, ignoreLeadingWhiteSpace: typing.Optional[typing.Union[bool, str]] = None, ignoreTrailingWhiteSpace: typing.Optional[typing.Union[bool, str]] = None, nullValue: typing.Optional[str] = None, nanValue: typing.Optional[str] = None, positiveInf: typing.Optional[str] = None, negativeInf: typing.Optional[str] = None, dateFormat: typing.Optional[str] = None, timestampFormat: typing.Optional[str] = None, maxColumns: typing.Optional[typing.Union[int, str]] = None, maxCharsPerColumn: typing.Optional[typing.Union[int, str]] = None, maxMalformedLogPerPartition: typing.Optional[typing.Union[int, str]] = None, mode: typing.Optional[typing.Literal['PERMISSIVE', 'DROPMALFORMED', 'FAILFAST']] = None, columnNameOfCorruptRecord: typing.Optional[str] = None, multiLine: typing.Optional[typing.Union[bool, str]] = None, charToEscapeQuoteEscaping: typing.Optional[str] = None, samplingRatio: typing.Optional[typing.Union[float, str]] = None, enforceSchema: typing.Optional[typing.Union[bool, str]] = None, emptyValue: typing.Optional[str] = None, locale: typing.Optional[str] = None, lineSep: typing.Optional[str] = None, unescapedQuoteHandling: typing.Optional[typing.Literal['STOP_AT_CLOSING_QUOTE', 'BACK_TO_DELIMITER', 'STOP_AT_DELIMITER', 'SKIP_VALUE', 'RAISE_ERROR']] = None) → pydantic.BaseModel

Add a csv asset to the datasource.

add_delta_asset

Signature

add_delta_asset(name: str, *, id: <pydantic.v1.fields.DeferredType object at 0x7f3cd3f48b90> = None, order_by: <pydantic.v1.fields.DeferredType object at 0x7f3cd3f48c50> = None, batch_metadata: <pydantic.v1.fields.DeferredType object at 0x7f3cd3f48da0> = None, batch_definitions: <pydantic.v1.fields.DeferredType object at 0x7f3cd3f48f50> = None, connect_options: <pydantic.v1.fields.DeferredType object at 0x7f3cd3f49010> = None, timestampAsOf: typing.Optional[str] = None, versionAsOf: typing.Optional[str] = None) → pydantic.BaseModel

Add a delta asset to the datasource.

add_directory_csv_asset

Signature

add_directory_csv_asset(name: str, *, id: <pydantic.v1.fields.DeferredType object at 0x7f3cd3f2b2f0> = None, order_by: <pydantic.v1.fields.DeferredType object at 0x7f3cd3f2b3b0> = None, batch_metadata: <pydantic.v1.fields.DeferredType object at 0x7f3cd3f2b500> = None, batch_definitions: <pydantic.v1.fields.DeferredType object at 0x7f3cd3f2b6b0> = None, connect_options: <pydantic.v1.fields.DeferredType object at 0x7f3cd3f2b770> = None, pathGlobFilter: typing.Optional[typing.Union[bool, str]] = None, recursiveFileLookup: typing.Optional[typing.Union[bool, str]] = None, modifiedBefore: typing.Optional[typing.Union[bool, str]] = None, modifiedAfter: typing.Optional[typing.Union[bool, str]] = None, schema: typing.Optional[typing.Union[great_expectations.datasource.fluent.serializable_types.pyspark.SerializableStructType, str]] = None, sep: typing.Optional[str] = None, encoding: typing.Optional[str] = None, quote: typing.Optional[str] = None, escape: typing.Optional[str] = None, comment: typing.Optional[str] = None, header: typing.Optional[typing.Union[bool, str]] = None, inferSchema: typing.Optional[typing.Union[bool, str]] = None, ignoreLeadingWhiteSpace: typing.Optional[typing.Union[bool, str]] = None, ignoreTrailingWhiteSpace: typing.Optional[typing.Union[bool, str]] = None, nullValue: typing.Optional[str] = None, nanValue: typing.Optional[str] = None, positiveInf: typing.Optional[str] = None, negativeInf: typing.Optional[str] = None, dateFormat: typing.Optional[str] = None, timestampFormat: typing.Optional[str] = None, maxColumns: typing.Optional[typing.Union[int, str]] = None, maxCharsPerColumn: typing.Optional[typing.Union[int, str]] = None, maxMalformedLogPerPartition: typing.Optional[typing.Union[int, str]] = None, mode: typing.Optional[typing.Literal['PERMISSIVE', 'DROPMALFORMED', 'FAILFAST']] = None, columnNameOfCorruptRecord: typing.Optional[str] = None, multiLine: typing.Optional[typing.Union[bool, str]] = None, charToEscapeQuoteEscaping: typing.Optional[str] = None, samplingRatio: typing.Optional[typing.Union[float, str]] = None, enforceSchema: typing.Optional[typing.Union[bool, str]] = None, emptyValue: typing.Optional[str] = None, locale: typing.Optional[str] = None, lineSep: typing.Optional[str] = None, unescapedQuoteHandling: typing.Optional[typing.Literal['STOP_AT_CLOSING_QUOTE', 'BACK_TO_DELIMITER', 'STOP_AT_DELIMITER', 'SKIP_VALUE', 'RAISE_ERROR']] = None, data_directory: pathlib.Path) → pydantic.BaseModel

Add a directory_csv asset to the datasource.

add_directory_delta_asset

Signature

add_directory_delta_asset(name: str, *, id: <pydantic.v1.fields.DeferredType object at 0x7f3cd3f49e20> = None, order_by: <pydantic.v1.fields.DeferredType object at 0x7f3cd3f49ee0> = None, batch_metadata: <pydantic.v1.fields.DeferredType object at 0x7f3cd3f4a030> = None, batch_definitions: <pydantic.v1.fields.DeferredType object at 0x7f3cd3f4a1e0> = None, connect_options: <pydantic.v1.fields.DeferredType object at 0x7f3cd3f4a2a0> = None, timestampAsOf: typing.Optional[str] = None, versionAsOf: typing.Optional[str] = None, data_directory: pathlib.Path) → pydantic.BaseModel

Add a directory_delta asset to the datasource.

add_directory_json_asset

Signature

add_directory_json_asset(name: str, *, id: <pydantic.v1.fields.DeferredType object at 0x7f3cd3f63d40> = None, order_by: <pydantic.v1.fields.DeferredType object at 0x7f3cd3f63e00> = None, batch_metadata: <pydantic.v1.fields.DeferredType object at 0x7f3cd3f63f50> = None, batch_definitions: <pydantic.v1.fields.DeferredType object at 0x7f3cd3d94140> = None, connect_options: <pydantic.v1.fields.DeferredType object at 0x7f3cd3d94200> = None, pathGlobFilter: typing.Optional[typing.Union[bool, str]] = None, recursiveFileLookup: typing.Optional[typing.Union[bool, str]] = None, modifiedBefore: typing.Optional[typing.Union[bool, str]] = None, modifiedAfter: typing.Optional[typing.Union[bool, str]] = None, schema: typing.Optional[typing.Union[great_expectations.datasource.fluent.serializable_types.pyspark.SerializableStructType, str]] = None, primitivesAsString: typing.Optional[typing.Union[bool, str]] = None, prefersDecimal: typing.Optional[typing.Union[bool, str]] = None, allowComments: typing.Optional[typing.Union[bool, str]] = None, allowUnquotedFieldNames: typing.Optional[typing.Union[bool, str]] = None, allowSingleQuotes: typing.Optional[typing.Union[bool, str]] = None, allowNumericLeadingZero: typing.Optional[typing.Union[bool, str]] = None, allowBackslashEscapingAnyCharacter: typing.Optional[typing.Union[bool, str]] = None, mode: typing.Optional[typing.Literal['PERMISSIVE', 'DROPMALFORMED', 'FAILFAST']] = None, columnNameOfCorruptRecord: typing.Optional[str] = None, dateFormat: typing.Optional[str] = None, timestampFormat: typing.Optional[str] = None, multiLine: typing.Optional[typing.Union[bool, str]] = None, allowUnquotedControlChars: typing.Optional[typing.Union[bool, str]] = None, lineSep: typing.Optional[str] = None, samplingRatio: typing.Optional[typing.Union[float, str]] = None, dropFieldIfAllNull: typing.Optional[typing.Union[bool, str]] = None, encoding: typing.Optional[str] = None, locale: typing.Optional[str] = None, allowNonNumericNumbers: typing.Optional[typing.Union[bool, str]] = None, data_directory: pathlib.Path) → pydantic.BaseModel

Add a directory_json asset to the datasource.

add_directory_orc_asset

Signature

add_directory_orc_asset(name: str, *, id: <pydantic.v1.fields.DeferredType object at 0x7f3cd3d971a0> = None, order_by: <pydantic.v1.fields.DeferredType object at 0x7f3cd3d97290> = None, batch_metadata: <pydantic.v1.fields.DeferredType object at 0x7f3cd3d96e40> = None, batch_definitions: <pydantic.v1.fields.DeferredType object at 0x7f3cd3d971d0> = None, connect_options: <pydantic.v1.fields.DeferredType object at 0x7f3cd3d972c0> = None, pathGlobFilter: typing.Optional[typing.Union[bool, str]] = None, recursiveFileLookup: typing.Optional[typing.Union[bool, str]] = None, modifiedBefore: typing.Optional[typing.Union[bool, str]] = None, modifiedAfter: typing.Optional[typing.Union[bool, str]] = None, mergeSchema: typing.Optional[typing.Union[bool, str]] = False, data_directory: pathlib.Path) → pydantic.BaseModel

Add a directory_orc asset to the datasource.

add_directory_parquet_asset

Signature

add_directory_parquet_asset(name: str, *, id: <pydantic.v1.fields.DeferredType object at 0x7f3cd3db80b0> = None, order_by: <pydantic.v1.fields.DeferredType object at 0x7f3cd3db8050> = None, batch_metadata: <pydantic.v1.fields.DeferredType object at 0x7f3cd3db8080> = None, batch_definitions: <pydantic.v1.fields.DeferredType object at 0x7f3cd3db81a0> = None, connect_options: <pydantic.v1.fields.DeferredType object at 0x7f3cd3db8200> = None, pathGlobFilter: typing.Optional[typing.Union[bool, str]] = None, recursiveFileLookup: typing.Optional[typing.Union[bool, str]] = None, modifiedBefore: typing.Optional[typing.Union[bool, str]] = None, modifiedAfter: typing.Optional[typing.Union[bool, str]] = None, mergeSchema: typing.Optional[typing.Union[bool, str]] = None, datetimeRebaseMode: typing.Optional[typing.Literal['EXCEPTION', 'CORRECTED', 'LEGACY']] = None, int96RebaseMode: typing.Optional[typing.Literal['EXCEPTION', 'CORRECTED', 'LEGACY']] = None, data_directory: pathlib.Path) → pydantic.BaseModel

Add a directory_parquet asset to the datasource.

add_directory_text_asset

Signature

add_directory_text_asset(name: str, *, id: <pydantic.v1.fields.DeferredType object at 0x7f3cd3db8f20> = None, order_by: <pydantic.v1.fields.DeferredType object at 0x7f3cd3db8f80> = None, batch_metadata: <pydantic.v1.fields.DeferredType object at 0x7f3cd3db8e60> = None, batch_definitions: <pydantic.v1.fields.DeferredType object at 0x7f3cd3db8da0> = None, connect_options: <pydantic.v1.fields.DeferredType object at 0x7f3cd3db8f50> = None, pathGlobFilter: typing.Optional[typing.Union[bool, str]] = None, recursiveFileLookup: typing.Optional[typing.Union[bool, str]] = None, modifiedBefore: typing.Optional[typing.Union[bool, str]] = None, modifiedAfter: typing.Optional[typing.Union[bool, str]] = None, wholetext: bool = False, lineSep: typing.Optional[str] = None, data_directory: pathlib.Path) → pydantic.BaseModel

Add a directory_text asset to the datasource.

add_json_asset

Signature

add_json_asset(name: str, *, id: <pydantic.v1.fields.DeferredType object at 0x7f3cd3f61700> = None, order_by: <pydantic.v1.fields.DeferredType object at 0x7f3cd3f61910> = None, batch_metadata: <pydantic.v1.fields.DeferredType object at 0x7f3cd3f61a60> = None, batch_definitions: <pydantic.v1.fields.DeferredType object at 0x7f3cd3f61c10> = None, connect_options: <pydantic.v1.fields.DeferredType object at 0x7f3cd3f61cd0> = None, pathGlobFilter: typing.Optional[typing.Union[bool, str]] = None, recursiveFileLookup: typing.Optional[typing.Union[bool, str]] = None, modifiedBefore: typing.Optional[typing.Union[bool, str]] = None, modifiedAfter: typing.Optional[typing.Union[bool, str]] = None, schema: typing.Optional[typing.Union[great_expectations.datasource.fluent.serializable_types.pyspark.SerializableStructType, str]] = None, primitivesAsString: typing.Optional[typing.Union[bool, str]] = None, prefersDecimal: typing.Optional[typing.Union[bool, str]] = None, allowComments: typing.Optional[typing.Union[bool, str]] = None, allowUnquotedFieldNames: typing.Optional[typing.Union[bool, str]] = None, allowSingleQuotes: typing.Optional[typing.Union[bool, str]] = None, allowNumericLeadingZero: typing.Optional[typing.Union[bool, str]] = None, allowBackslashEscapingAnyCharacter: typing.Optional[typing.Union[bool, str]] = None, mode: typing.Optional[typing.Literal['PERMISSIVE', 'DROPMALFORMED', 'FAILFAST']] = None, columnNameOfCorruptRecord: typing.Optional[str] = None, dateFormat: typing.Optional[str] = None, timestampFormat: typing.Optional[str] = None, multiLine: typing.Optional[typing.Union[bool, str]] = None, allowUnquotedControlChars: typing.Optional[typing.Union[bool, str]] = None, lineSep: typing.Optional[str] = None, samplingRatio: typing.Optional[typing.Union[float, str]] = None, dropFieldIfAllNull: typing.Optional[typing.Union[bool, str]] = None, encoding: typing.Optional[str] = None, locale: typing.Optional[str] = None, allowNonNumericNumbers: typing.Optional[typing.Union[bool, str]] = None) → pydantic.BaseModel

Add a json asset to the datasource.

add_orc_asset

Signature

add_orc_asset(name: str, *, id: <pydantic.v1.fields.DeferredType object at 0x7f3cd3d960f0> = None, order_by: <pydantic.v1.fields.DeferredType object at 0x7f3cd3d961b0> = None, batch_metadata: <pydantic.v1.fields.DeferredType object at 0x7f3cd3d96300> = None, batch_definitions: <pydantic.v1.fields.DeferredType object at 0x7f3cd3d964b0> = None, connect_options: <pydantic.v1.fields.DeferredType object at 0x7f3cd3d96570> = None, pathGlobFilter: typing.Optional[typing.Union[bool, str]] = None, recursiveFileLookup: typing.Optional[typing.Union[bool, str]] = None, modifiedBefore: typing.Optional[typing.Union[bool, str]] = None, modifiedAfter: typing.Optional[typing.Union[bool, str]] = None, mergeSchema: typing.Optional[typing.Union[bool, str]] = False) → pydantic.BaseModel

Add an orc asset to the datasource.

add_parquet_asset

Signature

add_parquet_asset(name: str, *, id: <pydantic.v1.fields.DeferredType object at 0x7f3cd3d97740> = None, order_by: <pydantic.v1.fields.DeferredType object at 0x7f3cd3d97cb0> = None, batch_metadata: <pydantic.v1.fields.DeferredType object at 0x7f3cd3d97c80> = None, batch_definitions: <pydantic.v1.fields.DeferredType object at 0x7f3cd3d97bf0> = None, connect_options: <pydantic.v1.fields.DeferredType object at 0x7f3cd3d97ad0> = None, pathGlobFilter: typing.Optional[typing.Union[bool, str]] = None, recursiveFileLookup: typing.Optional[typing.Union[bool, str]] = None, modifiedBefore: typing.Optional[typing.Union[bool, str]] = None, modifiedAfter: typing.Optional[typing.Union[bool, str]] = None, mergeSchema: typing.Optional[typing.Union[bool, str]] = None, datetimeRebaseMode: typing.Optional[typing.Literal['EXCEPTION', 'CORRECTED', 'LEGACY']] = None, int96RebaseMode: typing.Optional[typing.Literal['EXCEPTION', 'CORRECTED', 'LEGACY']] = None) → pydantic.BaseModel

Add a parquet asset to the datasource.

add_text_asset

Signature

add_text_asset(name: str, *, id: <pydantic.v1.fields.DeferredType object at 0x7f3cd3db88c0> = None, order_by: <pydantic.v1.fields.DeferredType object at 0x7f3cd3db8920> = None, batch_metadata: <pydantic.v1.fields.DeferredType object at 0x7f3cd3db8800> = None, batch_definitions: <pydantic.v1.fields.DeferredType object at 0x7f3cd3db8710> = None, connect_options: <pydantic.v1.fields.DeferredType object at 0x7f3cd3db88f0> = None, pathGlobFilter: typing.Optional[typing.Union[bool, str]] = None, recursiveFileLookup: typing.Optional[typing.Union[bool, str]] = None, modifiedBefore: typing.Optional[typing.Union[bool, str]] = None, modifiedAfter: typing.Optional[typing.Union[bool, str]] = None, wholetext: bool = False, lineSep: typing.Optional[str] = None) → pydantic.BaseModel

Add a text asset to the datasource.

delete_asset

Signature

delete_asset(name: str)None

Removes the DataAsset referred to by asset_name from internal list of available DataAsset objects.

Parameters

NameDescription

name

name of DataAsset to be deleted.

get_asset

Signature

get_asset(name: str) → great_expectations.datasource.fluent.interfaces._DataAssetT

Returns the DataAsset referred to by asset_name

Parameters

NameDescription

name

name of DataAsset sought.

Returns

TypeDescription

great_expectations.datasource.fluent.interfaces._DataAssetT

if named "DataAsset" object exists; otherwise, exception is raised.