Sanidhya Singh
02/22/2023, 4:18 AMimport pandas as pd
import os
import s3fs
class S3FileSystemPatched(s3fs.S3FileSystem):
def __init__(self, *k, **kw):
super(S3FileSystemPatched, self).__init__(*k,
key = os.environ["AWS_ACCESS_KEY_ID"],
secret = os.environ["AWS_SECRET_ACCESS_KEY"],
client_kwargs={'endpoint_url': os.environ["AWS_S3_ENDPOINT"]},
**kw)
print('S3FileSystem is patched')
s3fs.S3FileSystem = S3FileSystemPatched
data = pd.read_csv("<s3://example/master/test.csv>")
it throws FileNotFoundError: example/master/test.csv
Adi Polak
02/22/2023, 7:13 AM<lakefs://example/master/text.csv>
Jonathan Rosenberg
02/22/2023, 7:42 AMBarak Amar
02/22/2023, 8:53 AMmaster
or main
which is the new default.Sanidhya Singh
02/22/2023, 9:00 AMValueError: Protocol not known: lakefs
Jonathan Rosenberg
02/22/2023, 9:02 AMendpoint_url
?Sanidhya Singh
02/22/2023, 9:02 AMmaster
Jonathan Rosenberg
02/22/2023, 9:03 AMAWS_ACCESS_KEY_ID
and AWS_SECRET_ACCESS_KEY
should be your lakefs key and secretSanidhya Singh
02/22/2023, 9:07 AMFileNotFoundError: The specified bucket does not exist
Jonathan Rosenberg
02/22/2023, 9:11 AMSanidhya Singh
02/22/2023, 9:27 AMJonathan Rosenberg
02/22/2023, 9:34 AM