Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -54,13 +54,13 @@ def _split_generators(self, dl_manager):
paths = dl_manager.download_and_extract(resources)
return [
tfds.core.SplitGenerator(
name=tfds.Split.TEST, gen_kwargs=dict(split='test', paths=paths)
name=tfds.Split.TEST, gen_kwargs=dict(split='test', paths=paths) # pyrefly: ignore[missing-attribute]
),
tfds.core.SplitGenerator(
name=tfds.Split.TRAIN, gen_kwargs=dict(split='train', paths=paths)
name=tfds.Split.TRAIN, gen_kwargs=dict(split='train', paths=paths) # pyrefly: ignore[missing-attribute]
),
tfds.core.SplitGenerator(
name=tfds.Split.VALIDATION,
name=tfds.Split.VALIDATION, # pyrefly: ignore[missing-attribute]
gen_kwargs=dict(split='valid', paths=paths),
),
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,9 @@
_UNLABELED_FINAL: ["train", "dev"],
}
_SPLIT_MAPPINGS = {
"train": tfds.Split.TRAIN,
"dev": tfds.Split.VALIDATION,
"test": tfds.Split.TEST,
"train": tfds.Split.TRAIN, # pyrefly: ignore[missing-attribute]
"dev": tfds.Split.VALIDATION, # pyrefly: ignore[missing-attribute]
"test": tfds.Split.TEST, # pyrefly: ignore[missing-attribute]
}

_CLASS_LABELS = ["different_meaning", "paraphrase"]
Expand Down Expand Up @@ -101,10 +101,10 @@ def _info(self):
def _split_generators(self, dl_manager):
"""Returns SplitGenerators."""
dl_paths = dl_manager.download_and_extract(_DOWNLOAD_URLS)
subset = self.builder_config.subset
subset = self.builder_config.subset # pyrefly: ignore[missing-attribute]
labels_path = os.path.join(dl_paths[subset], _EXTRACTED_FOLDERS[subset])

if self.builder_config.tokenized:
if self.builder_config.tokenized: # pyrefly: ignore[missing-attribute]
mappings_path = ""
tags2texts = {}
else:
Expand Down Expand Up @@ -193,7 +193,7 @@ def _generate_examples(
key = row["id"]
label_str = (
"noisy_label"
if self.builder_config.subset == _UNLABELED_FINAL
if self.builder_config.subset == _UNLABELED_FINAL # pyrefly: ignore[missing-attribute]
else "label"
)
example = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,32 +75,32 @@ def _split_generators(self, dl_manager):
base_path = os.path.join(dl_path["x-final"], "x-final")

# Name of file for training for 'en' is different from other languages
if self.builder_config.language == "en":
if self.builder_config.language == "en": # pyrefly: ignore[missing-attribute]
training_path = os.path.join(
base_path, self.builder_config.language, "train.tsv"
)
else:
training_path = os.path.join(
training_path = os.path.join( # pyrefly: ignore[no-matching-overload]
base_path, self.builder_config.language, "translated_train.tsv"
)

return [
tfds.core.SplitGenerator(
name=tfds.Split.TRAIN,
name=tfds.Split.TRAIN, # pyrefly: ignore[missing-attribute]
gen_kwargs={"path": training_path},
),
tfds.core.SplitGenerator(
name=tfds.Split.TEST,
name=tfds.Split.TEST, # pyrefly: ignore[missing-attribute]
gen_kwargs={
"path": os.path.join(
"path": os.path.join( # pyrefly: ignore[no-matching-overload]
base_path, self.builder_config.language, "test_2k.tsv"
)
},
),
tfds.core.SplitGenerator(
name=tfds.Split.VALIDATION,
name=tfds.Split.VALIDATION, # pyrefly: ignore[missing-attribute]
gen_kwargs={
"path": os.path.join(
"path": os.path.join( # pyrefly: ignore[no-matching-overload]
base_path, self.builder_config.language, "dev_2k.tsv"
)
},
Expand Down
20 changes: 10 additions & 10 deletions tensorflow_datasets/datasets/penguins/penguins_dataset_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,11 +159,11 @@ class Builder(tfds.core.GeneratorBasedBuilder):
def _info(self) -> tfds.core.DatasetInfo:
"""Returns the dataset metadata."""
supervised_keys = None
features = self.builder_config.features
features = self.builder_config.features # pyrefly: ignore[missing-attribute]
supervised_features = features.copy()
label_name = self.builder_config.label
label_name = self.builder_config.label # pyrefly: ignore[missing-attribute]

if self.builder_config.name == 'processed':
if self.builder_config.name == 'processed': # pyrefly: ignore[missing-attribute]
label_feature = supervised_features.pop(label_name, None)
supervised_keys = ('features', label_name)
features = {
Expand All @@ -187,7 +187,7 @@ def _info(self) -> tfds.core.DatasetInfo:

def _split_generators(self, dl_manager: tfds.download.DownloadManager):
"""Returns SplitGenerators."""
path = dl_manager.download(_PENGUINS_PATH + self.builder_config.file_name)
path = dl_manager.download(_PENGUINS_PATH + self.builder_config.file_name) # pyrefly: ignore[missing-attribute]
return {'train': self._generate_examples(path)}

def _generate_examples(self, path):
Expand All @@ -197,8 +197,8 @@ def _generate_examples(self, path):
row = {f: self._clean_up(f, v) for f, v in row.items()}

# Pack features if requested.
if self.builder_config.name == 'processed':
label_name = self.builder_config.label
if self.builder_config.name == 'processed': # pyrefly: ignore[missing-attribute]
label_name = self.builder_config.label # pyrefly: ignore[missing-attribute]
label = row.pop(label_name, None)
row = list(row.values())
yield i, {'features': row, label_name: label}
Expand All @@ -207,21 +207,21 @@ def _generate_examples(self, path):

def _clean_up(self, field, value):
"""Applies field-level pre-processing, if needed."""
if not self.builder_config.cleanup:
if not self.builder_config.cleanup: # pyrefly: ignore[missing-attribute]
return value
if field not in self.builder_config.cleanup:
return value

feature_type = self.builder_config.features[field]
feature_type = self.builder_config.features[field] # pyrefly: ignore[missing-attribute]
if feature_type == np.float32:
# Field is a float. If it won't parse, clean it up.
try:
return float(value)
except ValueError:
return self.builder_config.cleanup[field]
return self.builder_config.cleanup[field] # pyrefly: ignore[bad-index]
elif isinstance(feature_type, tfds.features.ClassLabel):
# Field is a class. If it's OOV, clean it up.
if value not in feature_type.names:
return self.builder_config.cleanup[field]
return self.builder_config.cleanup[field] # pyrefly: ignore[bad-index]

return value
Original file line number Diff line number Diff line change
Expand Up @@ -100,15 +100,15 @@ def _split_generators(self, dl_manager):

return [
tfds.core.SplitGenerator(
name=tfds.Split.TRAIN,
name=tfds.Split.TRAIN, # pyrefly: ignore[missing-attribute]
gen_kwargs={
"csv_name": "train.csv",
"csv_paths": dl_paths["train"],
"img_paths": dl_paths["train_images"],
},
),
tfds.core.SplitGenerator(
name=tfds.Split.TEST,
name=tfds.Split.TEST, # pyrefly: ignore[missing-attribute]
gen_kwargs={
"csv_name": "test.csv",
"csv_paths": dl_paths["test"],
Expand Down
6 changes: 3 additions & 3 deletions tensorflow_datasets/datasets/pg19/pg19_dataset_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,21 +59,21 @@ def _split_generators(self, dl_manager):

return [
tfds.core.SplitGenerator(
name=tfds.Split.TRAIN,
name=tfds.Split.TRAIN, # pyrefly: ignore[missing-attribute]
gen_kwargs={
'metadata': metadata_dict,
'filepath': os.path.join(_DATA_DIR, 'train'),
},
),
tfds.core.SplitGenerator(
name=tfds.Split.VALIDATION,
name=tfds.Split.VALIDATION, # pyrefly: ignore[missing-attribute]
gen_kwargs={
'metadata': metadata_dict,
'filepath': os.path.join(_DATA_DIR, 'validation'),
},
),
tfds.core.SplitGenerator(
name=tfds.Split.TEST,
name=tfds.Split.TEST, # pyrefly: ignore[missing-attribute]
gen_kwargs={
'metadata': metadata_dict,
'filepath': os.path.join(_DATA_DIR, 'test'),
Expand Down
4 changes: 2 additions & 2 deletions tensorflow_datasets/datasets/piqa/piqa_dataset_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def _split_generators(self, dl_manager):
# Specify the splits
return [
tfds.core.SplitGenerator(
name=tfds.Split.TRAIN,
name=tfds.Split.TRAIN, # pyrefly: ignore[missing-attribute]
gen_kwargs={
"data_path": os.path.join(
extracted_path, "physicaliqa-train-dev/train.jsonl"
Expand All @@ -69,7 +69,7 @@ def _split_generators(self, dl_manager):
},
),
tfds.core.SplitGenerator(
name=tfds.Split.VALIDATION,
name=tfds.Split.VALIDATION, # pyrefly: ignore[missing-attribute]
gen_kwargs={
"data_path": os.path.join(
extracted_path, "physicaliqa-train-dev/dev.jsonl"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import os

from etils import epath
import six.moves.urllib as urllib
import six.moves.urllib as urllib # pyrefly: ignore[missing-source-for-stubs]
import tensorflow_datasets.public_api as tfds

_BASE_URL = "http://data.csail.mit.edu/places/places365/"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def _split_generators(self, dl_manager):
archive_path = dl_manager.download(_DOWNLOAD_URL)
return [
tfds.core.SplitGenerator(
name=tfds.Split.TRAIN,
name=tfds.Split.TRAIN, # pyrefly: ignore[missing-attribute]
gen_kwargs={"archive": dl_manager.iter_archive(archive_path)},
)
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def _split_generators(self, dl_manager):
path = dl_manager.download_and_extract(_URL)
return [
tfds.core.SplitGenerator(
name=tfds.Split.TRAIN, gen_kwargs={"datapath": path}
name=tfds.Split.TRAIN, gen_kwargs={"datapath": path} # pyrefly: ignore[missing-attribute]
)
]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def _split_generators(self, dl_manager):
)
return [
tfds.core.SplitGenerator(
name=tfds.Split.TRAIN, gen_kwargs={"image_files": image_files}
name=tfds.Split.TRAIN, gen_kwargs={"image_files": image_files} # pyrefly: ignore[missing-attribute]
)
]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@

def _parse_array(lines: Sequence[str]) -> np.ndarray:
"""Parse lines of tab-separated numbers into an array."""
lines = [x.split('\t') for x in lines]
lines = [x.split('\t') for x in lines] # pyrefly: ignore[bad-assignment]
return np.array(lines, dtype=np.float32)


Expand Down Expand Up @@ -165,16 +165,16 @@ def _split_generators(
self, dl_manager: tfds.download.DownloadManager
) -> Dict[Union[str, tfds.Split], _ExampleIterator]:
"""Returns SplitGenerators."""
name = self.builder_config.name # Configurable dataset (config) name.
name = self.builder_config.name # Configurable dataset (config) name. # pyrefly: ignore[missing-attribute]
path = dl_manager.download_and_extract(
urllib.parse.urljoin(self.URL, self.FILES[name])
)

splits = {
tfds.Split.VALIDATION: self._generate_examples(
tfds.Split.VALIDATION: self._generate_examples( # pyrefly: ignore[missing-attribute]
os.path.join(path, name, 'validation')
),
tfds.Split.TEST: self._generate_examples(
tfds.Split.TEST: self._generate_examples( # pyrefly: ignore[missing-attribute]
os.path.join(path, name, 'testing')
),
}
Expand Down
24 changes: 12 additions & 12 deletions tensorflow_datasets/datasets/qa4mre/qa4mre_dataset_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,10 +92,10 @@ def _get_question(
'document_id': document_id,
'document_str': document_str,
'question_id': question_id,
'question_str': question_str,
'question_str': question_str, # pyrefly: ignore[unbound-name]
'answer_options': possible_answers,
'correct_answer_id': correct_answer_id,
'correct_answer_str': correct_answer_str,
'correct_answer_id': correct_answer_id, # pyrefly: ignore[unbound-name]
'correct_answer_str': correct_answer_str, # pyrefly: ignore[unbound-name]
}

return id_, feats
Expand Down Expand Up @@ -247,17 +247,17 @@ def _split_generators(self, dl_manager):
cfg = self.builder_config
download_urls = dict()

if cfg.track == 'main':
download_urls['{}.main.{}'.format(cfg.year, cfg.lang)] = os.path.join(
_BASE_URL, PATHS[cfg.year]['_PATH_TMPL_MAIN_GS'].format(cfg.lang)
if cfg.track == 'main': # pyrefly: ignore[missing-attribute]
download_urls['{}.main.{}'.format(cfg.year, cfg.lang)] = os.path.join( # pyrefly: ignore[missing-attribute]
_BASE_URL, PATHS[cfg.year]['_PATH_TMPL_MAIN_GS'].format(cfg.lang) # pyrefly: ignore[missing-attribute]
) # pytype: disable=attribute-error

if cfg.year in ['2012', '2013'] and cfg.track == 'alzheimers':
download_urls['{}.alzheimers.EN'.format(cfg.year)] = os.path.join(
if cfg.year in ['2012', '2013'] and cfg.track == 'alzheimers': # pyrefly: ignore[missing-attribute]
download_urls['{}.alzheimers.EN'.format(cfg.year)] = os.path.join( # pyrefly: ignore[no-matching-overload]
_BASE_URL, PATHS[cfg.year]['_PATH_ALZHEIMER']
)

if cfg.year == '2013' and cfg.track == 'entrance_exam':
if cfg.year == '2013' and cfg.track == 'entrance_exam': # pyrefly: ignore[missing-attribute]
download_urls['2013.entrance_exam.EN'] = os.path.join(
_BASE_URL, PATHS[cfg.year]['_PATH_ENTRANCE_EXAM']
)
Expand All @@ -266,7 +266,7 @@ def _split_generators(self, dl_manager):

return [
tfds.core.SplitGenerator(
name=tfds.Split.TRAIN,
name=tfds.Split.TRAIN, # pyrefly: ignore[missing-attribute]
gen_kwargs={
'filepath': downloaded_files[
'{}.{}.{}'.format(cfg.year, cfg.track, cfg.lang)
Expand All @@ -293,7 +293,7 @@ def _generate_examples(self, filepath):
topic_id,
topic_name,
test_id,
document_id,
document_str,
document_id, # pyrefly: ignore[unbound-name]
document_str, # pyrefly: ignore[unbound-name]
question,
)
6 changes: 3 additions & 3 deletions tensorflow_datasets/datasets/qasc/qasc_dataset_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,15 +55,15 @@ def _split_generators(self, dl_manager):
data_dir = os.path.join(dl_dir["QASC_Dataset"], "QASC_Dataset")
return [
tfds.core.SplitGenerator(
name=tfds.Split.TRAIN,
name=tfds.Split.TRAIN, # pyrefly: ignore[missing-attribute]
gen_kwargs={"filepath": os.path.join(data_dir, "train.jsonl")},
),
tfds.core.SplitGenerator(
name=tfds.Split.TEST,
name=tfds.Split.TEST, # pyrefly: ignore[missing-attribute]
gen_kwargs={"filepath": os.path.join(data_dir, "test.jsonl")},
),
tfds.core.SplitGenerator(
name=tfds.Split.VALIDATION,
name=tfds.Split.VALIDATION, # pyrefly: ignore[missing-attribute]
gen_kwargs={"filepath": os.path.join(data_dir, "dev.jsonl")},
),
]
Expand Down
10 changes: 5 additions & 5 deletions tensorflow_datasets/datasets/qm9/qm9_dataset_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ def _process_molecule(
thermo = 0
for z in atoms['Z'].values:
thermo += v[z]
example[f'{k}_atomization'] = example[k] - thermo
example[f'{k}_atomization'] = example[k] - thermo # pyrefly: ignore[unsupported-operation]

return example

Expand Down Expand Up @@ -234,7 +234,7 @@ class Builder(tfds.core.GeneratorBasedBuilder):
def _info(self) -> tfds.core.DatasetInfo:
"""Returns the dataset metadata."""
return self.dataset_info_from_configs(
disable_shuffling=self.builder_config.permutation_seed is None,
disable_shuffling=self.builder_config.permutation_seed is None, # pyrefly: ignore[missing-attribute]
features=tfds.features.FeaturesDict({
'num_atoms': tfds.features.Tensor(shape=(), dtype=np.int64),
'charges': tfds.features.Tensor(shape=(29,), dtype=np.int64),
Expand Down Expand Up @@ -306,9 +306,9 @@ def _split_generators(

split_ids = _get_split_ids(
uncharacterized,
permutation_seed=self.builder_config.permutation_seed,
train_size=self.builder_config.train_size,
validation_size=self.builder_config.validation_size,
permutation_seed=self.builder_config.permutation_seed, # pyrefly: ignore[missing-attribute]
train_size=self.builder_config.train_size, # pyrefly: ignore[missing-attribute]
validation_size=self.builder_config.validation_size, # pyrefly: ignore[missing-attribute]
)

return {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def _split_generators(self, dl_manager: tfds.download.DownloadManager):
"""Returns SplitGenerators."""
path = dl_manager.download_and_extract(_DOWNLOAD_URL)

if self.builder_config.stripped:
if self.builder_config.stripped: # pyrefly: ignore[missing-attribute]
return {
'train': self._generate_examples(
path / 'QuALITY.v0.9.htmlstripped.train', 'train'
Expand Down
Loading
Loading