diff --git a/tensorflow_datasets/datasets/patch_camelyon/patch_camelyon_dataset_builder.py b/tensorflow_datasets/datasets/patch_camelyon/patch_camelyon_dataset_builder.py index 67ddc43f2b0..9785722db0b 100644 --- a/tensorflow_datasets/datasets/patch_camelyon/patch_camelyon_dataset_builder.py +++ b/tensorflow_datasets/datasets/patch_camelyon/patch_camelyon_dataset_builder.py @@ -54,13 +54,13 @@ def _split_generators(self, dl_manager): paths = dl_manager.download_and_extract(resources) return [ tfds.core.SplitGenerator( - name=tfds.Split.TEST, gen_kwargs=dict(split='test', paths=paths) + name=tfds.Split.TEST, gen_kwargs=dict(split='test', paths=paths) # pyrefly: ignore[missing-attribute] ), tfds.core.SplitGenerator( - name=tfds.Split.TRAIN, gen_kwargs=dict(split='train', paths=paths) + name=tfds.Split.TRAIN, gen_kwargs=dict(split='train', paths=paths) # pyrefly: ignore[missing-attribute] ), tfds.core.SplitGenerator( - name=tfds.Split.VALIDATION, + name=tfds.Split.VALIDATION, # pyrefly: ignore[missing-attribute] gen_kwargs=dict(split='valid', paths=paths), ), ] diff --git a/tensorflow_datasets/datasets/paws_wiki/paws_wiki_dataset_builder.py b/tensorflow_datasets/datasets/paws_wiki/paws_wiki_dataset_builder.py index 10c1958f20f..6d34d6e7fae 100644 --- a/tensorflow_datasets/datasets/paws_wiki/paws_wiki_dataset_builder.py +++ b/tensorflow_datasets/datasets/paws_wiki/paws_wiki_dataset_builder.py @@ -44,9 +44,9 @@ _UNLABELED_FINAL: ["train", "dev"], } _SPLIT_MAPPINGS = { - "train": tfds.Split.TRAIN, - "dev": tfds.Split.VALIDATION, - "test": tfds.Split.TEST, + "train": tfds.Split.TRAIN, # pyrefly: ignore[missing-attribute] + "dev": tfds.Split.VALIDATION, # pyrefly: ignore[missing-attribute] + "test": tfds.Split.TEST, # pyrefly: ignore[missing-attribute] } _CLASS_LABELS = ["different_meaning", "paraphrase"] @@ -101,10 +101,10 @@ def _info(self): def _split_generators(self, dl_manager): """Returns SplitGenerators.""" dl_paths = dl_manager.download_and_extract(_DOWNLOAD_URLS) - subset = self.builder_config.subset + subset = self.builder_config.subset # pyrefly: ignore[missing-attribute] labels_path = os.path.join(dl_paths[subset], _EXTRACTED_FOLDERS[subset]) - if self.builder_config.tokenized: + if self.builder_config.tokenized: # pyrefly: ignore[missing-attribute] mappings_path = "" tags2texts = {} else: @@ -193,7 +193,7 @@ def _generate_examples( key = row["id"] label_str = ( "noisy_label" - if self.builder_config.subset == _UNLABELED_FINAL + if self.builder_config.subset == _UNLABELED_FINAL # pyrefly: ignore[missing-attribute] else "label" ) example = { diff --git a/tensorflow_datasets/datasets/paws_x_wiki/paws_x_wiki_dataset_builder.py b/tensorflow_datasets/datasets/paws_x_wiki/paws_x_wiki_dataset_builder.py index 7ed5aff4a1f..41fb2f5fac6 100644 --- a/tensorflow_datasets/datasets/paws_x_wiki/paws_x_wiki_dataset_builder.py +++ b/tensorflow_datasets/datasets/paws_x_wiki/paws_x_wiki_dataset_builder.py @@ -75,32 +75,32 @@ def _split_generators(self, dl_manager): base_path = os.path.join(dl_path["x-final"], "x-final") # Name of file for training for 'en' is different from other languages - if self.builder_config.language == "en": + if self.builder_config.language == "en": # pyrefly: ignore[missing-attribute] training_path = os.path.join( base_path, self.builder_config.language, "train.tsv" ) else: - training_path = os.path.join( + training_path = os.path.join( # pyrefly: ignore[no-matching-overload] base_path, self.builder_config.language, "translated_train.tsv" ) return [ tfds.core.SplitGenerator( - name=tfds.Split.TRAIN, + name=tfds.Split.TRAIN, # pyrefly: ignore[missing-attribute] gen_kwargs={"path": training_path}, ), tfds.core.SplitGenerator( - name=tfds.Split.TEST, + name=tfds.Split.TEST, # pyrefly: ignore[missing-attribute] gen_kwargs={ - "path": os.path.join( + "path": os.path.join( # pyrefly: ignore[no-matching-overload] base_path, self.builder_config.language, "test_2k.tsv" ) }, ), tfds.core.SplitGenerator( - name=tfds.Split.VALIDATION, + name=tfds.Split.VALIDATION, # pyrefly: ignore[missing-attribute] gen_kwargs={ - "path": os.path.join( + "path": os.path.join( # pyrefly: ignore[no-matching-overload] base_path, self.builder_config.language, "dev_2k.tsv" ) }, diff --git a/tensorflow_datasets/datasets/penguins/penguins_dataset_builder.py b/tensorflow_datasets/datasets/penguins/penguins_dataset_builder.py index 2254fcffff9..8bc4dd4683a 100644 --- a/tensorflow_datasets/datasets/penguins/penguins_dataset_builder.py +++ b/tensorflow_datasets/datasets/penguins/penguins_dataset_builder.py @@ -159,11 +159,11 @@ class Builder(tfds.core.GeneratorBasedBuilder): def _info(self) -> tfds.core.DatasetInfo: """Returns the dataset metadata.""" supervised_keys = None - features = self.builder_config.features + features = self.builder_config.features # pyrefly: ignore[missing-attribute] supervised_features = features.copy() - label_name = self.builder_config.label + label_name = self.builder_config.label # pyrefly: ignore[missing-attribute] - if self.builder_config.name == 'processed': + if self.builder_config.name == 'processed': # pyrefly: ignore[missing-attribute] label_feature = supervised_features.pop(label_name, None) supervised_keys = ('features', label_name) features = { @@ -187,7 +187,7 @@ def _info(self) -> tfds.core.DatasetInfo: def _split_generators(self, dl_manager: tfds.download.DownloadManager): """Returns SplitGenerators.""" - path = dl_manager.download(_PENGUINS_PATH + self.builder_config.file_name) + path = dl_manager.download(_PENGUINS_PATH + self.builder_config.file_name) # pyrefly: ignore[missing-attribute] return {'train': self._generate_examples(path)} def _generate_examples(self, path): @@ -197,8 +197,8 @@ def _generate_examples(self, path): row = {f: self._clean_up(f, v) for f, v in row.items()} # Pack features if requested. - if self.builder_config.name == 'processed': - label_name = self.builder_config.label + if self.builder_config.name == 'processed': # pyrefly: ignore[missing-attribute] + label_name = self.builder_config.label # pyrefly: ignore[missing-attribute] label = row.pop(label_name, None) row = list(row.values()) yield i, {'features': row, label_name: label} @@ -207,21 +207,21 @@ def _generate_examples(self, path): def _clean_up(self, field, value): """Applies field-level pre-processing, if needed.""" - if not self.builder_config.cleanup: + if not self.builder_config.cleanup: # pyrefly: ignore[missing-attribute] return value if field not in self.builder_config.cleanup: return value - feature_type = self.builder_config.features[field] + feature_type = self.builder_config.features[field] # pyrefly: ignore[missing-attribute] if feature_type == np.float32: # Field is a float. If it won't parse, clean it up. try: return float(value) except ValueError: - return self.builder_config.cleanup[field] + return self.builder_config.cleanup[field] # pyrefly: ignore[bad-index] elif isinstance(feature_type, tfds.features.ClassLabel): # Field is a class. If it's OOV, clean it up. if value not in feature_type.names: - return self.builder_config.cleanup[field] + return self.builder_config.cleanup[field] # pyrefly: ignore[bad-index] return value diff --git a/tensorflow_datasets/datasets/pet_finder/pet_finder_dataset_builder.py b/tensorflow_datasets/datasets/pet_finder/pet_finder_dataset_builder.py index 18ddf3d1d0b..8a1d751d9ad 100644 --- a/tensorflow_datasets/datasets/pet_finder/pet_finder_dataset_builder.py +++ b/tensorflow_datasets/datasets/pet_finder/pet_finder_dataset_builder.py @@ -100,7 +100,7 @@ def _split_generators(self, dl_manager): return [ tfds.core.SplitGenerator( - name=tfds.Split.TRAIN, + name=tfds.Split.TRAIN, # pyrefly: ignore[missing-attribute] gen_kwargs={ "csv_name": "train.csv", "csv_paths": dl_paths["train"], @@ -108,7 +108,7 @@ def _split_generators(self, dl_manager): }, ), tfds.core.SplitGenerator( - name=tfds.Split.TEST, + name=tfds.Split.TEST, # pyrefly: ignore[missing-attribute] gen_kwargs={ "csv_name": "test.csv", "csv_paths": dl_paths["test"], diff --git a/tensorflow_datasets/datasets/pg19/pg19_dataset_builder.py b/tensorflow_datasets/datasets/pg19/pg19_dataset_builder.py index ee50fb17fd2..d8535977a2e 100644 --- a/tensorflow_datasets/datasets/pg19/pg19_dataset_builder.py +++ b/tensorflow_datasets/datasets/pg19/pg19_dataset_builder.py @@ -59,21 +59,21 @@ def _split_generators(self, dl_manager): return [ tfds.core.SplitGenerator( - name=tfds.Split.TRAIN, + name=tfds.Split.TRAIN, # pyrefly: ignore[missing-attribute] gen_kwargs={ 'metadata': metadata_dict, 'filepath': os.path.join(_DATA_DIR, 'train'), }, ), tfds.core.SplitGenerator( - name=tfds.Split.VALIDATION, + name=tfds.Split.VALIDATION, # pyrefly: ignore[missing-attribute] gen_kwargs={ 'metadata': metadata_dict, 'filepath': os.path.join(_DATA_DIR, 'validation'), }, ), tfds.core.SplitGenerator( - name=tfds.Split.TEST, + name=tfds.Split.TEST, # pyrefly: ignore[missing-attribute] gen_kwargs={ 'metadata': metadata_dict, 'filepath': os.path.join(_DATA_DIR, 'test'), diff --git a/tensorflow_datasets/datasets/piqa/piqa_dataset_builder.py b/tensorflow_datasets/datasets/piqa/piqa_dataset_builder.py index ecda74f02fc..c72237fd8b8 100644 --- a/tensorflow_datasets/datasets/piqa/piqa_dataset_builder.py +++ b/tensorflow_datasets/datasets/piqa/piqa_dataset_builder.py @@ -58,7 +58,7 @@ def _split_generators(self, dl_manager): # Specify the splits return [ tfds.core.SplitGenerator( - name=tfds.Split.TRAIN, + name=tfds.Split.TRAIN, # pyrefly: ignore[missing-attribute] gen_kwargs={ "data_path": os.path.join( extracted_path, "physicaliqa-train-dev/train.jsonl" @@ -69,7 +69,7 @@ def _split_generators(self, dl_manager): }, ), tfds.core.SplitGenerator( - name=tfds.Split.VALIDATION, + name=tfds.Split.VALIDATION, # pyrefly: ignore[missing-attribute] gen_kwargs={ "data_path": os.path.join( extracted_path, "physicaliqa-train-dev/dev.jsonl" diff --git a/tensorflow_datasets/datasets/places365_small/places365_small_dataset_builder.py b/tensorflow_datasets/datasets/places365_small/places365_small_dataset_builder.py index 04d4326998c..c29e488b475 100644 --- a/tensorflow_datasets/datasets/places365_small/places365_small_dataset_builder.py +++ b/tensorflow_datasets/datasets/places365_small/places365_small_dataset_builder.py @@ -18,7 +18,7 @@ import os from etils import epath -import six.moves.urllib as urllib +import six.moves.urllib as urllib # pyrefly: ignore[missing-source-for-stubs] import tensorflow_datasets.public_api as tfds _BASE_URL = "http://data.csail.mit.edu/places/places365/" diff --git a/tensorflow_datasets/datasets/plant_leaves/plant_leaves_dataset_builder.py b/tensorflow_datasets/datasets/plant_leaves/plant_leaves_dataset_builder.py index 1973f03f86c..4bc493d3bbd 100644 --- a/tensorflow_datasets/datasets/plant_leaves/plant_leaves_dataset_builder.py +++ b/tensorflow_datasets/datasets/plant_leaves/plant_leaves_dataset_builder.py @@ -80,7 +80,7 @@ def _split_generators(self, dl_manager): archive_path = dl_manager.download(_DOWNLOAD_URL) return [ tfds.core.SplitGenerator( - name=tfds.Split.TRAIN, + name=tfds.Split.TRAIN, # pyrefly: ignore[missing-attribute] gen_kwargs={"archive": dl_manager.iter_archive(archive_path)}, ) ] diff --git a/tensorflow_datasets/datasets/plant_village/plant_village_dataset_builder.py b/tensorflow_datasets/datasets/plant_village/plant_village_dataset_builder.py index e412103fe15..a7daaa106bd 100644 --- a/tensorflow_datasets/datasets/plant_village/plant_village_dataset_builder.py +++ b/tensorflow_datasets/datasets/plant_village/plant_village_dataset_builder.py @@ -84,7 +84,7 @@ def _split_generators(self, dl_manager): path = dl_manager.download_and_extract(_URL) return [ tfds.core.SplitGenerator( - name=tfds.Split.TRAIN, gen_kwargs={"datapath": path} + name=tfds.Split.TRAIN, gen_kwargs={"datapath": path} # pyrefly: ignore[missing-attribute] ) ] diff --git a/tensorflow_datasets/datasets/plantae_k/plantae_k_dataset_builder.py b/tensorflow_datasets/datasets/plantae_k/plantae_k_dataset_builder.py index 8d3d438132a..f4420b53f3e 100644 --- a/tensorflow_datasets/datasets/plantae_k/plantae_k_dataset_builder.py +++ b/tensorflow_datasets/datasets/plantae_k/plantae_k_dataset_builder.py @@ -91,7 +91,7 @@ def _split_generators(self, dl_manager): ) return [ tfds.core.SplitGenerator( - name=tfds.Split.TRAIN, gen_kwargs={"image_files": image_files} + name=tfds.Split.TRAIN, gen_kwargs={"image_files": image_files} # pyrefly: ignore[missing-attribute] ) ] diff --git a/tensorflow_datasets/datasets/protein_net/protein_net_dataset_builder.py b/tensorflow_datasets/datasets/protein_net/protein_net_dataset_builder.py index 50bf3d1a1a7..177546fb1e7 100644 --- a/tensorflow_datasets/datasets/protein_net/protein_net_dataset_builder.py +++ b/tensorflow_datasets/datasets/protein_net/protein_net_dataset_builder.py @@ -34,7 +34,7 @@ def _parse_array(lines: Sequence[str]) -> np.ndarray: """Parse lines of tab-separated numbers into an array.""" - lines = [x.split('\t') for x in lines] + lines = [x.split('\t') for x in lines] # pyrefly: ignore[bad-assignment] return np.array(lines, dtype=np.float32) @@ -165,16 +165,16 @@ def _split_generators( self, dl_manager: tfds.download.DownloadManager ) -> Dict[Union[str, tfds.Split], _ExampleIterator]: """Returns SplitGenerators.""" - name = self.builder_config.name # Configurable dataset (config) name. + name = self.builder_config.name # Configurable dataset (config) name. # pyrefly: ignore[missing-attribute] path = dl_manager.download_and_extract( urllib.parse.urljoin(self.URL, self.FILES[name]) ) splits = { - tfds.Split.VALIDATION: self._generate_examples( + tfds.Split.VALIDATION: self._generate_examples( # pyrefly: ignore[missing-attribute] os.path.join(path, name, 'validation') ), - tfds.Split.TEST: self._generate_examples( + tfds.Split.TEST: self._generate_examples( # pyrefly: ignore[missing-attribute] os.path.join(path, name, 'testing') ), } diff --git a/tensorflow_datasets/datasets/qa4mre/qa4mre_dataset_builder.py b/tensorflow_datasets/datasets/qa4mre/qa4mre_dataset_builder.py index 45ad4406f19..8c87d9c80ff 100644 --- a/tensorflow_datasets/datasets/qa4mre/qa4mre_dataset_builder.py +++ b/tensorflow_datasets/datasets/qa4mre/qa4mre_dataset_builder.py @@ -92,10 +92,10 @@ def _get_question( 'document_id': document_id, 'document_str': document_str, 'question_id': question_id, - 'question_str': question_str, + 'question_str': question_str, # pyrefly: ignore[unbound-name] 'answer_options': possible_answers, - 'correct_answer_id': correct_answer_id, - 'correct_answer_str': correct_answer_str, + 'correct_answer_id': correct_answer_id, # pyrefly: ignore[unbound-name] + 'correct_answer_str': correct_answer_str, # pyrefly: ignore[unbound-name] } return id_, feats @@ -247,17 +247,17 @@ def _split_generators(self, dl_manager): cfg = self.builder_config download_urls = dict() - if cfg.track == 'main': - download_urls['{}.main.{}'.format(cfg.year, cfg.lang)] = os.path.join( - _BASE_URL, PATHS[cfg.year]['_PATH_TMPL_MAIN_GS'].format(cfg.lang) + if cfg.track == 'main': # pyrefly: ignore[missing-attribute] + download_urls['{}.main.{}'.format(cfg.year, cfg.lang)] = os.path.join( # pyrefly: ignore[missing-attribute] + _BASE_URL, PATHS[cfg.year]['_PATH_TMPL_MAIN_GS'].format(cfg.lang) # pyrefly: ignore[missing-attribute] ) # pytype: disable=attribute-error - if cfg.year in ['2012', '2013'] and cfg.track == 'alzheimers': - download_urls['{}.alzheimers.EN'.format(cfg.year)] = os.path.join( + if cfg.year in ['2012', '2013'] and cfg.track == 'alzheimers': # pyrefly: ignore[missing-attribute] + download_urls['{}.alzheimers.EN'.format(cfg.year)] = os.path.join( # pyrefly: ignore[no-matching-overload] _BASE_URL, PATHS[cfg.year]['_PATH_ALZHEIMER'] ) - if cfg.year == '2013' and cfg.track == 'entrance_exam': + if cfg.year == '2013' and cfg.track == 'entrance_exam': # pyrefly: ignore[missing-attribute] download_urls['2013.entrance_exam.EN'] = os.path.join( _BASE_URL, PATHS[cfg.year]['_PATH_ENTRANCE_EXAM'] ) @@ -266,7 +266,7 @@ def _split_generators(self, dl_manager): return [ tfds.core.SplitGenerator( - name=tfds.Split.TRAIN, + name=tfds.Split.TRAIN, # pyrefly: ignore[missing-attribute] gen_kwargs={ 'filepath': downloaded_files[ '{}.{}.{}'.format(cfg.year, cfg.track, cfg.lang) @@ -293,7 +293,7 @@ def _generate_examples(self, filepath): topic_id, topic_name, test_id, - document_id, - document_str, + document_id, # pyrefly: ignore[unbound-name] + document_str, # pyrefly: ignore[unbound-name] question, ) diff --git a/tensorflow_datasets/datasets/qasc/qasc_dataset_builder.py b/tensorflow_datasets/datasets/qasc/qasc_dataset_builder.py index 7520bb4fbce..9279f0d1049 100644 --- a/tensorflow_datasets/datasets/qasc/qasc_dataset_builder.py +++ b/tensorflow_datasets/datasets/qasc/qasc_dataset_builder.py @@ -55,15 +55,15 @@ def _split_generators(self, dl_manager): data_dir = os.path.join(dl_dir["QASC_Dataset"], "QASC_Dataset") return [ tfds.core.SplitGenerator( - name=tfds.Split.TRAIN, + name=tfds.Split.TRAIN, # pyrefly: ignore[missing-attribute] gen_kwargs={"filepath": os.path.join(data_dir, "train.jsonl")}, ), tfds.core.SplitGenerator( - name=tfds.Split.TEST, + name=tfds.Split.TEST, # pyrefly: ignore[missing-attribute] gen_kwargs={"filepath": os.path.join(data_dir, "test.jsonl")}, ), tfds.core.SplitGenerator( - name=tfds.Split.VALIDATION, + name=tfds.Split.VALIDATION, # pyrefly: ignore[missing-attribute] gen_kwargs={"filepath": os.path.join(data_dir, "dev.jsonl")}, ), ] diff --git a/tensorflow_datasets/datasets/qm9/qm9_dataset_builder.py b/tensorflow_datasets/datasets/qm9/qm9_dataset_builder.py index bdf5c5be89d..722d7dec19a 100644 --- a/tensorflow_datasets/datasets/qm9/qm9_dataset_builder.py +++ b/tensorflow_datasets/datasets/qm9/qm9_dataset_builder.py @@ -131,7 +131,7 @@ def _process_molecule( thermo = 0 for z in atoms['Z'].values: thermo += v[z] - example[f'{k}_atomization'] = example[k] - thermo + example[f'{k}_atomization'] = example[k] - thermo # pyrefly: ignore[unsupported-operation] return example @@ -234,7 +234,7 @@ class Builder(tfds.core.GeneratorBasedBuilder): def _info(self) -> tfds.core.DatasetInfo: """Returns the dataset metadata.""" return self.dataset_info_from_configs( - disable_shuffling=self.builder_config.permutation_seed is None, + disable_shuffling=self.builder_config.permutation_seed is None, # pyrefly: ignore[missing-attribute] features=tfds.features.FeaturesDict({ 'num_atoms': tfds.features.Tensor(shape=(), dtype=np.int64), 'charges': tfds.features.Tensor(shape=(29,), dtype=np.int64), @@ -306,9 +306,9 @@ def _split_generators( split_ids = _get_split_ids( uncharacterized, - permutation_seed=self.builder_config.permutation_seed, - train_size=self.builder_config.train_size, - validation_size=self.builder_config.validation_size, + permutation_seed=self.builder_config.permutation_seed, # pyrefly: ignore[missing-attribute] + train_size=self.builder_config.train_size, # pyrefly: ignore[missing-attribute] + validation_size=self.builder_config.validation_size, # pyrefly: ignore[missing-attribute] ) return { diff --git a/tensorflow_datasets/datasets/quality/quality_dataset_builder.py b/tensorflow_datasets/datasets/quality/quality_dataset_builder.py index c5c60088c08..bb01dfa2cb1 100644 --- a/tensorflow_datasets/datasets/quality/quality_dataset_builder.py +++ b/tensorflow_datasets/datasets/quality/quality_dataset_builder.py @@ -88,7 +88,7 @@ def _split_generators(self, dl_manager: tfds.download.DownloadManager): """Returns SplitGenerators.""" path = dl_manager.download_and_extract(_DOWNLOAD_URL) - if self.builder_config.stripped: + if self.builder_config.stripped: # pyrefly: ignore[missing-attribute] return { 'train': self._generate_examples( path / 'QuALITY.v0.9.htmlstripped.train', 'train' diff --git a/tensorflow_datasets/datasets/quickdraw_bitmap/quickdraw_bitmap_dataset_builder.py b/tensorflow_datasets/datasets/quickdraw_bitmap/quickdraw_bitmap_dataset_builder.py index 24240df11d7..82390ceb69e 100644 --- a/tensorflow_datasets/datasets/quickdraw_bitmap/quickdraw_bitmap_dataset_builder.py +++ b/tensorflow_datasets/datasets/quickdraw_bitmap/quickdraw_bitmap_dataset_builder.py @@ -67,7 +67,7 @@ def _split_generators(self, dl_manager): # There is no predefined train/test split for this dataset. return [ tfds.core.SplitGenerator( - name=tfds.Split.TRAIN, + name=tfds.Split.TRAIN, # pyrefly: ignore[missing-attribute] gen_kwargs={ "file_paths": file_paths, }, diff --git a/tensorflow_datasets/datasets/race/race_dataset_builder.py b/tensorflow_datasets/datasets/race/race_dataset_builder.py index 26b0b1acf07..bfc9eebd747 100644 --- a/tensorflow_datasets/datasets/race/race_dataset_builder.py +++ b/tensorflow_datasets/datasets/race/race_dataset_builder.py @@ -64,7 +64,7 @@ def _split_generators(self, dl_manager: tfds.download.DownloadManager): ) path = path / "RACE" - task = self.builder_config.name + task = self.builder_config.name # pyrefly: ignore[missing-attribute] return { "train": self._generate_examples(path / "train" / task), diff --git a/tensorflow_datasets/datasets/reddit/reddit_dataset_builder.py b/tensorflow_datasets/datasets/reddit/reddit_dataset_builder.py index 207a16df27a..6f4ca8987f6 100644 --- a/tensorflow_datasets/datasets/reddit/reddit_dataset_builder.py +++ b/tensorflow_datasets/datasets/reddit/reddit_dataset_builder.py @@ -57,7 +57,7 @@ def _split_generators(self, dl_manager): dl_path = dl_manager.download_and_extract(_URL) return [ tfds.core.SplitGenerator( - name=tfds.Split.TRAIN, + name=tfds.Split.TRAIN, # pyrefly: ignore[missing-attribute] gen_kwargs={ "path": os.path.join(dl_path, "corpus-webis-tldr-17.json") }, diff --git a/tensorflow_datasets/datasets/reddit_disentanglement/reddit_disentanglement_dataset_builder.py b/tensorflow_datasets/datasets/reddit_disentanglement/reddit_disentanglement_dataset_builder.py index 144afeedd81..68ffac3981d 100644 --- a/tensorflow_datasets/datasets/reddit_disentanglement/reddit_disentanglement_dataset_builder.py +++ b/tensorflow_datasets/datasets/reddit_disentanglement/reddit_disentanglement_dataset_builder.py @@ -104,17 +104,17 @@ def _split_generators(self, dl_manager): """Returns SplitGenerators.""" return [ tfds.core.SplitGenerator( - name=tfds.Split.TRAIN, + name=tfds.Split.TRAIN, # pyrefly: ignore[missing-attribute] gen_kwargs={ "path": os.path.join(dl_manager.manual_dir, "train.csv") }, ), tfds.core.SplitGenerator( - name=tfds.Split.VALIDATION, + name=tfds.Split.VALIDATION, # pyrefly: ignore[missing-attribute] gen_kwargs={"path": os.path.join(dl_manager.manual_dir, "val.csv")}, ), tfds.core.SplitGenerator( - name=tfds.Split.TEST, + name=tfds.Split.TEST, # pyrefly: ignore[missing-attribute] gen_kwargs={ "path": os.path.join(dl_manager.manual_dir, "test.csv") }, diff --git a/tensorflow_datasets/datasets/reddit_tifu/reddit_tifu_dataset_builder.py b/tensorflow_datasets/datasets/reddit_tifu/reddit_tifu_dataset_builder.py index 1ae65eecb5e..32eb0e784d1 100644 --- a/tensorflow_datasets/datasets/reddit_tifu/reddit_tifu_dataset_builder.py +++ b/tensorflow_datasets/datasets/reddit_tifu/reddit_tifu_dataset_builder.py @@ -90,13 +90,13 @@ def _info(self): ) return self.dataset_info_from_configs( features=tfds.features.FeaturesDict(features), - supervised_keys=(_DOCUMENT, self.builder_config.summary_key), + supervised_keys=(_DOCUMENT, self.builder_config.summary_key), # pyrefly: ignore[missing-attribute] homepage="https://github.com/ctr4si/MMN", ) def _split_generators(self, dl_manager): """Returns SplitGenerators.""" - if self.builder_config.name == "long_split": + if self.builder_config.name == "long_split": # pyrefly: ignore[missing-attribute] paths = dl_manager.download_and_extract( {"data": _URL, "split": _LONG_SPLIT} ) @@ -104,7 +104,7 @@ def _split_generators(self, dl_manager): split_path = paths["split"] return [ tfds.core.SplitGenerator( - name=tfds.Split.TRAIN, + name=tfds.Split.TRAIN, # pyrefly: ignore[missing-attribute] gen_kwargs={ "path": dl_path, "split_path": split_path, @@ -112,7 +112,7 @@ def _split_generators(self, dl_manager): }, ), tfds.core.SplitGenerator( - name=tfds.Split.TEST, + name=tfds.Split.TEST, # pyrefly: ignore[missing-attribute] gen_kwargs={ "path": dl_path, "split_path": split_path, @@ -120,7 +120,7 @@ def _split_generators(self, dl_manager): }, ), tfds.core.SplitGenerator( - name=tfds.Split.VALIDATION, + name=tfds.Split.VALIDATION, # pyrefly: ignore[missing-attribute] gen_kwargs={ "path": dl_path, "split_path": split_path, @@ -132,7 +132,7 @@ def _split_generators(self, dl_manager): dl_path = dl_manager.download_and_extract(_URL) return [ tfds.core.SplitGenerator( - name=tfds.Split.TRAIN, + name=tfds.Split.TRAIN, # pyrefly: ignore[missing-attribute] gen_kwargs={"path": dl_path}, ) ] @@ -149,7 +149,7 @@ def _generate_examples(self, path=None, split_path=None, split=None): # 'selftext','trimmed_title','selftext_without_tldr_tokenized', # 'id','selftext_without_tldr' d = json.loads(line) - if not split or (split and d["id"] in split_data[split]): + if not split or (split and d["id"] in split_data[split]): # pyrefly: ignore[unbound-name] r = { _DOCUMENT: d["selftext_without_tldr"].strip(), _TITLE: d["trimmed_title"].strip(), @@ -158,5 +158,5 @@ def _generate_examples(self, path=None, split_path=None, split=None): } r.update({k: d[k] for k in _ADDITIONAL_FEATURES}) # skip if document or summary is empty - if r[_DOCUMENT] and r[self.builder_config.summary_key]: + if r[_DOCUMENT] and r[self.builder_config.summary_key]: # pyrefly: ignore[missing-attribute] yield i, r diff --git a/tensorflow_datasets/datasets/ref_coco/ref_coco_dataset_builder.py b/tensorflow_datasets/datasets/ref_coco/ref_coco_dataset_builder.py index 00be8eac326..3f514ee9ac2 100644 --- a/tensorflow_datasets/datasets/ref_coco/ref_coco_dataset_builder.py +++ b/tensorflow_datasets/datasets/ref_coco/ref_coco_dataset_builder.py @@ -218,35 +218,35 @@ def _info(self): def _split_generators(self, dl_manager): allowed_splits = { ('refcoco', 'google'): [ - tfds.Split.TRAIN, - tfds.Split.VALIDATION, - tfds.Split.TEST, + tfds.Split.TRAIN, # pyrefly: ignore[missing-attribute] + tfds.Split.VALIDATION, # pyrefly: ignore[missing-attribute] + tfds.Split.TEST, # pyrefly: ignore[missing-attribute] ], ('refcoco', 'unc'): [ - tfds.Split.TRAIN, - tfds.Split.VALIDATION, + tfds.Split.TRAIN, # pyrefly: ignore[missing-attribute] + tfds.Split.VALIDATION, # pyrefly: ignore[missing-attribute] 'testA', 'testB', ], ('refcocoplus', 'unc'): [ - tfds.Split.TRAIN, - tfds.Split.VALIDATION, + tfds.Split.TRAIN, # pyrefly: ignore[missing-attribute] + tfds.Split.VALIDATION, # pyrefly: ignore[missing-attribute] 'testA', 'testB', ], - ('refcocog', 'google'): [tfds.Split.TRAIN, tfds.Split.VALIDATION], + ('refcocog', 'google'): [tfds.Split.TRAIN, tfds.Split.VALIDATION], # pyrefly: ignore[missing-attribute] ('refcocog', 'umd'): [ - tfds.Split.TRAIN, - tfds.Split.VALIDATION, - tfds.Split.TEST, + tfds.Split.TRAIN, # pyrefly: ignore[missing-attribute] + tfds.Split.VALIDATION, # pyrefly: ignore[missing-attribute] + tfds.Split.TEST, # pyrefly: ignore[missing-attribute] ], } bc = self.builder_config - splits = allowed_splits[(bc.dataset, bc.dataset_partition)] + splits = allowed_splits[(bc.dataset, bc.dataset_partition)] # pyrefly: ignore[missing-attribute] return { split: self._generate_examples( - bc.dataset, bc.dataset_partition, split, dl_manager + bc.dataset, bc.dataset_partition, split, dl_manager # pyrefly: ignore[missing-attribute] ) for split in splits } @@ -259,7 +259,7 @@ def _generate_examples(self, dataset, dataset_partition, split, dl_manager): if dataset == 'refcocoplus': dataset = 'refcoco+' - if split == tfds.Split.VALIDATION: + if split == tfds.Split.VALIDATION: # pyrefly: ignore[missing-attribute] split = 'val' for image_id, example in _generate_examples( diff --git a/tensorflow_datasets/datasets/resisc45/resisc45_dataset_builder.py b/tensorflow_datasets/datasets/resisc45/resisc45_dataset_builder.py index 3df92d1f80c..7eb4fbef7ce 100644 --- a/tensorflow_datasets/datasets/resisc45/resisc45_dataset_builder.py +++ b/tensorflow_datasets/datasets/resisc45/resisc45_dataset_builder.py @@ -103,7 +103,7 @@ def _split_generators(self, dl_manager): ) return [ tfds.core.SplitGenerator( - name=tfds.Split.TRAIN, + name=tfds.Split.TRAIN, # pyrefly: ignore[missing-attribute] gen_kwargs={'path': path}, ), ] diff --git a/tensorflow_datasets/datasets/robomimic_mh/robomimic_mh_dataset_builder.py b/tensorflow_datasets/datasets/robomimic_mh/robomimic_mh_dataset_builder.py index 1ce49b41099..c17c950cb8f 100644 --- a/tensorflow_datasets/datasets/robomimic_mh/robomimic_mh_dataset_builder.py +++ b/tensorflow_datasets/datasets/robomimic_mh/robomimic_mh_dataset_builder.py @@ -35,7 +35,7 @@ class Builder(utils.RobomimicBuilder): DATASET_NAME = 'robomimic_mh' def _get_metadata(self) -> Dict[Any, Any]: - if self.builder_config.task == utils.Task.TRANSPORT: + if self.builder_config.task == utils.Task.TRANSPORT: # pyrefly: ignore[missing-attribute] episode_metadata = { '20_percent': np.bool_, '20_percent_train': np.bool_, diff --git a/tensorflow_datasets/datasets/robomimic_ph/robomimic_ph_dataset_builder.py b/tensorflow_datasets/datasets/robomimic_ph/robomimic_ph_dataset_builder.py index baabb7053c4..2bc223cb7e8 100644 --- a/tensorflow_datasets/datasets/robomimic_ph/robomimic_ph_dataset_builder.py +++ b/tensorflow_datasets/datasets/robomimic_ph/robomimic_ph_dataset_builder.py @@ -36,7 +36,7 @@ class Builder(utils.RobomimicBuilder): DATASET_NAME = 'robomimic_ph' def _get_metadata(self) -> Dict[Any, Any]: - if self.builder_config.task != utils.Task.TOOL_HANG: + if self.builder_config.task != utils.Task.TOOL_HANG: # pyrefly: ignore[missing-attribute] episode_metadata = { '20_percent': np.bool_, '20_percent_train': np.bool_, diff --git a/tensorflow_datasets/datasets/robonet/robonet_dataset_builder.py b/tensorflow_datasets/datasets/robonet/robonet_dataset_builder.py index 067a1dc5fe9..406ac1e7de9 100644 --- a/tensorflow_datasets/datasets/robonet/robonet_dataset_builder.py +++ b/tensorflow_datasets/datasets/robonet/robonet_dataset_builder.py @@ -137,8 +137,8 @@ class Builder(tfds.core.BeamBasedBuilder): ] def _info(self): - if self.builder_config.width is not None: - if self.builder_config.height is None: + if self.builder_config.width is not None: # pyrefly: ignore[missing-attribute] + if self.builder_config.height is None: # pyrefly: ignore[missing-attribute] raise ValueError('Provide either both height and width or none.') ffmpeg_extra_args = ( '-vf', @@ -151,7 +151,7 @@ def _info(self): video_shape = ( None, - self.builder_config.height, + self.builder_config.height, # pyrefly: ignore[missing-attribute] self.builder_config.width, 3, ) @@ -159,7 +159,7 @@ def _info(self): features = tfds.features.FeaturesDict({ # Video frames: uint8 [None, Time, Width, Height, Channels] 'video': tfds.features.Video( - video_shape, + video_shape, # pyrefly: ignore[bad-argument-type] ffmpeg_extra_args=ffmpeg_extra_args, encoding_format='png', ), # pytype: disable=wrong-arg-types # gen-stub-imports @@ -182,11 +182,11 @@ def _info(self): def _split_generators(self, dl_manager): files = dl_manager.download_and_extract( - DATA_URL_SAMPLE if self.builder_config.sample_dataset else DATA_URL + DATA_URL_SAMPLE if self.builder_config.sample_dataset else DATA_URL # pyrefly: ignore[missing-attribute] ) return [ tfds.core.SplitGenerator( - name=tfds.Split.TRAIN, + name=tfds.Split.TRAIN, # pyrefly: ignore[missing-attribute] gen_kwargs={ 'filedir': os.path.join(files, 'hdf5'), }, diff --git a/tensorflow_datasets/datasets/rock_paper_scissors/rock_paper_scissors_dataset_builder.py b/tensorflow_datasets/datasets/rock_paper_scissors/rock_paper_scissors_dataset_builder.py index 0dce7245acd..08ffdb64fa6 100644 --- a/tensorflow_datasets/datasets/rock_paper_scissors/rock_paper_scissors_dataset_builder.py +++ b/tensorflow_datasets/datasets/rock_paper_scissors/rock_paper_scissors_dataset_builder.py @@ -56,13 +56,13 @@ def _split_generators(self, dl_manager): return [ tfds.core.SplitGenerator( - name=tfds.Split.TRAIN, + name=tfds.Split.TRAIN, # pyrefly: ignore[missing-attribute] gen_kwargs={ "archive": dl_manager.iter_archive(train_path), }, ), tfds.core.SplitGenerator( - name=tfds.Split.TEST, + name=tfds.Split.TEST, # pyrefly: ignore[missing-attribute] gen_kwargs={ "archive": dl_manager.iter_archive(test_path), }, diff --git a/tensorflow_datasets/datasets/s3o4d/s3o4d_dataset_builder.py b/tensorflow_datasets/datasets/s3o4d/s3o4d_dataset_builder.py index 18e6cef9a13..380ccb0102a 100644 --- a/tensorflow_datasets/datasets/s3o4d/s3o4d_dataset_builder.py +++ b/tensorflow_datasets/datasets/s3o4d/s3o4d_dataset_builder.py @@ -68,8 +68,8 @@ def _split_generators(self, dl_manager: tfds.download.DownloadManager): '_'.join([a, b]), self._generate_examples( dl_manager, - paths['_'.join([a, b, 'img'])], - paths['_'.join([a, b, 'latent'])], + paths['_'.join([a, b, 'img'])], # pyrefly: ignore[bad-index] + paths['_'.join([a, b, 'latent'])], # pyrefly: ignore[bad-index] a, ), ) diff --git a/tensorflow_datasets/datasets/salient_span_wikipedia/salient_span_wikipedia_dataset_builder.py b/tensorflow_datasets/datasets/salient_span_wikipedia/salient_span_wikipedia_dataset_builder.py index e94a557ab1f..fc11a5f26bd 100644 --- a/tensorflow_datasets/datasets/salient_span_wikipedia/salient_span_wikipedia_dataset_builder.py +++ b/tensorflow_datasets/datasets/salient_span_wikipedia/salient_span_wikipedia_dataset_builder.py @@ -66,7 +66,7 @@ def _info(self): "type": np.str_, }), } - if not self.builder_config.split_sentences: + if not self.builder_config.split_sentences: # pyrefly: ignore[missing-attribute] feature_dict["sentences"] = tfds.features.Sequence({ "start": np.int32, "limit": np.int32, @@ -79,8 +79,8 @@ def _info(self): def _split_generators(self, dl_manager): return [ tfds.core.SplitGenerator( - name=tfds.Split.TRAIN, - gen_kwargs={"split_sentences": self.builder_config.split_sentences}, + name=tfds.Split.TRAIN, # pyrefly: ignore[missing-attribute] + gen_kwargs={"split_sentences": self.builder_config.split_sentences}, # pyrefly: ignore[missing-attribute] ), ] diff --git a/tensorflow_datasets/datasets/samsum/samsum_dataset_builder.py b/tensorflow_datasets/datasets/samsum/samsum_dataset_builder.py index a53739cd988..7b2b1eb45d7 100644 --- a/tensorflow_datasets/datasets/samsum/samsum_dataset_builder.py +++ b/tensorflow_datasets/datasets/samsum/samsum_dataset_builder.py @@ -53,19 +53,19 @@ def _split_generators( """Returns SplitGenerators.""" return [ tfds.core.SplitGenerator( - name=tfds.Split.TRAIN, + name=tfds.Split.TRAIN, # pyrefly: ignore[missing-attribute] gen_kwargs={ "path": os.path.join(dl_manager.manual_dir, "train.json") }, ), tfds.core.SplitGenerator( - name=tfds.Split.VALIDATION, + name=tfds.Split.VALIDATION, # pyrefly: ignore[missing-attribute] gen_kwargs={ "path": os.path.join(dl_manager.manual_dir, "val.json") }, ), tfds.core.SplitGenerator( - name=tfds.Split.TEST, + name=tfds.Split.TEST, # pyrefly: ignore[missing-attribute] gen_kwargs={ "path": os.path.join(dl_manager.manual_dir, "test.json") }, diff --git a/tensorflow_datasets/datasets/savee/savee_dataset_builder.py b/tensorflow_datasets/datasets/savee/savee_dataset_builder.py index f362192010d..96cae330253 100644 --- a/tensorflow_datasets/datasets/savee/savee_dataset_builder.py +++ b/tensorflow_datasets/datasets/savee/savee_dataset_builder.py @@ -158,15 +158,15 @@ def _split_generators(self, dl_manager): return [ tfds.core.SplitGenerator( - name=tfds.Split.TRAIN, + name=tfds.Split.TRAIN, # pyrefly: ignore[missing-attribute] gen_kwargs={'file_names': splits['train']}, ), tfds.core.SplitGenerator( - name=tfds.Split.VALIDATION, + name=tfds.Split.VALIDATION, # pyrefly: ignore[missing-attribute] gen_kwargs={'file_names': splits['validation']}, ), tfds.core.SplitGenerator( - name=tfds.Split.TEST, + name=tfds.Split.TEST, # pyrefly: ignore[missing-attribute] gen_kwargs={'file_names': splits['test']}, ), ] diff --git a/tensorflow_datasets/datasets/scan/scan_dataset_builder.py b/tensorflow_datasets/datasets/scan/scan_dataset_builder.py index bfcd7d9656b..ad1ac8fb125 100644 --- a/tensorflow_datasets/datasets/scan/scan_dataset_builder.py +++ b/tensorflow_datasets/datasets/scan/scan_dataset_builder.py @@ -102,10 +102,10 @@ def _split_generators(self, dl_manager): """Returns SplitGenerators.""" data_dir = dl_manager.download_and_extract(_DATA_URL) data_dir = os.path.join( - data_dir, 'SCAN-master', self.builder_config.directory + data_dir, 'SCAN-master', self.builder_config.directory # pyrefly: ignore[missing-attribute] ) - split = self.builder_config.name - splitfile = self.builder_config.splitfile + split = self.builder_config.name # pyrefly: ignore[missing-attribute] + splitfile = self.builder_config.splitfile # pyrefly: ignore[missing-attribute] if 'mcd' in split: split_dir = dl_manager.download_and_extract(_MCD_SPLITS_URL) split_dir = os.path.join(split_dir, 'scan-splits') @@ -128,9 +128,9 @@ def _split_generators(self, dl_manager): } return [ tfds.core.SplitGenerator( - name=tfds.Split.TRAIN, gen_kwargs=train_kwargs + name=tfds.Split.TRAIN, gen_kwargs=train_kwargs # pyrefly: ignore[missing-attribute] ), - tfds.core.SplitGenerator(name=tfds.Split.TEST, gen_kwargs=test_kwargs), + tfds.core.SplitGenerator(name=tfds.Split.TEST, gen_kwargs=test_kwargs), # pyrefly: ignore[missing-attribute] ] def _read_examples(self, datapath): diff --git a/tensorflow_datasets/datasets/scene_parse150/scene_parse150_dataset_builder.py b/tensorflow_datasets/datasets/scene_parse150/scene_parse150_dataset_builder.py index dfe90633df2..8b43291e0ba 100644 --- a/tensorflow_datasets/datasets/scene_parse150/scene_parse150_dataset_builder.py +++ b/tensorflow_datasets/datasets/scene_parse150/scene_parse150_dataset_builder.py @@ -51,7 +51,7 @@ def _split_generators(self, dl_manager): return [ tfds.core.SplitGenerator( - name=tfds.Split.TRAIN, + name=tfds.Split.TRAIN, # pyrefly: ignore[missing-attribute] gen_kwargs={ "images_dir_path": os.path.join( dl_paths["images"], "images/training" @@ -62,7 +62,7 @@ def _split_generators(self, dl_manager): }, ), tfds.core.SplitGenerator( - name=tfds.Split.TEST, + name=tfds.Split.TEST, # pyrefly: ignore[missing-attribute] gen_kwargs={ "images_dir_path": os.path.join( dl_paths["images"], "images/validation" diff --git a/tensorflow_datasets/datasets/sci_tail/sci_tail_dataset_builder.py b/tensorflow_datasets/datasets/sci_tail/sci_tail_dataset_builder.py index 8f777cab1d1..6fce065cf4a 100644 --- a/tensorflow_datasets/datasets/sci_tail/sci_tail_dataset_builder.py +++ b/tensorflow_datasets/datasets/sci_tail/sci_tail_dataset_builder.py @@ -58,13 +58,13 @@ def _split_generators(self, dl_manager: tfds.download.DownloadManager): # Returns the Dict[split names, Iterator[Key, Example]] return { - tfds.Split.TRAIN: self._generate_examples( + tfds.Split.TRAIN: self._generate_examples( # pyrefly: ignore[missing-attribute] path=data_dir / _SCITAIL_DIR / _TSV_DIR / 'scitail_1.0_train.tsv' ), - tfds.Split.VALIDATION: self._generate_examples( + tfds.Split.VALIDATION: self._generate_examples( # pyrefly: ignore[missing-attribute] path=data_dir / _SCITAIL_DIR / _TSV_DIR / 'scitail_1.0_dev.tsv' ), - tfds.Split.TEST: self._generate_examples( + tfds.Split.TEST: self._generate_examples( # pyrefly: ignore[missing-attribute] path=data_dir / _SCITAIL_DIR / _TSV_DIR / 'scitail_1.0_test.tsv' ), } diff --git a/tensorflow_datasets/datasets/scicite/scicite_dataset_builder.py b/tensorflow_datasets/datasets/scicite/scicite_dataset_builder.py index 5a747844bbe..0383eafcd2c 100644 --- a/tensorflow_datasets/datasets/scicite/scicite_dataset_builder.py +++ b/tensorflow_datasets/datasets/scicite/scicite_dataset_builder.py @@ -78,22 +78,22 @@ def _split_generators(self, dl_manager): path = os.path.join(dl_paths["scicite"], "scicite") return [ tfds.core.SplitGenerator( - name=tfds.Split.TRAIN, + name=tfds.Split.TRAIN, # pyrefly: ignore[missing-attribute] gen_kwargs={"path": os.path.join(path, "train.jsonl")}, ), tfds.core.SplitGenerator( - name=tfds.Split.VALIDATION, + name=tfds.Split.VALIDATION, # pyrefly: ignore[missing-attribute] gen_kwargs={"path": os.path.join(path, "dev.jsonl")}, ), tfds.core.SplitGenerator( - name=tfds.Split.TEST, + name=tfds.Split.TEST, # pyrefly: ignore[missing-attribute] gen_kwargs={"path": os.path.join(path, "test.jsonl")}, ), ] def _generate_examples(self, path=None): """Yields examples.""" - with epath.Path(path).open() as f: + with epath.Path(path).open() as f: # pyrefly: ignore[bad-argument-type] unique_ids = {} for line in f: d = json.loads(line) diff --git a/tensorflow_datasets/datasets/scientific_papers/scientific_papers_dataset_builder.py b/tensorflow_datasets/datasets/scientific_papers/scientific_papers_dataset_builder.py index 4c43403c97d..f0c755b5685 100644 --- a/tensorflow_datasets/datasets/scientific_papers/scientific_papers_dataset_builder.py +++ b/tensorflow_datasets/datasets/scientific_papers/scientific_papers_dataset_builder.py @@ -77,26 +77,26 @@ def _split_generators(self, dl_manager): dl_paths = dl_manager.download_and_extract(_URLS) path = os.path.join( dl_paths[self.builder_config.name], - self.builder_config.name + "-dataset", + self.builder_config.name + "-dataset", # pyrefly: ignore[missing-attribute] ) return [ tfds.core.SplitGenerator( - name=tfds.Split.TRAIN, + name=tfds.Split.TRAIN, # pyrefly: ignore[missing-attribute] gen_kwargs={"path": os.path.join(path, "train.txt")}, ), tfds.core.SplitGenerator( - name=tfds.Split.VALIDATION, + name=tfds.Split.VALIDATION, # pyrefly: ignore[missing-attribute] gen_kwargs={"path": os.path.join(path, "val.txt")}, ), tfds.core.SplitGenerator( - name=tfds.Split.TEST, + name=tfds.Split.TEST, # pyrefly: ignore[missing-attribute] gen_kwargs={"path": os.path.join(path, "test.txt")}, ), ] def _generate_examples(self, path=None): """Yields examples.""" - with epath.Path(path).open() as f: + with epath.Path(path).open() as f: # pyrefly: ignore[bad-argument-type] for line in f: # Possible keys are: # "article_id": str diff --git a/tensorflow_datasets/datasets/segment_anything/segment_anything_dataset_builder.py b/tensorflow_datasets/datasets/segment_anything/segment_anything_dataset_builder.py index f4c6030da09..a3767225c83 100644 --- a/tensorflow_datasets/datasets/segment_anything/segment_anything_dataset_builder.py +++ b/tensorflow_datasets/datasets/segment_anything/segment_anything_dataset_builder.py @@ -151,7 +151,7 @@ def _split_generators(self, dl_manager: tfds.download.DownloadManager): dict_of_urls[name] = url paths = dl_manager.download_and_extract(dict_of_urls) return { - 'train': self._generate_examples(paths), + 'train': self._generate_examples(paths), # pyrefly: ignore[bad-argument-type] } def _generate_examples(self, paths: Dict[str, epath.Path]): diff --git a/tensorflow_datasets/datasets/sentiment140/sentiment140_dataset_builder.py b/tensorflow_datasets/datasets/sentiment140/sentiment140_dataset_builder.py index b6592e2334d..1386b0ad34a 100644 --- a/tensorflow_datasets/datasets/sentiment140/sentiment140_dataset_builder.py +++ b/tensorflow_datasets/datasets/sentiment140/sentiment140_dataset_builder.py @@ -53,7 +53,7 @@ def _split_generators(self, dl_manager): return [ tfds.core.SplitGenerator( - name=tfds.Split.TRAIN, + name=tfds.Split.TRAIN, # pyrefly: ignore[missing-attribute] gen_kwargs={ "path": os.path.join( dl_paths, "training.1600000.processed.noemoticon.csv" @@ -61,7 +61,7 @@ def _split_generators(self, dl_manager): }, ), tfds.core.SplitGenerator( - name=tfds.Split.TEST, + name=tfds.Split.TEST, # pyrefly: ignore[missing-attribute] gen_kwargs={ "path": os.path.join(dl_paths, "testdata.manual.2009.06.14.csv") }, diff --git a/tensorflow_datasets/datasets/shapes3d/shapes3d_dataset_builder.py b/tensorflow_datasets/datasets/shapes3d/shapes3d_dataset_builder.py index a2dc9729fec..3afc272a741 100644 --- a/tensorflow_datasets/datasets/shapes3d/shapes3d_dataset_builder.py +++ b/tensorflow_datasets/datasets/shapes3d/shapes3d_dataset_builder.py @@ -59,7 +59,7 @@ def _split_generators(self, dl_manager): # There is no predefined train/val/test split for this dataset. return [ tfds.core.SplitGenerator( - name=tfds.Split.TRAIN, gen_kwargs=dict(filepath=filepath) + name=tfds.Split.TRAIN, gen_kwargs=dict(filepath=filepath) # pyrefly: ignore[missing-attribute] ), ] diff --git a/tensorflow_datasets/datasets/siscore/siscore_dataset_builder.py b/tensorflow_datasets/datasets/siscore/siscore_dataset_builder.py index 477de68328f..86c54037974 100644 --- a/tensorflow_datasets/datasets/siscore/siscore_dataset_builder.py +++ b/tensorflow_datasets/datasets/siscore/siscore_dataset_builder.py @@ -81,7 +81,7 @@ def _info(self): def _split_generators(self, dl_manager): """Returns SplitGenerator.""" # using rotation link only for now - variant = self.builder_config.variant + variant = self.builder_config.variant # pyrefly: ignore[missing-attribute] dataset_url = "/".join((_BASE_URL, f"{variant}.zip")) path = dl_manager.download_and_extract(dataset_url) path = os.path.join(path, _VARIANT_EXPANDED_DIR_NAMES[variant]) diff --git a/tensorflow_datasets/datasets/smallnorb/smallnorb_dataset_builder.py b/tensorflow_datasets/datasets/smallnorb/smallnorb_dataset_builder.py index 8a861df15f6..22006062e21 100644 --- a/tensorflow_datasets/datasets/smallnorb/smallnorb_dataset_builder.py +++ b/tensorflow_datasets/datasets/smallnorb/smallnorb_dataset_builder.py @@ -59,7 +59,7 @@ def _info(self): "label_lighting": tfds.features.ClassLabel(num_classes=6), } if self.version > "2.0.0": - features_dict["id"] = tfds.features.Text() + features_dict["id"] = tfds.features.Text() # pyrefly: ignore[bad-assignment] return self.dataset_info_from_configs( features=tfds.features.FeaturesDict(features_dict), homepage="https://cs.nyu.edu/~ylclab/data/norb-v1.0-small/", @@ -81,7 +81,7 @@ def _split_generators(self, dl_manager): return [ tfds.core.SplitGenerator( - name=tfds.Split.TRAIN, + name=tfds.Split.TRAIN, # pyrefly: ignore[missing-attribute] gen_kwargs=dict( split_prefix="train_", dat_path=files["training_dat"], @@ -90,7 +90,7 @@ def _split_generators(self, dl_manager): ), ), tfds.core.SplitGenerator( - name=tfds.Split.TEST, + name=tfds.Split.TEST, # pyrefly: ignore[missing-attribute] gen_kwargs=dict( split_prefix="test_", dat_path=files["testing_dat"], diff --git a/tensorflow_datasets/datasets/snli/snli_dataset_builder.py b/tensorflow_datasets/datasets/snli/snli_dataset_builder.py index fe8b0035398..2e20c615d93 100644 --- a/tensorflow_datasets/datasets/snli/snli_dataset_builder.py +++ b/tensorflow_datasets/datasets/snli/snli_dataset_builder.py @@ -49,17 +49,17 @@ def _split_generators(self, dl_manager): data_dir = os.path.join(dl_dir, 'snli_1.0') return [ tfds.core.SplitGenerator( - name=tfds.Split.TEST, + name=tfds.Split.TEST, # pyrefly: ignore[missing-attribute] gen_kwargs={ 'filepath': os.path.join(data_dir, 'snli_1.0_test.txt') }, ), tfds.core.SplitGenerator( - name=tfds.Split.VALIDATION, + name=tfds.Split.VALIDATION, # pyrefly: ignore[missing-attribute] gen_kwargs={'filepath': os.path.join(data_dir, 'snli_1.0_dev.txt')}, ), tfds.core.SplitGenerator( - name=tfds.Split.TRAIN, + name=tfds.Split.TRAIN, # pyrefly: ignore[missing-attribute] gen_kwargs={ 'filepath': os.path.join(data_dir, 'snli_1.0_train.txt') }, diff --git a/tensorflow_datasets/datasets/so2sat/so2sat_dataset_builder.py b/tensorflow_datasets/datasets/so2sat/so2sat_dataset_builder.py index 2198dce2a5b..2a180981fae 100644 --- a/tensorflow_datasets/datasets/so2sat/so2sat_dataset_builder.py +++ b/tensorflow_datasets/datasets/so2sat/so2sat_dataset_builder.py @@ -87,7 +87,7 @@ class Builder(tfds.core.GeneratorBasedBuilder): ] def _info(self): - if self.builder_config.selection == 'rgb': + if self.builder_config.selection == 'rgb': # pyrefly: ignore[missing-attribute] features = tfds.features.FeaturesDict({ 'image': tfds.features.Image(shape=[32, 32, 3]), 'label': tfds.features.ClassLabel(names=_LABELS), @@ -107,8 +107,8 @@ def _info(self): }) supervised_keys = None return self.dataset_info_from_configs( - features=features, - supervised_keys=supervised_keys, + features=features, # pyrefly: ignore[unbound-name] + supervised_keys=supervised_keys, # pyrefly: ignore[unbound-name] homepage='http://doi.org/10.14459/2018MP1454690', ) @@ -120,17 +120,17 @@ def _split_generators(self, dl_manager): }) return [ tfds.core.SplitGenerator( - name=tfds.Split.TRAIN, + name=tfds.Split.TRAIN, # pyrefly: ignore[missing-attribute] gen_kwargs={ 'path': paths['train'], - 'selection': self.builder_config.selection, + 'selection': self.builder_config.selection, # pyrefly: ignore[missing-attribute] }, ), tfds.core.SplitGenerator( - name=tfds.Split.VALIDATION, + name=tfds.Split.VALIDATION, # pyrefly: ignore[missing-attribute] gen_kwargs={ 'path': paths['val'], - 'selection': self.builder_config.selection, + 'selection': self.builder_config.selection, # pyrefly: ignore[missing-attribute] }, ), ] @@ -155,7 +155,7 @@ def _generate_examples(self, path, selection): 'label': np.argmax(label[i]).astype(int), 'sample_id': i, } - yield i, record + yield i, record # pyrefly: ignore[unbound-name] def _create_rgb(sen2_bands): diff --git a/tensorflow_datasets/datasets/speech_commands/speech_commands_dataset_builder.py b/tensorflow_datasets/datasets/speech_commands/speech_commands_dataset_builder.py index 4b1989b6b24..ec3f31a29f4 100644 --- a/tensorflow_datasets/datasets/speech_commands/speech_commands_dataset_builder.py +++ b/tensorflow_datasets/datasets/speech_commands/speech_commands_dataset_builder.py @@ -71,21 +71,21 @@ def _split_generators(self, dl_manager): return [ tfds.core.SplitGenerator( - name=tfds.Split.TRAIN, + name=tfds.Split.TRAIN, # pyrefly: ignore[missing-attribute] gen_kwargs={ 'archive': dl_manager.iter_archive(dl_path), 'file_list': train_paths, }, ), tfds.core.SplitGenerator( - name=tfds.Split.VALIDATION, + name=tfds.Split.VALIDATION, # pyrefly: ignore[missing-attribute] gen_kwargs={ 'archive': dl_manager.iter_archive(dl_path), 'file_list': validation_paths, }, ), tfds.core.SplitGenerator( - name=tfds.Split.TEST, + name=tfds.Split.TEST, # pyrefly: ignore[missing-attribute] gen_kwargs={ 'archive': dl_manager.iter_archive(dl_test_path), 'file_list': None, @@ -160,12 +160,12 @@ def _split_archive(self, train_archive): train_paths.append(path) # Original validation files did include silence - we add them manually here - validation_paths.append(os.path.join(BACKGROUND_NOISE, 'running_tap.wav')) + validation_paths.append(os.path.join(BACKGROUND_NOISE, 'running_tap.wav')) # pyrefly: ignore[unbound-name] # The paths for the train set is just whichever paths that do not exist in # either the test or validation splits. train_paths = ( - set(train_paths) - set(validation_paths) - set(train_test_paths) + set(train_paths) - set(validation_paths) - set(train_test_paths) # pyrefly: ignore[unbound-name] ) return train_paths, validation_paths diff --git a/tensorflow_datasets/datasets/spoken_digit/spoken_digit_dataset_builder.py b/tensorflow_datasets/datasets/spoken_digit/spoken_digit_dataset_builder.py index 33fbc7c6151..a0973a5b54a 100644 --- a/tensorflow_datasets/datasets/spoken_digit/spoken_digit_dataset_builder.py +++ b/tensorflow_datasets/datasets/spoken_digit/spoken_digit_dataset_builder.py @@ -50,7 +50,7 @@ def _split_generators(self, dl_manager): # There is no predefined train/val/test split for this dataset. return [ tfds.core.SplitGenerator( - name=tfds.Split.TRAIN, gen_kwargs={"path": path} + name=tfds.Split.TRAIN, gen_kwargs={"path": path} # pyrefly: ignore[missing-attribute] ) ] diff --git a/tensorflow_datasets/datasets/squad/squad_dataset_builder.py b/tensorflow_datasets/datasets/squad/squad_dataset_builder.py index 7cffc5bfb6b..cebdeb301f2 100644 --- a/tensorflow_datasets/datasets/squad/squad_dataset_builder.py +++ b/tensorflow_datasets/datasets/squad/squad_dataset_builder.py @@ -130,7 +130,7 @@ class Builder(tfds.core.GeneratorBasedBuilder): } def _info(self): - if self.builder_config.name == "v1.1": + if self.builder_config.name == "v1.1": # pyrefly: ignore[missing-attribute] features_dict = qa_utils.squadlike_features() elif self.builder_config.name == "v2.0": features_dict = _v2_features() @@ -148,23 +148,23 @@ def _info(self): def _split_generators(self, dl_manager): """Returns SplitGenerators.""" urls_to_download = { - "train": os.path.join(_URL, self.builder_config.train_file), - "dev": os.path.join(_URL, self.builder_config.dev_file), + "train": os.path.join(_URL, self.builder_config.train_file), # pyrefly: ignore[missing-attribute] + "dev": os.path.join(_URL, self.builder_config.dev_file), # pyrefly: ignore[missing-attribute] } downloaded_files = dl_manager.download_and_extract(urls_to_download) return [ tfds.core.SplitGenerator( - name=tfds.Split.TRAIN, + name=tfds.Split.TRAIN, # pyrefly: ignore[missing-attribute] gen_kwargs={"filepath": downloaded_files["train"]}, ), tfds.core.SplitGenerator( - name=tfds.Split.VALIDATION, + name=tfds.Split.VALIDATION, # pyrefly: ignore[missing-attribute] gen_kwargs={"filepath": downloaded_files["dev"]}, ), ] def _generate_examples(self, filepath): - if self.builder_config.name == "v1.1": + if self.builder_config.name == "v1.1": # pyrefly: ignore[missing-attribute] return qa_utils.generate_squadlike_examples(filepath) return _generate_v2_examples(filepath) diff --git a/tensorflow_datasets/datasets/stanford_dogs/stanford_dogs_dataset_builder.py b/tensorflow_datasets/datasets/stanford_dogs/stanford_dogs_dataset_builder.py index 1dcd2a1e6d0..c271d5c5945 100644 --- a/tensorflow_datasets/datasets/stanford_dogs/stanford_dogs_dataset_builder.py +++ b/tensorflow_datasets/datasets/stanford_dogs/stanford_dogs_dataset_builder.py @@ -91,29 +91,29 @@ def parse_mat_file(file_name): elif "test" in fname: test_list, _ = parse_mat_file(full_file_name) - self.info.features["label"].names = sorted(label_names) + self.info.features["label"].names = sorted(label_names) # pyrefly: ignore[unbound-name] for root, _, files in tf.io.gfile.walk(annotation_path): # Parsing the XML file which have the image annotations for fname in files: annotation_file_name = os.path.join(root, fname) with tf.io.gfile.GFile(annotation_file_name, "rb") as f: - xml_file_list[fname] = ET.parse(f) + xml_file_list[fname] = ET.parse(f) # pyrefly: ignore[unsupported-operation] return [ tfds.core.SplitGenerator( - name=tfds.Split.TRAIN, + name=tfds.Split.TRAIN, # pyrefly: ignore[missing-attribute] gen_kwargs={ "archive": dl_manager.iter_archive(images_path), - "file_names": train_list, + "file_names": train_list, # pyrefly: ignore[unbound-name] "annotation_files": xml_file_list, }, ), tfds.core.SplitGenerator( - name=tfds.Split.TEST, + name=tfds.Split.TEST, # pyrefly: ignore[missing-attribute] gen_kwargs={ "archive": dl_manager.iter_archive(images_path), - "file_names": test_list, + "file_names": test_list, # pyrefly: ignore[unbound-name] "annotation_files": xml_file_list, }, ), diff --git a/tensorflow_datasets/datasets/stanford_online_products/stanford_online_products_dataset_builder.py b/tensorflow_datasets/datasets/stanford_online_products/stanford_online_products_dataset_builder.py index 8d3f53767dd..1c99b1f36af 100644 --- a/tensorflow_datasets/datasets/stanford_online_products/stanford_online_products_dataset_builder.py +++ b/tensorflow_datasets/datasets/stanford_online_products/stanford_online_products_dataset_builder.py @@ -22,7 +22,7 @@ import tensorflow_datasets.public_api as tfds _DOWNLOAD_LINK = "ftp://cs.stanford.edu/cs/cvgl/Stanford_Online_Products.zip" -_SPLITS = {tfds.Split.TRAIN: "Ebay_train", tfds.Split.TEST: "Ebay_test"} +_SPLITS = {tfds.Split.TRAIN: "Ebay_train", tfds.Split.TEST: "Ebay_test"} # pyrefly: ignore[missing-attribute] _SUPER_CLASSES = [ "bicycle", diff --git a/tensorflow_datasets/datasets/star_cfq/star_cfq_dataset_builder.py b/tensorflow_datasets/datasets/star_cfq/star_cfq_dataset_builder.py index 249dedd09fa..0b007ccdf6b 100644 --- a/tensorflow_datasets/datasets/star_cfq/star_cfq_dataset_builder.py +++ b/tensorflow_datasets/datasets/star_cfq/star_cfq_dataset_builder.py @@ -314,12 +314,12 @@ def _info(self) -> tfds.core.DatasetInfo: def _split_generators(self, dl_manager): """Returns SplitGenerators.""" split_dir = dl_manager.download_and_extract( - '%s/%s' % (_DATA_URL, self.builder_config.split_archive_path) + '%s/%s' % (_DATA_URL, self.builder_config.split_archive_path) # pyrefly: ignore[missing-attribute] ) - split_path = os.path.join(split_dir, self.builder_config.split_path) + split_path = os.path.join(split_dir, self.builder_config.split_path) # pyrefly: ignore[missing-attribute] dataset_paths = {} - if self.builder_config.compound_divergence: + if self.builder_config.compound_divergence: # pyrefly: ignore[missing-attribute] extracted_dataset_path = dl_manager.download_and_extract( _DATA_URL + '/datasets/u-cfq-for-divergence-splits-1.0-compact-combined.tar.gz' @@ -364,7 +364,7 @@ def _split_generators(self, dl_manager): def _generate_examples(self, dataset_paths, split_path, split_id): """Yields examples.""" - if self.builder_config.compound_divergence: + if self.builder_config.compound_divergence: # pyrefly: ignore[missing-attribute] samples_path = os.path.join(dataset_paths[_UCFQ_POOL], 'dataset.json') with epath.Path(samples_path).open() as samples_file: logging.info('Reading json from %s into memory...', samples_path) @@ -404,7 +404,7 @@ def _generate_examples(self, dataset_paths, split_path, split_id): # be unique for each instance. key = f'{dataset}-{absolute_index}-{slice_index}' yield key, { - _QUESTION: pool[idx][_QUESTION_FIELD], - _QUERY: pool[idx][_QUERY_FIELD], + _QUESTION: pool[idx][_QUESTION_FIELD], # pyrefly: ignore[unsupported-operation] + _QUERY: pool[idx][_QUERY_FIELD], # pyrefly: ignore[unsupported-operation] } slice_index += 1 diff --git a/tensorflow_datasets/datasets/starcraft_video/starcraft_video_dataset_builder.py b/tensorflow_datasets/datasets/starcraft_video/starcraft_video_dataset_builder.py index 8341f269fcd..5c376f4d62a 100644 --- a/tensorflow_datasets/datasets/starcraft_video/starcraft_video_dataset_builder.py +++ b/tensorflow_datasets/datasets/starcraft_video/starcraft_video_dataset_builder.py @@ -109,8 +109,8 @@ def _info(self): "rgb_screen": tfds.features.Video( shape=( None, - self.builder_config.resolution, - self.builder_config.resolution, + self.builder_config.resolution, # pyrefly: ignore[missing-attribute] + self.builder_config.resolution, # pyrefly: ignore[missing-attribute] 3, ) ), # pytype: disable=wrong-arg-types # gen-stub-imports @@ -123,9 +123,9 @@ def _info(self): def _split_generators(self, dl_manager): url = DATA_URL_DIR + "%s_%dx%d_png/" % ( - self.builder_config.map_name, - self.builder_config.resolution, - self.builder_config.resolution, + self.builder_config.map_name, # pyrefly: ignore[missing-attribute] + self.builder_config.resolution, # pyrefly: ignore[missing-attribute] + self.builder_config.resolution, # pyrefly: ignore[missing-attribute] ) urls_to_download = { @@ -139,7 +139,7 @@ def _split_generators(self, dl_manager): return [ tfds.core.SplitGenerator( - name=tfds.Split.TRAIN, + name=tfds.Split.TRAIN, # pyrefly: ignore[missing-attribute] gen_kwargs={ "files": [ download @@ -149,11 +149,11 @@ def _split_generators(self, dl_manager): }, ), tfds.core.SplitGenerator( - name=tfds.Split.TEST, + name=tfds.Split.TEST, # pyrefly: ignore[missing-attribute] gen_kwargs={"files": [downloaded_urls["test"]]}, ), tfds.core.SplitGenerator( - name=tfds.Split.VALIDATION, + name=tfds.Split.VALIDATION, # pyrefly: ignore[missing-attribute] gen_kwargs={"files": [downloaded_urls["valid"]]}, ), ]