From 1d11a3b84559c3ba2d09ab6b099222d9610a1d86 Mon Sep 17 00:00:00 2001 From: rocky Date: Sat, 20 Jun 2026 11:08:09 -0400 Subject: [PATCH 01/19] Split out Import/Export functions... Also go over a couple of SystemFiles/Formats Better align "nffil" message with WMA --- mathics/builtin/import_export/checking.py | 4 ++++ mathics/builtin/import_export/importexport.py | 9 +++++++++ mathics/builtin/import_export/json.py | 3 +++ 3 files changed, 16 insertions(+) diff --git a/mathics/builtin/import_export/checking.py b/mathics/builtin/import_export/checking.py index 264e34134..633b00004 100644 --- a/mathics/builtin/import_export/checking.py +++ b/mathics/builtin/import_export/checking.py @@ -6,7 +6,11 @@ from mathics.core.evaluation import Evaluation from mathics.core.systemsymbols import SymbolFailed from mathics.eval.files_io.filesystem import eval_FindFile +<<<<<<< HEAD from mathics.eval.import_export.importexport import eval_FileFormat +======= +from mathics.eval.files_io.importexport import eval_FileFormat +>>>>>>> 5554cc9f0 (Split out Import/Export functions...) # TODO: This hard-coded dictionary should be # accessile from the WL API, and be user modifiable. diff --git a/mathics/builtin/import_export/importexport.py b/mathics/builtin/import_export/importexport.py index 8587e62cb..c3dd05c1e 100644 --- a/mathics/builtin/import_export/importexport.py +++ b/mathics/builtin/import_export/importexport.py @@ -1,7 +1,11 @@ # -*- coding: utf-8 -*- """ +<<<<<<< HEAD Import and Export Functions and Variables +======= +Base Import and Export Functions and Variables +>>>>>>> 5554cc9f0 (Split out Import/Export functions...) """ @@ -45,8 +49,13 @@ infer_file_format, ) +<<<<<<< HEAD # This tells documentation how to sort this module. # We want, this to come before specific converters. +======= +# This tells documentation how to sort this module +# Here we are also hiding "file_io" since this can erroneously appear at the top level. +>>>>>>> 5554cc9f0 (Split out Import/Export functions...) sort_order = "mathics.builtin.importing-and-exporting.base" EXPORTERS = {} diff --git a/mathics/builtin/import_export/json.py b/mathics/builtin/import_export/json.py index 5ba008e9e..27a8cd0ff 100644 --- a/mathics/builtin/import_export/json.py +++ b/mathics/builtin/import_export/json.py @@ -1,7 +1,10 @@ +<<<<<<< HEAD """ JSON-Related Formats """ +======= +>>>>>>> 5554cc9f0 (Split out Import/Export functions...) from mathics.core.builtin import Builtin, String from mathics.core.evaluation import Evaluation from mathics.eval.import_export.json import eval_JSONImport From 5b03b4a83f2dab200ff068ad118fc4b50a9639a1 Mon Sep 17 00:00:00 2001 From: rocky Date: Sat, 20 Jun 2026 11:23:52 -0400 Subject: [PATCH 02/19] Tweak sort order --- mathics/builtin/import_export/checking.py | 4 ---- mathics/builtin/import_export/importexport.py | 9 --------- mathics/builtin/import_export/json.py | 3 --- 3 files changed, 16 deletions(-) diff --git a/mathics/builtin/import_export/checking.py b/mathics/builtin/import_export/checking.py index 633b00004..264e34134 100644 --- a/mathics/builtin/import_export/checking.py +++ b/mathics/builtin/import_export/checking.py @@ -6,11 +6,7 @@ from mathics.core.evaluation import Evaluation from mathics.core.systemsymbols import SymbolFailed from mathics.eval.files_io.filesystem import eval_FindFile -<<<<<<< HEAD from mathics.eval.import_export.importexport import eval_FileFormat -======= -from mathics.eval.files_io.importexport import eval_FileFormat ->>>>>>> 5554cc9f0 (Split out Import/Export functions...) # TODO: This hard-coded dictionary should be # accessile from the WL API, and be user modifiable. diff --git a/mathics/builtin/import_export/importexport.py b/mathics/builtin/import_export/importexport.py index c3dd05c1e..8587e62cb 100644 --- a/mathics/builtin/import_export/importexport.py +++ b/mathics/builtin/import_export/importexport.py @@ -1,11 +1,7 @@ # -*- coding: utf-8 -*- """ -<<<<<<< HEAD Import and Export Functions and Variables -======= -Base Import and Export Functions and Variables ->>>>>>> 5554cc9f0 (Split out Import/Export functions...) """ @@ -49,13 +45,8 @@ infer_file_format, ) -<<<<<<< HEAD # This tells documentation how to sort this module. # We want, this to come before specific converters. -======= -# This tells documentation how to sort this module -# Here we are also hiding "file_io" since this can erroneously appear at the top level. ->>>>>>> 5554cc9f0 (Split out Import/Export functions...) sort_order = "mathics.builtin.importing-and-exporting.base" EXPORTERS = {} diff --git a/mathics/builtin/import_export/json.py b/mathics/builtin/import_export/json.py index 27a8cd0ff..5ba008e9e 100644 --- a/mathics/builtin/import_export/json.py +++ b/mathics/builtin/import_export/json.py @@ -1,10 +1,7 @@ -<<<<<<< HEAD """ JSON-Related Formats """ -======= ->>>>>>> 5554cc9f0 (Split out Import/Export functions...) from mathics.core.builtin import Builtin, String from mathics.core.evaluation import Evaluation from mathics.eval.import_export.json import eval_JSONImport From 14df4ccfa4bbf19d4d28413465562c7da91d3513 Mon Sep 17 00:00:00 2001 From: rocky Date: Mon, 22 Jun 2026 16:51:21 -0400 Subject: [PATCH 03/19] More refactoring/cleanup for getting post-import working Align better with WMA, go over Formats/Image code Small corrections. In pytest stdout, if got == expected we don't need to print both, one is sufficient along the the fact that the test passed. --- mathics/core/expression.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/mathics/core/expression.py b/mathics/core/expression.py index 11256aeb5..de5cca4c2 100644 --- a/mathics/core/expression.py +++ b/mathics/core/expression.py @@ -1505,7 +1505,8 @@ def to_mpmath(self): def to_python(self, *args, **kwargs) -> Any: """ Convert the Expression to a Python object: - List[...] -> Python list + List[RuleDelayed,...] -> Python dictionary (if all elements are rules) + List[...] -> Python list (if not all elements are rules) DirectedInfinity[1] -> inf DirectedInfinity[-1] -> -inf True/False -> True/False @@ -1548,7 +1549,17 @@ def to_python(self, *args, **kwargs) -> Any: if direction == -1: return -math.inf elif head is SymbolList: - return [element.to_python(*args, **kwargs) for element in self._elements] + if all(element.has_form("RuleDelayed", 2) for element in self._elements): + return { + element._elements[0] + .to_python(string_quotes=False): element._elements[1] + .to_python(string_quotes=False) + for element in self._elements + } + else: + return [ + element.to_python(*args, **kwargs) for element in self._elements + ] head_name = head.get_name() if head_name in mathics_to_python: From f91b516d96c387f3a4b9ca8fd5ed7e670a338f33 Mon Sep 17 00:00:00 2001 From: rocky Date: Mon, 22 Jun 2026 17:25:06 -0400 Subject: [PATCH 04/19] Revert a to_python() dict conversion for now. --- mathics/builtin/import_export/checking.py | 11 ++++++++++- mathics/core/expression.py | 15 ++------------- 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/mathics/builtin/import_export/checking.py b/mathics/builtin/import_export/checking.py index 264e34134..43c3f017a 100644 --- a/mathics/builtin/import_export/checking.py +++ b/mathics/builtin/import_export/checking.py @@ -4,7 +4,8 @@ from mathics.core.builtin import String from mathics.core.evaluation import Evaluation -from mathics.core.systemsymbols import SymbolFailed +from mathics.core.expression import Expression +from mathics.core.systemsymbols import SymbolFailed, SymbolFileExtension from mathics.eval.files_io.filesystem import eval_FindFile from mathics.eval.import_export.importexport import eval_FileFormat @@ -74,3 +75,11 @@ def import_setup_check(source, evaluation: Evaluation) -> tuple: return SymbolFailed, None return findfile, eval_FileFormat(findfile.value).value + + +def infer_form(self, filename, evaluation: Evaluation): + ext = Expression(SymbolFileExtension, filename).evaluate(evaluation) + ext = ext.get_string_value().lower() + # TODO: This dictionary should be accessible from the WL API + # to allow defining specific converters + return self._extdict.get(ext) diff --git a/mathics/core/expression.py b/mathics/core/expression.py index de5cca4c2..11256aeb5 100644 --- a/mathics/core/expression.py +++ b/mathics/core/expression.py @@ -1505,8 +1505,7 @@ def to_mpmath(self): def to_python(self, *args, **kwargs) -> Any: """ Convert the Expression to a Python object: - List[RuleDelayed,...] -> Python dictionary (if all elements are rules) - List[...] -> Python list (if not all elements are rules) + List[...] -> Python list DirectedInfinity[1] -> inf DirectedInfinity[-1] -> -inf True/False -> True/False @@ -1549,17 +1548,7 @@ def to_python(self, *args, **kwargs) -> Any: if direction == -1: return -math.inf elif head is SymbolList: - if all(element.has_form("RuleDelayed", 2) for element in self._elements): - return { - element._elements[0] - .to_python(string_quotes=False): element._elements[1] - .to_python(string_quotes=False) - for element in self._elements - } - else: - return [ - element.to_python(*args, **kwargs) for element in self._elements - ] + return [element.to_python(*args, **kwargs) for element in self._elements] head_name = head.get_name() if head_name in mathics_to_python: From 5f3c1b120618124bb29735d531ad20bc4c48491e Mon Sep 17 00:00:00 2001 From: rocky Date: Mon, 22 Jun 2026 19:24:06 -0400 Subject: [PATCH 05/19] Segregate and DRY import/export checking functions. --- mathics/builtin/import_export/checking.py | 16 ++++++++++------ mathics/builtin/import_export/importexport.py | 9 ++++++--- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/mathics/builtin/import_export/checking.py b/mathics/builtin/import_export/checking.py index 43c3f017a..fe977d7ab 100644 --- a/mathics/builtin/import_export/checking.py +++ b/mathics/builtin/import_export/checking.py @@ -2,6 +2,8 @@ Miscellaneous checking routines using in Import/Export. """ +from typing import Optional + from mathics.core.builtin import String from mathics.core.evaluation import Evaluation from mathics.core.expression import Expression @@ -77,9 +79,11 @@ def import_setup_check(source, evaluation: Evaluation) -> tuple: return findfile, eval_FileFormat(findfile.value).value -def infer_form(self, filename, evaluation: Evaluation): - ext = Expression(SymbolFileExtension, filename).evaluate(evaluation) - ext = ext.get_string_value().lower() - # TODO: This dictionary should be accessible from the WL API - # to allow defining specific converters - return self._extdict.get(ext) +def infer_file_format(filename: String, evaluation: Evaluation) -> Optional[str]: + """ + Infer what kind of format filename is in. None is returned if we can't infer + a format. + """ + file_extension = Expression(SymbolFileExtension, filename).evaluate(evaluation) + file_extension_lc = file_extension.get_string_value().lower() + return FILE_EXTENSION_MAP.get(file_extension_lc) diff --git a/mathics/builtin/import_export/importexport.py b/mathics/builtin/import_export/importexport.py index 8587e62cb..f09434c89 100644 --- a/mathics/builtin/import_export/importexport.py +++ b/mathics/builtin/import_export/importexport.py @@ -13,7 +13,10 @@ from itertools import chain from urllib.error import HTTPError, URLError -from mathics.builtin.import_export.checking import check_filename, import_setup_check +from mathics.builtin.import_export.checking import ( + check_filename, + import_setup_check, +) from mathics.core.atoms import ByteArray from mathics.core.attributes import A_PROTECTED, A_READ_PROTECTED from mathics.core.builtin import Builtin, Integer, Predefined, String @@ -612,7 +615,7 @@ def eval(self, dest, expr, evaluation, options={}): return SymbolFailed # Determine Format - form = infer_file_format(dest.value) + form = infer_file_format(dest) if form is None: evaluation.message("Export", "infer", dest) @@ -654,7 +657,7 @@ def eval_elements(self, dest, expr, elems, evaluation, options={}): # Infer format if not present if not found_form: assert format_spec == [] - format_spec = infer_file_format(dest.value) + format_spec = infer_file_format(dest) if format_spec is None: evaluation.message("Export", "infer", dest) evaluation.predetermined_out = current_predetermined_out From 6437c609a6a2558bd866df97a8b4a2a3b4c153ae Mon Sep 17 00:00:00 2001 From: rocky Date: Wed, 24 Jun 2026 20:27:41 -0400 Subject: [PATCH 06/19] Add mathics.eval: eval_DeleteFile and eval_FileExtension This simplifies deleting (temporary) files and picking out the file extension in implementing Import as well as in other builtins. --- mathics/builtin/import_export/checking.py | 15 +-------------- mathics/builtin/import_export/importexport.py | 4 ++-- 2 files changed, 3 insertions(+), 16 deletions(-) diff --git a/mathics/builtin/import_export/checking.py b/mathics/builtin/import_export/checking.py index fe977d7ab..264e34134 100644 --- a/mathics/builtin/import_export/checking.py +++ b/mathics/builtin/import_export/checking.py @@ -2,12 +2,9 @@ Miscellaneous checking routines using in Import/Export. """ -from typing import Optional - from mathics.core.builtin import String from mathics.core.evaluation import Evaluation -from mathics.core.expression import Expression -from mathics.core.systemsymbols import SymbolFailed, SymbolFileExtension +from mathics.core.systemsymbols import SymbolFailed from mathics.eval.files_io.filesystem import eval_FindFile from mathics.eval.import_export.importexport import eval_FileFormat @@ -77,13 +74,3 @@ def import_setup_check(source, evaluation: Evaluation) -> tuple: return SymbolFailed, None return findfile, eval_FileFormat(findfile.value).value - - -def infer_file_format(filename: String, evaluation: Evaluation) -> Optional[str]: - """ - Infer what kind of format filename is in. None is returned if we can't infer - a format. - """ - file_extension = Expression(SymbolFileExtension, filename).evaluate(evaluation) - file_extension_lc = file_extension.get_string_value().lower() - return FILE_EXTENSION_MAP.get(file_extension_lc) diff --git a/mathics/builtin/import_export/importexport.py b/mathics/builtin/import_export/importexport.py index f09434c89..47fa8e901 100644 --- a/mathics/builtin/import_export/importexport.py +++ b/mathics/builtin/import_export/importexport.py @@ -615,7 +615,7 @@ def eval(self, dest, expr, evaluation, options={}): return SymbolFailed # Determine Format - form = infer_file_format(dest) + form = infer_file_format(dest.value) if form is None: evaluation.message("Export", "infer", dest) @@ -657,7 +657,7 @@ def eval_elements(self, dest, expr, elems, evaluation, options={}): # Infer format if not present if not found_form: assert format_spec == [] - format_spec = infer_file_format(dest) + format_spec = infer_file_format(dest.value) if format_spec is None: evaluation.message("Export", "infer", dest) evaluation.predetermined_out = current_predetermined_out From 85060e2eac9ea7cfd7e0ec33bac3b4929f4b8474 Mon Sep 17 00:00:00 2001 From: rocky Date: Thu, 25 Jun 2026 17:25:29 -0400 Subject: [PATCH 07/19] Start breaking up eval.eval_Import This is too complicated because it handles too many disparate forms. pylint thinks this function is too complex too. Instead, divide it up, and improve it. --- mathics/eval/import_export/importexport.py | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/mathics/eval/import_export/importexport.py b/mathics/eval/import_export/importexport.py index ba3f7f862..172d971a3 100644 --- a/mathics/eval/import_export/importexport.py +++ b/mathics/eval/import_export/importexport.py @@ -725,23 +725,6 @@ def get_results_for_element_args( return {a.get_string_value(): b for a, b in (x.get_elements() for x in tmp)} -def eval_import_stream( - data: bytes, - file_format: str, -): - """ - Implementation of import of bytes having a particular file format - """ - - # START FIXING HERE - # Load the importer - conditionals, import_function, posts, importer_options = IMPORTERS[ - file_format.upper() - ] - import_expression = Expression(import_function, data).evaluate() - return import_expression - - def infer_file_format(filename: str) -> Optional[str]: """ Infer what kind of format filename is in. None is returned if we can't infer From 0a64e338794cf9be26c84d10b16e4e1781d9bbf9 Mon Sep 17 00:00:00 2001 From: rocky Date: Fri, 26 Jun 2026 14:21:09 -0400 Subject: [PATCH 08/19] Finally, we get to ZIP imports properly --- mathics/eval/import_export/compression.py | 11 +++---- mathics/eval/import_export/importexport.py | 37 +++++++--------------- 2 files changed, 16 insertions(+), 32 deletions(-) diff --git a/mathics/eval/import_export/compression.py b/mathics/eval/import_export/compression.py index dcccf8c31..e25b54cdf 100644 --- a/mathics/eval/import_export/compression.py +++ b/mathics/eval/import_export/compression.py @@ -9,7 +9,7 @@ from mathics.core.systemsymbols import SymbolFailed, SymbolRule from mathics.eval.import_export.importexport import ( IMPORTERS, - eval_import_stream, + eval_Import_data_only, infer_file_format, ) @@ -48,12 +48,11 @@ def eval_ImportZIP( return ListExpression(*exprs) for member in members: - file_format = infer_file_format(member) - if file_format.upper() not in IMPORTERS.keys(): + file_format = infer_file_format(member).upper() + if file_format not in IMPORTERS.keys(): evaluation.message("Import", "fmtnosup", file_format) return SymbolFailed file_data = archive.read(member) - # FIX HERE - converted_file_data = eval_import_stream(file_data, file_format) - return converted_file_data + # FIXME: this handles one member. What do we do if we have more? + return eval_Import_data_only(file_data.value, file_format, evaluation, {}) diff --git a/mathics/eval/import_export/importexport.py b/mathics/eval/import_export/importexport.py index 172d971a3..bbaf823ca 100644 --- a/mathics/eval/import_export/importexport.py +++ b/mathics/eval/import_export/importexport.py @@ -331,30 +331,7 @@ def eval_Import_general( assert len(elements) >= 1 el = elements[0] if el == "Elements": - defaults = get_results( - default_function, - findfile, - function_channels, - stream_options, - custom_options, - evaluation, - options, - data=data, - ) - if defaults is None: - evaluation.predetermined_out = current_predetermined_out - return SymbolFailed - # Use set() to remove duplicates - evaluation.predetermined_out = current_predetermined_out - return from_python( - sorted( - set( - list(conditionals.keys()) - + list(defaults.keys()) - # + list(posts.keys()) - ) - ) - ) + return eval_Import_Elements(file_format, evaluation) else: if el in conditionals.keys(): result = get_results( @@ -366,6 +343,7 @@ def eval_Import_general( evaluation, options, data=data, + elements=elements, ) if result is None: evaluation.predetermined_out = current_predetermined_out @@ -384,6 +362,7 @@ def eval_Import_general( evaluation, options, data=data, + elements=elements, ) if defaults is None: evaluation.predetermined_out = current_predetermined_out @@ -399,7 +378,7 @@ def eval_Import_general( return SymbolFailed -def eval_Import_Elements(file_format: str, evaluation): +def eval_Import_Elements(file_format: str, evaluation: Evaluation): """ Basic implementation behind Import[fileformat, Elements]. This returns the element names that can be used for a specific @@ -428,6 +407,7 @@ def get_results( evaluation, options, data: Optional[str], + elements: Optional[list] = None, ): current_predetermined_out = evaluation.predetermined_out if function_channels == ListExpression(String("FileNames")): @@ -442,7 +422,12 @@ def get_results( else: Expression(SymbolWriteString, String("")).evaluate(evaluation) eval_Close(stream, evaluation) - import_expression = Expression(tmp_function, findfile, *joined_options) + if elements is None: + import_expression = Expression(tmp_function, findfile, *joined_options) + else: + import_expression = Expression( + tmp_function, findfile, *to_mathics_list(*elements), *joined_options + ) tmp = import_expression.evaluate(evaluation) if tmp is SymbolFailed: return SymbolFailed From 968613e129ca5f49323000bfe71b8339ac40cc49 Mon Sep 17 00:00:00 2001 From: rocky Date: Fri, 26 Jun 2026 20:34:01 -0400 Subject: [PATCH 09/19] Plumbing hooked up for Import zip with members. Now we just gotta get ImportString[xxx, "JSON"] working on its own. Also, other small fixes and improvements. --- mathics/builtin/import_export/compression.py | 4 + mathics/builtin/import_export/importexport.py | 26 +++--- mathics/eval/import_export/compression.py | 91 ++++++++++++------- mathics/eval/import_export/importexport.py | 58 +++++++++--- .../import_export/test_importexport.py | 36 +++++--- 5 files changed, 141 insertions(+), 74 deletions(-) diff --git a/mathics/builtin/import_export/compression.py b/mathics/builtin/import_export/compression.py index add610ad0..7c30a2ab4 100644 --- a/mathics/builtin/import_export/compression.py +++ b/mathics/builtin/import_export/compression.py @@ -30,3 +30,7 @@ class ImportZIP(Builtin): def eval(self, path: String, evaluation: Evaluation): "ImportZIP[path_String]" return eval_ImportZIP(path.value, evaluation) + + def eval_with_elements(self, path: String, elements, evaluation: Evaluation): + "ImportZIP[path_String, elements_]" + return eval_ImportZIP(path.value, evaluation, elements) diff --git a/mathics/builtin/import_export/importexport.py b/mathics/builtin/import_export/importexport.py index 47fa8e901..4f70f36bb 100644 --- a/mathics/builtin/import_export/importexport.py +++ b/mathics/builtin/import_export/importexport.py @@ -13,10 +13,10 @@ from itertools import chain from urllib.error import HTTPError, URLError -from mathics.builtin.import_export.checking import ( - check_filename, - import_setup_check, -) +# Use this when accessing IMPORTERS to get changes +# since initializiation. +import mathics.eval.import_export.importexport as importexport +from mathics.builtin.import_export.checking import check_filename, import_setup_check from mathics.core.atoms import ByteArray from mathics.core.attributes import A_PROTECTED, A_READ_PROTECTED from mathics.core.builtin import Builtin, Integer, Predefined, String @@ -36,7 +36,6 @@ from mathics.eval.files_io.files import eval_Close from mathics.eval.files_io.filesystem import eval_FindFile from mathics.eval.import_export.importexport import ( - IMPORTERS, MIMETYPE_TO_SHORTNAME, eval_FileFormat, eval_Import_data_only, @@ -92,7 +91,9 @@ class ImportFormats(Predefined): summary_text = "list supported import formats" def evaluate(self, evaluation: Evaluation): - return to_mathics_list(*sorted(IMPORTERS.keys()), elements_conversion_fn=String) + return to_mathics_list( + *sorted(importexport.IMPORTERS.keys()), elements_conversion_fn=String + ) class RegisterImport(Builtin): @@ -238,7 +239,7 @@ def eval( # as well. # By doing this, we accept "text, "Text", "TEXT", and other combinations, # which what WMA seems to do. - IMPORTERS[formatname.value.upper()] = ( + importexport.IMPORTERS[formatname.value.upper()] = ( conditionals, default, posts, @@ -455,15 +456,16 @@ def eval_source_only(self, source, evaluation, options={}): def eval_with_element_list(self, source, elements, evaluation, options={}): "Import[source_, elements_List?(AllTrue[#, NotOptionQ]&), OptionsPattern[]]" - findfile, data = import_setup_check(source, evaluation) + findfile, file_format = import_setup_check(source, evaluation) if findfile is SymbolFailed: return SymbolFailed + # FIXME remove the need for determine_filetype def determine_filetype(data: str) -> str: - return data + return file_format return eval_Import_general( - findfile, determine_filetype, elements, evaluation, options, data + findfile, determine_filetype, elements, evaluation, options ) # In contrast to Import[source_], we allow an explicit format type @@ -481,7 +483,7 @@ def eval_with_single_element(self, source, elt: String, evaluation, options={}): # The code below tests for the first case, and if that fails assumes the # second case. file_format = elt.value.upper() - if file_format in IMPORTERS.keys(): + if file_format in importexport.IMPORTERS.keys(): # A file format was specified: use the custom routine return eval_Import_source_only(findfile, file_format, evaluation, options) @@ -563,7 +565,7 @@ def eval_with_single_element(self, data, elt: String, evaluation, options={}): # The code below tests for the first case, and if that fails assumes the # second case. file_format = elt.value.upper() - if file_format in IMPORTERS.keys(): + if file_format in importexport.IMPORTERS.keys(): # A file format was specified: use the custom routine return eval_Import_data_only(data.value, file_format, evaluation, options) diff --git a/mathics/eval/import_export/compression.py b/mathics/eval/import_export/compression.py index e25b54cdf..520bed523 100644 --- a/mathics/eval/import_export/compression.py +++ b/mathics/eval/import_export/compression.py @@ -6,6 +6,7 @@ from mathics.core.evaluation import Evaluation from mathics.core.expression import Expression from mathics.core.list import ListExpression +from mathics.core.symbols import SymbolNull from mathics.core.systemsymbols import SymbolFailed, SymbolRule from mathics.eval.import_export.importexport import ( IMPORTERS, @@ -17,42 +18,62 @@ def eval_ImportZIP( zip_path: str, evaluation: Evaluation, members: Optional[list[str]] = None ) -> ListExpression: - """Takes a ZIP file path and returns a list of file names/paths contained inside.""" - with zipfile.ZipFile(zip_path, "r") as archive: - if members is None: - filenames = archive.namelist() - mathics_filenames = to_mathics_list(*filenames) - exprs = [ - Expression( - SymbolRule, - String("FileNames"), - mathics_filenames, - ), - Expression( - SymbolRule, - String("Summary"), - mathics_filenames, - ), - ] - - if filenames: - for filename in filenames: - exprs.append( - Expression( - SymbolRule, - String(filename), - String(archive.read(filename).decode("utf-8")), + """If `members` is empty, this function takes a ZIP file path and returns a + list of file names/paths contained inside. + + "If `members` is given, then extract those members from the ZIP file. + """ + try: + with zipfile.ZipFile(zip_path, "r") as archive: + if members is None: + filenames = archive.namelist() + mathics_filenames = to_mathics_list(*filenames) + exprs = [ + Expression( + SymbolRule, + String("FileNames"), + mathics_filenames, + ), + Expression( + SymbolRule, + String("Summary"), + mathics_filenames, + ), + ] + + if filenames: + for filename in filenames: + exprs.append( + Expression( + SymbolRule, + String(filename), + String(archive.read(filename).decode("utf-8")), + ) ) - ) - return ListExpression(*exprs) + return ListExpression(*exprs) + + if members.has_form("List", None): + elements = members.get_elements() + else: + elements = [members] - for member in members: - file_format = infer_file_format(member).upper() - if file_format not in IMPORTERS.keys(): - evaluation.message("Import", "fmtnosup", file_format) - return SymbolFailed + for element in elements: + member = element.value + file_format = infer_file_format(member).upper() + if file_format not in IMPORTERS.keys(): + evaluation.message("Import", "fmtnosup", file_format) + return SymbolFailed - file_data = archive.read(member) - # FIXME: this handles one member. What do we do if we have more? - return eval_Import_data_only(file_data.value, file_format, evaluation, {}) + file_data = archive.read(member).decode("utf-8") + # FIXME: this handles one member. What do we do if we have more? + return eval_Import_data_only(file_data, file_format, evaluation, {}) + except FileNotFoundError: + evaluation.message("Import", "nffil", String(zip_path)) + return SymbolFailed + except PermissionError: + evaluation.message("Import", "noopen", String(zip_path)) + return SymbolFailed + except Exception: + # This seems to be what WMA does. + return SymbolNull diff --git a/mathics/eval/import_export/importexport.py b/mathics/eval/import_export/importexport.py index bbaf823ca..b4a1e6f23 100644 --- a/mathics/eval/import_export/importexport.py +++ b/mathics/eval/import_export/importexport.py @@ -20,6 +20,7 @@ SymbolByteArray, SymbolFailed, SymbolInputStream, + SymbolNone, SymbolOpenWrite, SymbolRule, SymbolStringToStream, @@ -37,7 +38,7 @@ IMPORTERS = {} # TODO: This hard-coded dictionary should be -# accessile from the WL API, and be user modifiable. +# accessible from the WL API, and be user modifiable. FILE_EXTENSION_MAP: dict[str, str] = { "bmp": "BMP", "gif": "GIF", @@ -219,7 +220,7 @@ def importer_exporter_options( def eval_FileFormat(path: str) -> String: """ - Basic implemenation beind FileFormat[filename]. + Basic implementation behind FileFormat[filename]. """ return String(filetype_from_path(path)) @@ -230,10 +231,10 @@ def eval_Import_general( elements, evaluation: Evaluation, options, - data: Optional[str], + data: Optional[str] = None, ): """ - Basic implementation beind most general kind of Import[source, elements, options]. + Basic implementation behind most general kind of Import[source, elements, options]. """ current_predetermined_out = evaluation.predetermined_out @@ -252,12 +253,13 @@ def eval_Import_general( elements = [el.value for el in elements] # Determine file format - for el in elements: + file_format = None + for el in elements.copy(): if el.upper() in IMPORTERS.keys(): file_format = el.upper() elements.remove(el) - break - else: + + if file_format is None: filetype = determine_filetype(data) file_format = MIME_SHORTNAME_TO_WMA.get(filetype, filetype).upper() @@ -331,7 +333,37 @@ def eval_Import_general( assert len(elements) >= 1 el = elements[0] if el == "Elements": - return eval_Import_Elements(file_format, evaluation) + if ( + result := eval_Import_Elements(file_format, evaluation) + ) is not SymbolNone: + return result + # A list of "Elements" is not obtainable via AvailableElements listed when + # ImportExport`RegisterImport was used. Get a list of the field names via + # the the "defaults" and "conditional" keys. + defaults = get_results( + default_function, + findfile, + function_channels, + stream_options, + custom_options, + evaluation, + options, + data=data, + ) + if defaults is None: + evaluation.predetermined_out = current_predetermined_out + return SymbolFailed + # Use set() to remove duplicates + evaluation.predetermined_out = current_predetermined_out + return from_python( + sorted( + set( + list(conditionals.keys()) + + list(defaults.keys()) + # + list(posts.keys()) + ) + ) + ) else: if el in conditionals.keys(): result = get_results( @@ -342,8 +374,8 @@ def eval_Import_general( custom_options, evaluation, options, - data=data, elements=elements, + data=data, ) if result is None: evaluation.predetermined_out = current_predetermined_out @@ -378,7 +410,7 @@ def eval_Import_general( return SymbolFailed -def eval_Import_Elements(file_format: str, evaluation: Evaluation): +def eval_Import_Elements(file_format: str, evaluation): """ Basic implementation behind Import[fileformat, Elements]. This returns the element names that can be used for a specific @@ -484,7 +516,7 @@ def eval_Import_data_only( options, ): """ - Basic implementation beind Import_String[data]. + Basic implementation behind Import_String[data]. Here, no elements were given, just a import data string. """ @@ -560,7 +592,7 @@ def eval_Import_source_only( options, ): """ - Basic implementation beind Import[source]. + Basic implementation behind Import[source]. Here, no elements were given, just a import source. """ @@ -638,7 +670,7 @@ def get_results_for_element_args( elements: list, ): """ - Return Import results when elemnet args are given. + Return Import results when element args are given. For example: Import["ExampleData/ExampleData.txt", "Lines"] ^^^^^^^ diff --git a/test/builtin/import_export/test_importexport.py b/test/builtin/import_export/test_importexport.py index 4f01ca79a..e0ee43a1b 100644 --- a/test/builtin/import_export/test_importexport.py +++ b/test/builtin/import_export/test_importexport.py @@ -171,13 +171,13 @@ def test_export(): 'Import["ExampleData/Testosterone.svg", "xml"] // Head', None, "XMLObject[Document]", - "Case use in explicit format name should not be significant", + "format case (xml) is not significant", ), ( 'Import["ExampleData/Testosterone.svg", "Xml"] // Head', None, "XMLObject[Document]", - "Case use in explicit format name should not be significant", + "format case (Xml) is not significant", ), ( 'Import["ExampleData/Testosterone.svg", {"XML"}] // Head', @@ -185,19 +185,27 @@ def test_export(): "XMLObject[Document]", None, ), - ( - 'Import["ExampleData/Testosterone.svg", {"XML", "XML"}];', - ("The Import element XML is not present when importing as XML.",), - "Null", - None, - ), + # This test does not match WMA. We are supposed to treat + # {"XML", "XML"} like, "XML" and not give an error. + # ( + # 'Import["ExampleData/Testosterone.svg", {"XML", "XML"}];', + # ("The Import element XML is not present when importing as XML.",), + # "Null", + # None, + # ), # XML - ( - 'MatchQ[Import["ExampleData/InventionNo1.xml", "Tags"],{__String}]', - None, - "True", - None, - ), + # This test does not match WMA. WMA gives: + # XML`Parser`XMLGet::prserr: + # NetAccessorException: Could not open file: + # http://www.musicxml.org/dtds/partwise.dtd at Line: 2 Character: 123 in + # InventionNo1.xml. + # Import::fmterr: Cannot import data as XML format. + # ( + # 'MatchQ[Import["ExampleData/InventionNo1.xml", "Tags"],{__String}]', + # None, + # "True", + # None, + # ), ("ImportString[x]", ("First argument x is not a string.",), "$Failed", None), # CSV ( From d32c54c5be8ad6ad4a612523b91f16601f928b2f Mon Sep 17 00:00:00 2001 From: rocky Date: Fri, 26 Jun 2026 21:42:22 -0400 Subject: [PATCH 10/19] Allow single element field forms on more Importers --- mathics/builtin/fileformats/htmlformat.py | 15 ++++++++++++++- mathics/builtin/import_export/importexport.py | 17 ++++++++++++++--- 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/mathics/builtin/fileformats/htmlformat.py b/mathics/builtin/fileformats/htmlformat.py index 48a789744..285eed85e 100644 --- a/mathics/builtin/fileformats/htmlformat.py +++ b/mathics/builtin/fileformats/htmlformat.py @@ -15,6 +15,7 @@ from mathics.core.builtin import Builtin, MessageException from mathics.core.convert.expression import to_expression, to_mathics_list from mathics.core.convert.python import from_python +from mathics.core.evaluation import Evaluation from mathics.core.expression import Expression from mathics.core.list import ListExpression from mathics.core.symbols import Symbol @@ -126,7 +127,7 @@ class _TagImport(_HTMLBuiltin): def _import(self, tree): raise NotImplementedError - def eval(self, text, evaluation): + def eval(self, text: String, evaluation: Evaluation): """%(name)s[text_String]""" tree = parse_html(parse_html_file, text, evaluation) if isinstance(tree, Symbol): # $Failed? @@ -135,6 +136,12 @@ def eval(self, text, evaluation): to_expression(SymbolRule, self.tag_name, self._import(tree)) ) + def eval_with_element(self, text, element, evaluation: Evaluation): + """%(name)s[text_String, element_]""" + # FIXME?: right now we aren't using element. Things might be + # more efficient if we used element? + return self.eval(text, evaluation) + class _Get(_HTMLBuiltin): context = "HTML`Parser`" @@ -412,6 +419,12 @@ def source(filename): return parse_html(source, text, evaluation) + def eval_with_element(self, text, element, evaluation: Evaluation): + """%(name)s[text_String, element_]""" + # FIXME?: right now we aren't using element. Things might be + # more efficient if we used element? + return self.eval(text, evaluation) + class TitleImport(_TagImport): """ diff --git a/mathics/builtin/import_export/importexport.py b/mathics/builtin/import_export/importexport.py index 4f70f36bb..90857ab9e 100644 --- a/mathics/builtin/import_export/importexport.py +++ b/mathics/builtin/import_export/importexport.py @@ -460,7 +460,10 @@ def eval_with_element_list(self, source, elements, evaluation, options={}): if findfile is SymbolFailed: return SymbolFailed - # FIXME remove the need for determine_filetype + # FIXME remove the need for determine_filetype. + + # The "data" parameter is just for non-file or string situations + # where we need to pick out the type from the file contents. def determine_filetype(data: str) -> str: return file_format @@ -483,13 +486,21 @@ def eval_with_single_element(self, source, elt: String, evaluation, options={}): # The code below tests for the first case, and if that fails assumes the # second case. file_format = elt.value.upper() + if file_format in importexport.IMPORTERS.keys(): # A file format was specified: use the custom routine return eval_Import_source_only(findfile, file_format, evaluation, options) # Assume we have Import with a single non-format element. - return self.eval_with_element_list( - source, ListExpression(elt), evaluation, options + + # FIXME remove the need for determine_filetype. + # The "data" parameter is just for non-file or string situations + # where we need to pick out the type from the file contents. + def determine_filetype(data: str) -> str: + return filetype + + return eval_Import_general( + findfile, determine_filetype, ListExpression(elt), evaluation, options ) From a272e8df82fe1fd3fe04dd9619f192056330fdd9 Mon Sep 17 00:00:00 2001 From: rocky Date: Sat, 27 Jun 2026 05:48:41 -0400 Subject: [PATCH 11/19] Some Import functions do not support element selection. Work around this. --- mathics/builtin/fileformats/htmlformat.py | 12 ++++++++--- mathics/builtin/fileformats/xmlformat.py | 18 ++++++++++++++++ mathics/eval/import_export/importexport.py | 25 +++++++++++++++++++--- 3 files changed, 49 insertions(+), 6 deletions(-) diff --git a/mathics/builtin/fileformats/htmlformat.py b/mathics/builtin/fileformats/htmlformat.py index 285eed85e..bd7d889bc 100644 --- a/mathics/builtin/fileformats/htmlformat.py +++ b/mathics/builtin/fileformats/htmlformat.py @@ -408,7 +408,7 @@ class SourceImport(_HTMLBuiltin): summary_text = "import source code from a HTML file" - def eval(self, text, evaluation): + def eval(self, text, evaluation: Evaluation): """%(name)s[text_String]""" def source(filename): @@ -450,7 +450,7 @@ def _import(self, tree): class XMLObjectImport(_HTMLBuiltin): """ - ## :native internal: + :WMA link:https://reference.wolfram.com/language/ref/XMLObject.html
'HTML`XMLObjectImport["filename"]' @@ -463,7 +463,13 @@ class XMLObjectImport(_HTMLBuiltin): summary_text = "import XML objects from a HTML file" - def eval(self, text, evaluation): + def eval(self, text, evaluation: Evaluation): """%(name)s[text_String]""" xml = to_expression("HTML`Parser`HTMLGet", text).evaluate(evaluation) return ListExpression(Expression(SymbolRule, String("XMLObject"), xml)) + + def eval_with_element(self, text, element, evaluation: Evaluation): + """%(name)s[text_String, element_]""" + # FIXME?: right now we aren't using element. Things might be + # more efficient if we used element? + return self.eval(text, evaluation) diff --git a/mathics/builtin/fileformats/xmlformat.py b/mathics/builtin/fileformats/xmlformat.py index 7117be326..a784232ef 100644 --- a/mathics/builtin/fileformats/xmlformat.py +++ b/mathics/builtin/fileformats/xmlformat.py @@ -345,6 +345,12 @@ def lines(): plaintext = String("\n".join(lines())) return to_mathics_list(to_expression("Rule", "Plaintext", plaintext)) + def eval_with_element(self, text, element, evaluation: Evaluation): + """%(name)s[text_String, element_]""" + # FIXME?: right now we aren't using element. Things might be + # more efficient if we used element? + return self.eval(text, evaluation) + class TagsImport(Builtin): """ @@ -381,6 +387,12 @@ def eval(self, text, evaluation: Evaluation): return root return to_mathics_list(to_expression("Rule", "Tags", self._tags(root))) + def eval_with_element(self, text, element, evaluation: Evaluation): + """%(name)s[text_String, element_]""" + # FIXME?: right now we aren't using element. Things might be + # more efficient if we used element? + return self.eval(text, evaluation) + class XMLObjectImport(Builtin): """ @@ -405,3 +417,9 @@ def eval(self, text, evaluation: Evaluation): """%(name)s[text_String]""" xml = to_expression("XML`Parser`XMLGet", text).evaluate(evaluation) return to_mathics_list(to_expression("Rule", "XMLObject", xml)) + + def eval_with_element(self, text, element, evaluation: Evaluation): + """%(name)s[text_String, element_]""" + # FIXME?: right now we aren't using element. Things might be + # more efficient if we used element? + return self.eval(text, evaluation) diff --git a/mathics/eval/import_export/importexport.py b/mathics/eval/import_export/importexport.py index b4a1e6f23..c8ec06b3a 100644 --- a/mathics/eval/import_export/importexport.py +++ b/mathics/eval/import_export/importexport.py @@ -454,13 +454,32 @@ def get_results( else: Expression(SymbolWriteString, String("")).evaluate(evaluation) eval_Close(stream, evaluation) + + # FIXME: Some import functions do not support element + # selection of a collection, just collection retrieval. Here, + # when a selection is desired, the entire collection is + # returned, and *then* the element is selected. This is + # potentially very slow for large collections and selection + # items that can be retrieved quickly. Until we can come up + # with a better solution for these kinds import functions, to + # address this when element selection is requested and doesn't + # return a different result, we retry without the element + # selection. + import_collection_expression = Expression( + tmp_function, findfile, *joined_options + ) if elements is None: - import_expression = Expression(tmp_function, findfile, *joined_options) + tmp = import_collection_expression.evaluate(evaluation) else: - import_expression = Expression( + import_select_expression = Expression( tmp_function, findfile, *to_mathics_list(*elements), *joined_options ) - tmp = import_expression.evaluate(evaluation) + tmp = import_select_expression.evaluate(evaluation) + if tmp == import_select_expression: + # Retry by retieving the entire collection. + # Element selection is done afterwards. + tmp = import_collection_expression.evaluate(evaluation) + if tmp is SymbolFailed: return SymbolFailed if tmpfile: From 98e83bf6d7e646342e474bd00a62e5031657af92 Mon Sep 17 00:00:00 2001 From: rocky Date: Sat, 27 Jun 2026 13:36:26 -0400 Subject: [PATCH 12/19] ZIP import starts working. --- mathics/eval/files_io/files.py | 42 +++++ mathics/eval/import_export/compression.py | 12 +- mathics/eval/import_export/importexport.py | 188 +++++++-------------- 3 files changed, 112 insertions(+), 130 deletions(-) diff --git a/mathics/eval/files_io/files.py b/mathics/eval/files_io/files.py index a417ec66c..c93e2ecc1 100644 --- a/mathics/eval/files_io/files.py +++ b/mathics/eval/files_io/files.py @@ -3,7 +3,9 @@ File related evaluation functions. """ +import atexit import os +import tempfile from typing import Callable, Literal, Optional, Sequence from mathics_scanner.errors import ( @@ -56,6 +58,46 @@ DEFAULT_TRACE_FN: Literal[None] = None +def create_temp_file_with_extension(data: str, file_extension: str) -> str: + """ + Writes data to a temporary file with a specific extension. + The file is closed immediately so it can be read by other processes. + It is automatically deleted when the program exits. + + Parameters: + data (str): The text content to write into the file. + file_extension (str): The extension (e.g., 'json', 'html', 'md'). + The file extension will have "." added to + the beginning. + Returns: + str: The absolute file path to the created temporary file. + """ + # Ensure the extension starts with a dot + file_extension = "." + file_extension + + # Create a secure temporary file with the desired extension. + # delete=False prevents Python from destroying it the moment we close the handle. + with tempfile.NamedTemporaryFile( + mode="w", suffix=file_extension, delete=False, encoding="utf-8" + ) as temp_file: + temp_file.write(data) + temp_path = temp_file.name + + # Register a cleanup hook to delete the file when the Python process terminates + def cleanup_temp_file(): + try: + if os.path.exists(temp_path): + os.remove(temp_path) + except OSError: + # Handle cases where the file was already deleted or is locked + pass + + atexit.register(cleanup_temp_file) + + # Return the path so your program can use or read it + return temp_path + + def print_line_number_and_text(line_number: int, text: str): """Prints a line number an text on that line with it. This is used as the default trace function in Get[] diff --git a/mathics/eval/import_export/compression.py b/mathics/eval/import_export/compression.py index 520bed523..586f67e08 100644 --- a/mathics/eval/import_export/compression.py +++ b/mathics/eval/import_export/compression.py @@ -65,9 +65,15 @@ def eval_ImportZIP( evaluation.message("Import", "fmtnosup", file_format) return SymbolFailed - file_data = archive.read(member).decode("utf-8") - # FIXME: this handles one member. What do we do if we have more? - return eval_Import_data_only(file_data, file_format, evaluation, {}) + unzipped_file_data = archive.read(member).decode("utf-8") + converted_member_data = eval_Import_data_only( + unzipped_file_data, file_format, evaluation, {"raw": True} + ) + result = ListExpression( + Expression(SymbolRule, element, converted_member_data) + ) + return result + except FileNotFoundError: evaluation.message("Import", "nffil", String(zip_path)) return SymbolFailed diff --git a/mathics/eval/import_export/importexport.py b/mathics/eval/import_export/importexport.py index c8ec06b3a..6b70e5a75 100644 --- a/mathics/eval/import_export/importexport.py +++ b/mathics/eval/import_export/importexport.py @@ -10,7 +10,7 @@ from mathics.core.atoms import ByteArray, String from mathics.core.builtin import get_option -from mathics.core.convert.expression import to_mathics_list +from mathics.core.convert.expression import to_expression from mathics.core.convert.python import from_python from mathics.core.evaluation import Evaluation from mathics.core.expression import Expression @@ -21,13 +21,15 @@ SymbolFailed, SymbolInputStream, SymbolNone, - SymbolOpenWrite, SymbolRule, SymbolStringToStream, - SymbolWriteString, ) -from mathics.eval.files_io.files import eval_Close, eval_Open -from mathics.eval.files_io.filesystem import eval_DeleteFile, eval_FileExtension +from mathics.eval.files_io.files import ( + create_temp_file_with_extension, + eval_Close, + eval_Open, +) +from mathics.eval.files_io.filesystem import eval_FileExtension # Some WMA file types reported by FileFormat do not # match what the mimetypes (and therefore MIME) extensions @@ -269,7 +271,9 @@ def eval_Import_general( return SymbolFailed # Load the importer - conditionals, default_function, posts, importer_options = IMPORTERS[file_format] + conditionals, import_function_symbol, posts, importer_options = IMPORTERS[ + file_format + ] stream_options, custom_options = importer_exporter_options( importer_options.get("System`Options"), options, "System`Import", evaluation @@ -292,13 +296,14 @@ def eval_Import_general( evaluation.predetermined_out = current_predetermined_out return SymbolFailed - # Perform the import defaults = None + # Perform the import if not elements: - defaults = get_results( - default_function, + defaults = perform_import( + import_function_symbol, findfile, + file_format, function_channels, stream_options, custom_options, @@ -320,7 +325,7 @@ def eval_Import_general( ) ) else: - result = defaults.get(default_element.get_string_value()) + result = defaults.get(default_element.value) if result is None: evaluation.message( "Import", "noelem", default_element, String(filetype) @@ -340,9 +345,10 @@ def eval_Import_general( # A list of "Elements" is not obtainable via AvailableElements listed when # ImportExport`RegisterImport was used. Get a list of the field names via # the the "defaults" and "conditional" keys. - defaults = get_results( - default_function, + defaults = perform_import( + import_function_symbol, findfile, + file_format, function_channels, stream_options, custom_options, @@ -366,9 +372,10 @@ def eval_Import_general( ) else: if el in conditionals.keys(): - result = get_results( + result = perform_import( conditionals[el], findfile, + file_format, function_channels, stream_options, custom_options, @@ -385,9 +392,10 @@ def eval_Import_general( return list(result.values())[0] else: if defaults is None: - defaults = get_results( - default_function, + defaults = perform_import( + import_function_symbol, findfile, + file_format, function_channels, stream_options, custom_options, @@ -430,9 +438,10 @@ def eval_Import_Elements(file_format: str, evaluation): return options.get("System`AvailableElements") -def get_results( - tmp_function, +def perform_import( + import_function_symbol: Symbol, findfile: Optional[String], + file_format: str, function_channels, stream_options, custom_options, @@ -441,19 +450,22 @@ def get_results( data: Optional[str], elements: Optional[list] = None, ): + """ + This routine does the data import. + "findfile", if not "None", is the path of a file where the unimported data resides. + If findfile is empty, then "data" will have the string data for that file, and + this routine will create a temporary file containing the data. The actual importer + then uses this file. + + "elements" when given contains the parts or kinds of things that should be extracted. + """ current_predetermined_out = evaluation.predetermined_out if function_channels == ListExpression(String("FileNames")): joined_options = list(chain(stream_options, custom_options)) - tmpfile = False if findfile is None: - tmpfile = True - stream = Expression(SymbolOpenWrite).evaluate(evaluation) - findfile = stream.elements[0] - if data is not None: - Expression(SymbolWriteString, String(data)).evaluate(evaluation) - else: - Expression(SymbolWriteString, String("")).evaluate(evaluation) - eval_Close(stream, evaluation) + findfile = String( + create_temp_file_with_extension(data, file_format.lower()) + ) # FIXME: Some import functions do not support element # selection of a collection, just collection retrieval. Here, @@ -465,14 +477,14 @@ def get_results( # address this when element selection is requested and doesn't # return a different result, we retry without the element # selection. - import_collection_expression = Expression( - tmp_function, findfile, *joined_options + import_collection_expression = to_expression( + import_function_symbol, findfile, *joined_options ) if elements is None: tmp = import_collection_expression.evaluate(evaluation) else: - import_select_expression = Expression( - tmp_function, findfile, *to_mathics_list(*elements), *joined_options + import_select_expression = to_expression( + import_function_symbol, findfile, *elements, *joined_options ) tmp = import_select_expression.evaluate(evaluation) if tmp == import_select_expression: @@ -482,8 +494,6 @@ def get_results( if tmp is SymbolFailed: return SymbolFailed - if tmpfile: - eval_DeleteFile([findfile.value]) elif function_channels == ListExpression(String("Streams")): if findfile is None: stream = Expression(SymbolStringToStream, String(data)).evaluate(evaluation) @@ -511,14 +521,16 @@ def get_results( evaluation.message("Import", "nffil") evaluation.predetermined_out = current_predetermined_out return None - tmp = Expression(tmp_function, stream, *custom_options).evaluate(evaluation) + tmp = Expression(import_function_symbol, stream, *custom_options).evaluate( + evaluation + ) eval_Close(stream, evaluation) else: # TODO message evaluation.predetermined_out = current_predetermined_out return SymbolFailed - tmp = tmp.get_elements() - if not all(expr.has_form("Rule", None) for expr in tmp): + result_elts = tmp.elements + if not all(expr.has_form("Rule", None) for expr in result_elts): evaluation.predetermined_out = current_predetermined_out return None @@ -551,7 +563,9 @@ def eval_Import_data_only( return SymbolFailed # Load the importer - conditionals, default_function, posts, importer_options = IMPORTERS[file_format] + conditionals, import_function_symbol, posts, importer_options = IMPORTERS[ + file_format + ] stream_options, custom_options = importer_exporter_options( importer_options.get("System`Options"), options, "System`Import", evaluation @@ -571,9 +585,10 @@ def eval_Import_data_only( return SymbolFailed # Perform the import - defaults = get_results( - default_function, + defaults = perform_import( + import_function_symbol, None, + file_format, function_channels, stream_options, custom_options, @@ -595,7 +610,7 @@ def eval_Import_data_only( ) ) else: - result = defaults.get(default_element.get_string_value()) + result = defaults.get(default_element.value) if result is None: evaluation.message("Import", "noelem", default_element, String(filetype)) evaluation.predetermined_out = current_predetermined_out @@ -624,7 +639,9 @@ def eval_Import_source_only( return SymbolFailed # Load the importer - conditionals, default_function, posts, importer_options = IMPORTERS[file_format] + conditionals, import_function_symbol, posts, importer_options = IMPORTERS[ + file_format + ] stream_options, custom_options = importer_exporter_options( importer_options.get("System`Options"), options, "System`Import", evaluation @@ -643,10 +660,11 @@ def eval_Import_source_only( evaluation.predetermined_out = current_predetermined_out return SymbolFailed - # Perform the import - defaults = get_results( - default_function, + # Perform the import. + defaults = perform_import( + import_function_symbol, findfile, + file_format, function_channels, stream_options, custom_options, @@ -677,90 +695,6 @@ def eval_Import_source_only( return result -def get_results_for_element_args( - tmp_function, - findfile: Optional[String], - function_channels, - stream_options, - custom_options, - evaluation, - options, - file_format: Optional[str], - elements: list, -): - """ - Return Import results when element args are given. - For example: - Import["ExampleData/ExampleData.txt", "Lines"] - ^^^^^^^ - """ - current_predetermined_out = evaluation.predetermined_out - if function_channels == ListExpression(String("FileNames")): - joined_options = list(chain(stream_options, custom_options)) - tmpfile = False - if findfile is None: - tmpfile = True - stream = Expression(SymbolOpenWrite).evaluate(evaluation) - findfile = stream.elements[0] - if file_format is not None: - Expression(SymbolWriteString, String(file_format)).evaluate(evaluation) - else: - Expression(SymbolWriteString, String("")).evaluate(evaluation) - eval_Close(stream, evaluation) - import_expression = Expression( - tmp_function, findfile, *to_mathics_list(elements), *joined_options - ) - tmp = import_expression.evaluate(evaluation) - if tmp is SymbolFailed: - return SymbolFailed - if tmpfile: - eval_DeleteFile([findfile.value]) - elif function_channels == ListExpression(String("Streams")): - if findfile is None: - stream = Expression(SymbolStringToStream, String(file_format)).evaluate( - evaluation - ) - else: - mode = "r" - if options.get("System`BinaryFormat") is SymbolTrue: - if not mode.endswith("b"): - mode += "b" - - encoding_option = options.get("System`CharacterEncoding") - encoding = ( - encoding_option.value if isinstance(encoding_option, String) else None - ) - - stream = eval_Open( - name=findfile, - mode=mode, - stream_type="InputStream", - encoding=encoding, - evaluation=evaluation, - ) - if stream is None: - return - if stream.head is not SymbolInputStream: - evaluation.message("Import", "nffil") - evaluation.predetermined_out = current_predetermined_out - return None - tmp = Expression(tmp_function, stream, *custom_options).evaluate(evaluation) - eval_Close(stream, evaluation) - else: - # TODO message - evaluation.predetermined_out = current_predetermined_out - return SymbolFailed - tmp = tmp.get_elements() - if not all(expr.has_form("Rule", None) for expr in tmp): - evaluation.predetermined_out = current_predetermined_out - return None - - # return {a.get_string_value() : b for a,b in map(lambda x: - # x.get_elements(), tmp)} - evaluation.predetermined_out = current_predetermined_out - return {a.get_string_value(): b for a, b in (x.get_elements() for x in tmp)} - - def infer_file_format(filename: str) -> Optional[str]: """ Infer what kind of format filename is in. None is returned if we can't infer From 5dbe72d4801b75ff07fb5c76d876a724b60e472d Mon Sep 17 00:00:00 2001 From: rocky Date: Sat, 27 Jun 2026 17:26:48 -0400 Subject: [PATCH 13/19] Revise to fit better inside previous fileformats frameworks. (I wasn't aware of this work until late on.) --- SYMBOLS_MANIFEST.txt | 5 +-- mathics/SystemFiles/Formats/JSON/Import.wl | 10 +++-- mathics/SystemFiles/Formats/ZIP/Import.wl | 10 ++--- mathics/builtin/fileformats/__init__.py | 14 ++++--- mathics/builtin/fileformats/compression.py | 32 +++++++++++++++ mathics/builtin/fileformats/htmlformat.py | 2 +- mathics/builtin/fileformats/jsonformat.py | 31 +++++++-------- mathics/builtin/fileformats/xmlformat.py | 4 +- mathics/builtin/import_export/compression.py | 36 ----------------- mathics/builtin/import_export/json.py | 33 ---------------- .../compression.py | 10 ++++- mathics/eval/fileformats/json.py | 39 +++++++++++++++++++ mathics/eval/fileformats/jsonformat.py | 15 ------- mathics/eval/files_io/files.py | 33 ++++++++++++---- mathics/eval/import_export/importexport.py | 2 +- mathics/eval/import_export/json.py | 27 ------------- 16 files changed, 145 insertions(+), 158 deletions(-) create mode 100644 mathics/builtin/fileformats/compression.py delete mode 100644 mathics/builtin/import_export/compression.py delete mode 100644 mathics/builtin/import_export/json.py rename mathics/eval/{import_export => fileformats}/compression.py (89%) create mode 100644 mathics/eval/fileformats/json.py delete mode 100644 mathics/eval/fileformats/jsonformat.py delete mode 100644 mathics/eval/import_export/json.py diff --git a/SYMBOLS_MANIFEST.txt b/SYMBOLS_MANIFEST.txt index d5af2742a..441eb853e 100644 --- a/SYMBOLS_MANIFEST.txt +++ b/SYMBOLS_MANIFEST.txt @@ -1,3 +1,4 @@ +Compress`ImportZIP HTML`DataImport HTML`FullDataImport HTML`HyperlinksImport @@ -12,7 +13,7 @@ ImportExport`RegisterExport ImportExport`RegisterImport Internal`RealValuedNumberQ Internal`RealValuedNumericQ -JSON`Import`JSONImport +JSON`ImportJSON System`$Aborted System`$Assumptions System`$BaseDirectory @@ -582,9 +583,7 @@ System`ImageTake System`ImageType System`Implies System`Import -System`ImportJSON System`ImportString -System`ImportZIP System`In System`Increment System`Indeterminate diff --git a/mathics/SystemFiles/Formats/JSON/Import.wl b/mathics/SystemFiles/Formats/JSON/Import.wl index 8df6cbfa3..43011f67e 100644 --- a/mathics/SystemFiles/Formats/JSON/Import.wl +++ b/mathics/SystemFiles/Formats/JSON/Import.wl @@ -1,15 +1,19 @@ (* ::Package:: *) +(* JSON Javascript Object Notation or JSON web service description Importer. + This is used by Import[] and, ImportString[]. + *) + Begin["System`Convert`JSONDump`"] (* JSON legacy element is Data even if Expression would be better. *) $AvailableElements = {"Data", "Dataset"}; ImportExport`RegisterImport[ - "JSON", - ImportJSON, + "JSON", (* WMA mime-type name *) + JSON`ImportJSON, (* Default Function name that handles this. *) {}, - "AvailableElements" -> $AvailableElements, + "AvailableElements" -> $AvailableElements, (* names retuned by "Elements" query *) "FunctionChannels" -> {"FileNames"}, "DefaultElement" -> "Data" ] diff --git a/mathics/SystemFiles/Formats/ZIP/Import.wl b/mathics/SystemFiles/Formats/ZIP/Import.wl index 6bc1ffd27..8c3d26d6b 100644 --- a/mathics/SystemFiles/Formats/ZIP/Import.wl +++ b/mathics/SystemFiles/Formats/ZIP/Import.wl @@ -1,7 +1,7 @@ (* ::Package:: *) -(* ZIP compressed file and file archive Importer. - This is used by Import[]. +(* Windows ZIP archive, ZIP compressed file and file archive Importer. + This is used by Import[] and, ImportString[]. *) Begin["System`Convert`CommonArchiveDump`"] @@ -21,13 +21,13 @@ GetElements[___] := ]; ImportExport`RegisterImport[ - "ZIP", - ImportZIP, + "ZIP", (* WMA mime-type name *) + Compress`ImportZIP, (* Default Function name that handles this. *) {}, (* Post importer function(s) *) FunctionChannels -> {"FileNames"}, (* WMA has this, but I (rocky) am not sure why or what it means: AvailableElements -> $ZIPAvailableElements, *) - AvailableElements -> {"Filenames", "Summary"}, + AvailableElements -> {"Filenames", "Summary"}, (* names retuned by "Elements" query *) BinaryFormat -> True, DefaultElement -> "FileNames", HiddenElements -> $ZIPHiddenElements, diff --git a/mathics/builtin/fileformats/__init__.py b/mathics/builtin/fileformats/__init__.py index 38ea5e30c..4a6c38935 100644 --- a/mathics/builtin/fileformats/__init__.py +++ b/mathics/builtin/fileformats/__init__.py @@ -1,10 +1,12 @@ """ -File Formats +Import/Export File Formats -Built-in Importers. +There various file formats can be used by 'Import' and 'Export' and related functions, \ +e.g. 'ImportString'. -""" +Many Import/Export functions are registered in SystemFiles/Formats/*.wl which is \ +autoloaded on startup. -# The Built-in Functions are defined in a separate context under the -# System`. For example System`HTML` and System`XML. This is done to not -# pollute the System` namespace. +The Built-in Functions are defined in a separate context. +For example, HTML` or Compress`. This is done to not pollute the System` namespace. +""" diff --git a/mathics/builtin/fileformats/compression.py b/mathics/builtin/fileformats/compression.py new file mode 100644 index 000000000..34b6c7186 --- /dev/null +++ b/mathics/builtin/fileformats/compression.py @@ -0,0 +1,32 @@ +""" +Compression & Archive Formats +""" + +from mathics.core.builtin import Builtin, String +from mathics.core.evaluation import Evaluation +from mathics.eval.fileformats.compression import eval_ImportZIP + +# See commit in __init__.py regarding the whacky way this gets called + + +class ImportZIP(Builtin): + """ + :WMA link:https://reference.wolfram.com/language/ref/format/ZIP.html + +
+
'Compress`ImportZIP[path]' +
Run zip for archive file $path$ +
+ + """ + + context = "Compress`" + summary_text = "import a ZIP file" + + def eval(self, path: String, evaluation: Evaluation): + "Compress`ImportZIP[path_String]" + return eval_ImportZIP(path, evaluation) + + def eval_with_elements(self, path: String, elements, evaluation: Evaluation): + "Compress`ImportZIP[path_String, elements_]" + return eval_ImportZIP(path, evaluation, elements) diff --git a/mathics/builtin/fileformats/htmlformat.py b/mathics/builtin/fileformats/htmlformat.py index bd7d889bc..514b45f86 100644 --- a/mathics/builtin/fileformats/htmlformat.py +++ b/mathics/builtin/fileformats/htmlformat.py @@ -2,7 +2,7 @@ """ HTML -Basic implementation for a HTML importer. +HTML importer. """ diff --git a/mathics/builtin/fileformats/jsonformat.py b/mathics/builtin/fileformats/jsonformat.py index 7afffb71e..4bda54aa8 100644 --- a/mathics/builtin/fileformats/jsonformat.py +++ b/mathics/builtin/fileformats/jsonformat.py @@ -1,32 +1,29 @@ -# -*- coding: utf-8 -*- - """ -JSON +JSON File Format -Basic implementation for an JSON importer. +JSON importer (via Python's "json" module). """ -from mathics.core.builtin import Builtin -from mathics.core.expression import Evaluation -from mathics.eval.fileformats.jsonformat import eval_JSONImport +from mathics.core.builtin import Builtin, String +from mathics.core.evaluation import Evaluation +from mathics.eval.fileformats.json import eval_JSONImport -class JSONImport(Builtin): +class ImportJSON(Builtin): """ - ## :native internal: + :WMA link:https://reference.wolfram.com/language/ref/format/JSON.html
-
'JSON`Import`JSONImport["file"]' -
parses "string" as a JSON file, and returns the data as a nested \ - list of rules. +
'JSON`ImportJSON[path]' +
Read $path$ as JSON and convert that to its corresponding Mathics3 equivalent.
""" - summary_text = "import elements from json" - context = "JSON`Import`" + context = "JSON`" messages = {"dec": "Decoding Error at `1`"} + summary_text = "import JSON file" - def eval(self, filename, evaluation: Evaluation): - """%(name)s[filename_String]""" - return eval_JSONImport(filename.value, evaluation) + def eval(self, path: String, evaluation: Evaluation): + "JSON`ImportJSON[path_String]" + return eval_JSONImport(path, evaluation) diff --git a/mathics/builtin/fileformats/xmlformat.py b/mathics/builtin/fileformats/xmlformat.py index a784232ef..0da65b062 100644 --- a/mathics/builtin/fileformats/xmlformat.py +++ b/mathics/builtin/fileformats/xmlformat.py @@ -1,9 +1,9 @@ # -*- coding: utf-8 -*- """ -XML +XML File Format -Basic implementation for an XML importer. +XML importer (via lxml). """ diff --git a/mathics/builtin/import_export/compression.py b/mathics/builtin/import_export/compression.py deleted file mode 100644 index 7c30a2ab4..000000000 --- a/mathics/builtin/import_export/compression.py +++ /dev/null @@ -1,36 +0,0 @@ -""" -Compression & Archive Formats -""" - -from mathics.core.builtin import Builtin, String -from mathics.core.evaluation import Evaluation -from mathics.eval.import_export.compression import eval_ImportZIP - -# The builtin functions defined here are called normally in a somewhat convoluted -# (and non obvious) way: -# via Import[] which consults RegisterImport[] which is invoked by autoloading -# Format/xxx/Import.wl - -# Furthermore, all we really do is just pass the call over to eval_Import... - - -class ImportZIP(Builtin): - """ - :WMA link:https://reference.wolfram.com/language/ref/format/ZIP.html - -
-
'ImportZIP[path]' -
Run zip for archive file $path$ -
- - """ - - summary_text = "import ZIP file" - - def eval(self, path: String, evaluation: Evaluation): - "ImportZIP[path_String]" - return eval_ImportZIP(path.value, evaluation) - - def eval_with_elements(self, path: String, elements, evaluation: Evaluation): - "ImportZIP[path_String, elements_]" - return eval_ImportZIP(path.value, evaluation, elements) diff --git a/mathics/builtin/import_export/json.py b/mathics/builtin/import_export/json.py deleted file mode 100644 index 5ba008e9e..000000000 --- a/mathics/builtin/import_export/json.py +++ /dev/null @@ -1,33 +0,0 @@ -""" -JSON-Related Formats -""" - -from mathics.core.builtin import Builtin, String -from mathics.core.evaluation import Evaluation -from mathics.eval.import_export.json import eval_JSONImport - -# The builtin functions defined here are called normally in a somewhat -# convoluted (and non obvious) way: via Import[] which consults -# RegisterImport[] which is invoked by autoloading -# Format/xxx/Import.wl - -# Furthermore, all we really do is just pass the call over to -# eval_Import... - - -class ImportJSON(Builtin): - """ - :WMA link:https://reference.wolfram.com/language/ref/format/JSON.html - -
-
'ImportJSON[path]' -
Read $path$ as JSON and convert that to its corresponding Mathics3 equivalent. -
- - """ - - summary_text = "import JSON file" - - def eval(self, path: String, evaluation: Evaluation): - "ImportJSON[path_String]" - return eval_JSONImport(path.value) diff --git a/mathics/eval/import_export/compression.py b/mathics/eval/fileformats/compression.py similarity index 89% rename from mathics/eval/import_export/compression.py rename to mathics/eval/fileformats/compression.py index 586f67e08..fc69bd862 100644 --- a/mathics/eval/import_export/compression.py +++ b/mathics/eval/fileformats/compression.py @@ -8,6 +8,7 @@ from mathics.core.list import ListExpression from mathics.core.symbols import SymbolNull from mathics.core.systemsymbols import SymbolFailed, SymbolRule +from mathics.eval.files_io.files import resolve_file from mathics.eval.import_export.importexport import ( IMPORTERS, eval_Import_data_only, @@ -16,13 +17,20 @@ def eval_ImportZIP( - zip_path: str, evaluation: Evaluation, members: Optional[list[str]] = None + zip_name: String, evaluation: Evaluation, members: Optional[list[str]] = None ) -> ListExpression: """If `members` is empty, this function takes a ZIP file path and returns a list of file names/paths contained inside. "If `members` is given, then extract those members from the ZIP file. """ + + zip_path, is_temporary_file = resolve_file(zip_name, "r", evaluation) + if zip_path is None: + return SymbolFailed + + # The below "try:" is probably unnecessary since resolve_file should + # catch errors. try: with zipfile.ZipFile(zip_path, "r") as archive: if members is None: diff --git a/mathics/eval/fileformats/json.py b/mathics/eval/fileformats/json.py new file mode 100644 index 000000000..fb582a511 --- /dev/null +++ b/mathics/eval/fileformats/json.py @@ -0,0 +1,39 @@ +import json + +from mathics.core.atoms import String +from mathics.core.convert.python import from_python +from mathics.core.expression import Evaluation, Expression +from mathics.core.list import ListExpression +from mathics.core.systemsymbols import SymbolFailed, SymbolRule +from mathics.eval.files_io.files import resolve_file + + +def eval_JSONImport(json_name: String, evaluation: Evaluation) -> ListExpression: + """Takes a JSON file path and returns a list of file names/paths contained inside.""" + json_path, is_temporary_file = resolve_file(json_name, "r", evaluation) + if json_path is None: + return SymbolFailed + + with open(json_path, "r") as json_file: + try: + json_data = json.load(json_file) + except json.decoder.JSONDecodeError as exc: + evaluation.message("JSON`Import`JSONImport", "dec", String(exc.msg)) + return None + mathics_json = from_python(json_data) + + # Tag the result by wrapping in a list of rule expressions. + # We do this so that Import can extract pieces by element name. + exprs = [ + Expression( + SymbolRule, + String("Data"), + mathics_json, + ), + Expression( + SymbolRule, + String("Dataset"), + mathics_json, + ), + ] + return ListExpression(*exprs) diff --git a/mathics/eval/fileformats/jsonformat.py b/mathics/eval/fileformats/jsonformat.py deleted file mode 100644 index cfc0e9cb0..000000000 --- a/mathics/eval/fileformats/jsonformat.py +++ /dev/null @@ -1,15 +0,0 @@ -import json - -from mathics.core.atoms import String -from mathics.core.convert.python import from_python -from mathics.core.expression import Evaluation - - -def eval_JSONImport(source_path: str, evaluation: Evaluation): - with open(source_path, "r") as f: - try: - json_dict = json.load(f) - except json.decoder.JSONDecodeError as exc: - evaluation.message("JSON`Import`JSONImport", "dec", String(exc.msg)) - return None - return from_python(json_dict) diff --git a/mathics/eval/files_io/files.py b/mathics/eval/files_io/files.py index c93e2ecc1..74a292e15 100644 --- a/mathics/eval/files_io/files.py +++ b/mathics/eval/files_io/files.py @@ -235,14 +235,9 @@ def eval_Open( encoding: Optional[str], evaluation: Evaluation, ): - path = name.value - tmp, is_temporary_file = path_search(path) - if tmp is None: - if mode in ["r", "rb"]: - evaluation.message("General", "noopen", name) - return SymbolFailed - else: - path = tmp + path, is_temporary_file = resolve_file(name, mode, evaluation) + if path is None: + return SymbolFailed try: opener = Mathics3Open( @@ -426,3 +421,25 @@ def eval_Read( return Expression(SymbolHold, from_python(result)) return from_python(result) + + +def resolve_file(name: String, mode: str, evaluation: Evaluation) -> Optional[str]: + """Resolve 'name' using `path_search` and returned the resolved name as the first + item of a tuple. + + If "mode" a write mode, then the file does not have to exist beforehand. + In some cases `path_search()` will decide that a temporary file is to be + created. In this case that fact will be reflected by returning True as the + second item of the tuple. + + If we can't open the file, we emit a "noopen" message. + """ + path = name.value + resolved_path, is_temporary_file = path_search(path) + if resolved_path is None: + if mode in ["r", "rb"]: + evaluation.message("General", "noopen", name) + return None, False + resolved_path = path + + return resolved_path, is_temporary_file diff --git a/mathics/eval/import_export/importexport.py b/mathics/eval/import_export/importexport.py index 6b70e5a75..f3ff937f9 100644 --- a/mathics/eval/import_export/importexport.py +++ b/mathics/eval/import_export/importexport.py @@ -529,7 +529,7 @@ def perform_import( # TODO message evaluation.predetermined_out = current_predetermined_out return SymbolFailed - result_elts = tmp.elements + result_elts = tmp.get_elements() if not all(expr.has_form("Rule", None) for expr in result_elts): evaluation.predetermined_out = current_predetermined_out return None diff --git a/mathics/eval/import_export/json.py b/mathics/eval/import_export/json.py deleted file mode 100644 index f6f2e11ed..000000000 --- a/mathics/eval/import_export/json.py +++ /dev/null @@ -1,27 +0,0 @@ -import json - -from mathics.core.atoms import String -from mathics.core.convert.python import from_python -from mathics.core.expression import Expression -from mathics.core.list import ListExpression -from mathics.core.systemsymbols import SymbolRule - - -def eval_JSONImport(json_path: str) -> ListExpression: - """Takes a ZIP file path and returns a list of file names/paths contained inside.""" - with open(json_path, "r") as json_file: - json_data = json.load(json_file) - mathics_json = from_python(json_data) - exprs = [ - Expression( - SymbolRule, - String("Data"), - mathics_json, - ), - Expression( - SymbolRule, - String("Dataset"), - mathics_json, - ), - ] - return ListExpression(*exprs) From d67cf1d16ed7fc2794e299cb1f2c6d99b9ae3b3a Mon Sep 17 00:00:00 2001 From: rocky Date: Sat, 27 Jun 2026 18:29:58 -0400 Subject: [PATCH 14/19] fileformats.json -> fileformats.jsonformat By renaming the file, we make it possible to syntax check via running "python jsonformat.py". "python json.py" gives a module ambiguity warning because "impor json" is used in the file. --- mathics/builtin/fileformats/jsonformat.py | 2 +- mathics/eval/fileformats/{json.py => jsonformat.py} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename mathics/eval/fileformats/{json.py => jsonformat.py} (100%) diff --git a/mathics/builtin/fileformats/jsonformat.py b/mathics/builtin/fileformats/jsonformat.py index 4bda54aa8..bbaf3b6fa 100644 --- a/mathics/builtin/fileformats/jsonformat.py +++ b/mathics/builtin/fileformats/jsonformat.py @@ -6,7 +6,7 @@ from mathics.core.builtin import Builtin, String from mathics.core.evaluation import Evaluation -from mathics.eval.fileformats.json import eval_JSONImport +from mathics.eval.fileformats.jsonformat import eval_JSONImport class ImportJSON(Builtin): diff --git a/mathics/eval/fileformats/json.py b/mathics/eval/fileformats/jsonformat.py similarity index 100% rename from mathics/eval/fileformats/json.py rename to mathics/eval/fileformats/jsonformat.py From 1a4a3191182e339ed37e2394aad92293d37c6053 Mon Sep 17 00:00:00 2001 From: rocky Date: Sat, 27 Jun 2026 18:40:00 -0400 Subject: [PATCH 15/19] More forceful wording in FIXME --- mathics/builtin/fileformats/htmlformat.py | 12 ++++++------ mathics/builtin/fileformats/xmlformat.py | 12 ++++++------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/mathics/builtin/fileformats/htmlformat.py b/mathics/builtin/fileformats/htmlformat.py index 514b45f86..0bcf957a0 100644 --- a/mathics/builtin/fileformats/htmlformat.py +++ b/mathics/builtin/fileformats/htmlformat.py @@ -138,8 +138,8 @@ def eval(self, text: String, evaluation: Evaluation): def eval_with_element(self, text, element, evaluation: Evaluation): """%(name)s[text_String, element_]""" - # FIXME?: right now we aren't using element. Things might be - # more efficient if we used element? + # FIXME: right now we aren't using element, and should use this to more + # efficiently extract part of the XML file that we want. return self.eval(text, evaluation) @@ -421,8 +421,8 @@ def source(filename): def eval_with_element(self, text, element, evaluation: Evaluation): """%(name)s[text_String, element_]""" - # FIXME?: right now we aren't using element. Things might be - # more efficient if we used element? + # FIXME: right now we aren't using element, and should use this to more + # efficiently extract part of the XML file that we want. return self.eval(text, evaluation) @@ -470,6 +470,6 @@ def eval(self, text, evaluation: Evaluation): def eval_with_element(self, text, element, evaluation: Evaluation): """%(name)s[text_String, element_]""" - # FIXME?: right now we aren't using element. Things might be - # more efficient if we used element? + # FIXME: right now we aren't using element, and should use this to more + # efficiently extract part of the HTML file that we want. return self.eval(text, evaluation) diff --git a/mathics/builtin/fileformats/xmlformat.py b/mathics/builtin/fileformats/xmlformat.py index 0da65b062..8a45ec967 100644 --- a/mathics/builtin/fileformats/xmlformat.py +++ b/mathics/builtin/fileformats/xmlformat.py @@ -347,8 +347,8 @@ def lines(): def eval_with_element(self, text, element, evaluation: Evaluation): """%(name)s[text_String, element_]""" - # FIXME?: right now we aren't using element. Things might be - # more efficient if we used element? + # FIXME: right now we aren't using element, and should use this to more + # efficiently extract part of the XML file that we want. return self.eval(text, evaluation) @@ -389,8 +389,8 @@ def eval(self, text, evaluation: Evaluation): def eval_with_element(self, text, element, evaluation: Evaluation): """%(name)s[text_String, element_]""" - # FIXME?: right now we aren't using element. Things might be - # more efficient if we used element? + # FIXME: right now we aren't using element, and should use this to more + # efficiently extract part of the XML file that we want. return self.eval(text, evaluation) @@ -420,6 +420,6 @@ def eval(self, text, evaluation: Evaluation): def eval_with_element(self, text, element, evaluation: Evaluation): """%(name)s[text_String, element_]""" - # FIXME?: right now we aren't using element. Things might be - # more efficient if we used element? + # FIXME: right now we aren't using element, and should use this to more + # efficiently extract part of the XML file that we want. return self.eval(text, evaluation) From 7e86a2e2849ae94fa31b36e74c53a52847edf255 Mon Sep 17 00:00:00 2001 From: rocky Date: Sat, 27 Jun 2026 19:48:13 -0400 Subject: [PATCH 16/19] Add a minor comment on get_elements() --- mathics/eval/import_export/importexport.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/mathics/eval/import_export/importexport.py b/mathics/eval/import_export/importexport.py index f3ff937f9..0e3d8d5e7 100644 --- a/mathics/eval/import_export/importexport.py +++ b/mathics/eval/import_export/importexport.py @@ -529,13 +529,14 @@ def perform_import( # TODO message evaluation.predetermined_out = current_predetermined_out return SymbolFailed + + # .get_elements() is more tolerant of the type of "tmp" than + # ._elements which assumes a Expression type. result_elts = tmp.get_elements() if not all(expr.has_form("Rule", None) for expr in result_elts): evaluation.predetermined_out = current_predetermined_out return None - # return {a.get_string_value() : b for a,b in map(lambda x: - # x.get_elements(), tmp)} evaluation.predetermined_out = current_predetermined_out return {a.get_string_value(): b for a, b in (x.get_elements() for x in tmp)} From ea36ef418ad885f1ebb50948863fd824ee394f61 Mon Sep 17 00:00:00 2001 From: rocky Date: Sun, 28 Jun 2026 09:07:12 -0400 Subject: [PATCH 17/19] Set "Text" as a default WMA mime type when no other is found --- mathics/eval/import_export/importexport.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mathics/eval/import_export/importexport.py b/mathics/eval/import_export/importexport.py index 0e3d8d5e7..c12f11746 100644 --- a/mathics/eval/import_export/importexport.py +++ b/mathics/eval/import_export/importexport.py @@ -702,4 +702,4 @@ def infer_file_format(filename: str) -> Optional[str]: a format. """ file_extension = eval_FileExtension(filename).lower() - return FILE_EXTENSION_MAP.get(file_extension) + return FILE_EXTENSION_MAP.get(file_extension, "Text") From f6d36b8587791f60cc4e4e6033e6132fdeeb4bf8 Mon Sep 17 00:00:00 2001 From: rocky Date: Sun, 28 Jun 2026 12:16:48 -0400 Subject: [PATCH 18/19] Allow selectable default on infer_file_format --- mathics/eval/fileformats/compression.py | 2 +- mathics/eval/import_export/importexport.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/mathics/eval/fileformats/compression.py b/mathics/eval/fileformats/compression.py index fc69bd862..2c02b3b9f 100644 --- a/mathics/eval/fileformats/compression.py +++ b/mathics/eval/fileformats/compression.py @@ -68,7 +68,7 @@ def eval_ImportZIP( for element in elements: member = element.value - file_format = infer_file_format(member).upper() + file_format = infer_file_format(member, "Text").upper() if file_format not in IMPORTERS.keys(): evaluation.message("Import", "fmtnosup", file_format) return SymbolFailed diff --git a/mathics/eval/import_export/importexport.py b/mathics/eval/import_export/importexport.py index c12f11746..17bd8c514 100644 --- a/mathics/eval/import_export/importexport.py +++ b/mathics/eval/import_export/importexport.py @@ -696,10 +696,10 @@ def eval_Import_source_only( return result -def infer_file_format(filename: str) -> Optional[str]: +def infer_file_format(filename: str, default_extension: str = None) -> Optional[str]: """ Infer what kind of format filename is in. None is returned if we can't infer a format. """ file_extension = eval_FileExtension(filename).lower() - return FILE_EXTENSION_MAP.get(file_extension, "Text") + return FILE_EXTENSION_MAP.get(file_extension, default_extension) From 662428d93085ff9f59b988b30563f172bd872f32 Mon Sep 17 00:00:00 2001 From: rocky Date: Tue, 30 Jun 2026 07:57:49 -0400 Subject: [PATCH 19/19] Better describe what up here with Import and Export --- mathics/builtin/fileformats/__init__.py | 36 ++++++++++++-- mathics/builtin/import_export/importexport.py | 47 +++++++++++++++++-- mathics/eval/fileformats/__init__.py | 0 mathics/eval/fileformats/compression.py | 11 ++++- mathics/eval/import_export/importexport.py | 33 +++++++++---- 5 files changed, 107 insertions(+), 20 deletions(-) create mode 100644 mathics/eval/fileformats/__init__.py diff --git a/mathics/builtin/fileformats/__init__.py b/mathics/builtin/fileformats/__init__.py index 4a6c38935..1614dbf19 100644 --- a/mathics/builtin/fileformats/__init__.py +++ b/mathics/builtin/fileformats/__init__.py @@ -1,8 +1,32 @@ -""" -Import/Export File Formats +r"""Import/Export File Formats, Importers and Exporters + +The data of files on a filesystem or retrieved from the Internet often are structured \ +according to a specific structures and rules. For example, consider different kinds of \ +structuring used in a JSON file, versus an HTML files, or a compressed GZIP file. + +In some cases, such as archive files, e.g., ZIP, TAR, and JAR, the file contains component parts, \ +which in WMA terminology are called "members" which is part of the broader metadata items \ +called "elements". -There various file formats can be used by 'Import' and 'Export' and related functions, \ -e.g. 'ImportString'. +A MIME type is typically associated with each kind of format. \Mathics3, following WMA, \ +uses a shortend name for this MIME type. For example \Mathics3 uses "HTML" as a shorthand \ +for the MIME type "text/html". + +Below is a list of file supported file types that we have builtin importers or exporters written \ +in Python. Other importers, however, are written in \Mathics3. + +Variable +:\$ExportFormats: +/doc/reference-of-built-in-symbols/inputoutput-files-and-filesystem/importing-and-exporting/\$exportformats \ +contains a list of file formats that are supported by +:Export: +/doc/reference-of-built-in-symbols/inputoutput-files-and-filesystem/importing-and-exporting/export, \ +while +:\$ImportFormats: +/doc/reference-of-built-in-symbols/inputoutput-files-and-filesystem/importing-and-exporting/\$importformats \ +does the corresponding thing for +:Import: +/doc/reference-of-built-in-symbols/inputoutput-files-and-filesystem/importing-and-exporting/import. Many Import/Export functions are registered in SystemFiles/Formats/*.wl which is \ autoloaded on startup. @@ -10,3 +34,7 @@ The Built-in Functions are defined in a separate context. For example, HTML` or Compress`. This is done to not pollute the System` namespace. """ + +# This tells documentation how to sort this module +# Here we are also hiding "file_io" since this can erroneously appear at the top level. +sort_order = "mathics.builtin.importing-export-file-formats" diff --git a/mathics/builtin/import_export/importexport.py b/mathics/builtin/import_export/importexport.py index 90857ab9e..975e0b859 100644 --- a/mathics/builtin/import_export/importexport.py +++ b/mathics/builtin/import_export/importexport.py @@ -1,8 +1,33 @@ # -*- coding: utf-8 -*- -""" +r""" Import and Export Functions and Variables +Many kinds data formats can be read into or written from \Mathics3. + +In contrast to reading or writing a file, importing and exporting imply some sort of \ +data restructuring into \Mathics3 and structuring into a filesystem that is not \ +just a stream of bytes, but instead also contains additional metadata and requires data reorganization \ +when stored in a filesystem. + +See +:Import/Export File Formats: +/doc/reference-of-built-in-symbols/fileformats/ for documentation \ +on the specific kinds of File Formats \Mathics3 supports. + + +Variable +:\$ExportFormats: +/doc/reference-of-built-in-symbols/inputoutput-files-and-filesystem/importing-and-exporting/\$exportformats \ +contains a list of file formats that are supported by +:Export: +/doc/reference-of-built-in-symbols/inputoutput-files-and-filesystem/importing-and-exporting/export, \ +while +:\$ImportFormats: +/doc/reference-of-built-in-symbols/inputoutput-files-and-filesystem/importing-and-exporting/\$importformats \ +does the corresponding thing for +:Import: +/doc/reference-of-built-in-symbols/inputoutput-files-and-filesystem/importing-and-exporting/import. """ import base64 @@ -434,7 +459,13 @@ class Import(Builtin): "$OptionSyntax": "System`Ignore", } - summary_text = "import elements from a file" + rules = { + "Import[filename_]": "Import[filename, {}]", + } + + summary_text = ( + r"read and convert to \Mathics3 some or all elements of structured file" + ) def eval_elements_query(self, source, evaluation, options={}): """Import[source_String, "Elements", OptionsPattern[]]""" @@ -537,7 +568,9 @@ class ImportString(Builtin): "$OptionSyntax": "System`Ignore", } - summary_text = "import data or elements of data from a string" + summary_text = ( + r"read and convert to \Mathics3 some or all elements of structured string" + ) def eval_data_only(self, data, evaluation, options={}): "ImportString[data_, OptionsPattern[]]" @@ -618,7 +651,9 @@ class Export(Builtin): "$OptionSyntax": "System`Ignore", } - summary_text = "export elements to a file" + summary_text = ( + r"write and convert to \Mathics3 some or all elements of structured file" + ) def eval(self, dest, expr, evaluation, options={}): "Export[dest_, expr_, OptionsPattern[Export]]" @@ -767,7 +802,9 @@ class ExportString(Builtin): rules = { "ExportString[expr_, elems_?NotListQ]": ("ExportString[expr, {elems}]"), } - summary_text = "export elements to a string" + summary_text = ( + r"write and convert to \Mathics3 some or all elements of structured string" + ) def eval_element(self, expr, element: String, evaluation: Evaluation, **options): "ExportString[expr_, element_String, OptionsPattern[ExportString]]" diff --git a/mathics/eval/fileformats/__init__.py b/mathics/eval/fileformats/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/mathics/eval/fileformats/compression.py b/mathics/eval/fileformats/compression.py index 2c02b3b9f..3d4ce2508 100644 --- a/mathics/eval/fileformats/compression.py +++ b/mathics/eval/fileformats/compression.py @@ -1,3 +1,8 @@ +""" +Evaluation routines for handling data in some sort of archive format, +e.g. ZIP, TAR, etc. +""" + import zipfile from typing import Optional @@ -22,7 +27,7 @@ def eval_ImportZIP( """If `members` is empty, this function takes a ZIP file path and returns a list of file names/paths contained inside. - "If `members` is given, then extract those members from the ZIP file. + "If `members` is given, then extract those members (or files) from the ZIP file. """ zip_path, is_temporary_file = resolve_file(zip_name, "r", evaluation) @@ -36,6 +41,10 @@ def eval_ImportZIP( if members is None: filenames = archive.namelist() mathics_filenames = to_mathics_list(*filenames) + + # Wrap metadata or "elements" of of the zip file into + # list of Rule. The caller can then use + # rules to pick out specific elements desired. exprs = [ Expression( SymbolRule, diff --git a/mathics/eval/import_export/importexport.py b/mathics/eval/import_export/importexport.py index 17bd8c514..473e0dd6b 100644 --- a/mathics/eval/import_export/importexport.py +++ b/mathics/eval/import_export/importexport.py @@ -1,6 +1,8 @@ """ Functions for figuring out a filetype or MIME type a given file path. + +Following WMA, we use WMA's custom short name for a mime type. """ import mimetypes @@ -37,6 +39,7 @@ # convert these mismatches MIME_SHORTNAME_TO_WMA: Final[Dict[str, str]] = {"JPG": "JPEG", "TXT": "Text"} +# FIXME: elements of the below dict should be a dataclass. IMPORTERS = {} # TODO: This hard-coded dictionary should be @@ -254,7 +257,7 @@ def eval_Import_general( elements = [el.value for el in elements] - # Determine file format + # Determine WMA version of the mime type. file_format = None for el in elements.copy(): if el.upper() in IMPORTERS.keys(): @@ -270,7 +273,8 @@ def eval_Import_general( evaluation.predetermined_out = current_predetermined_out return SymbolFailed - # Load the importer + # Extract information about the loader used for this MIME type. + # FIXME: turn into dataclass conditionals, import_function_symbol, posts, importer_options = IMPORTERS[ file_format ] @@ -419,11 +423,11 @@ def eval_Import_general( def eval_Import_Elements(file_format: str, evaluation): - """ - Basic implementation behind Import[fileformat, Elements]. + """Basic implementation behind Import[fileformat, Elements]. + This returns the element names that can be used for a specific - file_format type. We get this from the AvailableElements field - mentioned when registering an importer. + file_format type. We get this from the + AvailableElements field mentioned when registering an importer. """ filetype = MIME_SHORTNAME_TO_WMA.get(file_format, file_format).upper() @@ -450,14 +454,23 @@ def perform_import( data: Optional[str], elements: Optional[list] = None, ): - """ - This routine does the data import. + """ This routine does the import. "import" here means reading a \ + file or string which has been structured according to a format belonging to a mime type. + "findfile", if not "None", is the path of a file where the unimported data resides. - If findfile is empty, then "data" will have the string data for that file, and + If "findfile" is empty, then "data" will have the string data for that file, and this routine will create a temporary file containing the data. The actual importer then uses this file. - "elements" when given contains the parts or kinds of things that should be extracted. + "elements", when given, contains the parts or kinds of things that should be extracted. + Usually, there are custom routines for retrieving an element. + + It is also possible that when a custom element extraction does not + exist, that the caller will do the filtering after retrieving all of the information. + + This is not advisable when the information inside an element is small compared + to the information of the entire importable file. For example consider asking + about the member names or contents of tar file compared to the entire tar file. """ current_predetermined_out = evaluation.predetermined_out if function_channels == ListExpression(String("FileNames")):