diff --git a/SYMBOLS_MANIFEST.txt b/SYMBOLS_MANIFEST.txt index d5af2742a..441eb853e 100644 --- a/SYMBOLS_MANIFEST.txt +++ b/SYMBOLS_MANIFEST.txt @@ -1,3 +1,4 @@ +Compress`ImportZIP HTML`DataImport HTML`FullDataImport HTML`HyperlinksImport @@ -12,7 +13,7 @@ ImportExport`RegisterExport ImportExport`RegisterImport Internal`RealValuedNumberQ Internal`RealValuedNumericQ -JSON`Import`JSONImport +JSON`ImportJSON System`$Aborted System`$Assumptions System`$BaseDirectory @@ -582,9 +583,7 @@ System`ImageTake System`ImageType System`Implies System`Import -System`ImportJSON System`ImportString -System`ImportZIP System`In System`Increment System`Indeterminate diff --git a/mathics/SystemFiles/Formats/JSON/Import.wl b/mathics/SystemFiles/Formats/JSON/Import.wl index 8df6cbfa3..43011f67e 100644 --- a/mathics/SystemFiles/Formats/JSON/Import.wl +++ b/mathics/SystemFiles/Formats/JSON/Import.wl @@ -1,15 +1,19 @@ (* ::Package:: *) +(* JSON Javascript Object Notation or JSON web service description Importer. + This is used by Import[] and, ImportString[]. + *) + Begin["System`Convert`JSONDump`"] (* JSON legacy element is Data even if Expression would be better. *) $AvailableElements = {"Data", "Dataset"}; ImportExport`RegisterImport[ - "JSON", - ImportJSON, + "JSON", (* WMA mime-type name *) + JSON`ImportJSON, (* Default Function name that handles this. *) {}, - "AvailableElements" -> $AvailableElements, + "AvailableElements" -> $AvailableElements, (* names retuned by "Elements" query *) "FunctionChannels" -> {"FileNames"}, "DefaultElement" -> "Data" ] diff --git a/mathics/SystemFiles/Formats/ZIP/Import.wl b/mathics/SystemFiles/Formats/ZIP/Import.wl index 6bc1ffd27..8c3d26d6b 100644 --- a/mathics/SystemFiles/Formats/ZIP/Import.wl +++ b/mathics/SystemFiles/Formats/ZIP/Import.wl @@ -1,7 +1,7 @@ (* ::Package:: *) -(* ZIP compressed file and file archive Importer. - This is used by Import[]. +(* Windows ZIP archive, ZIP compressed file and file archive Importer. + This is used by Import[] and, ImportString[]. *) Begin["System`Convert`CommonArchiveDump`"] @@ -21,13 +21,13 @@ GetElements[___] := ]; ImportExport`RegisterImport[ - "ZIP", - ImportZIP, + "ZIP", (* WMA mime-type name *) + Compress`ImportZIP, (* Default Function name that handles this. *) {}, (* Post importer function(s) *) FunctionChannels -> {"FileNames"}, (* WMA has this, but I (rocky) am not sure why or what it means: AvailableElements -> $ZIPAvailableElements, *) - AvailableElements -> {"Filenames", "Summary"}, + AvailableElements -> {"Filenames", "Summary"}, (* names retuned by "Elements" query *) BinaryFormat -> True, DefaultElement -> "FileNames", HiddenElements -> $ZIPHiddenElements, diff --git a/mathics/builtin/fileformats/__init__.py b/mathics/builtin/fileformats/__init__.py index 38ea5e30c..1614dbf19 100644 --- a/mathics/builtin/fileformats/__init__.py +++ b/mathics/builtin/fileformats/__init__.py @@ -1,10 +1,40 @@ -""" -File Formats +r"""Import/Export File Formats, Importers and Exporters + +The data of files on a filesystem or retrieved from the Internet often are structured \ +according to a specific structures and rules. For example, consider different kinds of \ +structuring used in a JSON file, versus an HTML files, or a compressed GZIP file. + +In some cases, such as archive files, e.g., ZIP, TAR, and JAR, the file contains component parts, \ +which in WMA terminology are called "members" which is part of the broader metadata items \ +called "elements". + +A MIME type is typically associated with each kind of format. \Mathics3, following WMA, \ +uses a shortend name for this MIME type. For example \Mathics3 uses "HTML" as a shorthand \ +for the MIME type "text/html". + +Below is a list of file supported file types that we have builtin importers or exporters written \ +in Python. Other importers, however, are written in \Mathics3. + +Variable +:\$ExportFormats: +/doc/reference-of-built-in-symbols/inputoutput-files-and-filesystem/importing-and-exporting/\$exportformats \ +contains a list of file formats that are supported by +:Export: +/doc/reference-of-built-in-symbols/inputoutput-files-and-filesystem/importing-and-exporting/export, \ +while +:\$ImportFormats: +/doc/reference-of-built-in-symbols/inputoutput-files-and-filesystem/importing-and-exporting/\$importformats \ +does the corresponding thing for +:Import: +/doc/reference-of-built-in-symbols/inputoutput-files-and-filesystem/importing-and-exporting/import. -Built-in Importers. +Many Import/Export functions are registered in SystemFiles/Formats/*.wl which is \ +autoloaded on startup. +The Built-in Functions are defined in a separate context. +For example, HTML` or Compress`. This is done to not pollute the System` namespace. """ -# The Built-in Functions are defined in a separate context under the -# System`. For example System`HTML` and System`XML. This is done to not -# pollute the System` namespace. +# This tells documentation how to sort this module +# Here we are also hiding "file_io" since this can erroneously appear at the top level. +sort_order = "mathics.builtin.importing-export-file-formats" diff --git a/mathics/builtin/fileformats/compression.py b/mathics/builtin/fileformats/compression.py new file mode 100644 index 000000000..34b6c7186 --- /dev/null +++ b/mathics/builtin/fileformats/compression.py @@ -0,0 +1,32 @@ +""" +Compression & Archive Formats +""" + +from mathics.core.builtin import Builtin, String +from mathics.core.evaluation import Evaluation +from mathics.eval.fileformats.compression import eval_ImportZIP + +# See commit in __init__.py regarding the whacky way this gets called + + +class ImportZIP(Builtin): + """ + :WMA link:https://reference.wolfram.com/language/ref/format/ZIP.html + +
+
'Compress`ImportZIP[path]' +
Run zip for archive file $path$ +
+ + """ + + context = "Compress`" + summary_text = "import a ZIP file" + + def eval(self, path: String, evaluation: Evaluation): + "Compress`ImportZIP[path_String]" + return eval_ImportZIP(path, evaluation) + + def eval_with_elements(self, path: String, elements, evaluation: Evaluation): + "Compress`ImportZIP[path_String, elements_]" + return eval_ImportZIP(path, evaluation, elements) diff --git a/mathics/builtin/fileformats/htmlformat.py b/mathics/builtin/fileformats/htmlformat.py index 48a789744..0bcf957a0 100644 --- a/mathics/builtin/fileformats/htmlformat.py +++ b/mathics/builtin/fileformats/htmlformat.py @@ -2,7 +2,7 @@ """ HTML -Basic implementation for a HTML importer. +HTML importer. """ @@ -15,6 +15,7 @@ from mathics.core.builtin import Builtin, MessageException from mathics.core.convert.expression import to_expression, to_mathics_list from mathics.core.convert.python import from_python +from mathics.core.evaluation import Evaluation from mathics.core.expression import Expression from mathics.core.list import ListExpression from mathics.core.symbols import Symbol @@ -126,7 +127,7 @@ class _TagImport(_HTMLBuiltin): def _import(self, tree): raise NotImplementedError - def eval(self, text, evaluation): + def eval(self, text: String, evaluation: Evaluation): """%(name)s[text_String]""" tree = parse_html(parse_html_file, text, evaluation) if isinstance(tree, Symbol): # $Failed? @@ -135,6 +136,12 @@ def eval(self, text, evaluation): to_expression(SymbolRule, self.tag_name, self._import(tree)) ) + def eval_with_element(self, text, element, evaluation: Evaluation): + """%(name)s[text_String, element_]""" + # FIXME: right now we aren't using element, and should use this to more + # efficiently extract part of the XML file that we want. + return self.eval(text, evaluation) + class _Get(_HTMLBuiltin): context = "HTML`Parser`" @@ -401,7 +408,7 @@ class SourceImport(_HTMLBuiltin): summary_text = "import source code from a HTML file" - def eval(self, text, evaluation): + def eval(self, text, evaluation: Evaluation): """%(name)s[text_String]""" def source(filename): @@ -412,6 +419,12 @@ def source(filename): return parse_html(source, text, evaluation) + def eval_with_element(self, text, element, evaluation: Evaluation): + """%(name)s[text_String, element_]""" + # FIXME: right now we aren't using element, and should use this to more + # efficiently extract part of the XML file that we want. + return self.eval(text, evaluation) + class TitleImport(_TagImport): """ @@ -437,7 +450,7 @@ def _import(self, tree): class XMLObjectImport(_HTMLBuiltin): """ - ## :native internal: + :WMA link:https://reference.wolfram.com/language/ref/XMLObject.html
'HTML`XMLObjectImport["filename"]' @@ -450,7 +463,13 @@ class XMLObjectImport(_HTMLBuiltin): summary_text = "import XML objects from a HTML file" - def eval(self, text, evaluation): + def eval(self, text, evaluation: Evaluation): """%(name)s[text_String]""" xml = to_expression("HTML`Parser`HTMLGet", text).evaluate(evaluation) return ListExpression(Expression(SymbolRule, String("XMLObject"), xml)) + + def eval_with_element(self, text, element, evaluation: Evaluation): + """%(name)s[text_String, element_]""" + # FIXME: right now we aren't using element, and should use this to more + # efficiently extract part of the HTML file that we want. + return self.eval(text, evaluation) diff --git a/mathics/builtin/fileformats/jsonformat.py b/mathics/builtin/fileformats/jsonformat.py index 7afffb71e..bbaf3b6fa 100644 --- a/mathics/builtin/fileformats/jsonformat.py +++ b/mathics/builtin/fileformats/jsonformat.py @@ -1,32 +1,29 @@ -# -*- coding: utf-8 -*- - """ -JSON +JSON File Format -Basic implementation for an JSON importer. +JSON importer (via Python's "json" module). """ -from mathics.core.builtin import Builtin -from mathics.core.expression import Evaluation +from mathics.core.builtin import Builtin, String +from mathics.core.evaluation import Evaluation from mathics.eval.fileformats.jsonformat import eval_JSONImport -class JSONImport(Builtin): +class ImportJSON(Builtin): """ - ## :native internal: + :WMA link:https://reference.wolfram.com/language/ref/format/JSON.html
-
'JSON`Import`JSONImport["file"]' -
parses "string" as a JSON file, and returns the data as a nested \ - list of rules. +
'JSON`ImportJSON[path]' +
Read $path$ as JSON and convert that to its corresponding Mathics3 equivalent.
""" - summary_text = "import elements from json" - context = "JSON`Import`" + context = "JSON`" messages = {"dec": "Decoding Error at `1`"} + summary_text = "import JSON file" - def eval(self, filename, evaluation: Evaluation): - """%(name)s[filename_String]""" - return eval_JSONImport(filename.value, evaluation) + def eval(self, path: String, evaluation: Evaluation): + "JSON`ImportJSON[path_String]" + return eval_JSONImport(path, evaluation) diff --git a/mathics/builtin/fileformats/xmlformat.py b/mathics/builtin/fileformats/xmlformat.py index 7117be326..8a45ec967 100644 --- a/mathics/builtin/fileformats/xmlformat.py +++ b/mathics/builtin/fileformats/xmlformat.py @@ -1,9 +1,9 @@ # -*- coding: utf-8 -*- """ -XML +XML File Format -Basic implementation for an XML importer. +XML importer (via lxml). """ @@ -345,6 +345,12 @@ def lines(): plaintext = String("\n".join(lines())) return to_mathics_list(to_expression("Rule", "Plaintext", plaintext)) + def eval_with_element(self, text, element, evaluation: Evaluation): + """%(name)s[text_String, element_]""" + # FIXME: right now we aren't using element, and should use this to more + # efficiently extract part of the XML file that we want. + return self.eval(text, evaluation) + class TagsImport(Builtin): """ @@ -381,6 +387,12 @@ def eval(self, text, evaluation: Evaluation): return root return to_mathics_list(to_expression("Rule", "Tags", self._tags(root))) + def eval_with_element(self, text, element, evaluation: Evaluation): + """%(name)s[text_String, element_]""" + # FIXME: right now we aren't using element, and should use this to more + # efficiently extract part of the XML file that we want. + return self.eval(text, evaluation) + class XMLObjectImport(Builtin): """ @@ -405,3 +417,9 @@ def eval(self, text, evaluation: Evaluation): """%(name)s[text_String]""" xml = to_expression("XML`Parser`XMLGet", text).evaluate(evaluation) return to_mathics_list(to_expression("Rule", "XMLObject", xml)) + + def eval_with_element(self, text, element, evaluation: Evaluation): + """%(name)s[text_String, element_]""" + # FIXME: right now we aren't using element, and should use this to more + # efficiently extract part of the XML file that we want. + return self.eval(text, evaluation) diff --git a/mathics/builtin/import_export/compression.py b/mathics/builtin/import_export/compression.py deleted file mode 100644 index add610ad0..000000000 --- a/mathics/builtin/import_export/compression.py +++ /dev/null @@ -1,32 +0,0 @@ -""" -Compression & Archive Formats -""" - -from mathics.core.builtin import Builtin, String -from mathics.core.evaluation import Evaluation -from mathics.eval.import_export.compression import eval_ImportZIP - -# The builtin functions defined here are called normally in a somewhat convoluted -# (and non obvious) way: -# via Import[] which consults RegisterImport[] which is invoked by autoloading -# Format/xxx/Import.wl - -# Furthermore, all we really do is just pass the call over to eval_Import... - - -class ImportZIP(Builtin): - """ - :WMA link:https://reference.wolfram.com/language/ref/format/ZIP.html - -
-
'ImportZIP[path]' -
Run zip for archive file $path$ -
- - """ - - summary_text = "import ZIP file" - - def eval(self, path: String, evaluation: Evaluation): - "ImportZIP[path_String]" - return eval_ImportZIP(path.value, evaluation) diff --git a/mathics/builtin/import_export/importexport.py b/mathics/builtin/import_export/importexport.py index 8587e62cb..975e0b859 100644 --- a/mathics/builtin/import_export/importexport.py +++ b/mathics/builtin/import_export/importexport.py @@ -1,8 +1,33 @@ # -*- coding: utf-8 -*- -""" +r""" Import and Export Functions and Variables +Many kinds data formats can be read into or written from \Mathics3. + +In contrast to reading or writing a file, importing and exporting imply some sort of \ +data restructuring into \Mathics3 and structuring into a filesystem that is not \ +just a stream of bytes, but instead also contains additional metadata and requires data reorganization \ +when stored in a filesystem. + +See +:Import/Export File Formats: +/doc/reference-of-built-in-symbols/fileformats/ for documentation \ +on the specific kinds of File Formats \Mathics3 supports. + + +Variable +:\$ExportFormats: +/doc/reference-of-built-in-symbols/inputoutput-files-and-filesystem/importing-and-exporting/\$exportformats \ +contains a list of file formats that are supported by +:Export: +/doc/reference-of-built-in-symbols/inputoutput-files-and-filesystem/importing-and-exporting/export, \ +while +:\$ImportFormats: +/doc/reference-of-built-in-symbols/inputoutput-files-and-filesystem/importing-and-exporting/\$importformats \ +does the corresponding thing for +:Import: +/doc/reference-of-built-in-symbols/inputoutput-files-and-filesystem/importing-and-exporting/import. """ import base64 @@ -13,6 +38,9 @@ from itertools import chain from urllib.error import HTTPError, URLError +# Use this when accessing IMPORTERS to get changes +# since initializiation. +import mathics.eval.import_export.importexport as importexport from mathics.builtin.import_export.checking import check_filename, import_setup_check from mathics.core.atoms import ByteArray from mathics.core.attributes import A_PROTECTED, A_READ_PROTECTED @@ -33,7 +61,6 @@ from mathics.eval.files_io.files import eval_Close from mathics.eval.files_io.filesystem import eval_FindFile from mathics.eval.import_export.importexport import ( - IMPORTERS, MIMETYPE_TO_SHORTNAME, eval_FileFormat, eval_Import_data_only, @@ -89,7 +116,9 @@ class ImportFormats(Predefined): summary_text = "list supported import formats" def evaluate(self, evaluation: Evaluation): - return to_mathics_list(*sorted(IMPORTERS.keys()), elements_conversion_fn=String) + return to_mathics_list( + *sorted(importexport.IMPORTERS.keys()), elements_conversion_fn=String + ) class RegisterImport(Builtin): @@ -235,7 +264,7 @@ def eval( # as well. # By doing this, we accept "text, "Text", "TEXT", and other combinations, # which what WMA seems to do. - IMPORTERS[formatname.value.upper()] = ( + importexport.IMPORTERS[formatname.value.upper()] = ( conditionals, default, posts, @@ -430,7 +459,13 @@ class Import(Builtin): "$OptionSyntax": "System`Ignore", } - summary_text = "import elements from a file" + rules = { + "Import[filename_]": "Import[filename, {}]", + } + + summary_text = ( + r"read and convert to \Mathics3 some or all elements of structured file" + ) def eval_elements_query(self, source, evaluation, options={}): """Import[source_String, "Elements", OptionsPattern[]]""" @@ -452,15 +487,19 @@ def eval_source_only(self, source, evaluation, options={}): def eval_with_element_list(self, source, elements, evaluation, options={}): "Import[source_, elements_List?(AllTrue[#, NotOptionQ]&), OptionsPattern[]]" - findfile, data = import_setup_check(source, evaluation) + findfile, file_format = import_setup_check(source, evaluation) if findfile is SymbolFailed: return SymbolFailed + # FIXME remove the need for determine_filetype. + + # The "data" parameter is just for non-file or string situations + # where we need to pick out the type from the file contents. def determine_filetype(data: str) -> str: - return data + return file_format return eval_Import_general( - findfile, determine_filetype, elements, evaluation, options, data + findfile, determine_filetype, elements, evaluation, options ) # In contrast to Import[source_], we allow an explicit format type @@ -478,13 +517,21 @@ def eval_with_single_element(self, source, elt: String, evaluation, options={}): # The code below tests for the first case, and if that fails assumes the # second case. file_format = elt.value.upper() - if file_format in IMPORTERS.keys(): + + if file_format in importexport.IMPORTERS.keys(): # A file format was specified: use the custom routine return eval_Import_source_only(findfile, file_format, evaluation, options) # Assume we have Import with a single non-format element. - return self.eval_with_element_list( - source, ListExpression(elt), evaluation, options + + # FIXME remove the need for determine_filetype. + # The "data" parameter is just for non-file or string situations + # where we need to pick out the type from the file contents. + def determine_filetype(data: str) -> str: + return filetype + + return eval_Import_general( + findfile, determine_filetype, ListExpression(elt), evaluation, options ) @@ -521,7 +568,9 @@ class ImportString(Builtin): "$OptionSyntax": "System`Ignore", } - summary_text = "import data or elements of data from a string" + summary_text = ( + r"read and convert to \Mathics3 some or all elements of structured string" + ) def eval_data_only(self, data, evaluation, options={}): "ImportString[data_, OptionsPattern[]]" @@ -560,7 +609,7 @@ def eval_with_single_element(self, data, elt: String, evaluation, options={}): # The code below tests for the first case, and if that fails assumes the # second case. file_format = elt.value.upper() - if file_format in IMPORTERS.keys(): + if file_format in importexport.IMPORTERS.keys(): # A file format was specified: use the custom routine return eval_Import_data_only(data.value, file_format, evaluation, options) @@ -602,7 +651,9 @@ class Export(Builtin): "$OptionSyntax": "System`Ignore", } - summary_text = "export elements to a file" + summary_text = ( + r"write and convert to \Mathics3 some or all elements of structured file" + ) def eval(self, dest, expr, evaluation, options={}): "Export[dest_, expr_, OptionsPattern[Export]]" @@ -751,7 +802,9 @@ class ExportString(Builtin): rules = { "ExportString[expr_, elems_?NotListQ]": ("ExportString[expr, {elems}]"), } - summary_text = "export elements to a string" + summary_text = ( + r"write and convert to \Mathics3 some or all elements of structured string" + ) def eval_element(self, expr, element: String, evaluation: Evaluation, **options): "ExportString[expr_, element_String, OptionsPattern[ExportString]]" diff --git a/mathics/builtin/import_export/json.py b/mathics/builtin/import_export/json.py deleted file mode 100644 index 5ba008e9e..000000000 --- a/mathics/builtin/import_export/json.py +++ /dev/null @@ -1,33 +0,0 @@ -""" -JSON-Related Formats -""" - -from mathics.core.builtin import Builtin, String -from mathics.core.evaluation import Evaluation -from mathics.eval.import_export.json import eval_JSONImport - -# The builtin functions defined here are called normally in a somewhat -# convoluted (and non obvious) way: via Import[] which consults -# RegisterImport[] which is invoked by autoloading -# Format/xxx/Import.wl - -# Furthermore, all we really do is just pass the call over to -# eval_Import... - - -class ImportJSON(Builtin): - """ - :WMA link:https://reference.wolfram.com/language/ref/format/JSON.html - -
-
'ImportJSON[path]' -
Read $path$ as JSON and convert that to its corresponding Mathics3 equivalent. -
- - """ - - summary_text = "import JSON file" - - def eval(self, path: String, evaluation: Evaluation): - "ImportJSON[path_String]" - return eval_JSONImport(path.value) diff --git a/mathics/eval/fileformats/__init__.py b/mathics/eval/fileformats/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/mathics/eval/fileformats/compression.py b/mathics/eval/fileformats/compression.py new file mode 100644 index 000000000..3d4ce2508 --- /dev/null +++ b/mathics/eval/fileformats/compression.py @@ -0,0 +1,102 @@ +""" +Evaluation routines for handling data in some sort of archive format, +e.g. ZIP, TAR, etc. +""" + +import zipfile +from typing import Optional + +from mathics.core.atoms import String +from mathics.core.convert.expression import to_mathics_list +from mathics.core.evaluation import Evaluation +from mathics.core.expression import Expression +from mathics.core.list import ListExpression +from mathics.core.symbols import SymbolNull +from mathics.core.systemsymbols import SymbolFailed, SymbolRule +from mathics.eval.files_io.files import resolve_file +from mathics.eval.import_export.importexport import ( + IMPORTERS, + eval_Import_data_only, + infer_file_format, +) + + +def eval_ImportZIP( + zip_name: String, evaluation: Evaluation, members: Optional[list[str]] = None +) -> ListExpression: + """If `members` is empty, this function takes a ZIP file path and returns a + list of file names/paths contained inside. + + "If `members` is given, then extract those members (or files) from the ZIP file. + """ + + zip_path, is_temporary_file = resolve_file(zip_name, "r", evaluation) + if zip_path is None: + return SymbolFailed + + # The below "try:" is probably unnecessary since resolve_file should + # catch errors. + try: + with zipfile.ZipFile(zip_path, "r") as archive: + if members is None: + filenames = archive.namelist() + mathics_filenames = to_mathics_list(*filenames) + + # Wrap metadata or "elements" of of the zip file into + # list of Rule. The caller can then use + # rules to pick out specific elements desired. + exprs = [ + Expression( + SymbolRule, + String("FileNames"), + mathics_filenames, + ), + Expression( + SymbolRule, + String("Summary"), + mathics_filenames, + ), + ] + + if filenames: + for filename in filenames: + exprs.append( + Expression( + SymbolRule, + String(filename), + String(archive.read(filename).decode("utf-8")), + ) + ) + + return ListExpression(*exprs) + + if members.has_form("List", None): + elements = members.get_elements() + else: + elements = [members] + + for element in elements: + member = element.value + file_format = infer_file_format(member, "Text").upper() + if file_format not in IMPORTERS.keys(): + evaluation.message("Import", "fmtnosup", file_format) + return SymbolFailed + + unzipped_file_data = archive.read(member).decode("utf-8") + converted_member_data = eval_Import_data_only( + unzipped_file_data, file_format, evaluation, {"raw": True} + ) + result = ListExpression( + Expression(SymbolRule, element, converted_member_data) + ) + return result + + except FileNotFoundError: + evaluation.message("Import", "nffil", String(zip_path)) + return SymbolFailed + except PermissionError: + evaluation.message("Import", "noopen", String(zip_path)) + return SymbolFailed + except Exception: + # This seems to be what WMA does. + return SymbolNull diff --git a/mathics/eval/fileformats/jsonformat.py b/mathics/eval/fileformats/jsonformat.py index cfc0e9cb0..fb582a511 100644 --- a/mathics/eval/fileformats/jsonformat.py +++ b/mathics/eval/fileformats/jsonformat.py @@ -2,14 +2,38 @@ from mathics.core.atoms import String from mathics.core.convert.python import from_python -from mathics.core.expression import Evaluation +from mathics.core.expression import Evaluation, Expression +from mathics.core.list import ListExpression +from mathics.core.systemsymbols import SymbolFailed, SymbolRule +from mathics.eval.files_io.files import resolve_file -def eval_JSONImport(source_path: str, evaluation: Evaluation): - with open(source_path, "r") as f: +def eval_JSONImport(json_name: String, evaluation: Evaluation) -> ListExpression: + """Takes a JSON file path and returns a list of file names/paths contained inside.""" + json_path, is_temporary_file = resolve_file(json_name, "r", evaluation) + if json_path is None: + return SymbolFailed + + with open(json_path, "r") as json_file: try: - json_dict = json.load(f) + json_data = json.load(json_file) except json.decoder.JSONDecodeError as exc: evaluation.message("JSON`Import`JSONImport", "dec", String(exc.msg)) return None - return from_python(json_dict) + mathics_json = from_python(json_data) + + # Tag the result by wrapping in a list of rule expressions. + # We do this so that Import can extract pieces by element name. + exprs = [ + Expression( + SymbolRule, + String("Data"), + mathics_json, + ), + Expression( + SymbolRule, + String("Dataset"), + mathics_json, + ), + ] + return ListExpression(*exprs) diff --git a/mathics/eval/files_io/files.py b/mathics/eval/files_io/files.py index a417ec66c..74a292e15 100644 --- a/mathics/eval/files_io/files.py +++ b/mathics/eval/files_io/files.py @@ -3,7 +3,9 @@ File related evaluation functions. """ +import atexit import os +import tempfile from typing import Callable, Literal, Optional, Sequence from mathics_scanner.errors import ( @@ -56,6 +58,46 @@ DEFAULT_TRACE_FN: Literal[None] = None +def create_temp_file_with_extension(data: str, file_extension: str) -> str: + """ + Writes data to a temporary file with a specific extension. + The file is closed immediately so it can be read by other processes. + It is automatically deleted when the program exits. + + Parameters: + data (str): The text content to write into the file. + file_extension (str): The extension (e.g., 'json', 'html', 'md'). + The file extension will have "." added to + the beginning. + Returns: + str: The absolute file path to the created temporary file. + """ + # Ensure the extension starts with a dot + file_extension = "." + file_extension + + # Create a secure temporary file with the desired extension. + # delete=False prevents Python from destroying it the moment we close the handle. + with tempfile.NamedTemporaryFile( + mode="w", suffix=file_extension, delete=False, encoding="utf-8" + ) as temp_file: + temp_file.write(data) + temp_path = temp_file.name + + # Register a cleanup hook to delete the file when the Python process terminates + def cleanup_temp_file(): + try: + if os.path.exists(temp_path): + os.remove(temp_path) + except OSError: + # Handle cases where the file was already deleted or is locked + pass + + atexit.register(cleanup_temp_file) + + # Return the path so your program can use or read it + return temp_path + + def print_line_number_and_text(line_number: int, text: str): """Prints a line number an text on that line with it. This is used as the default trace function in Get[] @@ -193,14 +235,9 @@ def eval_Open( encoding: Optional[str], evaluation: Evaluation, ): - path = name.value - tmp, is_temporary_file = path_search(path) - if tmp is None: - if mode in ["r", "rb"]: - evaluation.message("General", "noopen", name) - return SymbolFailed - else: - path = tmp + path, is_temporary_file = resolve_file(name, mode, evaluation) + if path is None: + return SymbolFailed try: opener = Mathics3Open( @@ -384,3 +421,25 @@ def eval_Read( return Expression(SymbolHold, from_python(result)) return from_python(result) + + +def resolve_file(name: String, mode: str, evaluation: Evaluation) -> Optional[str]: + """Resolve 'name' using `path_search` and returned the resolved name as the first + item of a tuple. + + If "mode" a write mode, then the file does not have to exist beforehand. + In some cases `path_search()` will decide that a temporary file is to be + created. In this case that fact will be reflected by returning True as the + second item of the tuple. + + If we can't open the file, we emit a "noopen" message. + """ + path = name.value + resolved_path, is_temporary_file = path_search(path) + if resolved_path is None: + if mode in ["r", "rb"]: + evaluation.message("General", "noopen", name) + return None, False + resolved_path = path + + return resolved_path, is_temporary_file diff --git a/mathics/eval/import_export/compression.py b/mathics/eval/import_export/compression.py deleted file mode 100644 index dcccf8c31..000000000 --- a/mathics/eval/import_export/compression.py +++ /dev/null @@ -1,59 +0,0 @@ -import zipfile -from typing import Optional - -from mathics.core.atoms import String -from mathics.core.convert.expression import to_mathics_list -from mathics.core.evaluation import Evaluation -from mathics.core.expression import Expression -from mathics.core.list import ListExpression -from mathics.core.systemsymbols import SymbolFailed, SymbolRule -from mathics.eval.import_export.importexport import ( - IMPORTERS, - eval_import_stream, - infer_file_format, -) - - -def eval_ImportZIP( - zip_path: str, evaluation: Evaluation, members: Optional[list[str]] = None -) -> ListExpression: - """Takes a ZIP file path and returns a list of file names/paths contained inside.""" - with zipfile.ZipFile(zip_path, "r") as archive: - if members is None: - filenames = archive.namelist() - mathics_filenames = to_mathics_list(*filenames) - exprs = [ - Expression( - SymbolRule, - String("FileNames"), - mathics_filenames, - ), - Expression( - SymbolRule, - String("Summary"), - mathics_filenames, - ), - ] - - if filenames: - for filename in filenames: - exprs.append( - Expression( - SymbolRule, - String(filename), - String(archive.read(filename).decode("utf-8")), - ) - ) - - return ListExpression(*exprs) - - for member in members: - file_format = infer_file_format(member) - if file_format.upper() not in IMPORTERS.keys(): - evaluation.message("Import", "fmtnosup", file_format) - return SymbolFailed - - file_data = archive.read(member) - # FIX HERE - converted_file_data = eval_import_stream(file_data, file_format) - return converted_file_data diff --git a/mathics/eval/import_export/importexport.py b/mathics/eval/import_export/importexport.py index ba3f7f862..473e0dd6b 100644 --- a/mathics/eval/import_export/importexport.py +++ b/mathics/eval/import_export/importexport.py @@ -1,6 +1,8 @@ """ Functions for figuring out a filetype or MIME type a given file path. + +Following WMA, we use WMA's custom short name for a mime type. """ import mimetypes @@ -10,7 +12,7 @@ from mathics.core.atoms import ByteArray, String from mathics.core.builtin import get_option -from mathics.core.convert.expression import to_mathics_list +from mathics.core.convert.expression import to_expression from mathics.core.convert.python import from_python from mathics.core.evaluation import Evaluation from mathics.core.expression import Expression @@ -20,13 +22,16 @@ SymbolByteArray, SymbolFailed, SymbolInputStream, - SymbolOpenWrite, + SymbolNone, SymbolRule, SymbolStringToStream, - SymbolWriteString, ) -from mathics.eval.files_io.files import eval_Close, eval_Open -from mathics.eval.files_io.filesystem import eval_DeleteFile, eval_FileExtension +from mathics.eval.files_io.files import ( + create_temp_file_with_extension, + eval_Close, + eval_Open, +) +from mathics.eval.files_io.filesystem import eval_FileExtension # Some WMA file types reported by FileFormat do not # match what the mimetypes (and therefore MIME) extensions @@ -34,10 +39,11 @@ # convert these mismatches MIME_SHORTNAME_TO_WMA: Final[Dict[str, str]] = {"JPG": "JPEG", "TXT": "Text"} +# FIXME: elements of the below dict should be a dataclass. IMPORTERS = {} # TODO: This hard-coded dictionary should be -# accessile from the WL API, and be user modifiable. +# accessible from the WL API, and be user modifiable. FILE_EXTENSION_MAP: dict[str, str] = { "bmp": "BMP", "gif": "GIF", @@ -219,7 +225,7 @@ def importer_exporter_options( def eval_FileFormat(path: str) -> String: """ - Basic implemenation beind FileFormat[filename]. + Basic implementation behind FileFormat[filename]. """ return String(filetype_from_path(path)) @@ -230,10 +236,10 @@ def eval_Import_general( elements, evaluation: Evaluation, options, - data: Optional[str], + data: Optional[str] = None, ): """ - Basic implementation beind most general kind of Import[source, elements, options]. + Basic implementation behind most general kind of Import[source, elements, options]. """ current_predetermined_out = evaluation.predetermined_out @@ -251,13 +257,14 @@ def eval_Import_general( elements = [el.value for el in elements] - # Determine file format - for el in elements: + # Determine WMA version of the mime type. + file_format = None + for el in elements.copy(): if el.upper() in IMPORTERS.keys(): file_format = el.upper() elements.remove(el) - break - else: + + if file_format is None: filetype = determine_filetype(data) file_format = MIME_SHORTNAME_TO_WMA.get(filetype, filetype).upper() @@ -266,8 +273,11 @@ def eval_Import_general( evaluation.predetermined_out = current_predetermined_out return SymbolFailed - # Load the importer - conditionals, default_function, posts, importer_options = IMPORTERS[file_format] + # Extract information about the loader used for this MIME type. + # FIXME: turn into dataclass + conditionals, import_function_symbol, posts, importer_options = IMPORTERS[ + file_format + ] stream_options, custom_options = importer_exporter_options( importer_options.get("System`Options"), options, "System`Import", evaluation @@ -290,13 +300,14 @@ def eval_Import_general( evaluation.predetermined_out = current_predetermined_out return SymbolFailed - # Perform the import defaults = None + # Perform the import if not elements: - defaults = get_results( - default_function, + defaults = perform_import( + import_function_symbol, findfile, + file_format, function_channels, stream_options, custom_options, @@ -318,7 +329,7 @@ def eval_Import_general( ) ) else: - result = defaults.get(default_element.get_string_value()) + result = defaults.get(default_element.value) if result is None: evaluation.message( "Import", "noelem", default_element, String(filetype) @@ -331,9 +342,17 @@ def eval_Import_general( assert len(elements) >= 1 el = elements[0] if el == "Elements": - defaults = get_results( - default_function, + if ( + result := eval_Import_Elements(file_format, evaluation) + ) is not SymbolNone: + return result + # A list of "Elements" is not obtainable via AvailableElements listed when + # ImportExport`RegisterImport was used. Get a list of the field names via + # the the "defaults" and "conditional" keys. + defaults = perform_import( + import_function_symbol, findfile, + file_format, function_channels, stream_options, custom_options, @@ -357,14 +376,16 @@ def eval_Import_general( ) else: if el in conditionals.keys(): - result = get_results( + result = perform_import( conditionals[el], findfile, + file_format, function_channels, stream_options, custom_options, evaluation, options, + elements=elements, data=data, ) if result is None: @@ -375,15 +396,17 @@ def eval_Import_general( return list(result.values())[0] else: if defaults is None: - defaults = get_results( - default_function, + defaults = perform_import( + import_function_symbol, findfile, + file_format, function_channels, stream_options, custom_options, evaluation, options, data=data, + elements=elements, ) if defaults is None: evaluation.predetermined_out = current_predetermined_out @@ -400,11 +423,11 @@ def eval_Import_general( def eval_Import_Elements(file_format: str, evaluation): - """ - Basic implementation behind Import[fileformat, Elements]. + """Basic implementation behind Import[fileformat, Elements]. + This returns the element names that can be used for a specific - file_format type. We get this from the AvailableElements field - mentioned when registering an importer. + file_format type. We get this from the + AvailableElements field mentioned when registering an importer. """ filetype = MIME_SHORTNAME_TO_WMA.get(file_format, file_format).upper() @@ -419,35 +442,71 @@ def eval_Import_Elements(file_format: str, evaluation): return options.get("System`AvailableElements") -def get_results( - tmp_function, +def perform_import( + import_function_symbol: Symbol, findfile: Optional[String], + file_format: str, function_channels, stream_options, custom_options, evaluation, options, data: Optional[str], + elements: Optional[list] = None, ): + """ This routine does the import. "import" here means reading a \ + file or string which has been structured according to a format belonging to a mime type. + + "findfile", if not "None", is the path of a file where the unimported data resides. + If "findfile" is empty, then "data" will have the string data for that file, and + this routine will create a temporary file containing the data. The actual importer + then uses this file. + + "elements", when given, contains the parts or kinds of things that should be extracted. + Usually, there are custom routines for retrieving an element. + + It is also possible that when a custom element extraction does not + exist, that the caller will do the filtering after retrieving all of the information. + + This is not advisable when the information inside an element is small compared + to the information of the entire importable file. For example consider asking + about the member names or contents of tar file compared to the entire tar file. + """ current_predetermined_out = evaluation.predetermined_out if function_channels == ListExpression(String("FileNames")): joined_options = list(chain(stream_options, custom_options)) - tmpfile = False if findfile is None: - tmpfile = True - stream = Expression(SymbolOpenWrite).evaluate(evaluation) - findfile = stream.elements[0] - if data is not None: - Expression(SymbolWriteString, String(data)).evaluate(evaluation) - else: - Expression(SymbolWriteString, String("")).evaluate(evaluation) - eval_Close(stream, evaluation) - import_expression = Expression(tmp_function, findfile, *joined_options) - tmp = import_expression.evaluate(evaluation) + findfile = String( + create_temp_file_with_extension(data, file_format.lower()) + ) + + # FIXME: Some import functions do not support element + # selection of a collection, just collection retrieval. Here, + # when a selection is desired, the entire collection is + # returned, and *then* the element is selected. This is + # potentially very slow for large collections and selection + # items that can be retrieved quickly. Until we can come up + # with a better solution for these kinds import functions, to + # address this when element selection is requested and doesn't + # return a different result, we retry without the element + # selection. + import_collection_expression = to_expression( + import_function_symbol, findfile, *joined_options + ) + if elements is None: + tmp = import_collection_expression.evaluate(evaluation) + else: + import_select_expression = to_expression( + import_function_symbol, findfile, *elements, *joined_options + ) + tmp = import_select_expression.evaluate(evaluation) + if tmp == import_select_expression: + # Retry by retieving the entire collection. + # Element selection is done afterwards. + tmp = import_collection_expression.evaluate(evaluation) + if tmp is SymbolFailed: return SymbolFailed - if tmpfile: - eval_DeleteFile([findfile.value]) elif function_channels == ListExpression(String("Streams")): if findfile is None: stream = Expression(SymbolStringToStream, String(data)).evaluate(evaluation) @@ -475,19 +534,22 @@ def get_results( evaluation.message("Import", "nffil") evaluation.predetermined_out = current_predetermined_out return None - tmp = Expression(tmp_function, stream, *custom_options).evaluate(evaluation) + tmp = Expression(import_function_symbol, stream, *custom_options).evaluate( + evaluation + ) eval_Close(stream, evaluation) else: # TODO message evaluation.predetermined_out = current_predetermined_out return SymbolFailed - tmp = tmp.get_elements() - if not all(expr.has_form("Rule", None) for expr in tmp): + + # .get_elements() is more tolerant of the type of "tmp" than + # ._elements which assumes a Expression type. + result_elts = tmp.get_elements() + if not all(expr.has_form("Rule", None) for expr in result_elts): evaluation.predetermined_out = current_predetermined_out return None - # return {a.get_string_value() : b for a,b in map(lambda x: - # x.get_elements(), tmp)} evaluation.predetermined_out = current_predetermined_out return {a.get_string_value(): b for a, b in (x.get_elements() for x in tmp)} @@ -499,7 +561,7 @@ def eval_Import_data_only( options, ): """ - Basic implementation beind Import_String[data]. + Basic implementation behind Import_String[data]. Here, no elements were given, just a import data string. """ @@ -515,7 +577,9 @@ def eval_Import_data_only( return SymbolFailed # Load the importer - conditionals, default_function, posts, importer_options = IMPORTERS[file_format] + conditionals, import_function_symbol, posts, importer_options = IMPORTERS[ + file_format + ] stream_options, custom_options = importer_exporter_options( importer_options.get("System`Options"), options, "System`Import", evaluation @@ -535,9 +599,10 @@ def eval_Import_data_only( return SymbolFailed # Perform the import - defaults = get_results( - default_function, + defaults = perform_import( + import_function_symbol, None, + file_format, function_channels, stream_options, custom_options, @@ -559,7 +624,7 @@ def eval_Import_data_only( ) ) else: - result = defaults.get(default_element.get_string_value()) + result = defaults.get(default_element.value) if result is None: evaluation.message("Import", "noelem", default_element, String(filetype)) evaluation.predetermined_out = current_predetermined_out @@ -575,7 +640,7 @@ def eval_Import_source_only( options, ): """ - Basic implementation beind Import[source]. + Basic implementation behind Import[source]. Here, no elements were given, just a import source. """ @@ -588,7 +653,9 @@ def eval_Import_source_only( return SymbolFailed # Load the importer - conditionals, default_function, posts, importer_options = IMPORTERS[file_format] + conditionals, import_function_symbol, posts, importer_options = IMPORTERS[ + file_format + ] stream_options, custom_options = importer_exporter_options( importer_options.get("System`Options"), options, "System`Import", evaluation @@ -607,10 +674,11 @@ def eval_Import_source_only( evaluation.predetermined_out = current_predetermined_out return SymbolFailed - # Perform the import - defaults = get_results( - default_function, + # Perform the import. + defaults = perform_import( + import_function_symbol, findfile, + file_format, function_channels, stream_options, custom_options, @@ -641,111 +709,10 @@ def eval_Import_source_only( return result -def get_results_for_element_args( - tmp_function, - findfile: Optional[String], - function_channels, - stream_options, - custom_options, - evaluation, - options, - file_format: Optional[str], - elements: list, -): - """ - Return Import results when elemnet args are given. - For example: - Import["ExampleData/ExampleData.txt", "Lines"] - ^^^^^^^ - """ - current_predetermined_out = evaluation.predetermined_out - if function_channels == ListExpression(String("FileNames")): - joined_options = list(chain(stream_options, custom_options)) - tmpfile = False - if findfile is None: - tmpfile = True - stream = Expression(SymbolOpenWrite).evaluate(evaluation) - findfile = stream.elements[0] - if file_format is not None: - Expression(SymbolWriteString, String(file_format)).evaluate(evaluation) - else: - Expression(SymbolWriteString, String("")).evaluate(evaluation) - eval_Close(stream, evaluation) - import_expression = Expression( - tmp_function, findfile, *to_mathics_list(elements), *joined_options - ) - tmp = import_expression.evaluate(evaluation) - if tmp is SymbolFailed: - return SymbolFailed - if tmpfile: - eval_DeleteFile([findfile.value]) - elif function_channels == ListExpression(String("Streams")): - if findfile is None: - stream = Expression(SymbolStringToStream, String(file_format)).evaluate( - evaluation - ) - else: - mode = "r" - if options.get("System`BinaryFormat") is SymbolTrue: - if not mode.endswith("b"): - mode += "b" - - encoding_option = options.get("System`CharacterEncoding") - encoding = ( - encoding_option.value if isinstance(encoding_option, String) else None - ) - - stream = eval_Open( - name=findfile, - mode=mode, - stream_type="InputStream", - encoding=encoding, - evaluation=evaluation, - ) - if stream is None: - return - if stream.head is not SymbolInputStream: - evaluation.message("Import", "nffil") - evaluation.predetermined_out = current_predetermined_out - return None - tmp = Expression(tmp_function, stream, *custom_options).evaluate(evaluation) - eval_Close(stream, evaluation) - else: - # TODO message - evaluation.predetermined_out = current_predetermined_out - return SymbolFailed - tmp = tmp.get_elements() - if not all(expr.has_form("Rule", None) for expr in tmp): - evaluation.predetermined_out = current_predetermined_out - return None - - # return {a.get_string_value() : b for a,b in map(lambda x: - # x.get_elements(), tmp)} - evaluation.predetermined_out = current_predetermined_out - return {a.get_string_value(): b for a, b in (x.get_elements() for x in tmp)} - - -def eval_import_stream( - data: bytes, - file_format: str, -): - """ - Implementation of import of bytes having a particular file format - """ - - # START FIXING HERE - # Load the importer - conditionals, import_function, posts, importer_options = IMPORTERS[ - file_format.upper() - ] - import_expression = Expression(import_function, data).evaluate() - return import_expression - - -def infer_file_format(filename: str) -> Optional[str]: +def infer_file_format(filename: str, default_extension: str = None) -> Optional[str]: """ Infer what kind of format filename is in. None is returned if we can't infer a format. """ file_extension = eval_FileExtension(filename).lower() - return FILE_EXTENSION_MAP.get(file_extension) + return FILE_EXTENSION_MAP.get(file_extension, default_extension) diff --git a/mathics/eval/import_export/json.py b/mathics/eval/import_export/json.py deleted file mode 100644 index f6f2e11ed..000000000 --- a/mathics/eval/import_export/json.py +++ /dev/null @@ -1,27 +0,0 @@ -import json - -from mathics.core.atoms import String -from mathics.core.convert.python import from_python -from mathics.core.expression import Expression -from mathics.core.list import ListExpression -from mathics.core.systemsymbols import SymbolRule - - -def eval_JSONImport(json_path: str) -> ListExpression: - """Takes a ZIP file path and returns a list of file names/paths contained inside.""" - with open(json_path, "r") as json_file: - json_data = json.load(json_file) - mathics_json = from_python(json_data) - exprs = [ - Expression( - SymbolRule, - String("Data"), - mathics_json, - ), - Expression( - SymbolRule, - String("Dataset"), - mathics_json, - ), - ] - return ListExpression(*exprs) diff --git a/test/builtin/import_export/test_importexport.py b/test/builtin/import_export/test_importexport.py index 4f01ca79a..e0ee43a1b 100644 --- a/test/builtin/import_export/test_importexport.py +++ b/test/builtin/import_export/test_importexport.py @@ -171,13 +171,13 @@ def test_export(): 'Import["ExampleData/Testosterone.svg", "xml"] // Head', None, "XMLObject[Document]", - "Case use in explicit format name should not be significant", + "format case (xml) is not significant", ), ( 'Import["ExampleData/Testosterone.svg", "Xml"] // Head', None, "XMLObject[Document]", - "Case use in explicit format name should not be significant", + "format case (Xml) is not significant", ), ( 'Import["ExampleData/Testosterone.svg", {"XML"}] // Head', @@ -185,19 +185,27 @@ def test_export(): "XMLObject[Document]", None, ), - ( - 'Import["ExampleData/Testosterone.svg", {"XML", "XML"}];', - ("The Import element XML is not present when importing as XML.",), - "Null", - None, - ), + # This test does not match WMA. We are supposed to treat + # {"XML", "XML"} like, "XML" and not give an error. + # ( + # 'Import["ExampleData/Testosterone.svg", {"XML", "XML"}];', + # ("The Import element XML is not present when importing as XML.",), + # "Null", + # None, + # ), # XML - ( - 'MatchQ[Import["ExampleData/InventionNo1.xml", "Tags"],{__String}]', - None, - "True", - None, - ), + # This test does not match WMA. WMA gives: + # XML`Parser`XMLGet::prserr: + # NetAccessorException: Could not open file: + # http://www.musicxml.org/dtds/partwise.dtd at Line: 2 Character: 123 in + # InventionNo1.xml. + # Import::fmterr: Cannot import data as XML format. + # ( + # 'MatchQ[Import["ExampleData/InventionNo1.xml", "Tags"],{__String}]', + # None, + # "True", + # None, + # ), ("ImportString[x]", ("First argument x is not a string.",), "$Failed", None), # CSV (