diff --git a/etc/cf-standard-name-table.xml b/etc/cf-standard-name-table.xml index 3b145ae86e..e40141043e 100644 --- a/etc/cf-standard-name-table.xml +++ b/etc/cf-standard-name-table.xml @@ -24594,8 +24594,7 @@ kg m-2 s-1 - Methane emitted from the surface, generated by biomass burning (fires). Positive direction upwards. -The surface called "surface" means the lower boundary of the atmosphere. "Upward" indicates a vector component which is positive when directed upward (negative downward). In accordance with common usage in geophysical disciplines, "flux" implies per unit area, called "flux density" in physics. The chemical formula for methane is CH4. The mass is the total mass of the molecules. The specification of a physical process by the phrase "due_to_" process means that the quantity named is a single term in a sum of terms which together compose the general quantity named by omitting the phrase. "Emission" means emission from a primary source located anywhere within the atmosphere, including at the lower boundary (i.e. the surface of the earth). "Emission" is a process entirely distinct from "re-emission" which is used in some standard names. The term "fires" means all biomass fires, whether naturally occurring or ignited by humans. The precise conditions under which fires produce and consume methane can vary between models. + Methane emitted from the surface, generated by biomass burning (fires). Positive direction upwards. The surface called "surface" means the lower boundary of the atmosphere. "Upward" indicates a vector component which is positive when directed upward (negative downward). In accordance with common usage in geophysical disciplines, "flux" implies per unit area, called "flux density" in physics. The chemical formula for methane is CH4. The mass is the total mass of the molecules. The specification of a physical process by the phrase "due_to_" process means that the quantity named is a single term in a sum of terms which together compose the general quantity named by omitting the phrase. "Emission" means emission from a primary source located anywhere within the atmosphere, including at the lower boundary (i.e. the surface of the earth). "Emission" is a process entirely distinct from "re-emission" which is used in some standard names. The term "fires" means all biomass fires, whether naturally occurring or ignited by humans. The precise conditions under which fires produce and consume methane can vary between models. diff --git a/lib/iris/__init__.py b/lib/iris/__init__.py index 38465472ee..0644a83a0b 100644 --- a/lib/iris/__init__.py +++ b/lib/iris/__init__.py @@ -100,6 +100,7 @@ def callback(cube, field, filename): import iris._constraints import iris.config import iris.io +import iris.std_name_table from ._deprecation import IrisDeprecation, warn_deprecated diff --git a/lib/iris/common/mixin.py b/lib/iris/common/mixin.py index 4c19dd756b..029a9a9294 100644 --- a/lib/iris/common/mixin.py +++ b/lib/iris/common/mixin.py @@ -13,7 +13,7 @@ import cf_units -import iris.std_names +from iris.std_name_table import check_valid_std_name from .metadata import BaseMetadata @@ -36,19 +36,22 @@ def _get_valid_standard_name(name): name_groups = name.split(maxsplit=1) if name_groups: std_name = name_groups[0] - name_is_valid = std_name in iris.std_names.STD_NAMES + try: + new_std_name = check_valid_std_name(name_groups[0]) + name = name.replace(std_name, new_std_name) + except ValueError: + raise ValueError( + "{!r} is not a valid standard_name".format(name) + ) try: std_name_modifier = name_groups[1] except IndexError: pass # No modifier else: - name_is_valid &= std_name_modifier in valid_std_name_modifiers - - if not name_is_valid: - raise ValueError( - "{!r} is not a valid standard_name".format(name) - ) - + if std_name_modifier not in valid_std_name_modifiers: + raise ValueError( + "{!r} is not a valid standard_name".format(name) + ) return name diff --git a/lib/iris/std_name_table.py b/lib/iris/std_name_table.py new file mode 100644 index 0000000000..5a1a0ee13f --- /dev/null +++ b/lib/iris/std_name_table.py @@ -0,0 +1,118 @@ +# Copyright Iris contributors +# +# This file is part of Iris and is released under the LGPL license. +# See COPYING and COPYING.LESSER in the root of the repository for full +# licensing details. +""" +Handling of standard names and standard name aliases. +""" + +import warnings + +import iris.std_names + + +def get_convention(): + """Return the 'Conventions' string of the CF standard name table.""" + try: + convention = iris.std_names.CONVENTIONS_STRING + except AttributeError: + convention = None + return convention + + +def set_alias_processing(mode): + """ + Set how standard name aliases are handled. + + Arg: + + * mode `string` specifying handling: + 'accept' - aliases are handled as any other standard name, + 'warn' - as above, but a warning is issued, + 'replace' - aliased standard names are replaced with the current one. + """ + if not hasattr(iris.std_names, "ALIASES"): + raise ValueError("The standard name table has no aliases defined.") + if mode == "default": + iris.std_names._MODE = iris.std_names._DEFAULT + elif mode in iris.std_names._ALTERNATIVE_MODES: + iris.std_names._MODE = mode + else: + raise ValueError( + "{!r} is not a valid alternative for processing " + "of standard name aliases.".format(mode) + ) + + +def get_description(name): + """ + Return the standard name description as a `string`. + + Arg: + + * name `string` containing the standard name. + """ + if not hasattr(iris.std_names, "DESCRIPTIONS"): + return None + + error = False + if name in iris.std_names.STD_NAMES: + descr = iris.std_names.DESCRIPTIONS[name] + elif hasattr(iris.std_names, "ALIASES"): + if name in iris.std_names.ALIASES: + descr = iris.std_names.DESCRIPTIONS[iris.std_names.ALIASES[name]] + if iris.std_names._MODE == iris.std_names._REPLACE: + msg = ( + "\nStandard name {!r} is aliased and is \nreplaced by {!r}.\n" + "The description for the latter will be used." + ) + warnings.warn(msg.format(name, iris.std_names.ALIASES[name])) + else: + error = True + else: + error = True + + if error: + raise ValueError("{!r} is not a valid standard name.".format(name)) + return descr + + +def check_valid_std_name(name): + """ + Check and return if argument is a valid standard name or alias. + + Arg: + + * name `string` containing the prospective standard name. + + Depending on the setting of the alias proceessing the following will + happen if 'name' is an aliased standard name: + "accept" - the aliased standard name is accepted as valid and returned, + "warn" - a warning is issued, otherwise the same as "accept", + "replace" - the valid standard name is returned without warning. + + When 'name' is neither a standard name nor an alias an error results. + """ + error = False + if name in iris.std_names.STD_NAMES: + std_name = name + elif hasattr(iris.std_names, "ALIASES"): + if name in iris.std_names.ALIASES: + if iris.std_names._MODE == iris.std_names._REPLACE: + std_name = iris.std_names.ALIASES[name] + else: + std_name = name + if iris.std_names._MODE == iris.std_names._WARN: + msg = "\nThe standard name {!r} is aliased should be \nreplaced by {!r}." + warnings.warn( + msg.format(name, iris.std_names.ALIASES[name]) + ) + else: + error = True + else: + error = True + + if error: + raise ValueError("{!r} is not a valid standard_name.".format(name)) + return std_name diff --git a/tools/generate_std_names.py b/tools/generate_std_names.py index 08bacbe1e0..fb3aad7a7a 100644 --- a/tools/generate_std_names.py +++ b/tools/generate_std_names.py @@ -7,8 +7,9 @@ A script to convert the standard names information from the provided XML file into a Python dictionary format. -Takes two arguments: the first is the XML file to process and the second -is the name of the file to write the Python dictionary file into. +Takes two or three arguments: the first is the XML file to process and the second +is the name of the file to write the Python dictionary file into. The optional +third argument, '--descr', includes the standard name descriptions in the file. By default, Iris will use the source XML file: etc/cf-standard-name-table.xml @@ -20,23 +21,30 @@ """ import argparse -import pprint import xml.etree.ElementTree as ET -STD_VALUES_FILE_TEMPLATE = ''' +STD_NAME_TABLE_FILE_TEMPLATE = ''' # Copyright Iris contributors # # This file is part of Iris and is released under the LGPL license. # See COPYING and COPYING.LESSER in the root of the repository for full # licensing details. """ -This file contains a dictionary of standard value names that are mapped -to another dictionary of other standard name attributes. Currently only -the `canonical_unit` exists in these attribute dictionaries. - This file is automatically generated. Do not edit this file by hand. +The file contains the following elements, formatted as python code: + * A few variablles used internally in the standard name processing. + These beginn with an underscore. + * Information on the source standard name table version. + * A dictionary of standard value names that are mapped + to another dictionary of other standard name attributes. + Currently only the `canonical_unit` exists in these attribute + dictionaries. + * A dictionary of aliased standard names that are mapped to the + current standad name. + * Optionally, a dictionary of standard names mapped to their descriptions. + The file will be generated during a standard build/installation:: python setup.py build @@ -49,10 +57,17 @@ Or for more control (e.g. to use an alternative XML file) via:: python tools/generate_std_names.py XML_FILE MODULE_FILE - """ +'''.lstrip() + + +def found_or_none(elem): + return elem.text if elem is not None else None + -STD_NAMES = '''.lstrip() +# Take care of inconsistent quotes in standard name descriptions. +def replace_quote(txt): + return txt.replace('"', "'") if txt is not None else None def process_name_table(tree, element_name, *child_elements): @@ -62,32 +77,72 @@ def process_name_table(tree, element_name, *child_elements): """ for elem in tree.iterfind(element_name): sub_section = {} - for child_elem in child_elements: - found_elem = elem.find(child_elem) - sub_section[child_elem] = found_elem.text if found_elem is not None else None - + sub_section[child_elem] = found_or_none(elem.find(child_elem)) yield {elem.get("id") : sub_section} -def to_dict(infile, outfile): - values = {} - aliases = {} - +def prettydict(outfile, varname, data): + """Pretty formatted output of the data (dict) assigned to the variable 'varname'.""" + outfile.write(f'{varname} = {{\n') + for k, v in dict(sorted(data.items())).items(): + outfile.write(f' "{k}": "{v}",\n') + outfile.write("}\n\n") + + +def decode_version(outfile, tree): + """Decode the version information in the xml header information.""" + version = {} + for elem in ["table_name", "version_number", "last_modified", "institution", "contact"]: + version[elem] = found_or_none(tree.find(elem)) + if version["table_name"] is None: + if (version["institution"] == "Centre for Environmental Data Analysis" + and version["contact"] == "support@ceda.ac.uk"): + version["table_name"] = "CF-StdNameTable" + else: + version["table_name"] = "USER-StdNameTable" + prettydict(outfile, "VERSION", version) + version_string = "-".join(version[k] for k in ["table_name", "version_number"]) + outfile.write(f'CONVENTIONS_STRING = "{version_string}"\n\n') + + +def write_useful_variables(outfile): + outfile.write( + '\n# The following variables are used for processing the standard names information below\n' + '_ACCEPT = "accept"\n' + '_WARN = "warn"\n' + '_REPLACE ="replace"\n' + '_ALTERNATIVE_MODES = [_ACCEPT, _WARN, _REPLACE]\n' + '_DEFAULT = "warn"\n' + '_MODE = _DEFAULT\n\n' + ) + + +def decode_standard_name_table(infile, outfile, description=False): + """Process the different parts of the xml file.""" tree = ET.parse(infile) + outfile.write(STD_NAME_TABLE_FILE_TEMPLATE) + write_useful_variables(outfile) + decode_version(outfile, tree) + + data = {} for section in process_name_table(tree, 'entry', 'canonical_units'): - values.update(section) + data.update(section) + prettydict(outfile, "STD_NAMES", data) + data = {} for section in process_name_table(tree, 'alias', 'entry_id'): - aliases.update(section) - - for key, valued in aliases.items(): - values.update({ - key : {'canonical_units' : values.get(valued['entry_id']).get('canonical_units')} - }) + for k, v in section.items(): + data.update({k: v["entry_id"]}) + prettydict(outfile, "ALIASES", data) - outfile.write(STD_VALUES_FILE_TEMPLATE + pprint.pformat(values)) + if description: + data = {} + for section in process_name_table(tree, 'entry', 'description'): + for k, v in section.items(): + data.update({k: replace_quote(v["description"])}) + prettydict(outfile, "DESCRIPTIONS", data) if __name__ == "__main__": @@ -97,10 +152,13 @@ def to_dict(infile, outfile): help='Path to CF standard name XML') parser.add_argument('output', metavar='OUTPUT', help='Path to resulting Python code') + parser.add_argument('-d', '--descr', action="store_true", + help="Include standard name descriptions") args = parser.parse_args() encoding = {'encoding': 'utf-8'} with open(args.input, 'r', **encoding) as in_fh: with open(args.output, 'w', **encoding) as out_fh: - to_dict(in_fh, out_fh) + decode_standard_name_table(in_fh, out_fh, args.descr) + pass