Source code for fparser.common.splitline

#!/usr/bin/env python

# Modified work Copyright (c) 2017-2022 Science and Technology
# Facilities Council.
# Modified work Copyright (c) 2017 by J. Henrichs, Bureau of Meteorology
# Original work Copyright (c) 1999-2008 Pearu Peterson

# All rights reserved.

# Modifications made as part of the fparser project are distributed
# under the following license:

# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:

# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.

# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.

# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.

# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

# --------------------------------------------------------------------

# The original software (in the f2py project) was distributed under
# the following license:

# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:

#   a. Redistributions of source code must retain the above copyright notice,
#      this list of conditions and the following disclaimer.
#   b. Redistributions in binary form must reproduce the above copyright
#      notice, this list of conditions and the following disclaimer in the
#      documentation and/or other materials provided with the distribution.
#   c. Neither the name of the F2PY project nor the names of its
#      contributors may be used to endorse or promote products derived from
#      this software without specific prior written permission.

# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
# DAMAGE.

"""
Defines LineSplitter and helper functions.

Original Author: Pearu Peterson <pearu@cens.ioc.ee>
First version created: May 2006

-----
"""


import re



[docs]
class String(str):
    """Dummy string class."""



class ParenString(str):
    """Class representing a parenthesis string."""


__all__ = ["String", "string_replace_map", "splitquote", "splitparen"]

_f2py_str_findall = re.compile(r"_F2PY_STRING_CONSTANT_\d+_").findall
_is_name = re.compile(r"\w*\Z", re.I).match
_is_simple_str = re.compile(r"\w*\Z", re.I).match
_f2py_findall = re.compile(
    r"(_F2PY_STRING_CONSTANT_\d+_|F2PY_REAL_CONSTANT_\d+_|" r"F2PY_EXPR_TUPLE_\d+)"
).findall
# A valid exponential constant must begin with a digit or a '.' (and be
# preceeded by a non-'word' character or the start of the string).
# We have to exclude '.' from the match for a non-word character as
# otherwise, in a string such as ".5d0", it would be matched by the
# non-capturing group. Since the first group is non-capturing (?:),
# the matched literal is in group 1.
# R417 for real-literal-constant does not permit whitespace.
exponential_constant = re.compile(
    r"(?:[^\w.]|^)((\d+[.]\d*|\d*[.]\d+|\d+)[edED][+-]?\d+(_\w+)?)"
)


class StringReplaceDict(dict):
    """
    Dictionary object that is callable for applying map returned
    by string_replace_map() function.
    """

    def __call__(self, line):
        for key in _f2py_findall(line):
            if key in self:
                # We only replace the occurrence of `key` corresponding to
                # the current result of the findall. This prevents the
                # 'replace' also affecting subsequent matches that may
                # have key as a substring (e.g. 'F2PY_EXPR_TUPLE_10'
                # contains 'F2PY_EXPR_TUPLE_1').
                line = line.replace(key, self[key], 1)
        return line


def memoize(function):
    """Simple memoization decorator.

    :param function: The function to memoize.
    :type function: Callable

    Note: Python 3.9 comes with a thread-safe and more efficient cache as it
    can be bounded and we are interested in lines that have temporal locality.
    It's the: @functools.lru_cache(maxsize=8)

    """
    memo = {}

    def wrapper(*args, **kwargs):
        key = args
        if kwargs:
            for item in kwargs.items():
                key += item
        result = memo.get(key, None)
        if result is not None:
            return result
        result = function(*args, **kwargs)
        memo[key] = result
        return result

    return wrapper


@memoize

[docs]
def string_replace_map(line, lower=False):
    """
    #. Replaces string constants with symbol `'_F2PY_STRING_CONSTANT_<index>_'`
    #. Replaces (`expression`) with symbol `(F2PY_EXPR_TUPLE_<index>)`
    #. Replaces real numerical constants containing an exponent with symbol
       `F2PY_REAL_CONSTANT_<index>_`

    :param str line: the line of text in which to perform substitutions.
    :param bool lower: whether or not the call to splitquote() should return \
        items as lowercase (default is to leave the case unchanged).

    :returns: a new line and the replacement map.
    :rtype: 2-tuple of str and \
            :py:class:`fparser.common.splitline.StringReplaceDict`

    """

    str_idx = 0
    const_idx = 0
    parens_idx = 0

    items = []
    string_map = StringReplaceDict()
    rev_string_map = {}
    for item in splitquote(line, lower=lower)[0]:
        if isinstance(item, String) and not _is_simple_str(item[1:-1]):
            key = rev_string_map.get(item)
            if key is None:
                str_idx += 1
                key = "_F2PY_STRING_CONSTANT_{0}_".format(str_idx)
                trimmed = item[1:-1]
                string_map[key] = trimmed
                rev_string_map[trimmed] = key
            items.append(item[0] + key + item[-1])
        else:
            items.append(item)
    newline = "".join(items)

    const_keys = []
    for item in exponential_constant.finditer(newline):
        # Get the first captured group as that corresponds to the literal
        # *without* any preceding non-word character.
        found = item.group(1)

        key = rev_string_map.get(found)
        if key is None:
            const_idx += 1
            key = "F2PY_REAL_CONSTANT_{0}_".format(const_idx)
            string_map[key] = found
            rev_string_map[found] = key
            const_keys.append(key)
        newline = newline.replace(found, key)

    items = []
    expr_keys = []
    for item in splitparen(newline):
        if isinstance(item, ParenString) and not _is_name(item[1:-1].strip()):
            key = rev_string_map.get(item)
            if key is None:
                parens_idx += 1
                key = "F2PY_EXPR_TUPLE_{0}".format(parens_idx)
                trimmed = item[1:-1].strip()
                string_map[key] = trimmed
                rev_string_map[trimmed] = key
                expr_keys.append(key)
            items.append(item[0] + key + item[-1])
        else:
            items.append(item)

    # Ensure that any entries in the map do not themselves contain
    # substitutions
    found_keys = set()
    for key in expr_keys + const_keys:
        entry = string_map[key]
        # Find any keys within this map entry
        included_keys = _f2py_findall(entry)
        if included_keys:
            found_keys = found_keys.union(included_keys)
            for inc_key in included_keys:
                entry = entry.replace(inc_key, string_map[inc_key], 1)
            string_map[key] = entry

    return "".join(items), string_map




[docs]
def splitquote(line, stopchar=None, lower=False, quotechars="\"'"):
    """
    Splits the supplied line of text into parts consisting of regions that
    are not contained within quotes and those that are.

    Allows for the processing of a line that follows on from a previous one
    where a quoted string was begun but not closed by supporting the
    current closing quotation character to be specified.

    :param str line: the line to split.
    :param Optional[str] stopchar: the quote character that will terminate an \
                                   existing quoted string or None otherwise.
    :param bool lower: whether or not to convert the split parts of the line \
                       to lowercase.
    :param str quotechars: the characters that are considered to delimit \
                           quoted strings.

    :returns: tuple containing a list of the parts of the line split into \
              those parts that are not quoted strings and those parts that are \
              as well as the quote character corresponding with any quoted \
              string that has not been closed before the end of the line.
    :rtype: Tuple[List[str], str]

    """
    # Will hold the various parts that `line` is split into.
    items = []
    # The current position in the line being processed.
    ipos = 0
    while 1:
        # Move on to the next character in the line.
        try:
            char = line[ipos]
            ipos += 1
        except IndexError:
            break
        part = []
        nofslashes = 0
        if stopchar is None:
            # search for string start
            while 1:
                if char in quotechars and not nofslashes % 2:
                    # Found an un-escaped quote character.
                    stopchar = char
                    ipos -= 1
                    # This marks the end of the current part.
                    break
                if char == "\\":
                    nofslashes += 1
                else:
                    nofslashes = 0
                part.append(char)
                try:
                    char = line[ipos]
                    ipos += 1
                except IndexError:
                    break
            if part:
                # Found a part. Add it to the list of items.
                item = "".join(part)
                if lower:
                    item = item.lower()
                items.append(item)
            # Move on to the next character in the line.
            continue
        if char == stopchar:
            # string starts with quotechar
            part.append(char)
            try:
                char = line[ipos]
                ipos += 1
            except IndexError:
                # Have reached the end of the line after encountering an
                # opening quote character.
                if part:
                    item = String("".join(part))
                    items.append(item)
                break
        # else continued string
        while 1:
            if char == stopchar and not nofslashes % 2:
                # We've found the closing quote character.
                part.append(char)
                stopchar = None
                break
            if char == "\\":
                nofslashes += 1
            else:
                nofslashes = 0
            part.append(char)
            try:
                char = line[ipos]
                ipos += 1
            except IndexError:
                break
        if part:
            item = String("".join(part))
            items.append(item)
    return items, stopchar




[docs]
def splitparen(line, paren_open="([", paren_close=")]"):
    """
    Splits a line into top-level parenthesis and not-parenthesised
    parts. E.g.: "a( (1+2)*3) = b(x)" becomes:
    ["a", "( (1+2)*3)", " = b", "(x)"]
    :param str line: the string to split.
    :param str paren_open: The characters that define an open parentheses.
    :param str paren_close: The characters that define a closing parentheses.
    :return: List of parenthesised and not-parenthesised parts
    :rtype: list of str
    The paren_open and paren_close strings must be matched in order:
    paren_open[x] is closed by paren_close[x].
    """

    assert len(paren_open) == len(paren_close)

    items = []  # Result list
    num_backslashes = 0  # Counts consecutive "\" characters
    # Empty if outside quotes, or set to the starting (and therefore
    # also the ending) quote character while reading text inside quotes.
    inside_quotes_char = ""
    start = 0  # Index of start of current part.
    stack = []  # Stack keeping track of required closing brackets

    for idx, char in enumerate(line):
        if char == "\\":
            num_backslashes = (num_backslashes + 1) % 2
            continue

        # We had an odd number of \, so the next character is neither
        # a real quote or parenthesis character, and can just be added.
        if num_backslashes == 1:
            num_backslashes = 0
            continue

        # If we are reading a quote, keep on reading till closing
        # quote is reached
        if inside_quotes_char != "":
            # Reset inside_quotes_char if we find the closing quote
            if char == inside_quotes_char:
                inside_quotes_char = ""
            continue

        if char == "'" or char == '"':
            inside_quotes_char = char
            continue

        pos = paren_open.find(char)
        if pos > -1:
            if len(stack) == 0:
                # New part starts:
                items.append(line[start:idx])
                start = idx
            stack.append(paren_close[pos])
            continue

        # Found closing bracket
        if len(stack) > 0 and char == stack[-1]:
            stack.pop()
            if len(stack) == 0:
                # Found last closing bracket
                items.append(ParenString(line[start : idx + 1]))
                start = idx + 1

    # Add any leftover characters as a separate item
    if start != len(line):
        items.append(line[start:])
    return items