Source code for catalyst.compiler

# Copyright 2022-2023 Xanadu Quantum Technologies Inc.

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at

#     http://www.apache.org/licenses/LICENSE-2.0

# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""This module contains functions for lowering, compiling, and linking
MLIR/LLVM representations.
"""
import glob
import importlib
import logging
import os
import pathlib
import platform
import shutil
import subprocess
import sys
import warnings
from copy import deepcopy
from dataclasses import dataclass
from io import TextIOWrapper
from os import path
from typing import Any, Dict, Iterable, List, Optional, Tuple, Union

from mlir_quantum.compiler_driver import run_compiler_driver

from catalyst.logging import debug_logger, debug_logger_init
from catalyst.utils.exceptions import CompileError
from catalyst.utils.filesystem import Directory
from catalyst.utils.runtime_environment import get_lib_path

logger = logging.getLogger(__name__)
logger.addHandler(logging.NullHandler())

package_root = os.path.dirname(__file__)

DEFAULT_CUSTOM_CALLS_LIB_PATH = path.join(package_root, "utils")


# pylint: disable=too-many-instance-attributes
[docs]@dataclass
class CompileOptions:
    """Generic compilation options, for which reasonable default values exist.

    Args:
        verbose (Optional[bool]): flag indicating whether to enable verbose output.
            Default is ``False``
        logfile (Optional[TextIOWrapper]): the logfile to write output to.
            Default is ``sys.stderr``
        keep_intermediate (Optional[bool]): flag indicating whether to keep intermediate results.
            Default is ``False``
        pipelines (Optional[List[Tuple[str,List[str]]]]): A list of tuples. The first entry of the
            tuple corresponds to the name of a pipeline. The second entry of the tuple corresponds
            to a list of MLIR passes.
        autograph (Optional[bool]): flag indicating whether experimental autograph support is to
            be enabled.
        autograph_include (Optional[Iterable[str]]): A list of (sub)modules to be allow-listed
        for autograph conversion.
        async_qnodes (Optional[bool]): flag indicating whether experimental asynchronous execution
            of QNodes support is to be enabled.
        lower_to_llvm (Optional[bool]): flag indicating whether to attempt the LLVM lowering after
            the main compilation pipeline is complete. Default is ``True``.
        static_argnums (Optional[Union[int, Iterable[int]]]): indices of static arguments.
            Default is ``None``.
        abstracted_axes (Optional[Any]): store the abstracted_axes value. Defaults to ``None``.
        disable_assertions (Optional[bool]): disables all assertions. Default is ``False``.
        seed (Optional[int]) : the seed for random operations in a qjit call.
            Default is None.
    """

    verbose: Optional[bool] = False
    logfile: Optional[TextIOWrapper] = sys.stderr
    target: Optional[str] = "binary"
    keep_intermediate: Optional[bool] = False
    pipelines: Optional[List[Any]] = None
    autograph: Optional[bool] = False
    autograph_include: Optional[Iterable[str]] = ()
    async_qnodes: Optional[bool] = False
    static_argnums: Optional[Union[int, Iterable[int]]] = None
    abstracted_axes: Optional[Union[Iterable[Iterable[str]], Dict[int, str]]] = None
    lower_to_llvm: Optional[bool] = True
    disable_assertions: Optional[bool] = False
    seed: Optional[int] = None

    def __post_init__(self):
        # Check that async runs must not be seeded
        if self.async_qnodes and self.seed != None:
            raise CompileError(
                """
                Seeding has no effect on asyncronous qnodes,
                as the execution order of parallel runs is not guaranteed.
                As such, seeding an asynchronous run is not supported.
                """
            )

        # Check that seed is 32-bit unsigned int
        if (self.seed != None) and (self.seed < 0 or self.seed > 2**32 - 1):
            raise ValueError(
                """
                Seed must be an unsigned 32-bit integer!
                """
            )

        # Make the format of static_argnums easier to handle.
        static_argnums = self.static_argnums
        if static_argnums is None:
            self.static_argnums = ()
        elif isinstance(static_argnums, int):
            self.static_argnums = (static_argnums,)
        elif isinstance(static_argnums, Iterable):
            self.static_argnums = tuple(static_argnums)

    def __deepcopy__(self, memo):
        """Make a deep copy of all fields of a CompileOptions object except the logfile, which is
        copied directly"""
        return CompileOptions(
            **{
                k: (deepcopy(v) if k != "logfile" else self.logfile)
                for k, v in self.__dict__.items()
                if k != "logfile"
            }
        )

[docs]    def get_pipelines(self) -> List[Tuple[str, List[str]]]:
        """Get effective pipelines"""
        if self.pipelines:
            return self.pipelines
        elif self.async_qnodes:
            return DEFAULT_ASYNC_PIPELINES  # pragma: nocover
        if self.disable_assertions:
            if "disable-assertion" not in QUANTUM_COMPILATION_PASS[1]:
                QUANTUM_COMPILATION_PASS[1].append("disable-assertion")
        else:
            if "disable-assertion" in QUANTUM_COMPILATION_PASS[1]:
                QUANTUM_COMPILATION_PASS[1].remove("disable-assertion")
        return DEFAULT_PIPELINES


@debug_logger
def run_writing_command(command: List[str], compile_options: Optional[CompileOptions]) -> None:
    """Run the command after optionally announcing this fact to the user.

    Args:
        command (List[str]): command to be sent to a subprocess.
        compile_options (Optional[CompileOptions]): compile options.
    """

    if compile_options.verbose:
        print(f"[SYSTEM] {' '.join(command)}", file=compile_options.logfile)
    subprocess.run(command, check=True)


HLO_LOWERING_PASS = (
    "HLOLoweringPass",
    [
        "canonicalize",
        "func.func(chlo-legalize-to-hlo)",
        "stablehlo-legalize-to-hlo",
        "func.func(mhlo-legalize-control-flow)",
        "func.func(hlo-legalize-shapeops-to-standard)",
        "func.func(hlo-legalize-to-linalg)",
        "func.func(mhlo-legalize-to-std)",
        "func.func(hlo-legalize-sort)",
        "convert-to-signless",
        "canonicalize",
        "scatter-lowering",
        "hlo-custom-call-lowering",
        "cse",
    ],
)

QUANTUM_COMPILATION_PASS = (
    "QuantumCompilationPass",
    [
        "annotate-function",
        "lower-mitigation",
        "lower-gradients",
        "adjoint-lowering",
        "disable-assertion",
    ],
)

BUFFERIZATION_PASS = (
    "BufferizationPass",
    [
        "one-shot-bufferize{dialect-filter=memref}",
        "inline",
        "gradient-bufferize",
        "scf-bufferize",
        "convert-tensor-to-linalg",  # tensor.pad
        "convert-elementwise-to-linalg",  # Must be run before --arith-bufferize
        "arith-bufferize",
        "empty-tensor-to-alloc-tensor",
        "func.func(bufferization-bufferize)",
        "func.func(tensor-bufferize)",
        "catalyst-bufferize",  # Must be run before -- func.func(linalg-bufferize)
        "func.func(linalg-bufferize)",
        "func.func(tensor-bufferize)",
        "quantum-bufferize",
        "func-bufferize",
        "func.func(finalizing-bufferize)",
        "canonicalize",  # Remove dead memrefToTensorOp's
        # introduced during gradient-bufferize of callbacks
        "func.func(buffer-hoisting)",
        "func.func(buffer-loop-hoisting)",
        "func.func(buffer-deallocation)",
        "convert-arraylist-to-memref",
        "convert-bufferization-to-memref",
        "canonicalize",  # Must be after convert-bufferization-to-memref
        # otherwise there are issues in lowering of dynamic tensors.
        # "cse",
        "cp-global-memref",
    ],
)


MLIR_TO_LLVM_PASS = (
    "MLIRToLLVMDialect",
    [
        "expand-realloc",
        "convert-gradient-to-llvm",
        "func.func(convert-linalg-to-loops)",
        "convert-scf-to-cf",
        # This pass expands memref ops that modify the metadata of a memref (sizes, offsets,
        # strides) into a sequence of easier to analyze constructs. In particular, this pass
        # transforms ops into explicit sequence of operations that model the effect of this
        # operation on the different metadata. This pass uses affine constructs to materialize
        # these effects. Concretely, expanded-strided-metadata is used to decompose
        # memref.subview as it has no lowering in -finalize-memref-to-llvm.
        "expand-strided-metadata",
        "lower-affine",
        "arith-expand",  # some arith ops (ceildivsi) require expansion to be lowered to llvm
        "convert-complex-to-standard",  # added for complex.exp lowering
        "convert-complex-to-llvm",
        "convert-math-to-llvm",
        # Run after -convert-math-to-llvm as it marks math::powf illegal without converting it.
        "convert-math-to-libm",
        "convert-arith-to-llvm",
        "finalize-memref-to-llvm{use-generic-functions}",
        "convert-index-to-llvm",
        "convert-catalyst-to-llvm",
        "convert-quantum-to-llvm",
        "emit-catalyst-py-interface",
        # Remove any dead casts as the final pass expects to remove all existing casts,
        # but only those that form a loop back to the original type.
        "canonicalize",
        "reconcile-unrealized-casts",
        "gep-inbounds",
        "add-exception-handling",
        "register-inactive-callback",
    ],
)


DEFAULT_PIPELINES = [
    HLO_LOWERING_PASS,
    QUANTUM_COMPILATION_PASS,
    BUFFERIZATION_PASS,
    MLIR_TO_LLVM_PASS,
]

MLIR_TO_LLVM_ASYNC_PASS = deepcopy(MLIR_TO_LLVM_PASS)
MLIR_TO_LLVM_ASYNC_PASS[1][:0] = [
    "qnode-to-async-lowering",
    "async-func-to-async-runtime",
    "async-to-async-runtime",
    "convert-async-to-llvm",
]

DEFAULT_ASYNC_PIPELINES = [
    HLO_LOWERING_PASS,
    QUANTUM_COMPILATION_PASS,
    BUFFERIZATION_PASS,
    MLIR_TO_LLVM_ASYNC_PASS,
]


class LinkerDriver:
    """Compiler used to drive the linking stage.
    In order to avoid relying on a single linker at run time and allow the user some flexibility,
    this class defines a compiler resolution order where multiple known compilers are attempted.
    The order is defined as follows:
    1. A user specified compiler via the environment variable CATALYST_CC. It is expected that the
        user provided compiler is flag compatilble with GCC/Clang.
    2. clang: Priority is given to clang to maintain an LLVM toolchain through most of the process.
    3. gcc: Usually configured to link with LD.
    4. c99: Usually defaults to gcc, but no linker interface is specified.
    5. c89: Usually defaults to gcc, but no linker interface is specified.
    6. cc: Usually defaults to gcc, however POSIX states that it is deprecated.
    """

    _default_fallback_compilers = ["clang", "gcc", "c99", "c89", "cc"]

    @staticmethod
    @debug_logger
    def get_default_flags(options):
        """Re-compute the path where the libraries exist.

        The use case for this is if someone is in a python jupyter notebook and
        needs to change the environment mid computation.
        Returns
            (List[str]): The default flag list.
        """
        mlir_lib_path = get_lib_path("llvm", "MLIR_LIB_DIR")
        rt_lib_path = get_lib_path("runtime", "RUNTIME_LIB_DIR")

        # Adds RUNTIME_LIB_DIR to the Python system path to allow the catalyst_callback_registry
        # to be importable.
        sys.path.append(get_lib_path("runtime", "RUNTIME_LIB_DIR"))
        import catalyst_callback_registry as registry  # pylint: disable=import-outside-toplevel

        # We use MLIR's C runner utils library in the registry.
        # In order to be able to dlopen that library we need to know the path
        # So we set the path here.
        registry.set_mlir_lib_path(mlir_lib_path)

        lib_path_flags = [
            f"-Wl,-rpath,{mlir_lib_path}",
            f"-L{mlir_lib_path}",
        ]

        if rt_lib_path != mlir_lib_path:
            lib_path_flags += [
                f"-Wl,-rpath,{rt_lib_path}",
                f"-L{rt_lib_path}",
            ]
        else:
            pass  # pragma: nocover

        # Discover the custom call library provided by the frontend & add it to the rpath and -L.
        lib_path_flags += [
            f"-Wl,-rpath,{DEFAULT_CUSTOM_CALLS_LIB_PATH}",
            f"-L{DEFAULT_CUSTOM_CALLS_LIB_PATH}",
        ]

        # Discover the LAPACK library provided by scipy & add link against it.
        # Doing this here ensures we will always have the correct library name.

        if platform.system() == "Linux":
            file_path_within_package = "../scipy.libs/"
            file_extension = ".so"
        else:  # pragma: nocover
            msg = "Attempting to use catalyst on an unsupported system"
            assert platform.system() == "Darwin", msg
            file_path_within_package = ".dylibs/"
            file_extension = ".dylib"

        package_name = "scipy"
        scipy_package = importlib.util.find_spec(package_name)
        package_directory = path.dirname(scipy_package.origin)
        scipy_lib_path = path.join(package_directory, file_path_within_package)

        file_prefix = "libopenblas"
        search_pattern = path.join(scipy_lib_path, f"{file_prefix}*{file_extension}")
        search_result = glob.glob(search_pattern)
        if not search_result:
            raise CompileError(
                f'Unable to find OpenBLAS library at "{search_pattern}". '
                "Please ensure that SciPy is installed and available via pip."
            )
        openblas_so_file = search_result[0]
        openblas_lib_name = path.basename(openblas_so_file)[3 : -len(file_extension)]

        lib_path_flags += [
            f"-Wl,-rpath,{scipy_lib_path}",
            f"-L{scipy_lib_path}",
        ]

        system_flags = []
        if platform.system() == "Linux":
            system_flags += ["-Wl,-no-as-needed"]
        elif platform.system() == "Darwin":  # pragma: nocover
            system_flags += ["-Wl,-arch_errors_fatal"]

        # The exception handling mechanism requires linking against
        # __gxx_personality_v0 which is either on -lstdc++ in
        # or -lc++. We choose based on the operating system.
        if options.async_qnodes and platform.system() == "Linux":  # pragma: nocover
            system_flags += ["-lstdc++"]
        elif options.async_qnodes and platform.system() == "Darwin":  # pragma: nocover
            system_flags += ["-lc++"]

        default_flags = [
            "-shared",
            "-rdynamic",
            *system_flags,
            *lib_path_flags,
            "-lrt_capi",
            "-lpthread",
            "-lmlir_c_runner_utils",  # required for memref.copy
            f"-l{openblas_lib_name}",  # required for custom_calls lib
            "-lcustom_calls",
            "-lmlir_async_runtime",
        ]
        return default_flags

    @staticmethod
    def _get_compiler_fallback_order(fallback_compilers):
        """Compiler fallback order"""
        preferred_compiler = os.environ.get("CATALYST_CC", None)
        preferred_compiler_exists = LinkerDriver._exists(preferred_compiler)
        compilers = fallback_compilers
        emit_warning = preferred_compiler and not preferred_compiler_exists
        if emit_warning:
            msg = f"User defined compiler {preferred_compiler} is not in PATH. Using fallback ..."
            warnings.warn(msg, UserWarning)
        else:
            compilers = [preferred_compiler] + fallback_compilers
        return compilers

    @staticmethod
    def _exists(compiler):
        if compiler is None:
            return None
        return shutil.which(compiler)

    @staticmethod
    def _available_compilers(fallback_compilers):
        for compiler in LinkerDriver._get_compiler_fallback_order(fallback_compilers):
            if LinkerDriver._exists(compiler):
                yield compiler

    @staticmethod
    def _attempt_link(compiler, flags, infile, outfile, options):
        try:
            command = [compiler] + flags + [infile, "-o", outfile]
            run_writing_command(command, options)
            return True
        except subprocess.CalledProcessError as e:
            # Only warn in verbose mode, as users might see it otherwise in regular use.
            if options.verbose:
                msg = f"Compiler {compiler} failed to link executable and returned with exit code "
                msg += f"{e.returncode}. Output was: {e.output}.\nCommand: {command}"
                warnings.warn(msg, UserWarning)
            return False

    @staticmethod
    @debug_logger
    def get_output_filename(infile):
        """Rename object file to shared object

        Args:
            infile (str): input file name
            outfile (str): output file name
        """
        infile_path = pathlib.Path(infile)
        if not infile_path.exists():
            raise FileNotFoundError(f"Cannot find {infile}.")
        return str(infile_path.with_suffix(".so"))

    @staticmethod
    @debug_logger
    def run(infile, outfile=None, flags=None, fallback_compilers=None, options=None):
        """
        Link the infile against the necessary libraries and produce the outfile.

        Args:
            infile (str): input file
            outfile (Optional[str]): output file
            flags (Optional[List[str]]): flags to be passed down to the compiler
            fallback_compilers (Optional[List[str]]): name of executables to be looked for in PATH
            compile_options (Optional[CompileOptions]): generic compilation options.
        Raises:
            EnvironmentError: The exception is raised when no compiler succeeded.
        """
        if outfile is None:
            outfile = LinkerDriver.get_output_filename(infile)
        if options is None:
            options = CompileOptions()
        if flags is None:
            flags = LinkerDriver.get_default_flags(options)
        if fallback_compilers is None:
            fallback_compilers = LinkerDriver._default_fallback_compilers
        for compiler in LinkerDriver._available_compilers(fallback_compilers):
            success = LinkerDriver._attempt_link(compiler, flags, infile, outfile, options)
            if success:
                return outfile
        msg = f"Unable to link {infile}. Please check the output for any error messages. If no "
        msg += "compiler was found by Catalyst, please specify a compatible one via $CATALYST_CC."
        raise CompileError(msg)


class Compiler:
    """Compiles MLIR modules to shared objects by executing the Catalyst compiler driver library."""

    @debug_logger_init
    def __init__(self, options: Optional[CompileOptions] = None):
        self.options = options if options is not None else CompileOptions()
        self.last_compiler_output = None

    @debug_logger
    def run_from_ir(self, ir: str, module_name: str, workspace: Directory):
        """Compile a shared object from a textual IR (MLIR or LLVM).

        Args:
            ir (str): Textual MLIR to be compiled
            module_name (str): Module name to use for naming
            workspace (Directory): directory that holds output files and/or debug dumps.

        Returns:
            output_filename (str): Output file name. For the default pipeline this would be the
                                   shard object library path.
            out_IR (str): Output IR in textual form. For the default pipeline this would be the
                          LLVM IR.
            A list of:
               func_name (str) Inferred name of the main function
               ret_type_name (str) Inferred main function result type name
        """
        assert isinstance(
            workspace, Directory
        ), f"Compiler expects a Directory type, got {type(workspace)}."
        assert workspace.is_dir(), f"Compiler expects an existing directory, got {workspace}."

        lower_to_llvm = (
            self.options.lower_to_llvm if self.options.lower_to_llvm is not None else False
        )

        if self.options.verbose:
            print(f"[LIB] Running compiler driver in {workspace}", file=self.options.logfile)

        try:
            compiler_output = run_compiler_driver(
                ir,
                str(workspace),
                module_name,
                keep_intermediate=self.options.keep_intermediate,
                verbose=self.options.verbose,
                pipelines=self.options.get_pipelines(),
                lower_to_llvm=lower_to_llvm,
            )
        except RuntimeError as e:
            raise CompileError(*e.args) from e

        if self.options.verbose:
            for line in compiler_output.get_diagnostic_messages().strip().split("\n"):
                print(f"[LIB] {line}", file=self.options.logfile)

        filename = compiler_output.get_object_filename()
        out_IR = compiler_output.get_output_ir()
        func_name = compiler_output.get_function_attributes().get_function_name()
        ret_type_name = compiler_output.get_function_attributes().get_return_type()

        if lower_to_llvm:
            output = LinkerDriver.run(filename, options=self.options)
            output_filename = str(pathlib.Path(output).absolute())
        else:
            output_filename = filename

        self.last_compiler_output = compiler_output
        return output_filename, out_IR, [func_name, ret_type_name]

    @debug_logger
    def run(self, mlir_module, *args, **kwargs):
        """Compile an MLIR module to a shared object.

        .. note::

            For compilation of hybrid quantum-classical PennyLane programs,
            please see the :func:`~.qjit` decorator.

        Args:
            mlir_module: The MLIR module to be compiled

        Returns:
            (str): filename of shared object
        """

        return self.run_from_ir(
            mlir_module.operation.get_asm(
                binary=False, print_generic_op_form=False, assume_verified=True
            ),
            str(mlir_module.operation.attributes["sym_name"]).replace('"', ""),
            *args,
            **kwargs,
        )

    @debug_logger
    def get_output_of(self, pipeline) -> Optional[str]:
        """Get the output IR of a pipeline.
        Args:
            pipeline (str): name of pass class

        Returns
            (Optional[str]): output IR
        """
        if len(dict(self.options.get_pipelines()).get(pipeline, [])) == 0:
            msg = f"Attempting to get output for pipeline: {pipeline},"
            msg += " but no file was found.\n"
            msg += "Are you sure the file exists?"
            raise CompileError(msg)

        return self.last_compiler_output.get_pipeline_output(pipeline)
_modules/catalyst/compiler
Download Python script
Download Notebook
View on GitHub
Source code for catalyst.compiler

Contents

Downloads

Source code for catalyst.compiler

Contents

Downloads

Related