from __future__ import annotations

import ctypes
from contextlib import contextmanager
from dataclasses import dataclass
from typing import Any, Callable, TypedDict

import mlir.dialects.arith as arith
import mlir.dialects.cf as cf
import mlir.dialects.func as func
import mlir.dialects.scf as scf
import mlir.execution_engine as execution_engine
import mlir.ir as ir
import mlir.passmanager as passmanager
import mlir.runtime as runtime
import numpy as np
from sealir import ase
from sealir.rvsdg import grammar as rg
from sealir.rvsdg import internal_prefix

from ch03_egraph_program_rewrites import (
    run_test,
)
from ch04_1_typeinfer_ifelse import (
    Attributes,
)
from ch04_1_typeinfer_ifelse import (
    ExtendEGraphToRVSDG as ConditionalExtendGraphtoRVSDG,
)
from ch04_1_typeinfer_ifelse import (
    Int64,
    MyCostModel,
    NbOp_Add_Float64,
    NbOp_Add_Int64,
    NbOp_CastI64ToF64,
    NbOp_Div_Int64,
    NbOp_Gt_Int64,
    NbOp_Lt_Int64,
    NbOp_Sub_Float64,
    NbOp_Sub_Int64,
    NbOp_Type,
    SExpr,
    TypeInt64,
)
from ch04_1_typeinfer_ifelse import base_ruleset as if_else_ruleset
from ch04_1_typeinfer_ifelse import jit_compiler as _ch04_1_jit_compiler
from ch04_1_typeinfer_ifelse import (
    ruleset_type_infer_float,
    setup_argtypes,
)
from ch04_2_typeinfer_loops import (
    ExtendEGraphToRVSDG as LoopExtendEGraphToRVSDG,
)
from ch04_2_typeinfer_loops import (
    NbOp_Not_Int64,
)
from ch04_2_typeinfer_loops import base_ruleset as loop_ruleset
from utils import IN_NOTEBOOK, Report, display

_DEBUG = False

@dataclass(frozen=True)
class LowerStates(ase.TraverseState):
    push: Callable
    get_region_args: Callable
    function_block: func.FuncOp
    constant_block: ir.Block

function_name = "func"

class Backend:
    def __init__(self):
        self.context = context = ir.Context()
        self.f32 = ir.F32Type.get(context=context)
        self.f64 = ir.F64Type.get(context=context)
        self.i32 = ir.IntegerType.get_signless(32, context=context)
        self.i64 = ir.IntegerType.get_signless(64, context=context)
        self.boo = ir.IntegerType.get_signless(1, context=context)

    def lower_type(self, ty: NbOp_Type):
        """Type Lowering

        Convert SealIR types to MLIR types for compilation.
        """
        match ty:
            case NbOp_Type("Int64"):
                return self.i64
            case NbOp_Type("Float64"):
                return self.f64
            case NbOp_Type("Float32"):
                return self.f32
        raise NotImplementedError(f"unknown type: {ty}")

    def lower(self, root: rg.Func, argtypes):
        """Expression Lowering

        Lower RVSDG expressions to MLIR operations, handling control flow
        and data flow constructs.
        """
        context = self.context
        self.loc = loc = ir.Location.unknown(context=context)
        self.module = module = ir.Module.create(loc=loc)

        # Get the module body pointer so we can insert content into the
        # module.
        self.module_body = module_body = ir.InsertionPoint(module.body)
        # Convert SealIR types to MLIR types.
        input_types = tuple([self.lower_type(x) for x in argtypes])
        output_types = (
            self.lower_type(
                Attributes(root.body.begin.attrs).get_return_type(root.body)
            ),
        )

        with context, loc, module_body:
            # Constuct a function that emits a callable C-interface.
            fun = func.FuncOp(function_name, (input_types, output_types))
            fun.attributes["llvm.emit_c_interface"] = ir.UnitAttr.get()

            # Define two blocks within the function, a constant block to
            # define all the constants and a function block for the
            # actual content. This is done to prevent non-dominant use
            # of constants. (Use of a constant when declaration is done in
            # a region that isn't initialized.)
            const_block = fun.add_entry_block()
            fun.body.blocks.append(*[], arg_locs=None)
            func_block = fun.body.blocks[1]

        # Define entry points of both the blocks.
        constant_entry = ir.InsertionPoint(const_block)
        function_entry = ir.InsertionPoint(func_block)

        region_args = []

        @contextmanager
        def push(arg_values):
            region_args.append(tuple(arg_values))
            try:
                yield
            finally:
                region_args.pop()

        def get_region_args():
            return region_args[-1]

        with context, loc, function_entry:
            memo = ase.traverse(
                root,
                self.lower_expr,
                LowerStates(
                    push=push,
                    get_region_args=get_region_args,
                    function_block=fun,
                    constant_block=constant_entry,
                ),
            )

        # Use a break to jump from the constant block to the function block.
        # note that this is being inserted at end of constant block after the
        # Function construction when all the constants have been initialized.
        with context, loc, constant_entry:
            cf.br([], fun.body.blocks[1])

        return module

    def run_passes(self, module):
        """MLIR Pass Pipeline

        Apply MLIR passes for optimization and lowering to LLVM IR.
        """
        if _DEBUG:
            module.dump()

        if _DEBUG:
            module.context.enable_multithreading(False)
        if _DEBUG and not IN_NOTEBOOK:
            # notebook may hang if ir_printing is enabled and and MLIR failed.
            pass_man.enable_ir_printing()

        pass_man = passmanager.PassManager(context=module.context)
        pass_man.add("convert-linalg-to-loops")
        pass_man.add("convert-scf-to-cf")
        pass_man.add("finalize-memref-to-llvm")
        pass_man.add("convert-math-to-libm")
        pass_man.add("convert-func-to-llvm")
        pass_man.add("convert-index-to-llvm")
        pass_man.add("reconcile-unrealized-casts")
        pass_man.enable_verifier(True)
        pass_man.run(module.operation)
        # Output LLVM-dialect MLIR
        if _DEBUG:
            module.dump()
        return module

    def lower_expr(self, expr: SExpr, state: LowerStates):
        """Expression Lowering Implementation

        Implement the core expression lowering logic for various RVSDG
        constructs including functions, regions, control flow, and operations.
        """
        match expr:
            case rg.Func(args=args, body=body):
                names = {
                    argspec.name: state.function_block.arguments[i]
                    for i, argspec in enumerate(args.arguments)
                }
                argvalues = []
                for k in body.begin.inports:
                    if k == internal_prefix("io"):
                        v = arith.constant(self.i32, 0)
                    else:
                        v = names[k]
                    argvalues.append(v)

                with state.push(argvalues):
                    outs = yield body

                portnames = [p.name for p in body.ports]
                retval = outs[portnames.index(internal_prefix("ret"))]
                func.ReturnOp([retval])
            case rg.RegionBegin(inports=ins):
                portvalues = []
                for i, k in enumerate(ins):
                    pv = state.get_region_args()[i]
                    portvalues.append(pv)
                return tuple(portvalues)

            case rg.RegionEnd(
                begin=rg.RegionBegin() as begin,
                ports=ports,
            ):
                yield begin
                portvalues = []
                for p in ports:
                    pv = yield p.value
                    portvalues.append(pv)
                return tuple(portvalues)

            case rg.ArgRef(idx=int(idx), name=str(name)):
                return state.function_block.arguments[idx]

            case rg.Unpack(val=source, idx=int(idx)):
                ports = yield source
                return ports[idx]

            case rg.DbgValue(value=value):
                val = yield value
                return val

            case rg.PyInt(int(ival)):
                with state.constant_block:
                    const = arith.constant(self.i64, ival)
                return const

            case rg.PyBool(int(ival)):
                with state.constant_block:
                    const = arith.constant(self.boo, ival)
                return const

            case rg.PyFloat(float(fval)):
                with state.constant_block:
                    const = arith.constant(self.f64, fval)
                return const

            case NbOp_Gt_Int64(lhs, rhs):
                lhs = yield lhs
                rhs = yield rhs
                return arith.cmpi(4, lhs, rhs)

            case NbOp_Add_Int64(lhs, rhs):
                lhs = yield lhs
                rhs = yield rhs
                return arith.addi(lhs, rhs)

            case NbOp_Sub_Int64(lhs, rhs):
                lhs = yield lhs
                rhs = yield rhs
                return arith.subi(lhs, rhs)

            case NbOp_Add_Float64(lhs, rhs):
                lhs = yield lhs
                rhs = yield rhs
                return arith.addf(lhs, rhs)
            case NbOp_Sub_Float64(lhs, rhs):
                lhs = yield lhs
                rhs = yield rhs
                return arith.subf(lhs, rhs)
            case NbOp_Lt_Int64(lhs, rhs):
                lhs = yield lhs
                rhs = yield rhs
                return arith.cmpi(2, lhs, rhs)
            case NbOp_Sub_Int64(lhs, rhs):
                lhs = yield lhs
                rhs = yield rhs
                return arith.subi(lhs, rhs)

            case NbOp_CastI64ToF64(operand):
                val = yield operand
                return arith.sitofp(self.f64, val)
            case NbOp_Div_Int64(lhs, rhs):
                lhs = yield lhs
                rhs = yield rhs

                return arith.divf(
                    arith.sitofp(self.f64, lhs), arith.sitofp(self.f64, rhs)
                )
            ##### more
            case NbOp_Not_Int64(operand):
                # Implement unary not
                opval = yield operand
                return arith.cmpi(0, opval, arith.constant(self.i64, 0))
            case rg.PyBool(val):
                return arith.constant(self.boo, val)

            case rg.PyInt(val):
                return arith.constant(self.i64, val)

            case rg.IfElse(
                cond=cond, body=body, orelse=orelse, operands=operands
            ):
                condval = yield cond

                # process operands
                rettys = Attributes(body.begin.attrs)
                result_tys = []
                for i in range(0, rettys.num_output_types() + 1):
                    out_ty = rettys.get_output_type(i)
                    if out_ty is not None:
                        match out_ty.name:
                            case "Int64":
                                result_tys.append(self.i64)
                            case "Float64":
                                result_tys.append(self.f64)
                            case "Bool":
                                result_tys.append(self.boo)
                    else:
                        result_tys.append(self.i32)

                if_op = scf.IfOp(
                    cond=condval, results_=result_tys, hasElse=bool(orelse)
                )

                with ir.InsertionPoint(if_op.then_block):
                    value_else = yield body
                    scf.YieldOp([x for x in value_else])

                with ir.InsertionPoint(if_op.else_block):
                    value_else = yield orelse
                    scf.YieldOp([x for x in value_else])

                return if_op.results
            case rg.Loop(body=rg.RegionEnd() as body, operands=operands):
                rettys = Attributes(body.begin.attrs)
                # process operands
                ops = []
                for op in operands:
                    ops.append((yield op))

                result_tys = []
                for i in range(1, rettys.num_output_types() + 1):
                    out_ty = rettys.get_output_type(i)
                    if out_ty is not None:
                        match out_ty.name:
                            case "Int64":
                                result_tys.append(self.i64)
                            case "Float64":
                                result_tys.append(self.f64)
                            case "Bool":
                                result_tys.append(self.boo)
                    else:
                        result_tys.append(self.i32)

                while_op = scf.WhileOp(
                    results_=result_tys, inits=[op for op in ops]
                )
                before_block = while_op.before.blocks.append(*result_tys)
                after_block = while_op.after.blocks.append(*result_tys)
                new_ops = before_block.arguments

                # Before Region
                with ir.InsertionPoint(before_block), state.push(new_ops):
                    values = yield body
                    scf.ConditionOp(
                        args=[val for val in values[1:]], condition=values[0]
                    )

                # After Region
                with ir.InsertionPoint(after_block):
                    scf.YieldOp(after_block.arguments)

                while_op_res = scf._get_op_results_or_values(while_op)
                return while_op_res

            case _:
                raise NotImplementedError(expr, type(expr))

    # ## JIT Compilation
    #
    # Implement JIT compilation for MLIR modules using the MLIR execution
    # engine.

    def jit_compile(self, llmod, func_node: rg.Func, func_name="func"):
        """JIT Compilation

        Convert the MLIR module into a JIT-callable function using the MLIR
        execution engine.
        """
        attributes = Attributes(func_node.body.begin.attrs)
        # Convert SealIR types into MLIR types
        with self.loc:
            input_types = tuple(
                [self.lower_type(x) for x in attributes.input_types()]
            )

        output_types = (
            self.lower_type(
                Attributes(func_node.body.begin.attrs).get_return_type(
                    func_node.body
                )
            ),
        )
        return self.jit_compile_extra(llmod, input_types, output_types)

    def jit_compile_extra(
        self,
        llmod,
        input_types,
        output_types,
        function_name="func",
        exec_engine=None,
        is_ufunc=False,
        **execution_engine_params,
    ):
        # Converts the MLIR module into a JIT-callable function.
        # Use MLIR's own internal execution engine
        if exec_engine is None:
            engine = execution_engine.ExecutionEngine(
                llmod, **execution_engine_params
            )
        else:
            engine = exec_engine

        assert (
            len(output_types) == 1
        ), "Execution of functions with output arguments > 1 not supported"
        nout = len(output_types)

        # Build a wrapper function
        def jit_func(*args):
            if is_ufunc:
                input_args = args[:-nout]
                output_args = args[-nout:]
            else:
                input_args = args
                output_args = [None]
            assert len(input_args) == len(input_types)
            for arg, arg_ty in zip(input_args, input_types):
                # assert isinstance(arg, arg_ty)
                # TODO: Assert types here
                pass
            # Transform the input arguments into C-types
            # with their respective values. All inputs to
            # the internal execution engine should
            # be C-Type pointers.
            input_exec_ptrs = [
                self.get_exec_ptr(ty, val)[0]
                for ty, val in zip(input_types, input_args)
            ]
            # Invokes the function that we built, internally calls
            # _mlir_ciface_function_name as a void pointer with the given
            # input pointers, there can only be one resulting pointer
            # appended to the end of all input pointers in the invoke call.
            res_ptr, res_val = self.get_exec_ptr(
                output_types[0], output_args[0]
            )
            engine.invoke(function_name, *input_exec_ptrs, res_ptr)

            return self.get_out_val(res_ptr, res_val)

        return jit_func

    @classmethod
    def get_exec_ptr(self, mlir_ty, val):
        """Get Execution Pointer

        Convert MLIR types to C-types and allocate memory for the value.
        """
        if isinstance(mlir_ty, ir.IntegerType):
            val = 0 if val is None else val
            ptr = ctypes.pointer(ctypes.c_int64(val))
        elif isinstance(mlir_ty, ir.F32Type):
            val = 0.0 if val is None else val
            ptr = ctypes.pointer(ctypes.c_float(val))
        elif isinstance(mlir_ty, ir.F64Type):
            val = 0.0 if val is None else val
            ptr = ctypes.pointer(ctypes.c_double(val))
        elif isinstance(mlir_ty, ir.MemRefType):
            if isinstance(mlir_ty.element_type, ir.F64Type):
                np_dtype = np.float64
            elif isinstance(mlir_ty.element_type, ir.F32Type):
                np_dtype = np.float32
            else:
                raise TypeError(
                    "The current array element type is not supported"
                )

            if val is None:
                if not mlir_ty.has_static_shape:
                    raise ValueError(f"{mlir_ty} does not have static shape")
                val = np.zeros(mlir_ty.shape, dtype=np_dtype)

            ptr = ctypes.pointer(
                ctypes.pointer(runtime.get_ranked_memref_descriptor(val))
            )

        return ptr, val

    @classmethod
    def get_out_val(cls, res_ptr, res_val):
        if isinstance(res_val, np.ndarray):
            return res_val
        else:
            return res_ptr.contents.value

def example_1(a, b):
    if a > b:
        z = a - b
    else:
        z = b - a
    return z + a

compiler_config = dict(
    converter_class=LoopExtendEGraphToRVSDG,
    backend=Backend(),
    cost_model=MyCostModel(),
    verbose=True,
)

class RunBEPassOutput(TypedDict):
    module: Any

def pipeline_run_be_passes(
    backend, module, pipeline_report=Report.Sink()
) -> RunBEPassOutput:
    with pipeline_report.nest("MLIR passes") as report:
        backend.run_passes(module)
        report.append("MLIR optimized", module)
    return dict(module=module)

jit_compiler = _ch04_1_jit_compiler.insert(-1, pipeline_run_be_passes)

if __name__ == "__main__":
    display(jit_compiler.visualize())
    report = Report("Pipeline execution report", enable_nested_metadata=True)
    jit_func = jit_compiler(
        fn=example_1,
        argtypes=(Int64, Int64),
        ruleset=(if_else_ruleset | setup_argtypes(TypeInt64, TypeInt64)),
        pipeline_report=report,
        **compiler_config,
    ).jit_func
    report.display()

    args = (10, 33)
    run_test(example_1, jit_func, args, verbose=True)
    args = (7, 3)
    run_test(example_1, jit_func, args, verbose=True)

--------------------------------original source---------------------------------
   1|def example_1(a, b):
   2|    if a > b:
   3|        z = a - b
   4|    else:
   5|        z = b - a
   6|    return z + a
----------------------------------inter source----------------------------------
   1|def transformed_example_1(a, b):
   2|    """#file: /tmp/ipykernel_3723/3239589072.py"""
   3|    '#loc: 2:8-5:21'
   4|    if a > b:
   5|        '#loc: 3:12-3:21'
   6|        z = a - b
   7|    else:
   8|        z = b - a
   9|    '#loc: 6:8-6:20'
  10|    return z + a

transformed_example_1 = Func (Args (ArgSpec 'a' (PyNone)) (ArgSpec 'b' (PyNone)))
$0 = Region[512] <- !io a b
{
  $1 = PyBinOp > $0[0] $0[1], $0[2]
  $2 = If $1[1] <- $0[0] $0[1] $0[2]
    $3 = Region[562] <- !io a b
    {
      $4 = PyBinOp - $3[0] $3[1], $3[2]
      $5 = DbgValue 'z' $4[1]
    } [769] -> !io=$4[0] a=$3[1] b=$3[2] z=$5
    Else
    $6 = Region[659] <- !io a b
    {
      $7 = PyBinOp - $6[0] $6[2], $6[1]
      $8 = DbgValue 'z' $7[1]
    } [797] -> !io=$7[0] a=$6[1] b=$6[2] z=$8
  Endif
  $9 = PyBinOp + $2[0] $2[3], $2[1]
} [889] -> !io=$9[0] !ret=$9[1]

time elapsed 11.20ms
timing breakdown:
  8.99ms: Debug Info on RVSDG 
  2.21ms: RVSDG

time elapsed 36.09ms
timing breakdown:
  36.09ms: EGraph

time elapsed 0.00ms
timing breakdown:

transformed_example_1 = Func (Args (ArgSpec 'a' (PyNone)) (ArgSpec 'b' (PyNone)))
$0 = Region[1076] <- !io a b; #attrs (_, Int64, Int64)->(_, Int64)
{
  $1 = NbOp_Gt_Int64 $0[1] $0[2]
  $2 = If $1 <- $0[0] $0[1] $0[2]
    $3 = Region[1131] <- !io a b; #attrs (_, Int64, Int64)->(_, Int64, Int64, Int64)
    {
      $4 = NbOp_Sub_Int64 $3[1] $3[2]
    } [1178] -> !io=$3[0] a=$3[1] b=$3[2] z=$4
    Else
    $5 = Region[1219] <- !io a b; #attrs (_, Int64, Int64)->(_, Int64, Int64, Int64)
    {
      $6 = NbOp_Sub_Int64 $5[2] $5[1]
    } [1266] -> !io=$5[0] a=$5[1] b=$5[2] z=$6
  Endif
  $7 = NbOp_Add_Int64 $2[3] $2[1]
} [1318] -> !io=$2[0] !ret=$7

5841595.0

time elapsed 14.98ms
timing breakdown:
  14.96ms: Extracted RVSDG     
  0.01ms: Extracted cost

module {
  func.func @func(%arg0: i64, %arg1: i64) -> i64 attributes {llvm.emit_c_interface} {
    cf.br ^bb1
  ^bb1:  // pred: ^bb0
    %c0_i32 = arith.constant 0 : i32
    %0 = arith.cmpi sgt, %arg0, %arg1 : i64
    %1:4 = scf.if %0 -> (i32, i64, i64, i64) {
      %3 = arith.subi %arg0, %arg1 : i64
      scf.yield %c0_i32, %arg0, %arg1, %3 : i32, i64, i64, i64
    } else {
      %3 = arith.subi %arg1, %arg0 : i64
      scf.yield %c0_i32, %arg0, %arg1, %3 : i32, i64, i64, i64
    }
    %2 = arith.addi %1#3, %1#1 : i64
    return %2 : i64
  }
}

time elapsed 2.36ms
timing breakdown:
  2.36ms: Lowered module

def example_2(a, b):
    if a > b:
        z = float(a - b)
    else:
        z = float(b) - 1 / a
    return z - float(a)

if __name__ == "__main__":
    report = Report("Pipeline execution report", enable_nested_metadata=True)
    jit_func = jit_compiler(
        fn=example_2,
        argtypes=(Int64, Int64),
        ruleset=(
            if_else_ruleset
            | setup_argtypes(TypeInt64, TypeInt64)
            | ruleset_type_infer_float  # < --- added for float()
        ),
        pipeline_report=report,
        **compiler_config,
    ).jit_func

    report.display()

    args = (10, 33)
    run_test(example_2, jit_func, args, verbose=True)
    args = (7, 3)
    run_test(example_2, jit_func, args, verbose=True)

--------------------------------original source---------------------------------
   1|def example_2(a, b):
   2|    if a > b:
   3|        z = float(a - b)
   4|    else:
   5|        z = float(b) - 1 / a
   6|    return z - float(a)
----------------------------------inter source----------------------------------
   1|def transformed_example_2(a, b):
   2|    """#file: /tmp/ipykernel_3723/1511658715.py"""
   3|    '#loc: 2:8-5:32'
   4|    if a > b:
   5|        '#loc: 3:12-3:28'
   6|        z = float(a - b)
   7|    else:
   8|        z = float(b) - 1 / a
   9|    '#loc: 6:8-6:27'
  10|    return z - float(a)

transformed_example_2 = Func (Args (ArgSpec 'a' (PyNone)) (ArgSpec 'b' (PyNone)))
$0 = Region[628] <- !io a b
{
  $1 = PyBinOp > $0[0] $0[1], $0[2]
  $2 = If $1[1] <- $0[0] $0[1] $0[2]
    $3 = Region[678] <- !io a b
    {
      $4 = PyBinOp - $3[0] $3[1], $3[2]
      $5 = PyLoadGlobal $4[0] 'float'
      $6 = PyCall $5 $4[0] $4[1]
      $7 = DbgValue 'z' $6[1]
    } [948] -> !io=$6[0] a=$3[1] b=$3[2] z=$7
    Else
    $8 = Region[796] <- !io a b
    {
      $9 = PyLoadGlobal $8[0] 'float'
      $10 = PyCall $9 $8[0] $8[2]
      $11 = PyInt 1
      $12 = PyBinOp / $10[0] $11, $8[1]
      $13 = PyBinOp - $12[0] $10[1], $12[1]
      $14 = DbgValue 'z' $13[1]
    } [976] -> !io=$13[0] a=$8[1] b=$8[2] z=$14
  Endif
  $15 = PyLoadGlobal $2[0] 'float'
  $16 = PyCall $15 $2[0] $2[1]
  $17 = PyBinOp - $16[0] $2[3], $16[1]
} [1089] -> !io=$17[0] !ret=$17[1]

time elapsed 9.67ms
timing breakdown:
  7.28ms: Debug Info on RVSDG 
  2.39ms: RVSDG

time elapsed 44.71ms
timing breakdown:
  44.71ms: EGraph

time elapsed 0.00ms
timing breakdown:

transformed_example_2 = Func (Args (ArgSpec 'a' (PyNone)) (ArgSpec 'b' (PyNone)))
$0 = Region[1328] <- !io a b; #attrs (_, Int64, Int64)->(_, Float64)
{
  $1 = NbOp_Gt_Int64 $0[1] $0[2]
  $2 = If $1 <- $0[0] $0[1] $0[2]
    $3 = Region[1383] <- !io a b; #attrs (_, Int64, Int64)->(_, Int64, Int64, Float64)
    {
      $4 = NbOp_Sub_Int64 $3[1] $3[2]
      $5 = NbOp_CastI64ToF64 $4
    } [1434] -> !io=$3[0] a=$3[1] b=$3[2] z=$5
    Else
    $6 = Region[1475] <- !io a b; #attrs (_, Int64, Int64)->(_, Int64, Int64, Float64)
    {
      $7 = NbOp_CastI64ToF64 $6[2]
      $8 = PyInt 1
      $9 = NbOp_Div_Int64 $8 $6[1]
      $10 = NbOp_Sub_Float64 $7 $9
    } [1535] -> !io=$6[0] a=$6[1] b=$6[2] z=$10
  Endif
  $11 = NbOp_CastI64ToF64 $2[1]
  $12 = NbOp_Sub_Float64 $2[3] $11
} [1591] -> !io=$2[0] !ret=$12

5880571.0

time elapsed 17.16ms
timing breakdown:
  17.15ms: Extracted RVSDG     
  0.01ms: Extracted cost

module {
  func.func @func(%arg0: i64, %arg1: i64) -> f64 attributes {llvm.emit_c_interface} {
    %c1_i64 = arith.constant 1 : i64
    cf.br ^bb1
  ^bb1:  // pred: ^bb0
    %c0_i32 = arith.constant 0 : i32
    %0 = arith.cmpi sgt, %arg0, %arg1 : i64
    %1:4 = scf.if %0 -> (i32, i64, i64, f64) {
      %4 = arith.subi %arg0, %arg1 : i64
      %5 = arith.sitofp %4 : i64 to f64
      scf.yield %c0_i32, %arg0, %arg1, %5 : i32, i64, i64, f64
    } else {
      %4 = arith.sitofp %arg1 : i64 to f64
      %5 = arith.sitofp %c1_i64 : i64 to f64
      %6 = arith.sitofp %arg0 : i64 to f64
      %7 = arith.divf %5, %6 : f64
      %8 = arith.subf %4, %7 : f64
      scf.yield %c0_i32, %arg0, %arg1, %8 : i32, i64, i64, f64
    }
    %2 = arith.sitofp %1#1 : i64 to f64
    %3 = arith.subf %1#3, %2 : f64
    return %3 : f64
  }
}

time elapsed 1.46ms
timing breakdown:
  1.46ms: Lowered module

def example_3(init, n):
    c = float(init)
    i = 0
    while i < n:
        c = c + float(i)
        i = i + 1
    return c

if __name__ == "__main__":
    report = Report("Pipeline execution report", enable_nested_metadata=True)
    jit_func = jit_compiler(
        fn=example_3,
        argtypes=(Int64, Int64),
        ruleset=(loop_ruleset | setup_argtypes(TypeInt64, TypeInt64)),
        pipeline_report=report,
        **compiler_config,
    ).jit_func
    report.display()
    run_test(example_3, jit_func, (10, 7), verbose=True)

--------------------------------original source---------------------------------
   1|def example_3(init, n):
   2|    c = float(init)
   3|    i = 0
   4|    while i < n:
   5|        c = c + float(i)
   6|        i = i + 1
   7|    return c
----------------------------------inter source----------------------------------
   1|def transformed_example_3(init, n):
   2|    """#file: /tmp/ipykernel_3723/840461217.py"""
   3|    '#loc: 2:8-2:23'
   4|    c = float(init)
   5|    '#loc: 3:8-3:13'
   6|    i = 0
   7|    '#loc: 4:8-6:21'
   8|    __scfg_loop_cont_1__ = True
   9|    while __scfg_loop_cont_1__:
  10|        if i < n:
  11|            '#loc: 5:12-5:28'
  12|            c = c + float(i)
  13|            '#loc: 6:12-6:21'
  14|            i = i + 1
  15|            __scfg_backedge_var_0__ = 0
  16|        else:
  17|            __scfg_backedge_var_0__ = 1
  18|        __scfg_loop_cont_1__ = not __scfg_backedge_var_0__
  19|    '#loc: 7:8-7:16'
  20|    return c

transformed_example_3 = Func (Args (ArgSpec 'init' (PyNone)) (ArgSpec 'n' (PyNone)))
$0 = Region[940] <- !io init n
{
  $1 = PyLoadGlobal $0[0] 'float'
  $2 = PyCall $1 $0[0] $0[1]
  $3 = Undef __scfg_backedge_var_0__
  $4 = PyBool True
  $5 = DbgValue '__scfg_loop_cont_1__' $4
  $6 = DbgValue 'c' $2[1]
  $7 = PyInt 0
  $8 = DbgValue 'i' $7
  $9 = Loop [1745] <- $2[0] $3 $5 $6 $8 $0[1] $0[2]
    $10 = Region[1083] <- !io __scfg_backedge_var_0__ __scfg_loop_cont_1__ c i init n
    {
      $11 = PyBinOp < $10[0] $10[4], $10[6]
      $12 = If $11[1] <- $10[0] $10[1] $10[2] $10[3] $10[4] $10[5] $10[6]
        $13 = Region[1149] <- !io __scfg_backedge_var_0__ __scfg_loop_cont_1__ c i init n
        {
          $14 = PyLoadGlobal $13[0] 'float'
          $15 = PyCall $14 $13[0] $13[4]
          $16 = PyBinOp + $15[0] $13[3], $15[1]
          $17 = PyInt 1
          $18 = PyBinOp + $16[0] $13[4], $17
          $19 = PyInt 0
          $20 = DbgValue '__scfg_backedge_var_0__' $19
          $21 = DbgValue 'c' $16[1]
          $22 = DbgValue 'i' $18[1]
        } [1538] -> !io=$18[0] __scfg_backedge_var_0__=$20 __scfg_loop_cont_1__=$13[2] c=$21 i=$22 init=$13[5] n=$13[6]
        Else
        $23 = Region[1384] <- !io __scfg_backedge_var_0__ __scfg_loop_cont_1__ c i init n
        {
          $24 = PyInt 1
          $25 = DbgValue '__scfg_backedge_var_0__' $24
        } [1584] -> !io=$23[0] __scfg_backedge_var_0__=$25 __scfg_loop_cont_1__=$23[2] c=$23[3] i=$23[4] init=$23[5] n=$23[6]
      Endif
      $26 = PyUnaryOp not $12[0] $12[1]
      $27 = DbgValue '__scfg_loop_cont_1__' $26[1]
    } [1733] -> !_loopcond_0002=$27 !io=$26[0] __scfg_backedge_var_0__=$12[1] __scfg_loop_cont_1__=$27 c=$12[3] i=$12[4] init=$12[5] n=$12[6]
  EndLoop
} [1847] -> !io=$9[0] !ret=$9[3]

time elapsed 16.95ms
timing breakdown:
  13.50ms: Debug Info on RVSDG 
  3.45ms: RVSDG

time elapsed 67.93ms
timing breakdown:
  67.93ms: EGraph

time elapsed 0.00ms
timing breakdown:

transformed_example_3 = Func (Args (ArgSpec 'init' (PyNone)) (ArgSpec 'n' (PyNone)))
$0 = Region[2116] <- !io init n; #attrs (_, Int64, Int64)->(_, Float64)
{
  $1 = PyInt 0
  $2 = PyBool True
  $3 = NbOp_CastI64ToF64 $0[1]
  $4 = Loop [2715] <- $0[0] $1 $2 $3 $1 $0[1] $0[2]
    $5 = Region[2208] <- !io __scfg_backedge_var_0__ __scfg_loop_cont_1__ c i init n; #attrs (_, Int64, Bool, Float64, Int64, Int64, Int64)->(Bool, _, Int64, Bool, Float64, Int64, Int64, Int64)
    {
      $6 = NbOp_Lt_Int64 $5[4] $5[6]
      $7 = If $6 <- $5[0] $5[1] $5[2] $5[3] $5[4] $5[5] $5[6]
        $8 = Region[2303] <- !io __scfg_backedge_var_0__ __scfg_loop_cont_1__ c i init n; #attrs (_, _, Bool, Float64, Int64, Int64, Int64)->(_, Int64, Bool, Float64, Int64, Int64, Int64)
        {
          $9 = NbOp_CastI64ToF64 $8[4]
          $10 = NbOp_Add_Float64 $8[3] $9
          $11 = PyInt 1
          $12 = NbOp_Add_Int64 $8[4] $11
        } [2401] -> !io=$8[0] __scfg_backedge_var_0__=$1 __scfg_loop_cont_1__=$8[2] c=$10 i=$12 init=$8[5] n=$8[6]
        Else
        $13 = Region[2481] <- !io __scfg_backedge_var_0__ __scfg_loop_cont_1__ c i init n; #attrs (_, _, Bool, Float64, Int64, Int64, Int64)->(_, Int64, Bool, Float64, Int64, Int64, Int64)
        {
        } [2557] -> !io=$13[0] __scfg_backedge_var_0__=$11 __scfg_loop_cont_1__=$13[2] c=$13[3] i=$13[4] init=$13[5] n=$13[6]
      Endif
      $14 = NbOp_Not_Int64 $7[1]
    } [2680] -> !_loopcond_0002=$14 !io=$7[0] __scfg_backedge_var_0__=$7[1] __scfg_loop_cont_1__=$14 c=$7[3] i=$7[4] init=$7[5] n=$7[6]
  EndLoop
} [2746] -> !io=$4[0] !ret=$4[3]

5761859147.0

time elapsed 65.56ms
timing breakdown:
  65.55ms: Extracted RVSDG     
  0.01ms: Extracted cost

module {
  func.func @func(%arg0: i64, %arg1: i64) -> f64 attributes {llvm.emit_c_interface} {
    %c0_i64 = arith.constant 0 : i64
    %true = arith.constant true
    %c1_i64 = arith.constant 1 : i64
    cf.br ^bb1
  ^bb1:  // pred: ^bb0
    %c0_i32 = arith.constant 0 : i32
    %0 = arith.sitofp %arg0 : i64 to f64
    %1:7 = scf.while (%arg2 = %c0_i32, %arg3 = %c0_i64, %arg4 = %true, %arg5 = %0, %arg6 = %c0_i64, %arg7 = %arg0, %arg8 = %arg1) : (i32, i64, i1, f64, i64, i64, i64) -> (i32, i64, i1, f64, i64, i64, i64) {
      %2 = arith.cmpi slt, %arg6, %arg8 : i64
      %3:7 = scf.if %2 -> (i32, i64, i1, f64, i64, i64, i64) {
        %5 = arith.sitofp %arg6 : i64 to f64
        %6 = arith.addf %arg5, %5 : f64
        %7 = arith.addi %arg6, %c1_i64 : i64
        scf.yield %arg2, %c0_i64, %arg4, %6, %7, %arg7, %arg8 : i32, i64, i1, f64, i64, i64, i64
      } else {
        scf.yield %arg2, %c1_i64, %arg4, %arg5, %arg6, %arg7, %arg8 : i32, i64, i1, f64, i64, i64, i64
      }
      %c0_i64_0 = arith.constant 0 : i64
      %4 = arith.cmpi eq, %3#1, %c0_i64_0 : i64
      scf.condition(%4) %3#0, %3#1, %4, %3#3, %3#4, %3#5, %3#6 : i32, i64, i1, f64, i64, i64, i64
    } do {
    ^bb0(%arg2: i32, %arg3: i64, %arg4: i1, %arg5: f64, %arg6: i64, %arg7: i64, %arg8: i64):
      scf.yield %arg2, %arg3, %arg4, %arg5, %arg6, %arg7, %arg8 : i32, i64, i1, f64, i64, i64, i64
    }
    return %1#3 : f64
  }
}

time elapsed 1.93ms
timing breakdown:
  1.93ms: Lowered module

def example_4(init, n):
    c = float(init)
    i = 0
    while i < n:
        j = 0
        while j < i:
            c = c + float(j)
            j = j + 1
        i = i + 1
    return c

if __name__ == "__main__":
    report = Report("Pipeline execution report", enable_nested_metadata=True)
    jit_func = jit_compiler(
        fn=example_4,
        argtypes=(Int64, Int64),
        ruleset=(loop_ruleset | setup_argtypes(TypeInt64, TypeInt64)),
        pipeline_report=report,
        **compiler_config,
    ).jit_func
    report.display()
    run_test(example_4, jit_func, (10, 7), verbose=True)

--------------------------------original source---------------------------------
   1|def example_4(init, n):
   2|    c = float(init)
   3|    i = 0
   4|    while i < n:
   5|        j = 0
   6|        while j < i:
   7|            c = c + float(j)
   8|            j = j + 1
   9|        i = i + 1
  10|    return c
----------------------------------inter source----------------------------------
   1|def transformed_example_4(init, n):
   2|    """#file: /tmp/ipykernel_3723/2082353802.py"""
   3|    '#loc: 2:8-2:23'
   4|    c = float(init)
   5|    '#loc: 3:8-3:13'
   6|    i = 0
   7|    '#loc: 4:8-9:21'
   8|    __scfg_loop_cont_1__ = True
   9|    while __scfg_loop_cont_1__:
  10|        if i < n:
  11|            '#loc: 5:12-5:17'
  12|            j = 0
  13|            '#loc: 6:12-8:25'
  14|            __scfg_loop_cont_2__ = True
  15|            while __scfg_loop_cont_2__:
  16|                if j < i:
  17|                    '#loc: 7:16-7:32'
  18|                    c = c + float(j)
  19|                    '#loc: 8:16-8:25'
  20|                    j = j + 1
  21|                    __scfg_backedge_var_1__ = 0
  22|                else:
  23|                    __scfg_backedge_var_1__ = 1
  24|                __scfg_loop_cont_2__ = not __scfg_backedge_var_1__
  25|            '#loc: 9:12-9:21'
  26|            i = i + 1
  27|            __scfg_backedge_var_0__ = 0
  28|        else:
  29|            __scfg_backedge_var_0__ = 1
  30|        __scfg_loop_cont_1__ = not __scfg_backedge_var_0__
  31|    '#loc: 10:8-10:16'
  32|    return c

transformed_example_4 = Func (Args (ArgSpec 'init' (PyNone)) (ArgSpec 'n' (PyNone)))
$0 = Region[1423] <- !io init n
{
  $1 = PyLoadGlobal $0[0] 'float'
  $2 = PyCall $1 $0[0] $0[1]
  $3 = Undef __scfg_backedge_var_0__
  $4 = Undef __scfg_backedge_var_1__
  $5 = PyBool True
  $6 = DbgValue '__scfg_loop_cont_1__' $5
  $7 = Undef __scfg_loop_cont_2__
  $8 = DbgValue 'c' $2[1]
  $9 = PyInt 0
  $10 = DbgValue 'i' $9
  $11 = Undef j
  $12 = Loop [3296] <- $2[0] $3 $4 $6 $7 $8 $10 $0[1] $11 $0[2]
    $13 = Region[1578] <- !io __scfg_backedge_var_0__ __scfg_backedge_var_1__ __scfg_loop_cont_1__ __scfg_loop_cont_2__ c i init j n
    {
      $14 = PyBinOp < $13[0] $13[6], $13[9]
      $15 = If $14[1] <- $13[0] $13[1] $13[2] $13[3] $13[4] $13[5] $13[6] $13[7] $13[8] $13[9]
        $16 = Region[1662] <- !io __scfg_backedge_var_0__ __scfg_backedge_var_1__ __scfg_loop_cont_1__ __scfg_loop_cont_2__ c i init j n
        {
          $17 = PyBool True
          $18 = DbgValue '__scfg_loop_cont_2__' $17
          $19 = PyInt 0
          $20 = DbgValue 'j' $19
          $21 = Loop [2618] <- $16[0] $16[1] $16[2] $16[3] $18 $16[5] $16[6] $16[7] $20 $16[9]
            $22 = Region[1791] <- !io __scfg_backedge_var_0__ __scfg_backedge_var_1__ __scfg_loop_cont_1__ __scfg_loop_cont_2__ c i init j n
            {
              $23 = PyBinOp < $22[0] $22[8], $22[6]
              $24 = If $23[1] <- $22[0] $22[1] $22[2] $22[3] $22[4] $22[5] $22[6] $22[7] $22[8] $22[9]
                $25 = Region[1875] <- !io __scfg_backedge_var_0__ __scfg_backedge_var_1__ __scfg_loop_cont_1__ __scfg_loop_cont_2__ c i init j n
                {
                  $26 = PyLoadGlobal $25[0] 'float'
                  $27 = PyCall $26 $25[0] $25[8]
                  $28 = PyBinOp + $27[0] $25[5], $27[1]
                  $29 = PyInt 1
                  $30 = PyBinOp + $28[0] $25[8], $29
                  $31 = PyInt 0
                  $32 = DbgValue '__scfg_backedge_var_1__' $31
                  $33 = DbgValue 'c' $28[1]
                  $34 = DbgValue 'j' $30[1]
                } [2351] -> !io=$30[0] __scfg_backedge_var_0__=$25[1] __scfg_backedge_var_1__=$32 __scfg_loop_cont_1__=$25[3] __scfg_loop_cont_2__=$25[4] c=$33 i=$25[6] init=$25[7] j=$34 n=$25[9]
                Else
                $35 = Region[2146] <- !io __scfg_backedge_var_0__ __scfg_backedge_var_1__ __scfg_loop_cont_1__ __scfg_loop_cont_2__ c i init j n
                {
                  $36 = PyInt 1
                  $37 = DbgValue '__scfg_backedge_var_1__' $36
                } [2415] -> !io=$35[0] __scfg_backedge_var_0__=$35[1] __scfg_backedge_var_1__=$37 __scfg_loop_cont_1__=$35[3] __scfg_loop_cont_2__=$35[4] c=$35[5] i=$35[6] init=$35[7] j=$35[8] n=$35[9]
              Endif
              $38 = PyUnaryOp not $24[0] $24[2]
              $39 = DbgValue '__scfg_loop_cont_2__' $38[1]
            } [2603] -> !_loopcond_0004=$39 !io=$38[0] __scfg_backedge_var_0__=$24[1] __scfg_backedge_var_1__=$24[2] __scfg_loop_cont_1__=$24[3] __scfg_loop_cont_2__=$39 c=$24[5] i=$24[6] init=$24[7] j=$24[8] n=$24[9]
          EndLoop
          $40 = PyInt 1
          $41 = PyBinOp + $21[0] $21[6], $40
          $42 = PyInt 0
          $43 = DbgValue '__scfg_backedge_var_0__' $42
          $44 = DbgValue 'i' $41[1]
        } [3029] -> !io=$41[0] __scfg_backedge_var_0__=$43 __scfg_backedge_var_1__=$21[2] __scfg_loop_cont_1__=$21[3] __scfg_loop_cont_2__=$21[4] c=$21[5] i=$44 init=$21[7] j=$21[8] n=$21[9]
        Else
        $45 = Region[2824] <- !io __scfg_backedge_var_0__ __scfg_backedge_var_1__ __scfg_loop_cont_1__ __scfg_loop_cont_2__ c i init j n
        {
          $46 = PyInt 1
          $47 = DbgValue '__scfg_backedge_var_0__' $46
        } [3093] -> !io=$45[0] __scfg_backedge_var_0__=$47 __scfg_backedge_var_1__=$45[2] __scfg_loop_cont_1__=$45[3] __scfg_loop_cont_2__=$45[4] c=$45[5] i=$45[6] init=$45[7] j=$45[8] n=$45[9]
      Endif
      $48 = PyUnaryOp not $15[0] $15[1]
      $49 = DbgValue '__scfg_loop_cont_1__' $48[1]
    } [3281] -> !_loopcond_0002=$49 !io=$48[0] __scfg_backedge_var_0__=$15[1] __scfg_backedge_var_1__=$15[2] __scfg_loop_cont_1__=$49 __scfg_loop_cont_2__=$15[4] c=$15[5] i=$15[6] init=$15[7] j=$15[8] n=$15[9]
  EndLoop
} [3434] -> !io=$12[0] !ret=$12[5]

time elapsed 28.11ms
timing breakdown:
  21.18ms: Debug Info on RVSDG 
  6.93ms: RVSDG

time elapsed 151.95ms
timing breakdown:
  151.95ms: EGraph

time elapsed 0.00ms
timing breakdown:

transformed_example_4 = Func (Args (ArgSpec 'init' (PyNone)) (ArgSpec 'n' (PyNone)))
$0 = Region[3811] <- !io init n; #attrs (_, Int64, Int64)->(_, Float64)
{
  $1 = PyInt 0
  $2 = PyBool True
  $3 = PyBool False
  $4 = NbOp_CastI64ToF64 $0[1]
  $5 = Loop [5472] <- $0[0] $1 $1 $2 $3 $4 $1 $0[1] $1 $0[2]
    $6 = Region[3939] <- !io __scfg_backedge_var_0__ __scfg_backedge_var_1__ __scfg_loop_cont_1__ __scfg_loop_cont_2__ c i init j n; #attrs (_, Int64, Int64, Bool, Bool, Float64, Int64, Int64, Int64, Int64)->(Bool, _, Int64, Int64, Bool, Bool, Float64, Int64, Int64, Int64, Int64)
    {
      $7 = NbOp_Lt_Int64 $6[6] $6[9]
      $8 = If $7 <- $6[0] $6[1] $6[2] $6[3] $6[4] $6[5] $6[6] $6[7] $6[8] $6[9]
        $9 = Region[4067] <- !io __scfg_backedge_var_0__ __scfg_backedge_var_1__ __scfg_loop_cont_1__ __scfg_loop_cont_2__ c i init j n; #attrs (_, Int64, Int64, Bool, _, Float64, Int64, Int64, _, Int64)->(_, Int64, Int64, Bool, Bool, Float64, Int64, Int64, Int64, Int64)
        {
          $10 = Loop [4924] <- $9[0] $9[1] $9[2] $9[3] $2 $9[5] $9[6] $9[7] $1 $9[9]
            $11 = Region[4198] <- !io __scfg_backedge_var_0__ __scfg_backedge_var_1__ __scfg_loop_cont_1__ __scfg_loop_cont_2__ c i init j n; #attrs (_, Int64, Int64, Bool, Bool, Float64, Int64, Int64, Int64, Int64)->(Bool, _, Int64, Int64, Bool, Bool, Float64, Int64, Int64, Int64, Int64)
            {
              $12 = NbOp_Lt_Int64 $11[8] $11[6]
              $13 = If $12 <- $11[0] $11[1] $11[2] $11[3] $11[4] $11[5] $11[6] $11[7] $11[8] $11[9]
                $14 = Region[4332] <- !io __scfg_backedge_var_0__ __scfg_backedge_var_1__ __scfg_loop_cont_1__ __scfg_loop_cont_2__ c i init j n; #attrs (_, Int64, _, Bool, Bool, Float64, Int64, Int64, Int64, Int64)->(_, Int64, Int64, Bool, Bool, Float64, Int64, Int64, Int64, Int64)
                {
                  $15 = NbOp_CastI64ToF64 $14[8]
                  $16 = NbOp_Add_Float64 $14[5] $15
                  $17 = PyInt 1
                  $18 = NbOp_Add_Int64 $14[8] $17
                } [4463] -> !io=$14[0] __scfg_backedge_var_0__=$14[1] __scfg_backedge_var_1__=$1 __scfg_loop_cont_1__=$14[3] __scfg_loop_cont_2__=$14[4] c=$16 i=$14[6] init=$14[7] j=$18 n=$14[9]
                Else
                $19 = Region[4582] <- !io __scfg_backedge_var_0__ __scfg_backedge_var_1__ __scfg_loop_cont_1__ __scfg_loop_cont_2__ c i init j n; #attrs (_, Int64, _, Bool, Bool, Float64, Int64, Int64, Int64, Int64)->(_, Int64, Int64, Bool, Bool, Float64, Int64, Int64, Int64, Int64)
                {
                } [4691] -> !io=$19[0] __scfg_backedge_var_0__=$19[1] __scfg_backedge_var_1__=$17 __scfg_loop_cont_1__=$19[3] __scfg_loop_cont_2__=$19[4] c=$19[5] i=$19[6] init=$19[7] j=$19[8] n=$19[9]
              Endif
              $20 = NbOp_Not_Int64 $13[2]
            } [4865] -> !_loopcond_0004=$20 !io=$13[0] __scfg_backedge_var_0__=$13[1] __scfg_backedge_var_1__=$13[2] __scfg_loop_cont_1__=$13[3] __scfg_loop_cont_2__=$20 c=$13[5] i=$13[6] init=$13[7] j=$13[8] n=$13[9]
          EndLoop
          $21 = NbOp_Add_Int64 $10[6] $17
        } [5038] -> !io=$10[0] __scfg_backedge_var_0__=$1 __scfg_backedge_var_1__=$10[2] __scfg_loop_cont_1__=$10[3] __scfg_loop_cont_2__=$10[4] c=$10[5] i=$21 init=$10[7] j=$10[8] n=$10[9]
        Else
        $22 = Region[5151] <- !io __scfg_backedge_var_0__ __scfg_backedge_var_1__ __scfg_loop_cont_1__ __scfg_loop_cont_2__ c i init j n; #attrs (_, Int64, Int64, Bool, _, Float64, Int64, Int64, _, Int64)->(_, Int64, Int64, Bool, Bool, Float64, Int64, Int64, Int64, Int64)
        {
        } [5260] -> !io=$22[0] __scfg_backedge_var_0__=$17 __scfg_backedge_var_1__=$22[2] __scfg_loop_cont_1__=$22[3] __scfg_loop_cont_2__=$22[4] c=$22[5] i=$22[6] init=$22[7] j=$22[8] n=$22[9]
      Endif
      $23 = NbOp_Not_Int64 $8[1]
    } [5434] -> !_loopcond_0002=$23 !io=$8[0] __scfg_backedge_var_0__=$8[1] __scfg_backedge_var_1__=$8[2] __scfg_loop_cont_1__=$23 __scfg_loop_cont_2__=$8[4] c=$8[5] i=$8[6] init=$8[7] j=$8[8] n=$8[9]
  EndLoop
} [5506] -> !io=$5[0] !ret=$5[5]

4898841107495147.0

time elapsed 118.39ms
timing breakdown:
  118.37ms: Extracted RVSDG     
  0.01ms: Extracted cost

module {
  func.func @func(%arg0: i64, %arg1: i64) -> f64 attributes {llvm.emit_c_interface} {
    %c0_i64 = arith.constant 0 : i64
    %true = arith.constant true
    %false = arith.constant false
    %c1_i64 = arith.constant 1 : i64
    cf.br ^bb1
  ^bb1:  // pred: ^bb0
    %c0_i32 = arith.constant 0 : i32
    %0 = arith.sitofp %arg0 : i64 to f64
    %1:10 = scf.while (%arg2 = %c0_i32, %arg3 = %c0_i64, %arg4 = %c0_i64, %arg5 = %true, %arg6 = %false, %arg7 = %0, %arg8 = %c0_i64, %arg9 = %arg0, %arg10 = %c0_i64, %arg11 = %arg1) : (i32, i64, i64, i1, i1, f64, i64, i64, i64, i64) -> (i32, i64, i64, i1, i1, f64, i64, i64, i64, i64) {
      %2 = arith.cmpi slt, %arg8, %arg11 : i64
      %3:10 = scf.if %2 -> (i32, i64, i64, i1, i1, f64, i64, i64, i64, i64) {
        %5:10 = scf.while (%arg12 = %arg2, %arg13 = %arg3, %arg14 = %arg4, %arg15 = %arg5, %arg16 = %true, %arg17 = %arg7, %arg18 = %arg8, %arg19 = %arg9, %arg20 = %c0_i64, %arg21 = %arg11) : (i32, i64, i64, i1, i1, f64, i64, i64, i64, i64) -> (i32, i64, i64, i1, i1, f64, i64, i64, i64, i64) {
          %7 = arith.cmpi slt, %arg20, %arg18 : i64
          %8:10 = scf.if %7 -> (i32, i64, i64, i1, i1, f64, i64, i64, i64, i64) {
            %10 = arith.sitofp %arg20 : i64 to f64
            %11 = arith.addf %arg17, %10 : f64
            %12 = arith.addi %arg20, %c1_i64 : i64
            scf.yield %arg12, %arg13, %c0_i64, %arg15, %arg16, %11, %arg18, %arg19, %12, %arg21 : i32, i64, i64, i1, i1, f64, i64, i64, i64, i64
          } else {
            scf.yield %arg12, %arg13, %c1_i64, %arg15, %arg16, %arg17, %arg18, %arg19, %arg20, %arg21 : i32, i64, i64, i1, i1, f64, i64, i64, i64, i64
          }
          %c0_i64_1 = arith.constant 0 : i64
          %9 = arith.cmpi eq, %8#2, %c0_i64_1 : i64
          scf.condition(%9) %8#0, %8#1, %8#2, %8#3, %9, %8#5, %8#6, %8#7, %8#8, %8#9 : i32, i64, i64, i1, i1, f64, i64, i64, i64, i64
        } do {
        ^bb0(%arg12: i32, %arg13: i64, %arg14: i64, %arg15: i1, %arg16: i1, %arg17: f64, %arg18: i64, %arg19: i64, %arg20: i64, %arg21: i64):
          scf.yield %arg12, %arg13, %arg14, %arg15, %arg16, %arg17, %arg18, %arg19, %arg20, %arg21 : i32, i64, i64, i1, i1, f64, i64, i64, i64, i64
        }
        %6 = arith.addi %5#6, %c1_i64 : i64
        scf.yield %5#0, %c0_i64, %5#2, %5#3, %5#4, %5#5, %6, %5#7, %5#8, %5#9 : i32, i64, i64, i1, i1, f64, i64, i64, i64, i64
      } else {
        scf.yield %arg2, %c1_i64, %arg4, %arg5, %arg6, %arg7, %arg8, %arg9, %arg10, %arg11 : i32, i64, i64, i1, i1, f64, i64, i64, i64, i64
      }
      %c0_i64_0 = arith.constant 0 : i64
      %4 = arith.cmpi eq, %3#1, %c0_i64_0 : i64
      scf.condition(%4) %3#0, %3#1, %3#2, %4, %3#4, %3#5, %3#6, %3#7, %3#8, %3#9 : i32, i64, i64, i1, i1, f64, i64, i64, i64, i64
    } do {
    ^bb0(%arg2: i32, %arg3: i64, %arg4: i64, %arg5: i1, %arg6: i1, %arg7: f64, %arg8: i64, %arg9: i64, %arg10: i64, %arg11: i64):
      scf.yield %arg2, %arg3, %arg4, %arg5, %arg6, %arg7, %arg8, %arg9, %arg10, %arg11 : i32, i64, i64, i1, i1, f64, i64, i64, i64, i64
    }
    return %1#5 : f64
  }
}

time elapsed 3.26ms
timing breakdown:
  3.26ms: Lowered module

Chapter 6: MLIR Backend¶

Imports and Setup¶

MLIR Backend Implementation¶

Example 1: Simple If-Else¶

Example 2: Float Operations¶

Example 3: Simple While Loop¶

Example 4: Nested Loop¶