from typing import Any, TypedDict

from egglog import EGraph
from sealir import rvsdg
from sealir.eqsat.rvsdg_convert import egraph_conversion
from sealir.eqsat.rvsdg_eqsat import GraphRoot
from sealir.eqsat.rvsdg_extract import egraph_extraction

# We'll be extending from chapter 1.
from ch01_basic_compiler import (
    backend,
)
from ch01_basic_compiler import compiler_pipeline as pipeline_jit_compile
from ch01_basic_compiler import (
    jit_compile,
    pipeline_backend,
    pipeline_frontend,
    run_test,
)
from utils import IN_NOTEBOOK, Report, display

class EGraphOutput(TypedDict):
    egraph: EGraph
    egraph_root: GraphRoot

@pipeline_frontend.extend
def pipeline_egraph_conversion(
    rvsdg_expr, pipeline_report=Report.Sink()
) -> EGraphOutput:
    with pipeline_report.nest(
        "EGraph Conversion", default_expanded=True
    ) as report:
        memo = egraph_conversion(rvsdg_expr)
        egraph = EGraph()
        root = GraphRoot(memo[rvsdg_expr])
        egraph.let("root", root)
        report.append("EGraph", egraph)
        return {"egraph": egraph, "egraph_root": root}

if __name__ == "__main__":

    display(pipeline_egraph_conversion.visualize())

    def max_if_else(x, y):
        if x > y:
            return x
        else:
            return y

    # Get RVSDG
    report = Report("EGraph Conversion", default_expanded=True)
    cres = pipeline_egraph_conversion(fn=max_if_else, pipeline_report=report)
    report.display()

--------------------------------original source---------------------------------
   5|    def max_if_else(x, y):
   6|        if x > y:
   7|            return x
   8|        else:
   9|            return y
----------------------------------inter source----------------------------------
   1|def transformed_max_if_else(x, y):
   2|    """#file: /tmp/ipykernel_3431/2769577028.py"""
   3|    '#loc: 6:8-9:20'
   4|    if x > y:
   5|        '#loc: 7:12-7:20'
   6|        __scfg_return_value__ = x
   7|    else:
   8|        __scfg_return_value__ = y
   9|    return __scfg_return_value__

transformed_max_if_else = Func (Args (ArgSpec 'x' (PyNone)) (ArgSpec 'y' (PyNone)))
$0 = Region[392] <- !io x y
{
  $1 = PyBinOp > $0[0] $0[1], $0[2]
  $2 = If $1[1] <- $0[0] $0[1] $0[2]
    $3 = Region[442] <- !io x y
    {
      $4 = DbgValue '__scfg_return_value__' $3[1]
    } [615] -> !io=$3[0] __scfg_return_value__=$4 x=$3[1] y=$3[2]
    Else
    $5 = Region[522] <- !io x y
    {
      $6 = DbgValue '__scfg_return_value__' $5[2]
    } [643] -> !io=$5[0] __scfg_return_value__=$6 x=$5[1] y=$5[2]
  Endif
} [714] -> !io=$2[0] !ret=$2[1]

if __name__ == "__main__":
    help(egraph_extraction)

Help on function egraph_extraction in module sealir.eqsat.rvsdg_extract:

egraph_extraction(egraph: 'EGraph', rvsdg_sexpr, *, cost_model=None, converter_class=<class 'sealir.eqsat.rvsdg_extract_details.EGraphToRVSDG'>)

class EGraphExtractionOutput(TypedDict):
    cost: float
    extracted: Any

@pipeline_egraph_conversion.extend
def pipeline_egraph_extraction(
    egraph, rvsdg_expr, pipeline_report=Report.Sink()
) -> EGraphExtractionOutput:
    with pipeline_report.nest(
        "EGraph Extraction", default_expanded=True
    ) as report:
        cost, extracted = egraph_extraction(egraph, rvsdg_expr)
        report.append("Cost", cost)
        report.append("Extracted", rvsdg.format_rvsdg(extracted))
        return {"cost": cost, "extracted": extracted}

if __name__ == "__main__":
    report = Report("EGraph Extraction", default_expanded=True)
    cres = pipeline_egraph_extraction(fn=max_if_else, pipeline_report=report)
    report.display()

--------------------------------original source---------------------------------
   5|    def max_if_else(x, y):
   6|        if x > y:
   7|            return x
   8|        else:
   9|            return y
----------------------------------inter source----------------------------------
   1|def transformed_max_if_else(x, y):
   2|    """#file: /tmp/ipykernel_3431/2769577028.py"""
   3|    '#loc: 6:8-9:20'
   4|    if x > y:
   5|        '#loc: 7:12-7:20'
   6|        __scfg_return_value__ = x
   7|    else:
   8|        __scfg_return_value__ = y
   9|    return __scfg_return_value__

transformed_max_if_else = Func (Args (ArgSpec 'x' (PyNone)) (ArgSpec 'y' (PyNone)))
$0 = Region[392] <- !io x y
{
  $1 = PyBinOp > $0[0] $0[1], $0[2]
  $2 = If $1[1] <- $0[0] $0[1] $0[2]
    $3 = Region[442] <- !io x y
    {
      $4 = DbgValue '__scfg_return_value__' $3[1]
    } [615] -> !io=$3[0] __scfg_return_value__=$4 x=$3[1] y=$3[2]
    Else
    $5 = Region[522] <- !io x y
    {
      $6 = DbgValue '__scfg_return_value__' $5[2]
    } [643] -> !io=$5[0] __scfg_return_value__=$6 x=$5[1] y=$5[2]
  Endif
} [714] -> !io=$2[0] !ret=$2[1]

5556347.0

transformed_max_if_else = Func (Args (ArgSpec 'x' (PyNone)) (ArgSpec 'y' (PyNone)))
$0 = Region[837] <- !io x y
{
  $1 = PyBinOp > $0[0] $0[1], $0[2]
  $2 = If $1[1] <- $0[0] $0[1] $0[2]
    $3 = Region[874] <- !io x y
    {
      $4 = DbgValue '__scfg_return_value__' $3[1]
    } [939] -> !io=$3[0] __scfg_return_value__=$4 x=$3[1] y=$3[2]
    Else
    $5 = Region[950] <- !io x y
    {
      $6 = DbgValue '__scfg_return_value__' $5[2]
    } [1015] -> !io=$5[0] __scfg_return_value__=$6 x=$5[1] y=$5[2]
  Endif
} [1052] -> !io=$2[0] !ret=$2[1]

def egraph_action(
    egraph: EGraph,
    egraph_root: GraphRoot,
    pipeline_report=Report.Sink(),
) -> EGraphOutput:
    # For now, the middle end is just an identity function that exercise
    # the encoding into and out of egraph.
    with pipeline_report.nest("EGraph Action") as report:
        report.append("EGraph", egraph)
    return {"egraph": egraph, "egraph_root": egraph_root}

pipeline_middle_end = pipeline_egraph_extraction.insert(-1, egraph_action)

if __name__ == "__main__":
    display(pipeline_middle_end.visualize())

class BackendOutput(TypedDict):
    jit_func: Any
    llmod: Any

@pipeline_middle_end.extend
def pipeline_backend(
    extracted, pipeline_report=Report.Sink()
) -> BackendOutput:
    with pipeline_report.nest("Backend", default_expanded=True) as report:
        llmod = backend(extracted)
        report.append("LLVM", llmod)
        jt = jit_compile(llmod, extracted)
        return {"jit_func": jt, "llmod": llmod}

compiler_pipeline = pipeline_backend

if __name__ == "__main__":
    display(compiler_pipeline.visualize())

if __name__ == "__main__":

    def sum_ints(n):
        c = 1 + n
        for i in range(n):
            c += i
        return c

    report = Report("Compiler Pipeline", default_expanded=True)
    jt = compiler_pipeline(fn=sum_ints, pipeline_report=report).jit_func
    report.display()
    run_test(sum_ints, jt, (12,), verbose=True)

--------------------------------original source---------------------------------
   3|    def sum_ints(n):
   4|        c = 1 + n
   5|        for i in range(n):
   6|            c += i
   7|        return c
----------------------------------inter source----------------------------------
   1|def transformed_sum_ints(n):
   2|    """#file: /tmp/ipykernel_3431/6310283.py"""
   3|    '#loc: 4:8-4:17'
   4|    c = 1 + n
   5|    '#loc: 5:8-6:18'
   6|    __scfg_iterator_1__ = iter(range(n))
   7|    i = None
   8|    __scfg_loop_cont_1__ = True
   9|    while __scfg_loop_cont_1__:
  10|        __scfg_iter_last_1__ = i
  11|        i = next(__scfg_iterator_1__, '__scfg_sentinel__')
  12|        if i != '__scfg_sentinel__':
  13|            '#loc: 6:12-6:18'
  14|            c += i
  15|            __scfg_backedge_var_0__ = 0
  16|        else:
  17|            __scfg_backedge_var_0__ = 1
  18|        __scfg_loop_cont_1__ = not __scfg_backedge_var_0__
  19|    i = __scfg_iter_last_1__
  20|    '#loc: 7:8-7:16'
  21|    return c

transformed_sum_ints = Func (Args (ArgSpec 'n' (PyNone)))
$0 = Region[958] <- !io n
{
  $1 = PyInt 1
  $2 = PyBinOp + $0[0] $1, $0[1]
  $3 = PyLoadGlobal $2[0] 'range'
  $4 = PyCall $3 $2[0] $0[1]
  $5 = PyLoadGlobal $4[0] 'iter'
  $6 = PyCall $5 $4[0] $4[1]
  $7 = Undef __scfg_backedge_var_0__
  $8 = Undef __scfg_iter_last_1__
  $9 = DbgValue '__scfg_iterator_1__' $6[1]
  $10 = PyBool True
  $11 = DbgValue '__scfg_loop_cont_1__' $10
  $12 = DbgValue 'c' $2[1]
  $13 = PyNone
  $14 = DbgValue 'i' $13
  $15 = Loop [1860] <- $6[0] $7 $8 $9 $11 $12 $14 $0[1]
    $16 = Region[1159] <- !io __scfg_backedge_var_0__ __scfg_iter_last_1__ __scfg_iterator_1__ __scfg_loop_cont_1__ c i n
    {
      $17 = PyLoadGlobal $16[0] 'next'
      $18 = PyStr '__scfg_sentinel__'
      $19 = PyCall $17 $16[0] $16[3], $18
      $20 = DbgValue '__scfg_iter_last_1__' $16[6]
      $21 = DbgValue 'i' $19[1]
      $22 = PyStr '__scfg_sentinel__'
      $23 = PyBinOp != $19[0] $21, $22
      $24 = If $23[1] <- $19[0] $16[1] $20 $16[3] $16[4] $16[5] $21 $16[7]
        $25 = Region[1307] <- !io __scfg_backedge_var_0__ __scfg_iter_last_1__ __scfg_iterator_1__ __scfg_loop_cont_1__ c i n
        {
          $26 = PyInplaceBinOp + $25[0], $25[5], $25[6]
          $27 = PyInt 0
          $28 = DbgValue '__scfg_backedge_var_0__' $27
        } [1633] -> !io=$26[0] __scfg_backedge_var_0__=$28 __scfg_iter_last_1__=$25[2] __scfg_iterator_1__=$25[3] __scfg_loop_cont_1__=$25[4] c=$26[1] i=$25[6] n=$25[7]
        Else
        $29 = Region[1462] <- !io __scfg_backedge_var_0__ __scfg_iter_last_1__ __scfg_iterator_1__ __scfg_loop_cont_1__ c i n
        {
          $30 = PyInt 1
          $31 = DbgValue '__scfg_backedge_var_0__' $30
        } [1685] -> !io=$29[0] __scfg_backedge_var_0__=$31 __scfg_iter_last_1__=$29[2] __scfg_iterator_1__=$29[3] __scfg_loop_cont_1__=$29[4] c=$29[5] i=$29[6] n=$29[7]
      Endif
      $32 = PyUnaryOp not $24[0] $24[1]
      $33 = DbgValue '__scfg_loop_cont_1__' $32[1]
    } [1847] -> !_loopcond_0002=$33 !io=$32[0] __scfg_backedge_var_0__=$24[1] __scfg_iter_last_1__=$24[2] __scfg_iterator_1__=$24[3] __scfg_loop_cont_1__=$33 c=$24[5] i=$24[6] n=$24[7]
  EndLoop
} [1997] -> !io=$15[0] !ret=$15[5]

168527825595.0

transformed_sum_ints = Func (Args (ArgSpec 'n' (PyNone)))
$0 = Region[2240] <- !io n
{
  $1 = PyInt 1
  $2 = PyBinOp + $0[0] $1, $0[1]
  $3 = PyLoadGlobal $2[0] 'range'
  $4 = PyCall $3 $2[0] $0[1]
  $5 = PyLoadGlobal $4[0] 'iter'
  $6 = PyCall $5 $4[0] $4[1]
  $7 = Undef __scfg_backedge_var_0__
  $8 = Undef __scfg_iter_last_1__
  $9 = DbgValue '__scfg_iterator_1__' $6[1]
  $10 = PyBool True
  $11 = DbgValue '__scfg_loop_cont_1__' $10
  $12 = DbgValue 'c' $2[1]
  $13 = PyNone
  $14 = DbgValue 'i' $13
  $15 = Loop [2977] <- $6[0] $7 $8 $9 $11 $12 $14 $0[1]
    $16 = Region[2249] <- !io __scfg_backedge_var_0__ __scfg_iter_last_1__ __scfg_iterator_1__ __scfg_loop_cont_1__ c i n
    {
      $17 = PyLoadGlobal $16[0] 'next'
      $18 = PyStr '__scfg_sentinel__'
      $19 = PyCall $17 $16[0] $16[3], $18
      $20 = DbgValue '__scfg_iter_last_1__' $16[6]
      $21 = DbgValue 'i' $19[1]
      $22 = PyBinOp != $19[0] $21, $18
      $23 = If $22[1] <- $19[0] $16[1] $20 $16[3] $16[4] $16[5] $21 $16[7]
        $24 = Region[2335] <- !io __scfg_backedge_var_0__ __scfg_iter_last_1__ __scfg_iterator_1__ __scfg_loop_cont_1__ c i n
        {
          $25 = PyInplaceBinOp + $24[0], $24[5], $24[6]
          $26 = PyInt 0
          $27 = DbgValue '__scfg_backedge_var_0__' $26
        } [2466] -> !io=$25[0] __scfg_backedge_var_0__=$27 __scfg_iter_last_1__=$24[2] __scfg_iterator_1__=$24[3] __scfg_loop_cont_1__=$24[4] c=$25[1] i=$24[6] n=$24[7]
        Else
        $28 = Region[2481] <- !io __scfg_backedge_var_0__ __scfg_iter_last_1__ __scfg_iterator_1__ __scfg_loop_cont_1__ c i n
        {
          $29 = DbgValue '__scfg_backedge_var_0__' $1
        } [2595] -> !io=$28[0] __scfg_backedge_var_0__=$29 __scfg_iter_last_1__=$28[2] __scfg_iterator_1__=$28[3] __scfg_loop_cont_1__=$28[4] c=$28[5] i=$28[6] n=$28[7]
      Endif
      $30 = PyUnaryOp not $23[0] $23[1]
      $31 = DbgValue '__scfg_loop_cont_1__' $30[1]
    } [2788] -> !_loopcond_0002=$31 !io=$30[0] __scfg_backedge_var_0__=$23[1] __scfg_iter_last_1__=$23[2] __scfg_iterator_1__=$23[3] __scfg_loop_cont_1__=$31 c=$23[5] i=$23[6] n=$23[7]
  EndLoop
} [3009] -> !io=$15[0] !ret=$15[5]

; ModuleID = ""
target triple = "unknown-unknown-unknown"
target datalayout = ""

define ptr @"foo"(ptr %".1")
{
.3:
  %".4" = alloca ptr
  store ptr null, ptr %".4"
  br label %".6"
.6:
  br label %".8"
.8:
  %".10" = call ptr @"PyLong_FromSsize_t"(i64 1)
  %".11" = call ptr @"PyNumber_Add"(ptr %".10", ptr %".1")
  %"global.range" = inttoptr i64 94131196160960 to ptr
  %".12" = call ptr (ptr, ...) @"PyObject_CallFunctionObjArgs"(ptr %"global.range", ptr %".1", ptr null)
  %"global.iter" = inttoptr i64 140272991505120 to ptr
  %".13" = call ptr (ptr, ...) @"PyObject_CallFunctionObjArgs"(ptr %"global.iter", ptr %".12", ptr null)
  %".14" = call ptr @"PyLong_FromSsize_t"(i64 1)
  call void @"Py_IncRef"(ptr @"_Py_NoneStruct")
  br label %"loopbody"
loopbody:
  %".17" = phi  ptr [null, %".8"], [%".36", %"endif"]
  %".18" = phi  ptr [null, %".8"], [%".37", %"endif"]
  %".19" = phi  ptr [%".13", %".8"], [%".38", %"endif"]
  %".20" = phi  ptr [%".14", %".8"], [%".45", %"endif"]
  %".21" = phi  ptr [%".11", %".8"], [%".40", %"endif"]
  %".22" = phi  ptr [@"_Py_NoneStruct", %".8"], [%".41", %"endif"]
  %".23" = phi  ptr [%".1", %".8"], [%".42", %"endif"]
  %"global.next" = inttoptr i64 140272991505600 to ptr
  %".24" = bitcast ptr @"const_string" to ptr
  %".25" = call ptr @"PyUnicode_FromString"(ptr %".24")
  %".26" = call ptr (ptr, ...) @"PyObject_CallFunctionObjArgs"(ptr %"global.next", ptr %".19", ptr %".25", ptr null)
  %".27" = call ptr @"PyObject_RichCompare"(ptr %".26", ptr %".25", i32 3)
  %".28" = call i32 @"PyObject_IsTrue"(ptr %".27")
  %".29" = icmp ne i32 0, %".28"
  br i1 %".29", label %"then", label %"else"
endloop:
  ret ptr %".40"
then:
  %".31" = call ptr @"PyNumber_InPlaceAdd"(ptr %".21", ptr %".26")
  %".32" = call ptr @"PyLong_FromSsize_t"(i64 0)
  br label %"endif"
else:
  %".34" = call ptr @"PyLong_FromSsize_t"(i64 1)
  br label %"endif"
endif:
  %".36" = phi  ptr [%".32", %"then"], [%".34", %"else"]
  %".37" = phi  ptr [%".22", %"then"], [%".22", %"else"]
  %".38" = phi  ptr [%".19", %"then"], [%".19", %"else"]
  %".39" = phi  ptr [%".20", %"then"], [%".20", %"else"]
  %".40" = phi  ptr [%".31", %"then"], [%".21", %"else"]
  %".41" = phi  ptr [%".26", %"then"], [%".26", %"else"]
  %".42" = phi  ptr [%".23", %"then"], [%".23", %"else"]
  %".43" = call i32 @"PyObject_Not"(ptr %".36")
  %".44" = zext i32 %".43" to i64
  %".45" = call ptr @"PyBool_FromLong"(i64 %".44")
  %".46" = call i32 @"PyObject_IsTrue"(ptr %".45")
  %".47" = icmp ne i32 0, %".46"
  br i1 %".47", label %"loopbody", label %"endloop"
}

declare ptr @"PyLong_FromSsize_t"(i64 %".1")

declare ptr @"PyNumber_Add"(ptr %".1", ptr %".2")

declare ptr @"PyObject_CallFunctionObjArgs"(ptr %".1", ...)

@"_Py_NoneStruct" = external global i8
declare void @"Py_IncRef"(ptr %".1")

@"const_string" = internal constant [18 x i8] c"__scfg_sentinel__\00"
declare ptr @"PyUnicode_FromString"(ptr %".1")

declare ptr @"PyObject_RichCompare"(ptr %".1", ptr %".2", i32 %".3")

declare i32 @"PyObject_IsTrue"(ptr %".1")

declare ptr @"PyNumber_InPlaceAdd"(ptr %".1", ptr %".2")

declare i32 @"PyObject_Not"(ptr %".1")

declare ptr @"PyBool_FromLong"(i64 %".1")

(12,)

79

79

Chapter 2: Adding the EGraph Middle-End¶

Imports and Setup¶

Simple EGraph Roundtripping¶

Convert RVSDG to EGraph¶

Extract from EGraph¶

Extended Compiler Pipeline¶

Example: Testing the EGraph Pipeline¶