diff --git a/Strata/Languages/Python/PythonToLaurel.lean b/Strata/Languages/Python/PythonToLaurel.lean index 8a3848421..0cb17b9e3 100644 --- a/Strata/Languages/Python/PythonToLaurel.lean +++ b/Strata/Languages/Python/PythonToLaurel.lean @@ -72,7 +72,7 @@ structure TranslationContext where /-- List of function signatures -/ functionSignatures : List PythonFunctionDecl := [] /-- Map from prelude procedure names to their full signatures -/ - preludeProcedures : List (String × CoreProcedureSignature) := [] + preludeProcedures : Std.HashMap String CoreProcedureSignature := {} /-- Names of prelude functions (non-procedure callables) -/ preludeFunctions : List String := [] /-- Names of user-defined functions -/ @@ -82,15 +82,15 @@ structure TranslationContext where /-- Map (Classname, Attribute) to its type -/ classAttributeType: Std.HashMap (String × String) String := {} /-- Names of prelude types -/ - preludeTypes : List String := [] + preludeTypes : Std.HashSet String := {} /-- Overload dispatch table from PySpec: function name → overloads -/ overloadTable : Specs.ToLaurel.OverloadTable := {} /-- Behavior for unmodeled functions -/ unmodeledBehavior : UnmodeledFunctionBehavior := .havocOutputs /-- File path for source location metadata -/ filePath : String := "" - /-- List of defined composite types (classes) -/ - compositeTypes : List CompositeType := [] + /-- Known composite type names (user-defined classes + PySpec types) -/ + compositeTypeNames : Std.HashSet String := {} /-- Track current class during method translation -/ currentClassName : Option String := none deriving Inhabited @@ -189,6 +189,13 @@ def pythonTypeToCoreType (typeStr : String) : Option String := | "timedelta" => some PyLauType.Int | _ => none +/-- Check if a type string is recognized (primitive, core mapping, or prelude/composite type). -/ +def isKnownType (ctx : TranslationContext) (typeStr : String) : Bool := + typeStr ∈ ["int", "bool", "str"] || + (pythonTypeToCoreType typeStr).isSome || + typeStr ∈ ctx.compositeTypeNames || + typeStr ∈ ctx.preludeTypes + /-- Translate Python type annotation to Laurel HighType -/ def translateType (ctx : TranslationContext) (typeStr : String) : Except TranslationError HighTypeMd := match typeStr with @@ -200,8 +207,11 @@ def translateType (ctx : TranslationContext) (typeStr : String) : Except Transla match pythonTypeToCoreType typeStr with | some coreType => .ok (mkCoreType coreType) | none => - -- Check if it's a prelude type - if ctx.preludeTypes.contains typeStr then + -- Check if it matches a known composite type (user-defined or PySpec) + if typeStr ∈ ctx.compositeTypeNames then + .ok (mkHighTypeMd (.UserDefined typeStr)) + -- Check if it's a prelude type (Core types like DictStrAny) + else if typeStr ∈ ctx.preludeTypes then .ok (mkCoreType typeStr) else -- Map it to a core PyAnyType @@ -258,7 +268,9 @@ def resolveDispatch (ctx : TranslationContext) arguments (expected a string literal first argument)") match args[0] with | .Constant _ (.ConString _ s) _ => - return (fnOverloads.get? s.val).map (·.name) + return (fnOverloads.get? s.val).map fun ident => + if ident.pythonModule.isEmpty then ident.name + else ident.pythonModule ++ "_" ++ ident.name | _ => return none /-! ## Expression Translation -/ @@ -266,8 +278,8 @@ def resolveDispatch (ctx : TranslationContext) /-- Check if a function has a model (is in prelude or user-defined) -/ def hasModel (ctx : TranslationContext) (funcName : String) : Bool := - ctx.preludeProcedures.any (·.1 == funcName) || ctx.userFunctions.contains funcName || ctx.preludeFunctions.contains funcName || - ctx.compositeTypes.any (fun ct => ct.name == funcName) + funcName ∈ ctx.preludeProcedures || funcName ∈ ctx.userFunctions || ctx.preludeFunctions.contains funcName || + funcName ∈ ctx.compositeTypeNames def ListAny_mk (es: List StmtExprMd) : StmtExprMd := match es with | [] => mkStmtExprMd (.StaticCall "ListAny_nil" []) @@ -679,7 +691,7 @@ These functions are mutually recursive. def withException (ctx : TranslationContext) (funcname: String) : Bool := if funcname ∈ ctx.preludeFunctions then false else - match ctx.preludeProcedures.lookup funcname with + match ctx.preludeProcedures[funcname]? with | some sig => sig.outputs.length > 0 && sig.outputs.getLast! == "Error" | _ => false @@ -712,7 +724,7 @@ partial def translateAssign (ctx : TranslationContext) let targetExpr := mkStmtExprMd (StmtExpr.Identifier n.val) let assignStmts := match rhs_trans.val with | .StaticCall fnname args => - if ctx.compositeTypes.any (fun ct => ct.name == fnname) then + if fnname.text ∈ ctx.compositeTypeNames then let newExpr := mkStmtExprMd (StmtExpr.New fnname) let varType := mkHighTypeMd (.UserDefined fnname) let newStmt := mkStmtExprMd (StmtExpr.LocalVariable n.val varType (some newExpr)) @@ -721,12 +733,18 @@ partial def translateAssign (ctx : TranslationContext) else if withException ctx fnname.text then [mkStmtExprMd (StmtExpr.Assign [targetExpr, maybeExceptVar] rhs_trans)] else [mkStmtExprMd (StmtExpr.Assign [targetExpr] rhs_trans)] + | .New className => + let varType := mkHighTypeMd (.UserDefined className) + let newStmt := mkStmtExprMd (StmtExpr.LocalVariable n.val varType (some rhs_trans)) + [newStmt] | _ => [mkStmtExprMd (StmtExpr.Assign [targetExpr] rhs_trans)] newctx := match rhs_trans.val with | .StaticCall fnname _ => - if ctx.compositeTypes.any (fun ct => ct.name == fnname) then + if fnname.text ∈ ctx.compositeTypeNames then {newctx with variableTypes:= newctx.variableTypes ++ [(n.val, fnname.text)]} else newctx + | .New className => + {newctx with variableTypes:= newctx.variableTypes ++ [(n.val, className.text)]} | _=> newctx if n.val ∈ newctx.variableTypes.unzip.1 then return (newctx, assignStmts) @@ -735,7 +753,10 @@ partial def translateAssign (ctx : TranslationContext) let type := match annotation with | none => inferType | some annotation => - pyExprToString annotation + let annStr := pyExprToString annotation + -- If the annotation isn't a recognized type, prefer the + -- inferred type from the RHS (e.g., overload dispatch). + if isKnownType ctx annStr then annStr else inferType let initStmt := mkStmtExprMd (StmtExpr.LocalVariable n.val AnyTy AnyNone) newctx := {ctx with variableTypes:=(n.val, type)::ctx.variableTypes} return (newctx, initStmt::assignStmts) @@ -1049,7 +1070,7 @@ def translateFunction (ctx : TranslationContext) (funcDecl : PythonFunctionDecl) let mut inputs : List Parameter := [] inputs := funcDecl.args.map (fun (name, ty, _) => - if ctx.compositeTypes.any (fun ct => ct.name == ty) then + if ty ∈ ctx.compositeTypeNames then { name := name, type := mkHighTypeMd (.UserDefined ty) } else { name := name, type := AnyTy}) @@ -1298,18 +1319,113 @@ def getPreludeProcedures (prelude: Core.Program) : List String := |.proc => some decl.name.name | _ => none) -/-- Translate Python module to Laurel Program -/ -def pythonToLaurel (prelude: Core.Program) +/-- Information extracted from the prelude that `pythonToLaurel'` needs. + This decouples the translation from a specific `Core.Program` representation, + allowing the caller to supply prelude info from Laurel-level declarations. -/ +structure PreludeInfo where + /-- Type names (datatype, synonym, constructor names) -/ + types : Std.HashSet String := {} + /-- Composite type names (need UserDefined in Laurel, not TCore) -/ + compositeTypes : Std.HashSet String := {} + /-- Procedure names with input/output type signatures -/ + procedures : Std.HashMap String CoreProcedureSignature := {} + /-- Procedure signatures as PythonFunctionDecl (with arg names) -/ + functionSignatures : List PythonFunctionDecl := [] + /-- Function names (Core functions + datatype constructors/destructors/testers) -/ + functions : List String := [] + /-- Procedure names (non-function callables) -/ + procedureNames : List String := [] + +/-- Extract `PreludeInfo` from a `Core.Program`. -/ +def PreludeInfo.ofCoreProgram (prelude : Core.Program) : PreludeInfo where + types := .ofList (extractPreludeTypes prelude) + procedures := .ofList (extractPreludeProcedures prelude) + functionSignatures := preludeSignatureToPythonFunctionDecl prelude + functions := getPreludeFunctions prelude + procedureNames := getPreludeProcedures prelude + +/-- Convert a Laurel `HighType` to the same string name that `getTypeName` would + produce from the corresponding Core `LMonoTy` after translation. -/ +def getHighTypeName : Laurel.HighType → String + | .TInt => "int" + | .TBool => "bool" + | .TString => "string" + | .TVoid => "bool" + | .TFloat64 => "real" + | .THeap => "Heap" + | .TTypedField _ => "Field" + | .TCore s => s + | .UserDefined name => name.text + | .TSet _ => "Map" + | .TMap _ _ => "Map" + | _ => "unknown" + +/-- Extract `PreludeInfo` from a Laurel `Program`. -/ +def PreludeInfo.ofLaurelProgram (prog : Laurel.Program) : PreludeInfo where + types := + prog.types.foldl (init := {}) fun s td => + match td with + | .Composite _ => s + | .Constrained ct => s.insert ct.name.text + | .Datatype dt => s.insert dt.name.text + compositeTypes := + prog.types.foldl (init := {}) fun s td => + match td with + | .Composite ct => s.insert ct.name.text + | _ => s + procedures := + prog.staticProcedures.foldl (init := {}) fun m p => + if p.body.isExternal || p.isFunctional then m + else + let ins := p.inputs.map fun param => getHighTypeName param.type.val + let outs := p.outputs.map fun param => getHighTypeName param.type.val + m.insert p.name.text { inputs := ins, outputs := outs } + functionSignatures := + prog.staticProcedures.filterMap fun p => + if p.body.isExternal then none + else + let noneexpr : Python.expr SourceRange := .Constant default (.ConNone default) default + let args := p.inputs.map fun param => + (param.name.text, getHighTypeName param.type.val, some noneexpr) + let ret := p.outputs.head?.map fun param => getHighTypeName param.type.val + some { name := p.name.text, args := args, hasKwargs := false, ret := ret } + functions := + let funcNames := prog.staticProcedures.filterMap fun p => + if p.body.isExternal || !p.isFunctional then none else some p.name.text + let dtFuncs := prog.types.flatMap fun td => + match td with + | .Datatype dt => + let ctors := dt.constructors.map fun c => c.name.text + let destrs := dt.constructors.flatMap fun c => + c.args.flatMap fun a => + [dt.name.text ++ ".." ++ a.name.text, + dt.name.text ++ ".." ++ a.name.text ++ "!"] + let testers := dt.constructors.map fun c => "is" ++ c.name.text + ctors ++ destrs ++ testers + | _ => [] + funcNames ++ dtFuncs + procedureNames := + prog.staticProcedures.filterMap fun p => + if p.body.isExternal || p.isFunctional then none else some p.name.text + +/-- Merge two `PreludeInfo` values by concatenating each field. -/ +def PreludeInfo.merge (a b : PreludeInfo) : PreludeInfo where + types := b.types.fold (init := a.types) fun s n => s.insert n + compositeTypes := b.compositeTypes.fold (init := a.compositeTypes) fun s n => s.insert n + procedures := b.procedures.fold (init := a.procedures) fun m k v => m.insert k v + functionSignatures := a.functionSignatures ++ b.functionSignatures + functions := a.functions ++ b.functions + procedureNames := a.procedureNames ++ b.procedureNames + +/-- Translate Python module to Laurel Program using pre-extracted prelude info. -/ +def pythonToLaurel' (info : PreludeInfo) (pyModule : Python.Command SourceRange) - (prev_ctx: Option TranslationContext:= none) + (prev_ctx: Option TranslationContext := none) (filePath : String := "") (overloadTable : Specs.ToLaurel.OverloadTable := {}) : Except TranslationError (Laurel.Program × TranslationContext) := do match pyModule with | .Module _ body _ => do - let preludeProcedures := extractPreludeProcedures prelude - let preludeTypes := extractPreludeTypes prelude - -- Collect user function names let userFunctions := body.val.toList.filterMap fun stmt => match stmt with @@ -1318,18 +1434,19 @@ def pythonToLaurel (prelude: Core.Program) -- FIRST PASS: Collect all class definitions let mut compositeTypes : List CompositeType := [] + let mut compositeTypeNames := info.compositeTypes for stmt in body.val do match stmt with | .ClassDef _ _ _ _ _ _ _ => - -- Create initial context with just prelude info for class translation let initCtx : TranslationContext := { - preludeProcedures := preludeProcedures, - preludeTypes := preludeTypes, - compositeTypes := compositeTypes, + preludeProcedures := info.procedures, + preludeTypes := info.types, + compositeTypeNames := compositeTypeNames, filePath := filePath } let composite ← translateClass initCtx stmt compositeTypes := compositeTypes ++ [composite] + compositeTypeNames := compositeTypeNames.insert composite.name.text | _ => pure () let mut ctx : TranslationContext := match prev_ctx with @@ -1337,12 +1454,12 @@ def pythonToLaurel (prelude: Core.Program) | _ => { currentClassName := none, - preludeProcedures := preludeProcedures, - functionSignatures := preludeSignatureToPythonFunctionDecl prelude - preludeFunctions := getPreludeFunctions prelude - preludeTypes := preludeTypes, + preludeProcedures := info.procedures, + functionSignatures := info.functionSignatures + preludeFunctions := info.functions + preludeTypes := info.types, userFunctions := userFunctions, - compositeTypes := compositeTypes, + compositeTypeNames := compositeTypeNames, overloadTable := overloadTable, filePath := filePath } @@ -1375,49 +1492,15 @@ def pythonToLaurel (prelude: Core.Program) inputs := [], outputs := [], preconditions := [], - determinism := .deterministic none, --TODO: need to set reads - decreases := none, - body := .Transparent bodyBlock - md := default - isFunctional := false - } - - /- -Compute partial Laurel functions and procedures from the Core functions and procedures -These are needed by the Laurel pipeline to determine how to translate calls. -In the future, we will replace this Core=>Laurel translation by defining the Python prelude -in Laurel. - -/ - let preludeFunctions : List Procedure := (getPreludeFunctions prelude).map (λ funcname => - { - name := { text:= funcname}, - inputs := [], - outputs := [], - preconditions := [], determinism := .deterministic none, decreases := none, - body := .External - md := default - isFunctional := true - } - ) - - let preludeProcedures : List Procedure := (getPreludeProcedures prelude).map (λ funcname => - { - name := { text:= funcname}, - inputs := [], - outputs := [], - preconditions := [], - determinism := .deterministic none, - decreases := none, - body := .External + body := .Transparent bodyBlock md := default isFunctional := false } - ) let program : Laurel.Program := { - staticProcedures := preludeFunctions ++ preludeProcedures ++ procedures ++ [mainProc] + staticProcedures := procedures ++ [mainProc] staticFields := [] types := compositeTypes.map TypeDefinition.Composite constants := [] @@ -1427,6 +1510,37 @@ in Laurel. | _ => throw (.internalError "Expected Module") +/-- Generate External procedure stubs for prelude names so the Laurel + `resolve` pass can see them. -/ +def preludeStubs (info : PreludeInfo) : List Laurel.Procedure := + let functionStubs := info.functions.map fun funcname => + { name := { text := funcname }, inputs := [], outputs := [], + preconditions := [], determinism := .deterministic none, + decreases := none, body := .External, md := default, + isFunctional := true } + let procedureStubs := info.procedureNames.map fun funcname => + { name := { text := funcname }, inputs := [], outputs := [], + preconditions := [], determinism := .deterministic none, + decreases := none, body := .External, md := default, + isFunctional := false } + functionStubs ++ procedureStubs + +/-- Translate Python module to Laurel Program. + Delegates to `pythonToLaurel'` after extracting prelude info, + then prepends External stubs so the Laurel resolve pass can + see prelude names. -/ +def pythonToLaurel (prelude: Core.Program) + (pyModule : Python.Command SourceRange) + (prev_ctx: Option TranslationContext := none) + (filePath : String := "") + (overloadTable : Specs.ToLaurel.OverloadTable := {}) + : Except TranslationError (Laurel.Program × TranslationContext) := do + let info := PreludeInfo.ofCoreProgram prelude + let (program, ctx) ← pythonToLaurel' info pyModule prev_ctx filePath overloadTable + let stubs := preludeStubs info + return ({ program with + staticProcedures := stubs ++ program.staticProcedures }, ctx) + end -- public section end Strata.Python diff --git a/Strata/Languages/Python/ReadPython.lean b/Strata/Languages/Python/ReadPython.lean index 472f4cf75..59151dd03 100644 --- a/Strata/Languages/Python/ReadPython.lean +++ b/Strata/Languages/Python/ReadPython.lean @@ -43,70 +43,100 @@ private def formatParseFailureStderr (stderr : String) : Option String := do | none => none | none => none + +structure PythonToStrataOptions where + logPerf : Bool := false + extraPythonArgs : Array String := #[] + +/-- Runs an action, logging its elapsed time to stderr if `options.logPerf` is set. -/ +private def runWithOptions {α} (options : PythonToStrataOptions) (label : String) + (action : EIO String α) : EIO String α := do + if !options.logPerf then + return ← action + let start ← IO.monoNanosNow + let result ← action + let stop ← IO.monoNanosNow + let elapsedMs := (stop - start) / 1000000 + let _ ← IO.eprintln s!"[perf] {label}: {elapsedMs}ms" |>.toBaseIO + pure result + +/-- Runs `python -m strata.gen py_to_strata` to convert a Python file into a Strata file. -/ +private def runPyToStrata (pythonCmd : String) (extraPythonArgs : Array String) + (dialectFile pythonFile strataFile : System.FilePath) + : EIO String Unit := do + let spawnArgs : IO.Process.SpawnArgs := { + cmd := pythonCmd + args := extraPythonArgs ++ #["-m", "strata.gen", "py_to_strata", + "--dialect", dialectFile.toString, + pythonFile.toString, + strataFile.toString + ] + cwd := none + inheritEnv := true + stdin := .null + stdout := .piped + stderr := .piped + } + let child ← + match ← IO.Process.spawn spawnArgs |>.toBaseIO with + | .ok c => pure c + | .error msg => throw s!"Could not run Python: {msg}" + let stdout ← IO.asTask child.stdout.readToEnd Task.Priority.dedicated + let stderr ← + match ← child.stderr.readToEnd |>.toBaseIO with + | .ok c => pure c + | .error msg => throw s!"Could not read stderr from Python: {msg}" + let exitCode ← + match ← child.wait |>.toBaseIO with + | .ok c => pure c + | .error msg => throw s!"Could not wait for process exit code: {msg}" + let stdout ← + match stdout.get with + | .ok c => pure c + | .error msg => throw s!"Could not read stdout: {msg}" + if exitCode = 100 then + if let some msg := formatParseFailureStderr stderr then + throw <| s!"{pythonFile} parse error:\n {msg}" + if exitCode ≠ 0 then + let msg := s!"Internal: Python strata.gen failed (exitCode = {exitCode})\n" + let msg := s!"{msg}Standard output:\n" + let msg := stdout.splitOn.foldl (init := msg) fun msg ln => s!"{msg} {ln}\n" + let msg := s!"{msg}Standard error:\n" + let msg := stderr.splitOn.foldl (init := msg) fun msg ln => s!"{msg} {ln}\n" + throw <| msg + +/-- Reads a Strata temp file and parses it into Python AST statements. -/ +private def readParseStrataFile (strataFile : System.FilePath) + : EIO String (Array (Strata.Python.stmt Strata.SourceRange)) := do + let bytes ← + match ← IO.FS.readBinFile strataFile |>.toBaseIO with + | .ok b => pure b + | .error msg => + throw <| s!"Error reading Strata temp file {strataFile}: {msg}" + match readPythonStrataBytes strataFile.toString bytes with + | .ok stmts => pure stmts + | .error msg => throw msg + /-- This runs `python -m strata.gen py_to_strata` to convert a Python file into a Strata file, and then reads it in. This function fails if the environment isn't configured correctly or the Python file cannot be parsed. + -/ def pythonToStrata (dialectFile pythonFile : System.FilePath) - (pythonCmd : String := "python") : - EIO String (Array (Strata.Python.stmt Strata.SourceRange)) := do + (pythonCmd : String := "python") + (options : PythonToStrataOptions := {}) + : EIO String (Array (Strata.Python.stmt Strata.SourceRange)) := do let (_handle, strataFile) ← match ← IO.FS.createTempFile |>.toBaseIO with | .ok p => pure p | .error msg => throw s!"Cannot create temporary file: {msg}" try - let spawnArgs : IO.Process.SpawnArgs := { - cmd := pythonCmd - args := #["-m", "strata.gen", "py_to_strata", - "--dialect", dialectFile.toString, - pythonFile.toString, - strataFile.toString - ] - cwd := none - inheritEnv := true - stdin := .null - stdout := .piped - stderr := .piped - } - let child ← - match ← IO.Process.spawn spawnArgs |>.toBaseIO with - | .ok c => pure c - | .error msg => throw s!"Could not run Python: {msg}" - let stdout ← IO.asTask child.stdout.readToEnd Task.Priority.dedicated - let stderr ← - match ← child.stderr.readToEnd |>.toBaseIO with - | .ok c => pure c - | .error msg => throw s!"Could not read stderr from Python: {msg}" - let exitCode ← - match ← child.wait |>.toBaseIO with - | .ok c => pure c - | .error msg => throw s!"Could not wait for process exit code: {msg}" - let stdout ← - match stdout.get with - | .ok c => pure c - | .error msg => throw s!"Could not read stdout: {msg}" - if exitCode = 100 then - if let some msg := formatParseFailureStderr stderr then - throw <| s!"{pythonFile} parse error:\n {msg}" - if exitCode ≠ 0 then - let msg := s!"Internal: Python strata.gen failed (exitCode = {exitCode})\n" - let msg := s!"{msg}Standard output:\n" - let msg := stdout.splitOn.foldl (init := msg) fun msg ln => s!"{msg} {ln}\n" - let msg := s!"{msg}Standard error:\n" - let msg := stderr.splitOn.foldl (init := msg) fun msg ln => s!"{msg} {ln}\n" - throw <| msg - let bytes ← - match ← IO.FS.readBinFile strataFile |>.toBaseIO with - | .ok b => pure b - | .error msg => - throw <| s!"Error reading Strata temp file {strataFile}: {msg}" - match readPythonStrataBytes strataFile.toString bytes with - | .ok stmts => pure stmts - | .error msg => throw msg + runWithOptions options s!"parsing {pythonFile}" (runPyToStrata pythonCmd options.extraPythonArgs dialectFile pythonFile strataFile) + runWithOptions options s!"reading {pythonFile}" (readParseStrataFile strataFile) finally match ← IO.FS.removeFile strataFile |>.toBaseIO with | .ok () => pure () diff --git a/Strata/Languages/Python/Specs.lean b/Strata/Languages/Python/Specs.lean index 35fe2431a..f7834b29d 100644 --- a/Strata/Languages/Python/Specs.lean +++ b/Strata/Languages/Python/Specs.lean @@ -34,6 +34,10 @@ def baseLogEvent (events : Std.HashSet EventType) let _ ← IO.eprintln s!"[{event}]: {message}" |>.toBaseIO pure () +/-- Creates `PythonToStrataOptions` from an event set. Enables `logPerf` when `"perf"` is present. -/ +def PythonToStrataOptions.ofEventSet (events : Std.HashSet EventType) : PythonToStrataOptions where + logPerf := events.contains "perf" + /-- A Python module name split into its dot-separated components. For example, `typing.List` has components `["typing", "List"]`. @@ -203,6 +207,9 @@ structure PySpecContext where strataDir : System.FilePath /-- Callback that takes a module name and provides filepath to module -/ moduleReader : ModuleReader + /-- Python module name for the current file (e.g., "boto3.dynamodb"). + Used as `pythonModule` for locally-defined classes. -/ + currentModule : String def preludeAtoms : List (String × PythonIdent) := [ ("bool", .builtinsBool), @@ -1211,7 +1218,8 @@ private def resolveBaseClasses (bases : Array (expr SourceRange)) | some (.ident pyIdent _) => result := result.push pyIdent | some (.pyClass clsName _) => - result := result.push { pythonModule := "", name := clsName } + let mod := (← read).currentModule + result := result.push { pythonModule := mod, name := clsName } | _ => specError base.ann s!"Unknown base class '{name}'" | _ => @@ -1408,8 +1416,9 @@ partial def resolveModule (loc : SourceRange) (modName : String) : let pythonCmd := (←read).pythonCmd let dialectFile := (←read).dialectFile + let options := PythonToStrataOptions.ofEventSet (←read).eventSet let commands ← - match ← pythonToStrata (pythonCmd := pythonCmd) dialectFile pythonFile |>.toBaseIO with + match ← pythonToStrata (pythonCmd := pythonCmd) (options := options) dialectFile pythonFile |>.toBaseIO with | .ok r => pure r | .error msg => specError loc msg @@ -1418,7 +1427,7 @@ partial def resolveModule (loc : SourceRange) (modName : String) : let warnings := (←get).warnings let errorCount := errors.size modify fun s => { s with errors := #[], warnings := #[] } - let ctx := { (←read) with pythonFile := pythonFile } + let ctx := { (←read) with pythonFile := pythonFile, currentModule := modName } let initState : PySpecState := { errors, warnings } let (sigs, t) ← translateModuleAux commands |>.run ctx |>.run initState let newWarnings := t.warnings.size - warnings.size @@ -1537,11 +1546,16 @@ partial def translate (body : Array (stmt Strata.SourceRange)) : PySpecM Unit := partial def translateModuleAux (body : Array (Strata.Python.stmt Strata.SourceRange)) : PySpecM (Array Signature) := do + let ctx ← read + let start ← IO.monoNanosNow translate body let s ← get for ⟨cl, t⟩ in s.typeReferences do if let .unresolved loc := t then specError loc s!"Class {cl} not defined." + let stop ← IO.monoNanosNow + let elapsedMs := (stop - start) / 1000000 + baseLogEvent ctx.eventSet "perf" s!"translating {ctx.pythonFile}: {elapsedMs}ms" return s.elements end @@ -1555,6 +1569,7 @@ def translateModule (dialectFile searchPath strataDir pythonFile : System.FilePath) (fileMap : Lean.FileMap) (body : Array (Strata.Python.stmt Strata.SourceRange)) + (currentModule : String) (pythonCmd : String := "python") (events : Std.HashSet EventType := {}) (skipNames : Std.HashSet PythonIdent := {}) : @@ -1579,6 +1594,7 @@ def translateModule throw s!"Could not read file {pythonPath}: {msg}" strataDir := strataDir pythonFile := pythonFile + currentModule := currentModule } let (res, s) ← translateModuleAux body |>.run ctx |>.run {} let fmm ← fileMapsRef.get @@ -1608,10 +1624,12 @@ public def translateFile throw s!"{pythonFile} must be a file." | _ => throw s!"{pythonFile} could not be read: {msg}" + let options := PythonToStrataOptions.ofEventSet events let body ← - match ← pythonToStrata (pythonCmd := pythonCmd) dialectFile pythonFile |>.toBaseIO with + match ← pythonToStrata (pythonCmd := pythonCmd) (options := options) dialectFile pythonFile |>.toBaseIO with | .ok r => pure r | .error msg => throw msg + let currentModule := pythonFile.fileStem.getD pythonFile.toString let (fmm, sigs, errors, warnings) ← translateModule (pythonCmd := pythonCmd) @@ -1623,6 +1641,7 @@ public def translateFile (pythonFile := pythonFile) (.ofString contents) body + currentModule let ppErr (e : SpecError) : EIO String String := match fmm[e.file]? with | none => diff --git a/Strata/Languages/Python/Specs/IdentifyOverloads.lean b/Strata/Languages/Python/Specs/IdentifyOverloads.lean new file mode 100644 index 000000000..74c2350b4 --- /dev/null +++ b/Strata/Languages/Python/Specs/IdentifyOverloads.lean @@ -0,0 +1,293 @@ +/- + Copyright Strata Contributors + + SPDX-License-Identifier: Apache-2.0 OR MIT +-/ + +import Strata.Languages.Python.PythonDialect +import Strata.Languages.Python.Specs.ToLaurel + +/-! +# Overload Resolution for Python Programs + +Walks a Python AST and collects which overloaded service modules +are actually used. Given an `OverloadTable` (from a dispatch +`.pyspec.st.ion` file), the walker finds every `Call` whose +function name appears in the table and whose first argument is +a string literal matching an overload entry, then records the +`pythonModule` of the resolved return type. + +The result is a deduplicated set of module names that can be used +to determine which `.pyspec.st.ion` files are needed. +-/ + +namespace Strata.Python.Specs.IdentifyOverloads + +open Strata.Python (stmt expr) +open Strata.Python.Specs (PythonIdent) +open Strata.Python.Specs.ToLaurel (OverloadTable) + +/-- State accumulated while walking the AST. -/ +structure ResolveState where + modules : Std.HashSet String := {} + warnings : Array String := #[] + +/-- Monad for the overload-resolution walker. -/ +abbrev ResolveM := StateM ResolveState + +/-- Record a warning about an unhandled AST node. -/ +def warn (msg : String) : ResolveM Unit := + modify fun s => + { s with warnings := s.warnings.push msg } + +/-- Record a module name from a resolved overload. -/ +def recordModule (mod : String) : ResolveM Unit := + modify fun s => + { s with modules := s.modules.insert mod } + +/-! ## Recursive AST Walker -/ + +mutual + +/-- Walk an expression, checking `Call` nodes against + the overload table and recursing into sub-expressions. -/ +partial def walkExpr + (tbl : OverloadTable) + (e : expr SourceRange) + : ResolveM Unit := do + match e with + -- The interesting case: function calls + | .Call _ f args kwargs => do + -- Check dispatch + let funcName := match f with + | .Attribute _ _ attr _ => attr.val + | .Name _ n _ => n.val + | _ => "" + match tbl.get? funcName with + | some fnOverloads => + if h : args.val.size > 0 then + match args.val[0] with + | .Constant _ (.ConString _ s) _ => + if let some pyId := fnOverloads.get? s.val then + recordModule pyId.pythonModule + | _ => pure () + | none => pure () + -- Recurse into func, args, keyword values + walkExpr tbl f + for arg in args.val do + walkExpr tbl arg + for kw in kwargs.val do + match kw with + | .mk_keyword _ _ kwVal => walkExpr tbl kwVal + + -- Recurse into sub-expressions for all other forms + | .BoolOp _ _ values => do + for v in values.val do walkExpr tbl v + | .NamedExpr _ target value => + walkExpr tbl target + walkExpr tbl value + | .BinOp _ left _ right => + walkExpr tbl left + walkExpr tbl right + | .UnaryOp _ _ operand => + walkExpr tbl operand + | .Lambda _ _ body => + walkExpr tbl body + | .IfExp _ test body orelse => + walkExpr tbl test + walkExpr tbl body + walkExpr tbl orelse + | .Dict _ keys values => do + for k in keys.val do + match k with + | .some_expr _ ke => walkExpr tbl ke + | _ => pure () + for v in values.val do walkExpr tbl v + | .Set _ elts => do + for e in elts.val do walkExpr tbl e + | .ListComp _ elt gens => + walkExpr tbl elt + for g in gens.val do walkComprehension tbl g + | .SetComp _ elt gens => + walkExpr tbl elt + for g in gens.val do walkComprehension tbl g + | .DictComp _ key value gens => + walkExpr tbl key + walkExpr tbl value + for g in gens.val do walkComprehension tbl g + | .GeneratorExp _ elt gens => + walkExpr tbl elt + for g in gens.val do walkComprehension tbl g + | .Await _ value => + walkExpr tbl value + | .Yield _ value => do + if let some v := value.val then walkExpr tbl v + | .YieldFrom _ value => + walkExpr tbl value + | .Compare _ left _ comparators => do + walkExpr tbl left + for c in comparators.val do walkExpr tbl c + | .FormattedValue _ value _ fmtSpec => do + walkExpr tbl value + if let some fs := fmtSpec.val then + walkExpr tbl fs + | .Interpolation _ value _ _ fmtSpec => do + walkExpr tbl value + if let some fs := fmtSpec.val then + walkExpr tbl fs + | .JoinedStr _ values => do + for v in values.val do walkExpr tbl v + | .TemplateStr _ values => do + for v in values.val do walkExpr tbl v + | .Subscript _ value slice _ => + walkExpr tbl value + walkExpr tbl slice + | .Starred _ value _ => + walkExpr tbl value + | .List _ elts _ => do + for e in elts.val do walkExpr tbl e + | .Tuple _ elts _ => do + for e in elts.val do walkExpr tbl e + | .Slice _ lower upper step => do + if let some l := lower.val then walkExpr tbl l + if let some u := upper.val then walkExpr tbl u + if let some s := step.val then walkExpr tbl s + | .Attribute _ value _ _ => + walkExpr tbl value + -- Leaf nodes — no sub-expressions + | .Constant .. | .Name .. => + pure () + +/-- Walk a comprehension's sub-expressions. -/ +partial def walkComprehension + (tbl : OverloadTable) + (g : Strata.Python.comprehension SourceRange) + : ResolveM Unit := do + match g with + | .mk_comprehension _ target iter ifs _ => + walkExpr tbl target + walkExpr tbl iter + for cond in ifs.val do walkExpr tbl cond + +/-- Walk a single statement, recursing into + sub-expressions and sub-statement bodies. -/ +partial def walkStmt + (tbl : OverloadTable) + (s : stmt SourceRange) + : ResolveM Unit := do + match s with + | .FunctionDef _ _ _ body _ _ _ _ => + walkStmts tbl body.val + | .AsyncFunctionDef _ _ _ body _ _ _ _ => + walkStmts tbl body.val + | .ClassDef _ _ _ _ body _ _ => + walkStmts tbl body.val + | .Return _ value => do + if let some v := value.val then walkExpr tbl v + | .Delete _ targets => do + for t in targets.val do walkExpr tbl t + | .Assign _ targets value _ => do + for t in targets.val do walkExpr tbl t + walkExpr tbl value + | .AugAssign _ target _ value => + walkExpr tbl target + walkExpr tbl value + | .AnnAssign _ target _ value _ => do + walkExpr tbl target + if let some v := value.val then walkExpr tbl v + | .For _ target iter body orelse _ => + walkExpr tbl target + walkExpr tbl iter + walkStmts tbl body.val + walkStmts tbl orelse.val + | .AsyncFor _ target iter body orelse _ => + walkExpr tbl target + walkExpr tbl iter + walkStmts tbl body.val + walkStmts tbl orelse.val + | .While _ test body orelse => + walkExpr tbl test + walkStmts tbl body.val + walkStmts tbl orelse.val + | .If _ test body orelse => + walkExpr tbl test + walkStmts tbl body.val + walkStmts tbl orelse.val + | .With _ items body _ => do + for item in items.val do + match item with + | .mk_withitem _ ctxExpr optVars => + walkExpr tbl ctxExpr + if let some v := optVars.val then + walkExpr tbl v + walkStmts tbl body.val + | .AsyncWith _ items body _ => do + for item in items.val do + match item with + | .mk_withitem _ ctxExpr optVars => + walkExpr tbl ctxExpr + if let some v := optVars.val then + walkExpr tbl v + walkStmts tbl body.val + | .Raise _ exc cause => do + if let some e := exc.val then walkExpr tbl e + if let some c := cause.val then walkExpr tbl c + | .Try _ body handlers orelse finalbody => do + walkStmts tbl body.val + for h in handlers.val do + match h with + | .ExceptHandler _ exType _ hBody => + if let some t := exType.val then + walkExpr tbl t + walkStmts tbl hBody.val + walkStmts tbl orelse.val + walkStmts tbl finalbody.val + | .TryStar _ body handlers orelse finalbody => do + walkStmts tbl body.val + for h in handlers.val do + match h with + | .ExceptHandler _ exType _ hBody => + if let some t := exType.val then + walkExpr tbl t + walkStmts tbl hBody.val + walkStmts tbl orelse.val + walkStmts tbl finalbody.val + | .Assert _ test msg => do + walkExpr tbl test + if let some m := msg.val then walkExpr tbl m + | .Expr _ value => + walkExpr tbl value + | .Match _ subject cases => do + walkExpr tbl subject + for c in cases.val do + match c with + | .mk_match_case _ _pat guard cBody => + if let some g := guard.val then + walkExpr tbl g + walkStmts tbl cBody.val + | .TypeAlias _ _ _ value => + walkExpr tbl value + -- Leaf statements — no sub-expressions to walk + | .Import .. | .ImportFrom .. | .Global .. + | .Nonlocal .. | .Pass .. | .Break .. + | .Continue .. => + pure () + +/-- Walk an array of statements. -/ +partial def walkStmts + (tbl : OverloadTable) + (stmts : Array (stmt SourceRange)) + : ResolveM Unit := do + for s in stmts do walkStmt tbl s + +end + +/-- Run the walker over the top-level statements and return + the final state containing collected modules and warnings. -/ +def resolveOverloads + (overloads : OverloadTable) + (stmts : Array (stmt SourceRange)) + : ResolveState := + (walkStmts overloads stmts |>.run {}).2 + +end Strata.Python.Specs.IdentifyOverloads diff --git a/Strata/Languages/Python/Specs/ToLaurel.lean b/Strata/Languages/Python/Specs/ToLaurel.lean index 3de8990b1..5475f0566 100644 --- a/Strata/Languages/Python/Specs/ToLaurel.lean +++ b/Strata/Languages/Python/Specs/ToLaurel.lean @@ -51,6 +51,9 @@ and only string literal values are extracted. -/ /-- Context for PySpec to Laurel translation. -/ structure ToLaurelContext where filepath : System.FilePath + /-- Module prefix prepended to generated type and procedure names + to avoid collisions when multiple PySpec files are combined. -/ + modulePrefix : String := "" /-- State for PySpec to Laurel translation. -/ structure ToLaurelState where @@ -83,6 +86,13 @@ def pushOverloadEntry (funcName : String) (literalValue : String) let updated := existing.insert literalValue returnType { s with overloads := s.overloads.insert funcName updated } +/-- Prepend the module prefix to a name. Returns the name unchanged + if the prefix is empty. -/ +def prefixName (name : String) : ToLaurelM String := do + let ctx ← read + if ctx.modulePrefix.isEmpty then return name + return ctx.modulePrefix ++ "_" ++ name + /-! ## Helper Functions -/ /-- Create a HighTypeMd with default metadata. -/ @@ -243,7 +253,8 @@ def specTypeToLaurelType (ty : SpecType) : ToLaurelM HighTypeMd := do if args.size > 0 then reportError default s!"Generic class '{name}' with type args unsupported" - return mkTy (.UserDefined { text := name }) + let prefixed ← prefixName name + return mkTy (.UserDefined { text := prefixed }) | .intLiteral _ => return mkTy .TInt | .stringLiteral _ => return mkTy .TString | .typedDict _ _ _ => return mkCore "DictStrAny" @@ -281,23 +292,30 @@ def funcDeclToLaurel (procName : String) (func : FunctionDecl) /-- Convert a class definition to Laurel types and procedures. -/ def classDefToLaurel (cls : ClassDef) : ToLaurelM Unit := do + let prefixedName ← prefixName cls.name let laurelFields ← cls.fields.toList.mapM fun f => do let ty ← specTypeToLaurelType f.type pure { name := f.name, isMutable := true, type := ty : Laurel.Field } + let prefixedBases ← cls.bases.toList.mapM fun cd => do + -- Local bases (empty pythonModule) get prefixed; external ones don't + let baseName ← if cd.pythonModule.isEmpty then prefixName cd.name + else pure (toString cd) + return mkId baseName pushType (.Composite { - name := cls.name - extending := cls.bases.toList.map (fun cd => mkId $ toString cd) + name := prefixedName + extending := prefixedBases fields := laurelFields instanceProcedures := [] }) for method in cls.methods do - let proc ← funcDeclToLaurel (cls.name ++ "_" ++ method.name) method + let proc ← funcDeclToLaurel (prefixedName ++ "_" ++ method.name) method pushProcedure proc /-- Convert a type definition to a Laurel composite type placeholder. -/ -def typeDefToLaurel (td : TypeDef) : ToLaurelM Unit := +def typeDefToLaurel (td : TypeDef) : ToLaurelM Unit := do + let prefixedName ← prefixName td.name pushType (.Composite { - name := td.name + name := prefixedName extending := [] fields := [] instanceProcedures := [] @@ -336,7 +354,10 @@ def extractOverloadEntry (func : FunctionDecl) : ToLaurelM Unit := do return let retType ← match func.returnType.atoms[0] with - | .pyClass name _ => pure (PythonIdent.mk "" name) + | .pyClass name _ => do + let ctx ← read + let prefixed ← prefixName name + pure (PythonIdent.mk ctx.modulePrefix prefixed) | .ident nm _ => pure nm | _ => reportError func.loc @@ -358,8 +379,9 @@ def signatureToLaurel (sig : Signature) : ToLaurelM Unit := | .functionDecl func => do if func.isOverload then extractOverloadEntry func - else - let proc ← funcDeclToLaurel func.name func + else do + let procName ← prefixName func.name + let proc ← funcDeclToLaurel procName func pushProcedure proc | .classDef cls => classDefToLaurel cls @@ -372,8 +394,9 @@ structure TranslationResult where /-- Run the translation and return a Laurel Program, dispatch table, and any errors. -/ def signaturesToLaurel (filepath : System.FilePath) (sigs : Array Signature) + (modulePrefix : String := "") : TranslationResult := - let ctx : ToLaurelContext := { filepath } + let ctx : ToLaurelContext := { filepath, modulePrefix } let ((), state) := (sigs.forM signatureToLaurel).run ctx |>.run {} let pgm : Laurel.Program := { staticProcedures := state.procedures.toList diff --git a/StrataMain.lean b/StrataMain.lean index fc789a2d3..0ba0a6017 100644 --- a/StrataMain.lean +++ b/StrataMain.lean @@ -12,6 +12,7 @@ import Strata.Languages.Laurel.Grammar.ConcreteToAbstractTreeTranslator import Strata.Languages.Laurel.LaurelToCoreTranslator import Strata.Languages.Python.Python import Strata.Languages.Python.Specs +import Strata.Languages.Python.Specs.IdentifyOverloads import Strata.Languages.Python.Specs.ToLaurel import Strata.Languages.Laurel.LaurelFormat import Strata.Transform.ProcedureInlining @@ -329,61 +330,167 @@ def pyAnalyzeCommand : Command where | none => Map.empty Core.Sarif.writeSarifOutput .deductive files vcResults (filePath ++ ".sarif") -/-- Result of building the PySpec-augmented prelude. -/ -structure PySpecPrelude where - corePrelude : Core.Program +/-- Result of reading PySpec files: combined Laurel declarations and overload table. -/ +structure PySpecLaurelResult where + laurelProgram : Strata.Laurel.Program overloads : Strata.Python.Specs.ToLaurel.OverloadTable -/-- Build the Core prelude augmented with declarations from PySpec Ion files. - Each Ion file is translated PySpec → Laurel → Core, and the resulting declarations - are appended to the base prelude (with duplicates filtered out). - Also accumulates overload dispatch tables. -/ -def buildPySpecPrelude (pyspecPaths : Array String) : IO PySpecPrelude := do - -- The Laurel prelude is now included during HeapParameterization at the Laurel level. - -- We no longer need to strip it from translate output. - let laurelPreludeSize := 0 - let mut preludeDecls : Array Core.Decl := - Strata.Python.Core.PythonLaurelPrelude.decls.toArray - let mut existingNames : Std.HashSet String := - preludeDecls.foldl (init := {}) fun s d => - (Core.Decl.names d).foldl (init := s) fun s n => s.insert n.name - let mut allOverloads : Strata.Python.Specs.ToLaurel.OverloadTable := {} +open Strata.Python.Specs.ToLaurel (OverloadTable) + +def mergeOverloads (old new : OverloadTable) : OverloadTable := + new.fold (init := old) fun o name n => + o.alter name fun s => some <| s.getD {} |>.union n + + +/-- Read PySpec Ion files and collect their Laurel declarations and overload + tables into a single combined result. Each Ion file is parsed and translated + to Laurel via `signaturesToLaurel`. The resulting procedures and types are + accumulated into one `Laurel.Program`, and overload dispatch entries are + merged into a single table. -/ +def buildPySpecLaurel (pyspecPaths : Array String) (overloads : OverloadTable) : IO PySpecLaurelResult := do + let mut combinedProcedures : Array (Strata.Laurel.Procedure × String) := #[] + let mut combinedTypes : Array (Strata.Laurel.TypeDefinition × String) := #[] + let mut allOverloads := overloads for ionPath in pyspecPaths do let ionFile : System.FilePath := ionPath let some mod := ionFile.fileStem | exitFailure s!"No stem {ionFile}" - let .ok _mod := Strata.Python.Specs.ModuleName.ofString mod + let .ok parsedMod := Strata.Python.Specs.ModuleName.ofString mod | exitFailure s!"Invalid module {mod}" + let modulePrefix := parsedMod.components[0]'parsedMod.componentsSizePos let sigs ← match ← Strata.Python.Specs.readDDM ionFile |>.toBaseIO with | .ok t => pure t | .error msg => exitFailure s!"Could not read {ionFile}: {msg}" - let result := Strata.Python.Specs.ToLaurel.signaturesToLaurel ionPath sigs - if result.errors.size > 0 then - IO.eprintln s!"{result.errors.size} PySpec translation warning(s) for {ionPath}:" - for err in result.errors do + let { program, errors, overloads } := Strata.Python.Specs.ToLaurel.signaturesToLaurel ionPath sigs modulePrefix + if errors.size > 0 then + IO.eprintln s!"{errors.size} PySpec translation warning(s) for {ionPath}:" + for err in errors do IO.eprintln s!" {err.file}: {err.message}" - -- Merge overload table entries - for (funcName, overloads) in result.overloads do - let existing := allOverloads.getD funcName {} - allOverloads := allOverloads.insert funcName - (overloads.fold (init := existing) fun acc k v => acc.insert k v) - match Strata.Laurel.translate result.program with - | .error diagnostics => - exitFailure s!"PySpec Laurel to Core translation failed for {ionPath}: {diagnostics}" - | .ok (coreSpec, _modifiesDiags) => - -- The Laurel prelude is now included at the Laurel level during HeapParameterization, - -- so translate output already contains the prelude declarations as normal decls. - let pyspecDecls := coreSpec.decls.drop laurelPreludeSize - -- Register new names, failing on collisions - for d in pyspecDecls do - for n in Core.Decl.names d do - if existingNames.contains n.name then - exitFailure s!"Core name collision in PySpec {ionPath}: {n.name}" - existingNames := existingNames.insert n.name - preludeDecls := preludeDecls ++ pyspecDecls.toArray - let pyPrelude : Core.Program := { decls := preludeDecls.toList } - return { corePrelude := pyPrelude, overloads := allOverloads } + allOverloads := mergeOverloads allOverloads overloads + for td in program.types do + combinedTypes := combinedTypes.push (td, ionPath) + for proc in program.staticProcedures do + combinedProcedures := combinedProcedures.push (proc, ionPath) + -- Sanity check: detect name collisions across PySpec files + let mut seenTypes : Std.HashMap String String := {} + for (td, srcFile) in combinedTypes do + let name := match td with + | .Composite ct => ct.name.text + | .Constrained ct => ct.name.text + | .Datatype dt => dt.name.text + match seenTypes.get? name with + | some prevFile => + IO.eprintln s!"warning: PySpec type name collision: '{name}' defined in both {prevFile} and {srcFile}" + | none => pure () + seenTypes := seenTypes.insert name srcFile + let mut seenProcs : Std.HashMap String String := {} + for (proc, srcFile) in combinedProcedures do + match seenProcs.get? proc.name.text with + | some prevFile => + IO.eprintln s!"warning: PySpec procedure name collision: '{proc.name.text}' defined in both {prevFile} and {srcFile}" + | none => pure () + seenProcs := seenProcs.insert proc.name.text srcFile + let combinedLaurel : Strata.Laurel.Program := { + staticProcedures := combinedProcedures.toList.map Prod.fst + staticFields := [] + types := combinedTypes.toList.map Prod.fst + constants := [] + } + return { laurelProgram := combinedLaurel, overloads := allOverloads } + +/-- Read dispatch Ion files and merge their overload tables. -/ +def readDispatchOverloads + (dispatchPaths : Array String) + : IO Strata.Python.Specs.ToLaurel.OverloadTable := do + let mut tbl : + Strata.Python.Specs.ToLaurel.OverloadTable := {} + for dispatchPath in dispatchPaths do + let ionFile : System.FilePath := dispatchPath + let sigs ← + match ← Strata.Python.Specs.readDDM ionFile + |>.toBaseIO with + | .ok t => pure t + | .error msg => + exitFailure + s!"Could not read dispatch file {ionFile}: {msg}" + let (overloads, errors) := + Strata.Python.Specs.ToLaurel.extractOverloads + dispatchPath sigs + if errors.size > 0 then + IO.eprintln + s!"{errors.size} dispatch warning(s) \ + for {ionFile}:" + for err in errors do + IO.eprintln s!" {err.file}: {err.message}" + for (funcName, fnOverloads) in overloads do + let existing := tbl.getD funcName {} + tbl := tbl.insert funcName + (fnOverloads.fold (init := existing) + fun acc k v => acc.insert k v) + return tbl + +/-- Extract top-level Python statements from a + Strata program. Calls `toPyCommands` and + `unwrapModule` on the single command. -/ +def extractStmtsFromProgram + (pgm : Strata.Program) + : Array (Strata.Python.stmt Strata.SourceRange) := + let cmds := Strata.toPyCommands pgm.commands + Strata.unwrapModule cmds[0]! + +/-- Build dispatch overload table, auto-resolve pyspec files + from the program AST, and return combined Laurel declarations + and overload table. + + Auto-resolved pyspec files that are missing on disk are + skipped with a warning. Explicitly provided `--pyspec` + and `--dispatch` paths still produce a hard error when + unreadable. -/ +def resolveAndBuildLaurelPrelude + (dispatchPaths : Array String) + (pyspecPaths : Array String) + (stmts : Array (Strata.Python.stmt Strata.SourceRange)) + : IO PySpecLaurelResult := do + -- Build dispatch overload table + let dispatchOverloads ← + readDispatchOverloads dispatchPaths + + -- Auto-resolve pyspec files used by the program + let resolveState := + Strata.Python.Specs.IdentifyOverloads.resolveOverloads + dispatchOverloads stmts + for w in resolveState.warnings do + IO.eprintln s!"warning: {w}" + let mut autoSpecPaths : Array String := #[] + if h : dispatchPaths.size > 0 then + let firstDispatch : System.FilePath := + dispatchPaths[0] + let dispatchDir := + firstDispatch.parent.getD "." + let resolvedMods := + resolveState.modules.toArray.qsort (· < ·) + for modName in resolvedMods do + match Strata.Python.Specs.ModuleName.ofString + modName with + | .error _ => + IO.eprintln + s!"warning: invalid module name \ + '{modName}', skipping" + | .ok mod => + let specPath : System.FilePath := + dispatchDir / mod.strataFileName + if ← specPath.pathExists then + autoSpecPaths := + autoSpecPaths.push specPath.toString + else + IO.eprintln + s!"warning: auto-resolved pyspec \ + not found: {specPath}" + let allSpecPaths := autoSpecPaths ++ pyspecPaths + + -- Read PySpec files into combined Laurel program + buildPySpecLaurel allSpecPaths dispatchOverloads def pyAnalyzeLaurelCommand : Command where name := "pyAnalyzeLaurel" @@ -413,34 +520,27 @@ def pyAnalyzeLaurelCommand : Command where IO.print pgm assert! cmds.size == 1 - let pySpecResult ← buildPySpecPrelude (pflags.getRepeated "pyspec") - let pyPrelude := pySpecResult.corePrelude - - -- Extract overload dispatch tables from --dispatch files - let mut allOverloads := pySpecResult.overloads - for dispatchPath in pflags.getRepeated "dispatch" do - let ionFile : System.FilePath := dispatchPath - let sigs ← - match ← Strata.Python.Specs.readDDM ionFile |>.toBaseIO with - | .ok t => pure t - | .error msg => - exitFailure s!"Could not read dispatch file {ionFile}: {msg}" - let (overloads, errors) := - Strata.Python.Specs.ToLaurel.extractOverloads dispatchPath sigs - if errors.size > 0 then - IO.eprintln s!"{errors.size} dispatch warning(s) for {ionFile}:" - for err in errors do - IO.eprintln s!" {err.file}: {err.message}" - for (funcName, fnOverloads) in overloads do - let existing := allOverloads.getD funcName {} - allOverloads := allOverloads.insert funcName - (fnOverloads.fold (init := existing) fun acc k v => acc.insert k v) + let stmts := Strata.unwrapModule cmds[0]! + let { laurelProgram := pySpecLaurelPgm, overloads } ← + resolveAndBuildLaurelPrelude + (pflags.getRepeated "dispatch") + (pflags.getRepeated "pyspec") + stmts let sourcePathForMetadata := match pySourceOpt with | some (pyPath, _) => pyPath | none => filePath - let laurelPgm := Strata.Python.pythonToLaurel pyPrelude cmds[0]! none - sourcePathForMetadata allOverloads + + -- Build PreludeInfo from base Core prelude + Laurel-level PySpec info + let baseInfo := Strata.Python.PreludeInfo.ofCoreProgram + Strata.Python.Core.PythonLaurelPrelude + let pySpecInfo := Strata.Python.PreludeInfo.ofLaurelProgram pySpecLaurelPgm + let preludeInfo := baseInfo.merge pySpecInfo + + let laurelPgm := + Strata.Python.pythonToLaurel' + preludeInfo cmds[0]! none + sourcePathForMetadata overloads match laurelPgm with | .error e => exitFailure s!"Python to Laurel translation failed: {e}" @@ -449,32 +549,32 @@ def pyAnalyzeLaurelCommand : Command where IO.println "\n==== Laurel Program ====" IO.println f!"{laurelProgram}" - -- Translate Laurel to Core - match Strata.Laurel.translate laurelProgram with + -- Combine PySpec Laurel + user Laurel into a single program. + let combinedLaurel : Strata.Laurel.Program := { + staticProcedures := pySpecLaurelPgm.staticProcedures ++ laurelProgram.staticProcedures + staticFields := pySpecLaurelPgm.staticFields ++ laurelProgram.staticFields + types := pySpecLaurelPgm.types ++ laurelProgram.types + constants := pySpecLaurelPgm.constants ++ laurelProgram.constants + } + + -- Single Laurel.translate call over the combined program + match Strata.Laurel.translate combinedLaurel with | .error diagnostics => exitFailure s!"Laurel to Core translation failed: {diagnostics}" - | .ok (coreProgramDecls, modifiesDiags) => + | .ok (coreFromLaurel, modifiesDiags) => if verbose then IO.println "\n==== Core Program ====" - IO.print (coreProgramDecls, modifiesDiags) - - -- The Laurel prelude is now included at the Laurel level during - -- HeapParameterization, so translate output contains prelude decls as normal decls. - -- No stripping needed. - let programDecls := coreProgramDecls.decls.filter (λ d=> d.name.name != "Box") - -- Check for name collisions between program and prelude - let preludeNames : Std.HashSet String := - pyPrelude.decls.flatMap Core.Decl.names + IO.print (coreFromLaurel, modifiesDiags) + + -- Merge with base PythonLaurelPrelude, deduplicating by name + let baseDecls := Strata.Python.Core.PythonLaurelPrelude.decls + let baseNames : Std.HashSet String := + baseDecls.flatMap Core.Decl.names |>.foldl (init := {}) fun s n => s.insert n.name - let collisions := programDecls.flatMap fun d => - d.names.filter fun n => preludeNames.contains n.name - if !collisions.isEmpty then - let names := ", ".intercalate (collisions.map (·.name)) - exitFailure s!"Core name collision between program and prelude: {names}" - let coreProgram := {decls := pyPrelude.decls ++ programDecls } - -- dbg_trace "=== Generated Strata Core Program ===" - -- dbg_trace (toString (Std.Format.pretty (Strata.Core.formatProgram coreProgram) 100)) - -- dbg_trace "=================================" + let newDecls := coreFromLaurel.decls.filter fun d => + (Core.Decl.names d).all fun n => !baseNames.contains n.name + let coreProgram := + { decls := baseDecls ++ newDecls } -- Verify using Core verifier let baseOptions : VerifyOptions := @@ -774,6 +874,36 @@ def pySpecToLaurelCommand : Command where for proc in pgm.staticProcedures do IO.println s!" {Strata.Laurel.formatProcedure proc}" +def pyResolveOverloadsCommand : Command where + name := "pyResolveOverloads" + args := [ "python_path", "dispatch_ion" ] + help := "Identify which overloaded service modules a \ + Python program uses. Prints one module name per \ + line to stdout." + callback := fun v _ => do + let pythonFile : System.FilePath := v[0] + let dispatchPath := v[1] + -- Read dispatch overload table + let overloads ← + readDispatchOverloads #[dispatchPath] + -- Convert .py to Python AST + let stmts ← + IO.FS.withTempFile fun _handle dialectFile => do + IO.FS.writeBinFile dialectFile + Strata.Python.Python.toIon + match ← Strata.Python.pythonToStrata dialectFile pythonFile |>.toBaseIO with + | .ok s => pure s + | .error msg => exitFailure msg + -- Walk AST and collect modules + let state := + Strata.Python.Specs.IdentifyOverloads.resolveOverloads + overloads stmts + for w in state.warnings do + IO.eprintln s!"warning: {w}" + let sorted := state.modules.toArray.qsort (· < ·) + for m in sorted do + IO.println m + def laurelParseCommand : Command where name := "laurelParse" args := [ "file" ] @@ -999,9 +1129,12 @@ def commandGroups : List CommandGroup := [ commands := [javaGenCommand] }, { name := "Python" commands := [pyAnalyzeCommand, pyAnalyzeLaurelCommand, + pyResolveOverloadsCommand, pySpecsCommand, pySpecToLaurelCommand, - pyAnalyzeLaurelToGotoCommand, pyAnalyzeToGotoCommand, - pyTranslateCommand, pyTranslateLaurelCommand] }, + pyAnalyzeLaurelToGotoCommand, + pyAnalyzeToGotoCommand, + pyTranslateCommand, + pyTranslateLaurelCommand] }, { name := "Laurel" commands := [laurelAnalyzeCommand, laurelAnalyzeBinaryCommand, laurelAnalyzeToGotoCommand, laurelParseCommand, diff --git a/StrataTest/Backends/CBMC/GOTO/ToCProverGOTO.lean b/StrataTest/Backends/CBMC/GOTO/ToCProverGOTO.lean index 6dff3608f..489e85cce 100644 --- a/StrataTest/Backends/CBMC/GOTO/ToCProverGOTO.lean +++ b/StrataTest/Backends/CBMC/GOTO/ToCProverGOTO.lean @@ -426,6 +426,10 @@ info: ok: #[ASSERT skip] ------------------------------------------------------------------------------- -- Test that contracts are included in the function symbol's code type +/-- +info: Except.ok () +-/ +#guard_msgs in #eval do let reqExpr ← CProverGOTO.exprToJson (CProverGOTO.Expr.gt (.symbol "x" .Integer) (.constant "0" .Integer)) @@ -467,6 +471,10 @@ info: ok: #[DECL (decl (i : unsignedbv[32])), return format ans.instructions -- The backward GOTO (location 5, targeting location 2) should have the invariant +/-- +info: ok: () +-/ +#guard_msgs in #eval do let ans ← Imperative.Stmts.toGotoTransform Lambda.TEnv.default "testInv" ExampleLoopInvariant let backGotos := ans.instructions.toList.filter (fun (i : CProverGOTO.Instruction) => @@ -493,6 +501,10 @@ private def ExampleLoopMeasure : List (Imperative.Stmt LExprTP (Imperative.Cmd L [] {}] +/-- +info: ok: () +-/ +#guard_msgs in #eval do let ans ← Imperative.Stmts.toGotoTransform Lambda.TEnv.default "testMeas" ExampleLoopMeasure let backGotos := ans.instructions.toList.filter (fun (i : CProverGOTO.Instruction) => diff --git a/Tools/Python/strata/gen.py b/Tools/Python/strata/gen.py index fc3535be1..8ff4339f5 100755 --- a/Tools/Python/strata/gen.py +++ b/Tools/Python/strata/gen.py @@ -85,6 +85,8 @@ def main(): parser = argparse.ArgumentParser( prog='strata_python', description='Strata interface to Python parser') + parser.add_argument('-q', '--quiet', action='store_true', + help='Suppress warnings.') subparsers = parser.add_subparsers(help="subcommand help") def write_python_dialect(args): @@ -109,6 +111,11 @@ def write_python_dialect(args): checkast_command.set_defaults(func=check_ast_imp) args = parser.parse_args() + if not args.quiet and not ion.__IS_C_EXTENSION_SUPPORTED: + print("warning: amazon.ion C extension is not available. " + "Ion serialization will be significantly slower. " + "Use -q to suppress this warning.", + file=sys.stderr) if hasattr(args, 'func'): args.func(args) else: diff --git a/laurel_intro.md b/laurel_intro.md new file mode 100644 index 000000000..ad3dcdbeb --- /dev/null +++ b/laurel_intro.md @@ -0,0 +1,169 @@ +# Laurel to Core Translation Pipeline + +## Overview + +`Strata.Laurel.translate` converts a Laurel program into a Strata Core program +through a series of transformation passes. Each pass rewrites the Laurel AST, +and name resolution is re-run after each pass to rebuild the semantic model. + +The pipeline is defined in `Strata/Languages/Laurel/LaurelToCoreTranslator.lean:591`. + +## Pipeline Steps + +### 1. Prepend Core Definitions for Laurel + +``` +program.staticProcedures := coreDefinitionsForLaurel.staticProcedures ++ program.staticProcedures +``` + +Adds external procedure stubs (`const`, `select`, `update`) so that references +to these built-in map operations resolve during later passes. These are marked +`External` and are dropped from the final Core output. + +### 2. First Resolution + +Builds a `SemanticModel` that maps identifiers to their definitions (types, +fields, procedures, etc.). Also validates diamond-inherited field accesses. + +### 3. Heap Parameterization + +**File:** `Strata/Languages/Laurel/HeapParameterization.lean` +**Constants:** `Strata/Languages/Laurel/HeapParameterizationConstants.lean` + +Transforms field access on composite types into explicit heap map operations. +After this pass, composite objects no longer have fields directly -- instead, +fields are stored in a global heap as `Map Composite (Map Field Box)`. + +**What it adds to the program:** + +Types (from `heapConstants`): +- `Composite { MkComposite(ref: int) }` -- an object reference +- `Box { BoxInt(int), BoxBool(bool), BoxFloat64(float64), BoxComposite(Composite) }` -- tagged union for field values +- `Heap { MkHeap(data: Map Composite Map Field Box, nextReference: int) }` -- the heap itself + +Types (generated per-program): +- `Field` -- one constructor per field across all composite types + (e.g., `Client.region`, `Plan.id`). This is program-specific because + different programs define different composite types with different fields. + +Procedures (from `heapConstants`): +- `readField(heap, obj, field) -> Box` +- `updateField(heap, obj, field, val) -> Heap` +- `increment(heap) -> Heap` + +**What it transforms:** +- Removes fields from composite type definitions +- Rewrites field reads/writes to use `readField`/`updateField` on the heap + +### 4. Type Hierarchy Transform + +**File:** `Strata/Languages/Laurel/TypeHierarchy.lean` + +Generates runtime type-tag infrastructure so that `isinstance` checks +(represented as `IsType` in Laurel) can be translated to Core. In Python, +`isinstance(obj, SomeClass)` needs to check whether `obj`'s runtime type is +`SomeClass` or any subclass of it. This pass encodes that relationship using +maps. + +**What it adds to the program:** + +Types: +- `TypeTag` datatype -- one nullary constructor `_TypeTag` per + composite type. For example, if the program has `Client` and `Plan`, + the datatype is `TypeTag { Client_TypeTag, Plan_TypeTag }`. + +Constants: +- `ancestorsFor` -- one per composite type. A `Map TypeTag Bool` + where `ancestorsForClient[Plan_TypeTag] = true` means `Plan` is an + ancestor of (or is the same as) `Client`. Computed by walking the + `extending` (inheritance) chain transitively. +- `ancestorsPerType` -- combines all per-type maps into a single + `Map TypeTag (Map TypeTag Bool)`. This is the main lookup table: + `select(select(ancestorsPerType, obj_tag), TypeName_TypeTag)` returns + `true` if `obj` is an instance of `TypeName`. + +**What it transforms:** +- Modifies `Composite` to add a `typeTag: TypeTag` field to `MkComposite` +- Rewrites `IsType target ty` into: + `select(select(ancestorsPerType(), Composite..typeTag!(target)), TypeName_TypeTag())` +- Rewrites `New TypeName` into heap allocation + `MkComposite(ref, TypeName_TypeTag())` + +**Everything in this step is program-specific** -- different programs have +different composite types, so the `TypeTag` constructors, ancestor maps, and +`ancestorsPerType` are all different. + +### 5. Modifies Clauses Transform + +Processes `modifies` clauses on procedures. No new names introduced. + +### 6. Lift Expression Assignments + +Lifts assignments out of expressions into separate statements. No new names. + +### 7. Eliminate Returns in Expressions + +Eliminates return statements inside expressions. No new names. + +### 8. Constrained Type Elimination + +Eliminates constrained (refinement) types. No new names introduced. + +### 9. Final Core Translation + +Converts the fully-transformed Laurel program into a `Core.Program`: + +- Laurel datatypes -> `Core.Decl.datatype` +- Laurel constants -> `Core.Decl.func` (0-ary functions) +- Laurel procedures marked `isFunctional` -> `Core.Decl.func` +- Remaining procedures -> `Core.Decl.proc` +- External procedures are **dropped** + +Output declaration order: +``` +datatypeDecls ++ constantDecls ++ pureFuncDecls ++ procDecls +``` + +## Names Introduced by the Pipeline + +### Static names (from `heapConstants`, same for every program) + +| Name | Kind | +|------|------| +| `Composite`, `MkComposite` | Datatype + constructor (modified by typeHierarchy to add `typeTag` field) | +| `Box`, `BoxInt`, `BoxBool`, `BoxFloat64`, `BoxComposite` | Datatype + constructors | +| `Heap`, `MkHeap` | Datatype + constructors | +| `readField`, `updateField`, `increment` | Functions | + +### Program-specific names + +| Name pattern | Kind | Source | +|-------------|------|--------| +| `Field`, `.` constructors | Datatype | heapParameterization | +| `TypeTag`, `_TypeTag` constructors | Datatype | typeHierarchyTransform | +| `ancestorsFor` | Constant | typeHierarchyTransform | +| `ancestorsPerType` | Constant | typeHierarchyTransform | + +## Issue: Using `translate` for Prelude Building + +`buildPySpecPrelude` in `StrataMain.lean` runs each PySpec file through +`Laurel.translate` independently, then combines the results into a single +prelude. This is problematic because: + +1. **Static names are duplicated** across every translation (e.g., `Box` + appears in every output). These just need deduplication. + +2. **Program-specific names conflict** across translations. Each PySpec file + defines different composite types, producing different `TypeTag` + constructors and `ancestorsFor*` constants. When combined, these collide. + +3. **Program-specific names conflict between prelude and program.** The + combined prelude has a `TypeTag` with constructors from ALL pyspec + composite types. The user program's `TypeTag` has constructors from only + ITS composite types. These are structurally different definitions with the + same name. + +The fundamental issue is that `Laurel.translate` is designed as a monolithic +pipeline for a complete program. It does not separate infrastructure +declarations from user declarations in its output. Using it piecemeal for +prelude building produces conflicts that require filtering heuristics to work +around. diff --git a/simplify_python_to_laurel.md b/simplify_python_to_laurel.md new file mode 100644 index 000000000..e0b4e9bed --- /dev/null +++ b/simplify_python_to_laurel.md @@ -0,0 +1,127 @@ +# Simplifying the pythonToLaurel Prelude Dependency + +## Problem + +`pythonToLaurel` takes a `Core.Program` as its prelude argument. This forces +the PySpec Laurel declarations to be translated to Core *before* the user +program can be translated to Laurel. But `Laurel.translate` is a whole-program +pipeline — `typeHierarchyTransform` generates a single `TypeTag` datatype +covering all composite types in its input. Running it separately on PySpec and +the user program produces two incompatible `TypeTag` definitions. + +The ideal fix is a single `Laurel.translate` call over the combined PySpec + +user Laurel program. But that requires breaking the circular dependency: + +``` +pythonToLaurel needs Core prelude (including PySpec) + -> PySpec must be translated to Core first + -> Laurel.translate called on PySpec alone + -> produces TypeTag that conflicts with program's TypeTag +``` + +## What pythonToLaurel Actually Uses from the Core Prelude + +`pythonToLaurel` (PythonToLaurel.lean:1299) calls four extraction functions on +the `Core.Program`: + +### 1. `extractPreludeTypes` -> `List String` +Line 1097. Returns type names from `.type` decls (constructors, synonyms, +datatypes). Used to recognize type annotations in Python code. + +### 2. `extractPreludeProcedures` -> `List (String x CoreProcedureSignature)` +Line 1106. Returns procedure names + input/output type name lists. Used to +translate procedure calls with correct argument handling. + +### 3. `preludeSignatureToPythonFunctionDecl` -> `List PythonFunctionDecl` +Line 1115. Returns procedure names + arg names/types + return type. Stored in +`TranslationContext.functionSignatures` for call translation. + +### 4. `getPreludeFunctions` -> `List String` +Line 1291. Returns function names + datatype constructor/destructor/tester +names. Used to distinguish function calls from procedure calls. + +**Key observation:** All four are pure name/signature extractions. No bodies, +no definitions, no Core expressions. Just: "what names exist and what are their +parameter types?" + +## Proposed Solution + +### Step 1: Define a prelude info structure + +Create a structure that holds just the information `pythonToLaurel` needs: + +```lean +structure PythonToLaurelPreludeInfo where + types : List String + procedures : List (String x CoreProcedureSignature) + functionSignatures : List PythonFunctionDecl + functions : List String +``` + +### Step 2: Extract from base prelude + Laurel PySpec declarations + +The base `PythonLaurelPrelude` is already a `Core.Program`, so extraction +works as-is. For PySpec Laurel declarations, we extract at the Laurel level: + +- **Types**: PySpec composite type names (from `Laurel.TypeDefinition`) +- **Procedures**: PySpec procedure names + input/output parameter types + (from `Laurel.Procedure` inputs/outputs) +- **Functions**: PySpec functional procedure names + datatype + constructor/destructor names + +The Laurel-level types use `HighType` rather than Core's `LMonoTy`, so +we need a mapping from Laurel type names to the string names that +`pythonToLaurel` expects. This should be straightforward since +`extractPreludeProcedures` already just calls `getTypeName` which returns +a string. + +### Step 3: Change `pythonToLaurel` signature + +```lean +def pythonToLaurel (preludeInfo : PythonToLaurelPreludeInfo) + (pyModule : Python.Command SourceRange) + ... +``` + +Or, more conservatively, keep the existing `Core.Program` parameter but +*also* accept supplemental info that gets merged in. This would be less +invasive. + +### Step 4: Single translate call + +With `pythonToLaurel` no longer needing the PySpec Core translation, the +pipeline becomes: + +1. `buildPySpecLaurel` -> PySpec Laurel declarations + overloads +2. Extract prelude info from `PythonLaurelPrelude` (Core) + PySpec (Laurel) +3. `pythonToLaurel(preludeInfo, ...)` -> user Laurel program +4. Combine PySpec Laurel types/procedures + user Laurel program +5. Single `Laurel.translate` -> Core +6. Merge with `PythonLaurelPrelude` base decls -> final Core program + +This eliminates the duplicate `TypeTag`/`ancestorsPerType`/`Box` problem +entirely, because there's only one `translate` call that sees all composite +types. + +## Risks and Open Questions + +- **Type name mapping**: Laurel `HighType` names need to map to the same + strings that `getTypeName` produces from Core types. Need to verify this + is a 1:1 correspondence. + +- **`preludeSignatureToPythonFunctionDecl`** is the most complex extraction. + It needs arg names, arg types, and return type. Laurel `Procedure` has + `inputs : List Parameter` and `outputs : List Parameter` which should + provide this, but the field names may differ. + +- **Conservative alternative**: Instead of changing `pythonToLaurel`'s + signature, we could construct a "fake" `Core.Program` from the Laurel + declarations that contains just enough decls (procedure headers, type + stubs) for the extraction functions to work. This avoids touching + `pythonToLaurel` at all but is less clean. + +- **`translateLaurelToPrelude` simplification**: Once we have a single + translate call, we can also skip `heapParameterization` and + `typeHierarchyTransform` for a "prelude-only" translation path if we + ever need one. But with the single-call approach, this becomes + unnecessary. diff --git a/testcase.sh b/testcase.sh new file mode 100755 index 000000000..f256f4cc7 --- /dev/null +++ b/testcase.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash +set -ex + +DISPATCH_FILE="botomoog-python/pyspec/botomoog.pyspec.st.ion" +ION_FILE="kiro_tests_annotated/apigateway_key_manager.python.st.ion" + +lake exe strata pyAnalyzeLaurel --dispatch "$DISPATCH_FILE" "$ION_FILE"