Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 31 additions & 46 deletions src/worker/lambda/packages/packagePuller.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,15 @@ type Package struct {

// the pip-install admin lambda returns this
type PackageMeta struct {
Deps []string `json:"Deps"`
Deps []string `json:"Deps"` // deprecated
TopLevel []string `json:"TopLevel"`
}

type ModuleInfo struct {
Name string
IsPkg bool
}

func NewPackagePuller(sbPool sandbox.SandboxPool, depTracer *DepTracer) (*PackagePuller, error) {
// create a lambda function for installing pip packages. We do
// each install in a Sandbox for two reasons:
Expand Down Expand Up @@ -74,48 +79,7 @@ func NewPackagePuller(sbPool sandbox.SandboxPool, depTracer *DepTracer) (*Packag
// be case insensitive, and MUST consider hyphens and
// underscores to be equivalent."
func NormalizePkg(pkg string) string {
return strings.ReplaceAll(strings.ToLower(pkg), "_", "-")
}

// "pip install" missing packages to Conf.Pkgs_dir
func (pp *PackagePuller) InstallRecursive(installs []string) ([]string, error) {
// shrink capacity to length so that our appends are not
// visible to caller
installs = installs[:len(installs):len(installs)]

installSet := make(map[string]bool)
for _, install := range installs {
name := strings.Split(install, "==")[0]
installSet[name] = true
}

// Installs may grow as we loop, because some installs have
// deps, leading to other installs
for i := 0; i < len(installs); i++ {
pkg := installs[i]
if common.Conf.Trace.Package {
log.Printf("On %v of %v", pkg, installs)
}
p, err := pp.GetPkg(pkg)
if err != nil {
return nil, err
}

if common.Conf.Trace.Package {
log.Printf("Package '%s' has deps %v", pkg, p.Meta.Deps)
log.Printf("Package '%s' has top-level modules %v", pkg, p.Meta.TopLevel)
}

// push any previously unseen deps on the list of ones to install
for _, dep := range p.Meta.Deps {
if !installSet[dep] {
installs = append(installs, dep)
installSet[dep] = true
}
}
}

return installs, nil
return strings.Split(strings.ReplaceAll(strings.ToLower(pkg), "_", "-"), ";")[0]
}

// GetPkg does the pip install in a Sandbox, taking care to never install the
Expand Down Expand Up @@ -169,6 +133,7 @@ func (pp *PackagePuller) sandboxInstall(p *Package) (err error) {
// assume dir existence means it is installed already
log.Printf("%s appears already installed from previous run of OL", p.Name)
alreadyInstalled = true
return nil
} else {
log.Printf("run pip install %s from a new Sandbox to %s on host", p.Name, scratchDir)
if err := os.Mkdir(scratchDir, 0700); err != nil {
Expand Down Expand Up @@ -219,9 +184,29 @@ func (pp *PackagePuller) sandboxInstall(p *Package) (err error) {
return err
}

for i, pkg := range p.Meta.Deps {
p.Meta.Deps[i] = NormalizePkg(pkg)
return nil
}

// IterModules is a simplified implementation of pkgutil.iterModules
// todo: implement every details in pkgutil.iterModules, or find a efficient way to call pkgutil.iterModules in python
func IterModules(path string) ([]ModuleInfo, error) {
var modules []ModuleInfo

files, err := ioutil.ReadDir(path)
if err != nil {
return nil, err
}

return nil
for _, file := range files {
if file.IsDir() {
// Check if the directory contains an __init__.py file, which would make it a package.
if _, err := os.Stat(filepath.Join(path, file.Name(), "__init__.py")); !os.IsNotExist(err) {
modules = append(modules, ModuleInfo{Name: file.Name(), IsPkg: true})
}
} else if strings.HasSuffix(file.Name(), ".py") && file.Name() != "__init__.py" {
modName := strings.TrimSuffix(file.Name(), ".py")
modules = append(modules, ModuleInfo{Name: modName, IsPkg: false})
}
}
return modules, nil
}
162 changes: 162 additions & 0 deletions src/worker/lambda/zygote/huge/bare.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
package huge

import (
"encoding/json"
"fmt"
"io/ioutil"
"strings"

"github.com/open-lambda/open-lambda/ol/common"
)

// bare.go just manipulates the tree structure with some search
// algorithms. Does not include any sandboxes and does not require
// locking (it is immutable).

type Node struct {
parent *Node

// assigned via pre-order traversal, starting at 0
ID int

// parse from JSON
Packages []string `json:"packages"`
Children []*Node `json:"children"`
}

// LoadTreeFromConfig returns a list of Node pointers upon success.
// Each Node has an ID corresponding to its index. The Node at index
// 0 is the root.
func LoadTreeFromConfig() ([]*Node, error) {
var root *Node = &Node{};
var err error;

switch treeConf := common.Conf.Import_cache_tree.(type) {
case string:
if treeConf != "" {
var b []byte
if strings.HasPrefix(treeConf, "{") && strings.HasSuffix(treeConf, "}") {
b = []byte(treeConf)
} else {
b, err = ioutil.ReadFile(treeConf)
if err != nil {
return nil, fmt.Errorf("could not open import tree file (%v): %v\n", treeConf, err.Error())
}
}

if err := json.Unmarshal(b, root); err != nil {
return nil, fmt.Errorf("could parse import tree file (%v): %v\n", treeConf, err.Error())
}
}
case map[string]any:
b, err := json.Marshal(treeConf)
if err != nil {
return nil, err
}
if err := json.Unmarshal(b, root); err != nil {
return nil, err
}
}

// assign every node an ID, 0-N
nodes := []*Node{}
recursiveNodeInit(root, &nodes)

return nodes, nil
}



func recursiveNodeInit(node *Node, nodes *[]*Node) {
node.ID = len(*nodes)
*nodes = append(*nodes, node)

for _, child := range node.Children {
child.parent = node
recursiveNodeInit(child, nodes)
}
}

// isSubset returns true iff every item in A is also in B
func isSubset(A []string, B []string) bool {
for _, a := range A {
found := false
for _, b := range B {
if a == b {
found = true
break
}
}
if !found {
return false
}
}
return true
}

// findDiff returns items in A that are not in B
func findDiff(A []string, B []string) []string {
diff := []string{}
for _, a := range A {
found := false
for _, b := range B {
if a == b {
found = true
break
}
}
if !found {
diff = append(diff, a)
}
}
return diff
}

// FindEligibleZygotes finds IDs of Zygotes that could be used to
// create a Sandbox with the desired packages. A Zygote is only
// eligible if it (and it's ancestors) together have a subset of the
// desired package set (don't want to expose functions to packages
// they don't want).
//
// eligible will be populated with IDs along a path from root node to
// most specific Zygote. eligible[i] is the parent of eligible[i+1].
func (node *Node) FindEligibleZygotes(packages []string, eligible *[]int) bool {
// if this node imports a package that's not wanted by the
// lambda, neither this Zygote nor its children will work
if !isSubset(node.Packages, packages) {
// this Zygote is not eligible because the
// node has a package not desired by the
// sandbox
return false
}

// this node is eligible
*eligible = append(*eligible, node.ID)

// check our descendents; is one of them a Zygote that works?
// we prefer a child Zygote over the one for this node,
// because they have more packages pre-imported
remainingPackages := findDiff(packages, node.Packages)
for _, child := range node.Children {
if child.FindEligibleZygotes(remainingPackages, eligible) {
// we prefer the first child in the list, and
// want a single list of zygotes (from root to
// lower node), so if we found one that is
// eligible, don't continue more'
break
}
}

// this node is eligible
return true
}

func (node *Node) AllPackages() []string {
all := []string{}
curr := node
for curr != nil {
all = append(all, node.Packages...)
curr = curr.parent
}
return all
}
Loading