ml:educational_framework_edf
Educational Framework
The Educational Framework (EDF) is a deep learning framework for educational purposes written in 150 lines of Python and NumPy code. It was written by David McCallester for the course TTIC 31230: Fundamentals of Deep Learning, (Fall 2020 with videos.)
Here is the complete source code for the framework:
import numpy as np DT=np.float32 learning_rate = .001 ######################## Clearing the Computation graph ###################### # This should be done immediately before creating a new computation graph. def clear_compgraph(): global CompNodes, Parameters CompNodes = [] Parameters = [] ########################### Forward-Backward and SGD ###################### def Forward(): for c in CompNodes: c.forward() def Backward(loss): for c in CompNodes + Parameters: c.grad = np.zeros(c.value.shape, dtype = DT) loss.grad = np.ones(loss.value.shape)/len(loss.value) #The convention is to compute the averaage gradient over the batch for c in CompNodes[::-1]: c.backward(); def SGD(): for p in Parameters: p.SGD() ########################## Computation Graphs ############################# """ There are three kinds of nodes in a computation graph, inputs, parameters, and computed nodes (CompNodes). Computed nodes are defined on a case by case bases. The Sigmoid class is defined as an example. Other classes are defined below. """ class Input: def __init__(self): pass def addgrad(self, delta): pass class Parameter: def __init__(self,value): self.value = DT(value) Parameters.append(self) def addgrad(self,delta): self.grad += np.sum(delta, axis = 0) def SGD(self): #this is a default self.value -= learning_rate*self.grad class CompNode: def addgrad(self, delta): self.grad += delta ############### Parameter Packages and some Compnodes ####################### class ParameterPackage: pass class AffineParams(ParameterPackage): def __init__(self,nInputs,nOutputs): X = Xavier(nInputs) self.w = Parameter(np.random.uniform(-X,X,(nInputs,nOutputs))) self.b = Parameter(np.zeros(nOutputs)) def Xavier(nInputs): return np.sqrt(3.0/nInputs) class Affine(CompNode): def __init__(self,Phi,x): CompNodes.append(self) self.x = x self.Phi = Phi def forward(self): self.value = np.matmul(self.x.value,self.Phi.w.value) + self.Phi.b.value # the addition broadcasts b over the batch. def backward(self): self.x.addgrad(np.matmul(self.grad,self.Phi.w.value.transpose())) self.Phi.b.addgrad(self.grad) self.Phi.w.addgrad(self.x.value[:,:,np.newaxis] * self.grad[:,np.newaxis,:]) class Sigmoid(CompNode): def __init__(self,x): CompNodes.append(self) self.x = x def forward(self): bounded = np.maximum(-10,np.minimum(10,self.x.value)) #blocks numerical warnings self.value = 1 / (1 + np.exp(-bounded)) def backward(self): self.x.addgrad(self.grad * self.value * (1-self.value)) class Relu(CompNode): def __init__(self,x): CompNodes.append(self) self.x = x def forward(self): self.value = np.maximum(0,self.x.value) def backward(self): self.x.addgrad(np.greater(self.x.value,0)* self.grad) class Softmax(CompNode): def __init__(self,s): #s has shape (nBatch,nLabels} CompNodes.append(self) self.s = s def forward(self): smax = np.max(self.s.value,axis=1,keepdims=True) bounded = np.maximum(-10,self.s.value - smax) #blocks numerical warnings es = np.exp(bounded) self.value = es / np.sum(es,axis=1,keepdims=True) def backward(self): p_dot_pgrad = np.matmul(self.value[:,np.newaxis,:],self.grad[:,:,np.newaxis]).squeeze(-1) # p dot p.grad with shape (nBatch,1) self.s.addgrad(self.value * (self.grad - p_dot_pgrad)) # p_dot_pgrad is broadcast over the labels class LogLoss(CompNode): def __init__(self,p,y): if not isinstance(p,CompNode): print('LogLoss probability vector is not a CompNode') raise ValueError CompNodes.append(self) self.p = p self.y = y def forward(self): dLabels = self.p.value.shape[1] dBatch = len(self.y.value) self.picker = np.arange(dBatch)*dLabels + self.y.value # p.flatten[picker][b] =p[b,y[b]] where b is the batch index self.value = - np.log(self.p.value.reshape((-1,))[self.picker]) def backward(self): flatpval = self.p.value.reshape((-1,)) flatpgrad = self.p.grad.reshape((-1,)) flatpgrad[self.picker] -= self.grad/flatpval[self.picker]
ml/educational_framework_edf.txt · Last modified: 2023/06/15 07:36 by 127.0.0.1