axes: tuple or list of ints, optional
If specified, it must be a tuple or list which contains a permutation of [0,1,…,N-1] where N is the number of axes of a. The i’th axis of the returned array will correspond to the axis numbered axes[i] of the input. If not specified, defaults to range(a.ndim)[::-1], which reverses the order of the axes.
简而言之,axes[i] = n 中代表结果的第i维度是原张量数组中的第n维,举例如下:
axes = [1, 0, 2]
i = 0, axes[0] = 1, 则结果的第0维是原来的第1维
i = 1, axes[1] = 0, 则结果的第1维其实是的第0维
defcompute(self, a): ### BEGIN YOUR SOLUTION dim = list(range(len(a.shape))) if self.axes isNone: dim[-2], dim[-1] = dim[-1], dim[-2] else: dim[self.axes[0]], dim[self.axes[1]] = dim[self.axes[1]], dim[self.axes[0]] return array_api.transpose(a, dim) ### END YOUR SOLUTION
""" Sample code automatically generated on 2024-04-23 08:02:21 by www.matrixcalculus.org from input d/dX X' = \mathbbT where X is a matrix The generated code is provided "as is" without warranty of any kind. """
from __future__ import division, print_function, absolute_import
deffind_topo_sort(node_list: List[Value]) -> List[Value]: """Given a list of nodes, return a topological sort list of nodes ending in them. A simple algorithm is to do a post-order DFS traversal on the given nodes, going backwards based on input edges. Since a node is added to the ordering after all its predecessors are traversed due to post-order DFS, we get a topological sort. """ ### BEGIN YOUR SOLUTION # visited: 一个集合,用于记录已经访问的结果 # topo_order: 一个列表,用于保存拓扑排序的结果 visited = set() topo_order = [] # root_node = node_list[-1] # for node in root_node.inputs: # topo_sort_dfs(node, visited, topo_order) # visited.add(root_node) # topo_order.append(root_node) for node in node_list: topo_sort_dfs(node, visited=visited, topo_order=topo_order) return topo_order ### END YOUR SOLUTION
deftopo_sort_dfs(node, visited, topo_order): """Post-order DFS""" ### BEGIN YOUR SOLUTION if node in visited: return
if node.inputs isNone: topo_order.append(node) else: for pre_node in node.inputs: topo_sort_dfs(pre_node, visited, topo_order) visited.add(node) topo_order.append(node) ### END YOUR SOLUTION
defcompute_gradient_of_variables(output_tensor, out_grad): """Take gradient of output node with respect to each node in node_list. Store the computed result in the grad field of each Variable. """ # a map from node to a list of gradient contributions from each output node node_to_output_grads_list: Dict[Tensor, List[Tensor]] = {} # Special note on initializing gradient of # We are really taking a derivative of the scalar reduce_sum(output_node) # instead of the vector output_node. But this is the common case for loss function. node_to_output_grads_list[output_tensor] = [out_grad]
# Traverse graph in reverse topological order given the output_node that we are taking gradient wrt. reverse_topo_order = list(reversed(find_topo_sort([output_tensor])))
### BEGIN YOUR SOLUTION for inode in reverse_topo_order: v_i = sum_node_list(node_to_output_grads_list[inode]) inode.grad = v_i
if inode.op isNone: continue
v_k_i_tuple = inode.op.gradient_as_tuple(out_grad=v_i, node=inode) for knode, v_k_i inzip(inode.inputs, v_k_i_tuple): if knode notin node_to_output_grads_list: node_to_output_grads_list[knode] = [] node_to_output_grads_list[knode].append(v_k_i)
classReLU(TensorOp): defcompute(self, a): ### BEGIN YOUR SOLUTION return array_api.maximum(0, a) ### END YOUR SOLUTION
defgradient(self, out_grad, node): ### BEGIN YOUR SOLUTION a = node.inputs[0].realize_cached_data() # a > 0的结果是一个元素类型为布尔的张量 return out_grad * Tensor(a > 0) ### END YOUR SOLUTION
defnn_epoch(X, y, W1, W2, lr=0.1, batch=100): """Run a single epoch of SGD for a two-layer neural network defined by the weights W1 and W2 (with no bias terms): logits = ReLU(X * W1) * W1 The function should use the step size lr, and the specified batch size (and again, without randomizing the order of X). Args: X (np.ndarray[np.float32]): 2D input array of size (num_examples x input_dim). y (np.ndarray[np.uint8]): 1D class label array of size (num_examples,) W1 (ndl.Tensor[np.float32]): 2D array of first layer weights, of shape (input_dim, hidden_dim) W2 (ndl.Tensor[np.float32]): 2D array of second layer weights, of shape (hidden_dim, num_classes) lr (float): step size (learning rate) for SGD batch (int): size of SGD mini-batch Returns: Tuple: (W1, W2) W1: ndl.Tensor[np.float32] W2: ndl.Tensor[np.float32] """
### BEGIN YOUR SOLUTION num_examples, num_classes = X.shape[0], W2.shape[1] for i inrange(0, num_examples, batch): sample = ndl.Tensor(X[i:i+batch, :]) label = y[i:i+batch]