diff --git a/htlcswitch/circuit_map.go b/htlcswitch/circuit_map.go index 0e56d43e..27c9096f 100644 --- a/htlcswitch/circuit_map.go +++ b/htlcswitch/circuit_map.go @@ -225,8 +225,10 @@ func (cm *circuitMap) initBuckets() error { // restoreMemState loads the contents of the half circuit and full circuit // buckets from disk and reconstructs the in-memory representation of the -// circuit map. Afterwards, the state of the hash index is reconstructed using -// the recovered set of full circuits. +// circuit map. Afterwards, the state of the hash index is reconstructed using +// the recovered set of full circuits. This method will also remove any stray +// keystones, which are those that appear fully-opened, but have no pending +// circuit related to the intended incoming link. func (cm *circuitMap) restoreMemState() error { log.Infof("Restoring in-memory circuit state from disk") @@ -235,7 +237,7 @@ func (cm *circuitMap) restoreMemState() error { pending = make(map[CircuitKey]*PaymentCircuit) ) - if err := cm.cfg.DB.View(func(tx *bolt.Tx) error { + if err := cm.cfg.DB.Update(func(tx *bolt.Tx) error { // Restore any of the circuits persisted in the circuit bucket // back into memory. circuitBkt := tx.Bucket(circuitAddKey) @@ -264,6 +266,7 @@ func (cm *circuitMap) restoreMemState() error { return ErrCorruptedCircuitMap } + var strayKeystones []Keystone if err := keystoneBkt.ForEach(func(k, v []byte) error { var ( inKey CircuitKey @@ -280,15 +283,45 @@ func (cm *circuitMap) restoreMemState() error { // Retrieve the pending circuit, set its keystone, then // add it to the opened map. - circuit := pending[inKey] - circuit.Outgoing = outKey - opened[*outKey] = circuit + circuit, ok := pending[inKey] + if ok { + circuit.Outgoing = outKey + opened[*outKey] = circuit + } else { + strayKeystones = append(strayKeystones, Keystone{ + InKey: inKey, + OutKey: *outKey, + }) + } return nil }); err != nil { return err } + // If any stray keystones were found, we'll proceed to prune + // them from the circuit map's persistent storage. This may + // manifest on older nodes that had updated channels before + // their short channel id was set properly. We believe this + // issue has been fixed, though this will allow older nodes to + // recover without additional intervention. + for _, strayKeystone := range strayKeystones { + // As a precaution, we will only cleanup keystones + // related to locally-initiated payments. If a + // documented case of stray keystones emerges for + // forwarded payments, this check should be removed, but + // with extreme caution. + if strayKeystone.OutKey.ChanID != sourceHop { + continue + } + + log.Infof("Removing stray keystone: %v", strayKeystone) + err := keystoneBkt.Delete(strayKeystone.OutKey.Bytes()) + if err != nil { + return err + } + } + return nil }); err != nil { @@ -495,8 +528,13 @@ func (cm *circuitMap) LookupByPaymentHash(hash [32]byte) []*PaymentCircuit { func (cm *circuitMap) CommitCircuits(circuits ...*PaymentCircuit) ( *CircuitFwdActions, error) { + inKeys := make([]CircuitKey, 0, len(circuits)) + for _, circuit := range circuits { + inKeys = append(inKeys, circuit.Incoming) + } + log.Tracef("Committing fresh circuits: %v", newLogClosure(func() string { - return spew.Sdump(circuits) + return spew.Sdump(inKeys) })) actions := &CircuitFwdActions{} @@ -765,10 +803,12 @@ func (cm *circuitMap) CloseCircuit(outKey CircuitKey) (*PaymentCircuit, error) { return circuit, nil } -// DeleteCircuits destroys the target circuit by removing it from the circuit map, -// additionally removing the circuit's keystone if the HTLC was forwarded -// through an outgoing link. The circuit should be identified by its incoming -// circuit key. +// DeleteCircuits destroys the target circuits by removing them from the circuit +// map, additionally removing the circuits' keystones if any HTLCs were +// forwarded through an outgoing link. The circuits should be identified by its +// incoming circuit key. If a given circuit is not found in the circuit map, it +// will be ignored from the query. This would typically indicate that the +// circuit was already cleaned up at a different point in time. func (cm *circuitMap) DeleteCircuits(inKeys ...CircuitKey) error { log.Tracef("Deleting resolved circuits: %v", newLogClosure(func() string { @@ -781,22 +821,15 @@ func (cm *circuitMap) DeleteCircuits(inKeys ...CircuitKey) error { ) cm.mtx.Lock() - // First check that all provided keys are still known to the circuit - // map. + // Remove any references to the circuits from memory, keeping track of + // which circuits were removed, and which ones had been marked closed. + // This can be used to restore these entries later if the persistent + // removal fails. for _, inKey := range inKeys { - if _, ok := cm.pending[inKey]; !ok { - cm.mtx.Unlock() - return ErrUnknownCircuit + circuit, ok := cm.pending[inKey] + if !ok { + continue } - } - - // If no offenders were found, remove any references to the circuit from - // memory, keeping track of which circuits were removed, and which ones - // had been marked closed. This can be used to restore these entries - // later if the persistent removal fails. - for _, inKey := range inKeys { - circuit := cm.pending[inKey] - delete(cm.pending, inKey) if _, ok := cm.closed[inKey]; ok { diff --git a/htlcswitch/circuit_test.go b/htlcswitch/circuit_test.go index 046c9fde..b693ebe7 100644 --- a/htlcswitch/circuit_test.go +++ b/htlcswitch/circuit_test.go @@ -483,8 +483,9 @@ func TestCircuitMapPersistence(t *testing.T) { // Removing already-removed circuit should return an error. err = circuitMap.DeleteCircuits(circuit1.Incoming) - if err == nil { - t.Fatal("Remove did not return expected not found error") + if err != nil { + t.Fatal("Unexpected failure when deleting already "+ + "deleted circuit: %v", err) } // Verify that nothing related to hash1 has changed @@ -518,10 +519,17 @@ func TestCircuitMapPersistence(t *testing.T) { assertNumCircuitsWithHash(t, circuitMap, hash2, 0) assertNumCircuitsWithHash(t, circuitMap, hash3, 1) - // Remove last remaining circuit with payment hash hash3. - err = circuitMap.DeleteCircuits(circuit3.Incoming) + // In removing the final circuit, we will try and remove all other known + // circuits as well. Any circuits that are unknown to the circuit map + // will be ignored, and only circuit 3 should be cause any change in the + // state. + err = circuitMap.DeleteCircuits( + circuit1.Incoming, circuit2.Incoming, + circuit3.Incoming, circuit4.Incoming, + ) if err != nil { - t.Fatalf("Remove returned unexpected error: %v", err) + t.Fatalf("Unexpected failure when removing circuit while also "+ + "deleting already deleted circuits: %v", err) } // Check that the circuit map is empty, even after restarting. diff --git a/htlcswitch/link.go b/htlcswitch/link.go index 3112b8ed..0c1df9dc 100644 --- a/htlcswitch/link.go +++ b/htlcswitch/link.go @@ -585,7 +585,7 @@ func (l *channelLink) syncChanStates() error { // Ensure that all packets have been have been removed from the // link's mailbox. - if err := l.ackDownStreamPackets(true); err != nil { + if err := l.ackDownStreamPackets(); err != nil { return err } @@ -1493,12 +1493,7 @@ func (l *channelLink) handleUpstreamMsg(msg lnwire.Message) { // removed from the circuit map before removing them from the link's mailbox, // otherwise it could be possible for some circuit to be missed if this link // flaps. -// -// The `forgive` flag allows this method to tolerate restarts, and ignores -// errors that could be caused by a previous circuit deletion. Under normal -// operation, this is set to false so that we would fail the link if we were -// unable to remove a circuit. -func (l *channelLink) ackDownStreamPackets(forgive bool) error { +func (l *channelLink) ackDownStreamPackets() error { // First, remove the downstream Add packets that were included in the // previous commitment signature. This will prevent the Adds from being // replayed if this link disconnects. @@ -1524,21 +1519,6 @@ func (l *channelLink) ackDownStreamPackets(forgive bool) error { case nil: // Successful deletion. - case ErrUnknownCircuit: - if forgive { - // After a restart, we may have already removed this - // circuit. Since it shouldn't be possible for a - // circuit to be closed by different htlcs, we assume - // this error signals that the whole batch was - // successfully removed. - l.warnf("forgiving unknown circuit error after " + - "attempting deletion, circuit was probably " + - "removed before shutting down.") - break - } - - return err - default: l.errorf("unable to delete %d circuits: %v", len(l.closedCircuits), err) @@ -1603,7 +1583,7 @@ func (l *channelLink) updateCommitTx() error { return err } - if err := l.ackDownStreamPackets(false); err != nil { + if err := l.ackDownStreamPackets(); err != nil { return err }