Merge pull request #3164 from joostjager/persistent-mc

routing: persistent mission control
2019-08-02 15:45:49 -07:00 · 2019-08-02 15:45:49 -07:00 · 7767eecbb8
commit 7767eecbb8
parent 6e9e0eaddd 7e7b620355
18 changed files with 1015 additions and 376 deletions
--- a/channeldb/payments.go
+++ b/channeldb/payments.go
@ -484,7 +484,7 @@ func serializePaymentAttemptInfo(w io.Writer, a *PaymentAttemptInfo) error {
 		return err
 	}

-	if err := serializeRoute(w, a.Route); err != nil {
+	if err := SerializeRoute(w, a.Route); err != nil {
 		return err
 	}

@ -497,7 +497,7 @@ func deserializePaymentAttemptInfo(r io.Reader) (*PaymentAttemptInfo, error) {
 	if err != nil {
 		return nil, err
 	}
-	a.Route, err = deserializeRoute(r)
+	a.Route, err = DeserializeRoute(r)
 	if err != nil {
 		return nil, err
 	}
@ -533,7 +533,8 @@ func deserializeHop(r io.Reader) (*route.Hop, error) {
 	return h, nil
 }

-func serializeRoute(w io.Writer, r route.Route) error {
+// SerializeRoute serializes a route.
+func SerializeRoute(w io.Writer, r route.Route) error {
 	if err := WriteElements(w,
 		r.TotalTimeLock, r.TotalAmount, r.SourcePubKey[:],
 	); err != nil {
@ -553,7 +554,8 @@ func serializeRoute(w io.Writer, r route.Route) error {
 	return nil
 }

-func deserializeRoute(r io.Reader) (route.Route, error) {
+// DeserializeRoute deserializes a route.
+func DeserializeRoute(r io.Reader) (route.Route, error) {
 	rt := route.Route{}
 	if err := ReadElements(r,
 		&rt.TotalTimeLock, &rt.TotalAmount,
--- a/channeldb/payments_test.go
+++ b/channeldb/payments_test.go
@ -203,12 +203,12 @@ func TestRouteSerialization(t *testing.T) {
 	t.Parallel()

 	var b bytes.Buffer
-	if err := serializeRoute(&b, testRoute); err != nil {
+	if err := SerializeRoute(&b, testRoute); err != nil {
 		t.Fatal(err)
 	}

 	r := bytes.NewReader(b.Bytes())
-	route2, err := deserializeRoute(r)
+	route2, err := DeserializeRoute(r)
 	if err != nil {
 		t.Fatal(err)
 	}
--- a/lnrpc/routerrpc/config.go
+++ b/lnrpc/routerrpc/config.go
@ -3,26 +3,29 @@ package routerrpc
 import (
 	"time"

-	"github.com/lightningnetwork/lnd/lnwire"
+	"github.com/btcsuite/btcutil"
 )

 // RoutingConfig contains the configurable parameters that control routing.
 type RoutingConfig struct {
-	// PenaltyHalfLife defines after how much time a penalized node or
-	// channel is back at 50% probability.
-	PenaltyHalfLife time.Duration
-
-	// PaymentAttemptPenalty is the virtual cost in path finding weight
-	// units of executing a payment attempt that fails. It is used to trade
-	// off potentially better routes against their probability of
-	// succeeding.
-	PaymentAttemptPenalty lnwire.MilliSatoshi
-
-	// MinProbability defines the minimum success probability of the
-	// returned route.
-	MinRouteProbability float64
+	// MinRouteProbability is the minimum required route success probability
+	// to attempt the payment.
+	MinRouteProbability float64 `long:"minrtprob" description:"Minimum required route success probability to attempt the payment"`

 	// AprioriHopProbability is the assumed success probability of a hop in
 	// a route when no other information is available.
-	AprioriHopProbability float64
+	AprioriHopProbability float64 `long:"apriorihopprob" description:"Assumed success probability of a hop in a route when no other information is available."`
+
+	// PenaltyHalfLife defines after how much time a penalized node or
+	// channel is back at 50% probability.
+	PenaltyHalfLife time.Duration `long:"penaltyhalflife" description:"Defines the duration after which a penalized node or channel is back at 50% probability"`
+
+	// AttemptCost is the virtual cost in path finding weight units of
+	// executing a payment attempt that fails. It is used to trade off
+	// potentially better routes against their probability of succeeding.
+	AttemptCost btcutil.Amount `long:"attemptcost" description:"The (virtual) cost in sats of a failed payment attempt"`
+
+	// MaxMcHistory defines the maximum number of payment results that
+	// are held on disk by mission control.
+	MaxMcHistory int `long:"maxmchistory" description:"the maximum number of payment results that are held on disk by mission control"`
 }
--- a/lnrpc/routerrpc/config_active.go
+++ b/lnrpc/routerrpc/config_active.go
@ -3,10 +3,6 @@
 package routerrpc

 import (
-	"time"
-
-	"github.com/btcsuite/btcutil"
-	"github.com/lightningnetwork/lnd/lnwire"
 	"github.com/lightningnetwork/lnd/macaroons"
 	"github.com/lightningnetwork/lnd/routing"
 )
@ -17,28 +13,13 @@ import (
 // options, while if able to be populated, the latter fields MUST also be
 // specified.
 type Config struct {
+	RoutingConfig
+
 	// RouterMacPath is the path for the router macaroon. If unspecified
 	// then we assume that the macaroon will be found under the network
 	// directory, named DefaultRouterMacFilename.
 	RouterMacPath string `long:"routermacaroonpath" description:"Path to the router macaroon"`

-	// MinProbability is the minimum required route success probability to
-	// attempt the payment.
-	MinRouteProbability float64 `long:"minrtprob" description:"Minimum required route success probability to attempt the payment"`
-
-	// AprioriHopProbability is the assumed success probability of a hop in
-	// a route when no other information is available.
-	AprioriHopProbability float64 `long:"apriorihopprob" description:"Assumed success probability of a hop in a route when no other information is available."`
-
-	// PenaltyHalfLife defines after how much time a penalized node or
-	// channel is back at 50% probability.
-	PenaltyHalfLife time.Duration `long:"penaltyhalflife" description:"Defines the duration after which a penalized node or channel is back at 50% probability"`
-
-	// AttemptCost is the virtual cost in path finding weight units of
-	// executing a payment attempt that fails. It is used to trade off
-	// potentially better routes against their probability of succeeding.
-	AttemptCost int64 `long:"attemptcost" description:"The (virtual) cost in sats of a failed payment attempt"`
-
 	// NetworkDir is the main network directory wherein the router rpc
 	// server will find the macaroon named DefaultRouterMacFilename.
 	NetworkDir string
@ -62,13 +43,17 @@ type Config struct {

 // DefaultConfig defines the config defaults.
 func DefaultConfig() *Config {
-	return &Config{
+	defaultRoutingConfig := RoutingConfig{
 		AprioriHopProbability: routing.DefaultAprioriHopProbability,
 		MinRouteProbability:   routing.DefaultMinRouteProbability,
 		PenaltyHalfLife:       routing.DefaultPenaltyHalfLife,
-		AttemptCost: int64(
-			routing.DefaultPaymentAttemptPenalty.ToSatoshis(),
-		),
+		AttemptCost: routing.DefaultPaymentAttemptPenalty.
+			ToSatoshis(),
+		MaxMcHistory: routing.DefaultMaxMcHistory,
+	}
+
+	return &Config{
+		RoutingConfig: defaultRoutingConfig,
 	}
 }

@ -77,9 +62,8 @@ func GetRoutingConfig(cfg *Config) *RoutingConfig {
 	return &RoutingConfig{
 		AprioriHopProbability: cfg.AprioriHopProbability,
 		MinRouteProbability:   cfg.MinRouteProbability,
-		PaymentAttemptPenalty: lnwire.NewMSatFromSatoshis(
-			btcutil.Amount(cfg.AttemptCost),
-		),
-		PenaltyHalfLife: cfg.PenaltyHalfLife,
+		AttemptCost:           cfg.AttemptCost,
+		PenaltyHalfLife:       cfg.PenaltyHalfLife,
+		MaxMcHistory:          cfg.MaxMcHistory,
 	}
 }
--- a/lnrpc/routerrpc/config_default.go
+++ b/lnrpc/routerrpc/config_default.go
@ -19,7 +19,9 @@ func GetRoutingConfig(cfg *Config) *RoutingConfig {
 	return &RoutingConfig{
 		AprioriHopProbability: routing.DefaultAprioriHopProbability,
 		MinRouteProbability:   routing.DefaultMinRouteProbability,
-		PaymentAttemptPenalty: routing.DefaultPaymentAttemptPenalty,
-		PenaltyHalfLife:       routing.DefaultPenaltyHalfLife,
+		AttemptCost: routing.DefaultPaymentAttemptPenalty.
+			ToSatoshis(),
+		PenaltyHalfLife: routing.DefaultPenaltyHalfLife,
+		MaxMcHistory:    routing.DefaultMaxMcHistory,
 	}
 }
--- a/lnrpc/routerrpc/router_backend.go
+++ b/lnrpc/routerrpc/router_backend.go
@ -64,7 +64,7 @@ type MissionControl interface {

 	// ResetHistory resets the history of MissionControl returning it to a state as
 	// if no payment attempts have been made.
-	ResetHistory()
+	ResetHistory() error

 	// GetHistorySnapshot takes a snapshot from the current mission control state
 	// and actual probability estimates.
--- a/lnrpc/routerrpc/router_backend_test.go
+++ b/lnrpc/routerrpc/router_backend_test.go
@ -147,7 +147,9 @@ func (m *mockMissionControl) GetEdgeProbability(fromNode route.Vertex,
 	return testMissionControlProb
 }

-func (m *mockMissionControl) ResetHistory() {}
+func (m *mockMissionControl) ResetHistory() error {
+	return nil
+}

 func (m *mockMissionControl) GetHistorySnapshot() *routing.MissionControlSnapshot {
 	return nil
--- a/lnrpc/routerrpc/router_server.go
+++ b/lnrpc/routerrpc/router_server.go
@ -436,7 +436,10 @@ func marshallChannelUpdate(update *lnwire.ChannelUpdate) *ChannelUpdate {
 func (s *Server) ResetMissionControl(ctx context.Context,
 	req *ResetMissionControlRequest) (*ResetMissionControlResponse, error) {

-	s.cfg.RouterBackend.MissionControl.ResetHistory()
+	err := s.cfg.RouterBackend.MissionControl.ResetHistory()
+	if err != nil {
+		return nil, err
+	}

 	return &ResetMissionControlResponse{}, nil
 }
--- a/lnwire/onion_error.go
+++ b/lnwire/onion_error.go
@ -1113,10 +1113,16 @@ func DecodeFailure(r io.Reader, pver uint32) (FailureMessage, error) {

 	dataReader := bytes.NewReader(failureData)

+	return DecodeFailureMessage(dataReader, pver)
+}
+
+// DecodeFailureMessage decodes just the failure message, ignoring any padding
+// that may be present at the end.
+func DecodeFailureMessage(r io.Reader, pver uint32) (FailureMessage, error) {
 	// Once we have the failure data, we can obtain the failure code from
 	// the first two bytes of the buffer.
 	var codeBytes [2]byte
-	if _, err := io.ReadFull(dataReader, codeBytes[:]); err != nil {
+	if _, err := io.ReadFull(r, codeBytes[:]); err != nil {
 		return nil, fmt.Errorf("unable to read failure code: %v", err)
 	}
 	failCode := FailCode(binary.BigEndian.Uint16(codeBytes[:]))
@ -1132,10 +1138,9 @@ func DecodeFailure(r io.Reader, pver uint32) (FailureMessage, error) {
 	// well.
 	switch f := failure.(type) {
 	case Serializable:
-		if err := f.Decode(dataReader, pver); err != nil {
+		if err := f.Decode(r, pver); err != nil {
 			return nil, fmt.Errorf("unable to decode error "+
-				"update (type=%T, len_bytes=%v, bytes=%x): %v",
-				failure, failureLength, failureData[:], err)
+				"update (type=%T): %v", failure, err)
 		}
 	}

@ -1147,26 +1152,11 @@ func DecodeFailure(r io.Reader, pver uint32) (FailureMessage, error) {
 func EncodeFailure(w io.Writer, failure FailureMessage, pver uint32) error {
 	var failureMessageBuffer bytes.Buffer

-	// First, we'll write out the error code itself into the failure
-	// buffer.
-	var codeBytes [2]byte
-	code := uint16(failure.Code())
-	binary.BigEndian.PutUint16(codeBytes[:], code)
-	_, err := failureMessageBuffer.Write(codeBytes[:])
+	err := EncodeFailureMessage(&failureMessageBuffer, failure, pver)
 	if err != nil {
 		return err
 	}

-	// Next, some message have an additional message payload, if this is
-	// one of those types, then we'll also encode the error payload as
-	// well.
-	switch failure := failure.(type) {
-	case Serializable:
-		if err := failure.Encode(&failureMessageBuffer, pver); err != nil {
-			return err
-		}
-	}
-
 	// The combined size of this message must be below the max allowed
 	// failure message length.
 	failureMessage := failureMessageBuffer.Bytes()
@ -1187,6 +1177,32 @@ func EncodeFailure(w io.Writer, failure FailureMessage, pver uint32) error {
 	)
 }

+// EncodeFailureMessage encodes just the failure message without adding a length
+// and padding the message for the onion protocol.
+func EncodeFailureMessage(w io.Writer, failure FailureMessage, pver uint32) error {
+	// First, we'll write out the error code itself into the failure
+	// buffer.
+	var codeBytes [2]byte
+	code := uint16(failure.Code())
+	binary.BigEndian.PutUint16(codeBytes[:], code)
+	_, err := w.Write(codeBytes[:])
+	if err != nil {
+		return err
+	}
+
+	// Next, some message have an additional message payload, if this is
+	// one of those types, then we'll also encode the error payload as
+	// well.
+	switch failure := failure.(type) {
+	case Serializable:
+		if err := failure.Encode(w, pver); err != nil {
+			return err
+		}
+	}
+
+	return nil
+}
+
 // makeEmptyOnionError creates a new empty onion error  of the proper concrete
 // type based on the passed failure code.
 func makeEmptyOnionError(code FailCode) (FailureMessage, error) {
--- a/routing/missioncontrol.go
+++ b/routing/missioncontrol.go
@ -5,6 +5,8 @@ import (
 	"sync"
 	"time"

+	"github.com/coreos/bbolt"
+	"github.com/lightningnetwork/lnd/channeldb"
 	"github.com/lightningnetwork/lnd/lnwire"
 	"github.com/lightningnetwork/lnd/routing/route"
 )
@ -38,6 +40,9 @@ const (
 	// Nodes forward non-strict, so it isn't necessary to apply a less
 	// restrictive channel level tracking scheme here.
 	minSecondChanceInterval = time.Minute
+
+	// DefaultMaxMcHistory is the default maximum history size.
+	DefaultMaxMcHistory = 1000
 )

 // MissionControl contains state which summarizes the past attempts of HTLC
@ -62,6 +67,8 @@ type MissionControl struct {

 	cfg *MissionControlConfig

+	store *missionControlStore
+
 	sync.Mutex

 	// TODO(roasbeef): further counters, if vertex continually unavailable,
@ -80,6 +87,10 @@ type MissionControlConfig struct {
 	// AprioriHopProbability is the assumed success probability of a hop in
 	// a route when no other information is available.
 	AprioriHopProbability float64
+
+	// MaxMcHistory defines the maximum number of payment results that are
+	// held on disk.
+	MaxMcHistory int
 }

 // nodeHistory contains a summary of payment attempt outcomes involving a
@ -146,30 +157,82 @@ type MissionControlChannelSnapshot struct {
 	SuccessProb float64
 }

+// paymentResult is the information that becomes available when a payment
+// attempt completes.
+type paymentResult struct {
+	id                 uint64
+	timeFwd, timeReply time.Time
+	route              *route.Route
+	success            bool
+	failureSourceIdx   *int
+	failure            lnwire.FailureMessage
+}
+
 // NewMissionControl returns a new instance of missionControl.
-func NewMissionControl(cfg *MissionControlConfig) *MissionControl {
+func NewMissionControl(db *bbolt.DB, cfg *MissionControlConfig) (
+	*MissionControl, error) {
+
 	log.Debugf("Instantiating mission control with config: "+
 		"PenaltyHalfLife=%v, AprioriHopProbability=%v",
 		cfg.PenaltyHalfLife, cfg.AprioriHopProbability)

-	return &MissionControl{
+	store, err := newMissionControlStore(db, cfg.MaxMcHistory)
+	if err != nil {
+		return nil, err
+	}
+
+	mc := &MissionControl{
 		history:          make(map[route.Vertex]*nodeHistory),
 		lastSecondChance: make(map[DirectedNodePair]time.Time),
 		now:              time.Now,
 		cfg:              cfg,
+		store:            store,
 	}
+
+	if err := mc.init(); err != nil {
+		return nil, err
+	}
+
+	return mc, nil
+}
+
+// init initializes mission control with historical data.
+func (m *MissionControl) init() error {
+	log.Debugf("Mission control state reconstruction started")
+
+	start := time.Now()
+
+	results, err := m.store.fetchAll()
+	if err != nil {
+		return err
+	}
+
+	for _, result := range results {
+		m.applyPaymentResult(result)
+	}
+
+	log.Debugf("Mission control state reconstruction finished: "+
+		"n=%v, time=%v", len(results), time.Now().Sub(start))
+
+	return nil
 }

 // ResetHistory resets the history of MissionControl returning it to a state as
 // if no payment attempts have been made.
-func (m *MissionControl) ResetHistory() {
+func (m *MissionControl) ResetHistory() error {
 	m.Lock()
 	defer m.Unlock()

+	if err := m.store.clear(); err != nil {
+		return err
+	}
+
 	m.history = make(map[route.Vertex]*nodeHistory)
 	m.lastSecondChance = make(map[DirectedNodePair]time.Time)

 	log.Debugf("Mission control history cleared")
+
+	return nil
 }

 // GetEdgeProbability is expected to return the success probability of a payment
@ -282,22 +345,22 @@ func (m *MissionControl) createHistoryIfNotExists(vertex route.Vertex) *nodeHist
 	return node
 }

-// ReportVertexFailure reports a node level failure.
-func (m *MissionControl) ReportVertexFailure(v route.Vertex) {
-	log.Debugf("Reporting vertex %v failure to Mission Control", v)
+// reportVertexFailure reports a node level failure.
+func (m *MissionControl) reportVertexFailure(timestamp time.Time,
+	v route.Vertex) {

-	now := m.now()
+	log.Debugf("Reporting vertex %v failure to Mission Control", v)

 	m.Lock()
 	defer m.Unlock()

 	history := m.createHistoryIfNotExists(v)
-	history.lastFail = &now
+	history.lastFail = &timestamp
 }

-// ReportEdgePolicyFailure reports a policy related failure.
-func (m *MissionControl) ReportEdgePolicyFailure(failedEdge edge) {
-	now := m.now()
+// reportEdgePolicyFailure reports a policy related failure.
+func (m *MissionControl) reportEdgePolicyFailure(timestamp time.Time,
+	failedEdge edge) {

 	m.Lock()
 	defer m.Unlock()
@ -306,32 +369,30 @@ func (m *MissionControl) ReportEdgePolicyFailure(failedEdge edge) {
 	// immediately. If some time has passed since the last policy failure,
 	// we grant the node a second chance at forwarding the payment.
 	if m.requestSecondChance(
-		now, failedEdge.from, failedEdge.to,
+		timestamp, failedEdge.from, failedEdge.to,
 	) {
 		return
 	}

 	history := m.createHistoryIfNotExists(failedEdge.from)
-	history.lastFail = &now
+	history.lastFail = &timestamp
 }

-// ReportEdgeFailure reports a channel level failure.
+// reportEdgeFailure reports a channel level failure.
 //
 // TODO(roasbeef): also add value attempted to send and capacity of channel
-func (m *MissionControl) ReportEdgeFailure(failedEdge edge,
+func (m *MissionControl) reportEdgeFailure(timestamp time.Time, failedEdge edge,
 	minPenalizeAmt lnwire.MilliSatoshi) {

 	log.Debugf("Reporting channel %v failure to Mission Control",
 		failedEdge.channel)

-	now := m.now()
-
 	m.Lock()
 	defer m.Unlock()

 	history := m.createHistoryIfNotExists(failedEdge.from)
 	history.channelLastFail[failedEdge.channel] = &channelHistory{
-		lastFail:       now,
+		lastFail:       timestamp,
 		minPenalizeAmt: minPenalizeAmt,
 	}
 }
@ -387,3 +448,286 @@ func (m *MissionControl) GetHistorySnapshot() *MissionControlSnapshot {

 	return &snapshot
 }
+
+// ReportPaymentFail reports a failed payment to mission control as input for
+// future probability estimates. The failureSourceIdx argument indicates the
+// failure source. If it is nil, the failure source is unknown. This function
+// returns a bool indicating whether this error is a final error. If it is
+// final, a failure reason is returned and no further payment attempts need to
+// be made.
+func (m *MissionControl) ReportPaymentFail(paymentID uint64, rt *route.Route,
+	failureSourceIdx *int, failure lnwire.FailureMessage) (bool,
+	channeldb.FailureReason, error) {
+
+	timestamp := m.now()
+
+	// TODO(joostjager): Use actual payment initiation time for timeFwd.
+	result := &paymentResult{
+		success:          false,
+		timeFwd:          timestamp,
+		timeReply:        timestamp,
+		id:               paymentID,
+		failureSourceIdx: failureSourceIdx,
+		failure:          failure,
+		route:            rt,
+	}
+
+	// Store complete result in database.
+	if err := m.store.AddResult(result); err != nil {
+		return false, 0, err
+	}
+
+	// Apply result to update mission control state.
+	final, reason := m.applyPaymentResult(result)
+
+	return final, reason, nil
+}
+
+// applyPaymentResult applies a payment result as input for future probability
+// estimates. It returns a bool indicating whether this error is a final error
+// and no further payment attempts need to be made.
+func (m *MissionControl) applyPaymentResult(result *paymentResult) (
+	bool, channeldb.FailureReason) {
+
+	var (
+		failureSourceIdxInt int
+		failure             lnwire.FailureMessage
+	)
+
+	if result.failureSourceIdx == nil {
+		// If the failure message could not be decrypted, attribute the
+		// failure to our own outgoing channel.
+		//
+		// TODO(joostager): Penalize all channels in the route.
+		failureSourceIdxInt = 0
+		failure = lnwire.NewTemporaryChannelFailure(nil)
+	} else {
+		failureSourceIdxInt = *result.failureSourceIdx
+		failure = result.failure
+	}
+
+	var failureVertex route.Vertex
+
+	if failureSourceIdxInt > 0 {
+		failureVertex = result.route.Hops[failureSourceIdxInt-1].PubKeyBytes
+	} else {
+		failureVertex = result.route.SourcePubKey
+	}
+	log.Tracef("Node %x (index %v) reported failure when sending htlc",
+		failureVertex, result.failureSourceIdx)
+
+	// Always determine chan id ourselves, because a channel update with id
+	// may not be available.
+	failedEdge, failedAmt := getFailedEdge(
+		result.route, failureSourceIdxInt,
+	)
+
+	switch failure.(type) {
+
+	// If the end destination didn't know the payment
+	// hash or we sent the wrong payment amount to the
+	// destination, then we'll terminate immediately.
+	case *lnwire.FailUnknownPaymentHash:
+		// TODO(joostjager): Check onionErr.Amount() whether it matches
+		// what we expect. (Will it ever not match, because if not
+		// final_incorrect_htlc_amount would be returned?)
+
+		return true, channeldb.FailureReasonIncorrectPaymentDetails
+
+	// If we sent the wrong amount to the destination, then
+	// we'll exit early.
+	case *lnwire.FailIncorrectPaymentAmount:
+		return true, channeldb.FailureReasonIncorrectPaymentDetails
+
+	// If the time-lock that was extended to the final node
+	// was incorrect, then we can't proceed.
+	case *lnwire.FailFinalIncorrectCltvExpiry:
+		// TODO(joostjager): Take into account that second last hop may
+		// have deliberately handed out an htlc that expires too soon.
+		// In that case we should continue routing.
+		return true, channeldb.FailureReasonError
+
+	// If we crafted an invalid onion payload for the final
+	// node, then we'll exit early.
+	case *lnwire.FailFinalIncorrectHtlcAmount:
+		// TODO(joostjager): Take into account that second last hop may
+		// have deliberately handed out an htlc with a too low value. In
+		// that case we should continue routing.
+
+		return true, channeldb.FailureReasonError
+
+	// Similarly, if the HTLC expiry that we extended to
+	// the final hop expires too soon, then will fail the
+	// payment.
+	//
+	// TODO(roasbeef): can happen to to race condition, try
+	// again with recent block height
+	case *lnwire.FailFinalExpiryTooSoon:
+		// TODO(joostjager): Take into account that any hop may have
+		// delayed. Ideally we should continue routing. Knowing the
+		// delaying node at this point would help.
+		return true, channeldb.FailureReasonIncorrectPaymentDetails
+
+	// If we erroneously attempted to cross a chain border,
+	// then we'll cancel the payment.
+	case *lnwire.FailInvalidRealm:
+		return true, channeldb.FailureReasonError
+
+	// If we get a notice that the expiry was too soon for
+	// an intermediate node, then we'll prune out the node
+	// that sent us this error, as it doesn't now what the
+	// correct block height is.
+	case *lnwire.FailExpiryTooSoon:
+		m.reportVertexFailure(result.timeReply, failureVertex)
+		return false, 0
+
+	// If we hit an instance of onion payload corruption or an invalid
+	// version, then we'll exit early as this shouldn't happen in the
+	// typical case.
+	//
+	// TODO(joostjager): Take into account that the previous hop may have
+	// tampered with the onion. Routing should continue using other paths.
+	case *lnwire.FailInvalidOnionVersion:
+		return true, channeldb.FailureReasonError
+	case *lnwire.FailInvalidOnionHmac:
+		return true, channeldb.FailureReasonError
+	case *lnwire.FailInvalidOnionKey:
+		return true, channeldb.FailureReasonError
+
+	// If we get a failure due to violating the minimum
+	// amount, we'll apply the new minimum amount and retry
+	// routing.
+	case *lnwire.FailAmountBelowMinimum:
+		m.reportEdgePolicyFailure(result.timeReply, failedEdge)
+		return false, 0
+
+	// If we get a failure due to a fee, we'll apply the
+	// new fee update, and retry our attempt using the
+	// newly updated fees.
+	case *lnwire.FailFeeInsufficient:
+		m.reportEdgePolicyFailure(result.timeReply, failedEdge)
+		return false, 0
+
+	// If we get the failure for an intermediate node that
+	// disagrees with our time lock values, then we'll
+	// apply the new delta value and try it once more.
+	case *lnwire.FailIncorrectCltvExpiry:
+		m.reportEdgePolicyFailure(result.timeReply, failedEdge)
+		return false, 0
+
+	// The outgoing channel that this node was meant to
+	// forward one is currently disabled, so we'll apply
+	// the update and continue.
+	case *lnwire.FailChannelDisabled:
+		m.reportEdgeFailure(result.timeReply, failedEdge, 0)
+		return false, 0
+
+	// It's likely that the outgoing channel didn't have
+	// sufficient capacity, so we'll prune this edge for
+	// now, and continue onwards with our path finding.
+	case *lnwire.FailTemporaryChannelFailure:
+		m.reportEdgeFailure(result.timeReply, failedEdge, failedAmt)
+		return false, 0
+
+	// If the send fail due to a node not having the
+	// required features, then we'll note this error and
+	// continue.
+	case *lnwire.FailRequiredNodeFeatureMissing:
+		m.reportVertexFailure(result.timeReply, failureVertex)
+		return false, 0
+
+	// If the send fail due to a node not having the
+	// required features, then we'll note this error and
+	// continue.
+	case *lnwire.FailRequiredChannelFeatureMissing:
+		m.reportVertexFailure(result.timeReply, failureVertex)
+		return false, 0
+
+	// If the next hop in the route wasn't known or
+	// offline, we'll only the channel which we attempted
+	// to route over. This is conservative, and it can
+	// handle faulty channels between nodes properly.
+	// Additionally, this guards against routing nodes
+	// returning errors in order to attempt to black list
+	// another node.
+	case *lnwire.FailUnknownNextPeer:
+		m.reportEdgeFailure(result.timeReply, failedEdge, 0)
+		return false, 0
+
+	// If the node wasn't able to forward for which ever
+	// reason, then we'll note this and continue with the
+	// routes.
+	case *lnwire.FailTemporaryNodeFailure:
+		m.reportVertexFailure(result.timeReply, failureVertex)
+		return false, 0
+
+	case *lnwire.FailPermanentNodeFailure:
+		m.reportVertexFailure(result.timeReply, failureVertex)
+		return false, 0
+
+	// If we crafted a route that contains a too long time
+	// lock for an intermediate node, we'll prune the node.
+	// As there currently is no way of knowing that node's
+	// maximum acceptable cltv, we cannot take this
+	// constraint into account during routing.
+	//
+	// TODO(joostjager): Record the rejected cltv and use
+	// that as a hint during future path finding through
+	// that node.
+	case *lnwire.FailExpiryTooFar:
+		m.reportVertexFailure(result.timeReply, failureVertex)
+		return false, 0
+
+	// If we get a permanent channel or node failure, then
+	// we'll prune the channel in both directions and
+	// continue with the rest of the routes.
+	case *lnwire.FailPermanentChannelFailure:
+		m.reportEdgeFailure(result.timeReply, failedEdge, 0)
+		m.reportEdgeFailure(result.timeReply, edge{
+			from:    failedEdge.to,
+			to:      failedEdge.from,
+			channel: failedEdge.channel,
+		}, 0)
+		return false, 0
+
+	// Any other failure or an empty failure will get the node pruned.
+	default:
+		m.reportVertexFailure(result.timeReply, failureVertex)
+		return false, 0
+	}
+}
+
+// getFailedEdge tries to locate the failing channel given a route and the
+// pubkey of the node that sent the failure. It will assume that the failure is
+// associated with the outgoing channel of the failing node. As a second result,
+// it returns the amount sent over the edge.
+func getFailedEdge(route *route.Route, failureSource int) (edge,
+	lnwire.MilliSatoshi) {
+
+	// Determine if we have a failure from the final hop. If it is, we
+	// assume that the failing channel is the incoming channel.
+	//
+	// TODO(joostjager): In this case, certain types of failures are not
+	// expected. For example FailUnknownNextPeer. This could be a reason to
+	// prune the node?
+	if failureSource == len(route.Hops) {
+		failureSource--
+	}
+
+	// As this failure indicates that the target channel was unable to carry
+	// this HTLC (for w/e reason), we'll return the _outgoing_ channel that
+	// the source of the failure was meant to pass the HTLC along to.
+	if failureSource == 0 {
+		return edge{
+			from:    route.SourcePubKey,
+			to:      route.Hops[0].PubKeyBytes,
+			channel: route.Hops[0].ChannelID,
+		}, route.TotalAmount
+	}
+
+	return edge{
+		from:    route.Hops[failureSource-1].PubKeyBytes,
+		to:      route.Hops[failureSource].PubKeyBytes,
+		channel: route.Hops[failureSource].ChannelID,
+	}, route.Hops[failureSource-1].AmtToForward
+}
--- a/routing/missioncontrol_store.go
+++ b/routing/missioncontrol_store.go
@ -0,0 +1,269 @@
+package routing
+
+import (
+	"bytes"
+	"encoding/binary"
+	"fmt"
+	"time"
+
+	"github.com/btcsuite/btcd/wire"
+	"github.com/coreos/bbolt"
+	"github.com/lightningnetwork/lnd/channeldb"
+	"github.com/lightningnetwork/lnd/lnwire"
+)
+
+var (
+	// resultsKey is the fixed key under which the attempt results are
+	// stored.
+	resultsKey = []byte("missioncontrol-results")
+
+	// Big endian is the preferred byte order, due to cursor scans over
+	// integer keys iterating in order.
+	byteOrder = binary.BigEndian
+)
+
+const (
+	// unknownFailureSourceIdx is the database encoding of an unknown error
+	// source.
+	unknownFailureSourceIdx = -1
+)
+
+// missionControlStore is a bolt db based implementation of a mission control
+// store. It stores the raw payment attempt data from which the internal mission
+// controls state can be rederived on startup. This allows the mission control
+// internal data structure to be changed without requiring a database migration.
+// Also changes to mission control parameters can be applied to historical data.
+// Finally, it enables importing raw data from an external source.
+type missionControlStore struct {
+	db         *bbolt.DB
+	maxRecords int
+	numRecords int
+}
+
+func newMissionControlStore(db *bbolt.DB, maxRecords int) (*missionControlStore, error) {
+	store := &missionControlStore{
+		db:         db,
+		maxRecords: maxRecords,
+	}
+
+	// Create buckets if not yet existing.
+	err := db.Update(func(tx *bbolt.Tx) error {
+		resultsBucket, err := tx.CreateBucketIfNotExists(resultsKey)
+		if err != nil {
+			return fmt.Errorf("cannot create results bucket: %v",
+				err)
+		}
+
+		// Count initial number of results and track this number in
+		// memory to avoid calling Stats().KeyN. The reliability of
+		// Stats() is doubtful and seemed to have caused crashes in the
+		// past (see #1874).
+		c := resultsBucket.Cursor()
+		for k, _ := c.First(); k != nil; k, _ = c.Next() {
+			store.numRecords++
+		}
+
+		return nil
+	})
+	if err != nil {
+		return nil, err
+	}
+
+	return store, nil
+}
+
+// clear removes all results from the db.
+func (b *missionControlStore) clear() error {
+	return b.db.Update(func(tx *bbolt.Tx) error {
+		if err := tx.DeleteBucket(resultsKey); err != nil {
+			return err
+		}
+
+		_, err := tx.CreateBucket(resultsKey)
+		return err
+	})
+}
+
+// fetchAll returns all results currently stored in the database.
+func (b *missionControlStore) fetchAll() ([]*paymentResult, error) {
+	var results []*paymentResult
+
+	err := b.db.View(func(tx *bbolt.Tx) error {
+		resultBucket := tx.Bucket(resultsKey)
+		results = make([]*paymentResult, 0)
+
+		return resultBucket.ForEach(func(k, v []byte) error {
+			result, err := deserializeResult(k, v)
+			if err != nil {
+				return err
+			}
+
+			results = append(results, result)
+
+			return nil
+		})
+
+	})
+	if err != nil {
+		return nil, err
+	}
+
+	return results, nil
+}
+
+// serializeResult serializes a payment result and returns a key and value byte
+// slice to insert into the bucket.
+func serializeResult(rp *paymentResult) ([]byte, []byte, error) {
+	// Write timestamps, success status, failure source index and route.
+	var b bytes.Buffer
+
+	var dbFailureSourceIdx int32
+	if rp.failureSourceIdx == nil {
+		dbFailureSourceIdx = unknownFailureSourceIdx
+	} else {
+		dbFailureSourceIdx = int32(*rp.failureSourceIdx)
+	}
+
+	err := channeldb.WriteElements(
+		&b,
+		uint64(rp.timeFwd.UnixNano()),
+		uint64(rp.timeReply.UnixNano()),
+		rp.success, dbFailureSourceIdx,
+	)
+	if err != nil {
+		return nil, nil, err
+	}
+
+	if err := channeldb.SerializeRoute(&b, *rp.route); err != nil {
+		return nil, nil, err
+	}
+
+	// Write failure. If there is no failure message, write an empty
+	// byte slice.
+	var failureBytes bytes.Buffer
+	if rp.failure != nil {
+		err := lnwire.EncodeFailureMessage(&failureBytes, rp.failure, 0)
+		if err != nil {
+			return nil, nil, err
+		}
+	}
+	err = wire.WriteVarBytes(&b, 0, failureBytes.Bytes())
+	if err != nil {
+		return nil, nil, err
+	}
+
+	// Compose key that identifies this result.
+	key := getResultKey(rp)
+
+	return key, b.Bytes(), nil
+}
+
+// deserializeResult deserializes a payment result.
+func deserializeResult(k, v []byte) (*paymentResult, error) {
+	// Parse payment id.
+	result := paymentResult{
+		id: byteOrder.Uint64(k[8:]),
+	}
+
+	r := bytes.NewReader(v)
+
+	// Read timestamps, success status and failure source index.
+	var (
+		timeFwd, timeReply uint64
+		dbFailureSourceIdx int32
+	)
+
+	err := channeldb.ReadElements(
+		r, &timeFwd, &timeReply, &result.success, &dbFailureSourceIdx,
+	)
+	if err != nil {
+		return nil, err
+	}
+
+	// Convert time stamps to local time zone for consistent logging.
+	result.timeFwd = time.Unix(0, int64(timeFwd)).Local()
+	result.timeReply = time.Unix(0, int64(timeReply)).Local()
+
+	// Convert from unknown index magic number to nil value.
+	if dbFailureSourceIdx != unknownFailureSourceIdx {
+		failureSourceIdx := int(dbFailureSourceIdx)
+		result.failureSourceIdx = &failureSourceIdx
+	}
+
+	// Read route.
+	route, err := channeldb.DeserializeRoute(r)
+	if err != nil {
+		return nil, err
+	}
+	result.route = &route
+
+	// Read failure.
+	failureBytes, err := wire.ReadVarBytes(
+		r, 0, lnwire.FailureMessageLength, "failure",
+	)
+	if err != nil {
+		return nil, err
+	}
+	if len(failureBytes) > 0 {
+		result.failure, err = lnwire.DecodeFailureMessage(
+			bytes.NewReader(failureBytes), 0,
+		)
+		if err != nil {
+			return nil, err
+		}
+	}
+
+	return &result, nil
+}
+
+// AddResult adds a new result to the db.
+func (b *missionControlStore) AddResult(rp *paymentResult) error {
+	return b.db.Update(func(tx *bbolt.Tx) error {
+		bucket := tx.Bucket(resultsKey)
+
+		// Prune oldest entries.
+		if b.maxRecords > 0 {
+			for b.numRecords >= b.maxRecords {
+				cursor := bucket.Cursor()
+				cursor.First()
+				if err := cursor.Delete(); err != nil {
+					return err
+				}
+
+				b.numRecords--
+			}
+		}
+
+		// Serialize result into key and value byte slices.
+		k, v, err := serializeResult(rp)
+		if err != nil {
+			return err
+		}
+
+		// The store is assumed to be idempotent. It could be that the
+		// same result is added twice and in that case the counter
+		// shouldn't be increased.
+		if bucket.Get(k) == nil {
+			b.numRecords++
+		}
+
+		// Put into results bucket.
+		return bucket.Put(k, v)
+	})
+}
+
+// getResultKey returns a byte slice representing a unique key for this payment
+// result.
+func getResultKey(rp *paymentResult) []byte {
+	var keyBytes [8 + 8 + 33]byte
+
+	// Identify records by a combination of time, payment id and sender pub
+	// key. This allows importing mission control data from an external
+	// source without key collisions and keeps the records sorted
+	// chronologically.
+	byteOrder.PutUint64(keyBytes[:], uint64(rp.timeReply.UnixNano()))
+	byteOrder.PutUint64(keyBytes[8:], rp.id)
+	copy(keyBytes[16:], rp.route.SourcePubKey[:])
+
+	return keyBytes[:]
+}
--- a/routing/missioncontrol_store_test.go
+++ b/routing/missioncontrol_store_test.go
@ -0,0 +1,140 @@
+package routing
+
+import (
+	"io/ioutil"
+	"os"
+	"reflect"
+	"testing"
+	"time"
+
+	"github.com/lightningnetwork/lnd/lnwire"
+
+	"github.com/coreos/bbolt"
+	"github.com/lightningnetwork/lnd/routing/route"
+)
+
+const testMaxRecords = 2
+
+func TestMissionControlStore(t *testing.T) {
+	// Set time zone explictly to keep test deterministic.
+	time.Local = time.UTC
+
+	file, err := ioutil.TempFile("", "*.db")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	dbPath := file.Name()
+
+	db, err := bbolt.Open(dbPath, 0600, nil)
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer db.Close()
+	defer os.Remove(dbPath)
+
+	store, err := newMissionControlStore(db, testMaxRecords)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	results, err := store.fetchAll()
+	if err != nil {
+		t.Fatal(err)
+	}
+	if len(results) != 0 {
+		t.Fatal("expected no results")
+	}
+
+	testRoute := route.Route{
+		SourcePubKey: route.Vertex{1},
+		Hops: []*route.Hop{
+			{
+				PubKeyBytes: route.Vertex{2},
+			},
+		},
+	}
+
+	failureSourceIdx := 1
+
+	result1 := paymentResult{
+		route:            &testRoute,
+		failure:          lnwire.NewFailUnknownPaymentHash(100),
+		failureSourceIdx: &failureSourceIdx,
+		id:               99,
+		timeReply:        testTime,
+		timeFwd:          testTime.Add(-time.Minute),
+	}
+
+	result2 := result1
+	result2.timeReply = result1.timeReply.Add(time.Hour)
+	result2.timeFwd = result1.timeReply.Add(time.Hour)
+	result2.id = 2
+
+	// Store result.
+	err = store.AddResult(&result2)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Store again to test idempotency.
+	err = store.AddResult(&result2)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Store second result which has an earlier timestamp.
+	err = store.AddResult(&result1)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	results, err = store.fetchAll()
+	if err != nil {
+		t.Fatal(err)
+	}
+	if len(results) != 2 {
+		t.Fatal("expected two results")
+	}
+
+	// Check that results are stored in chronological order.
+	if !reflect.DeepEqual(&result1, results[0]) {
+		t.Fatal()
+	}
+	if !reflect.DeepEqual(&result2, results[1]) {
+		t.Fatal()
+	}
+
+	// Recreate store to test pruning.
+	store, err = newMissionControlStore(db, testMaxRecords)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Add a newer result.
+	result3 := result1
+	result3.timeReply = result1.timeReply.Add(2 * time.Hour)
+	result3.timeFwd = result1.timeReply.Add(2 * time.Hour)
+	result3.id = 3
+
+	err = store.AddResult(&result3)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Check that results are pruned.
+	results, err = store.fetchAll()
+	if err != nil {
+		t.Fatal(err)
+	}
+	if len(results) != 2 {
+		t.Fatal("expected two results")
+	}
+
+	if !reflect.DeepEqual(&result2, results[0]) {
+		t.Fatal()
+	}
+	if !reflect.DeepEqual(&result3, results[1]) {
+		t.Fatal()
+	}
+}
--- a/routing/missioncontrol_test.go
+++ b/routing/missioncontrol_test.go
@ -1,25 +1,50 @@
 package routing

 import (
+	"io/ioutil"
+	"os"
 	"testing"
 	"time"

+	"github.com/coreos/bbolt"
 	"github.com/lightningnetwork/lnd/lnwire"
 	"github.com/lightningnetwork/lnd/routing/route"
 )

 var (
-	mcTestNode = route.Vertex{}
 	mcTestEdge = EdgeLocator{
-		ChannelID: 123,
+		ChannelID: 2,
 	}
-	mcTestTime = time.Date(2018, time.January, 9, 14, 00, 00, 0, time.UTC)
+
+	mcTestRoute = &route.Route{
+		SourcePubKey: route.Vertex{10},
+		Hops: []*route.Hop{
+			{
+				ChannelID:    1,
+				PubKeyBytes:  route.Vertex{11},
+				AmtToForward: 1000,
+			},
+			{
+				ChannelID:   2,
+				PubKeyBytes: route.Vertex{12},
+			},
+		},
+	}
+
+	mcTestTime  = time.Date(2018, time.January, 9, 14, 00, 00, 0, time.UTC)
+	mcTestNode1 = mcTestRoute.Hops[0].PubKeyBytes
+	mcTestNode2 = mcTestRoute.Hops[1].PubKeyBytes
 )

 type mcTestContext struct {
 	t   *testing.T
 	mc  *MissionControl
 	now time.Time
+
+	db     *bbolt.DB
+	dbPath string
+
+	pid uint64
 }

 func createMcTestContext(t *testing.T) *mcTestContext {
@ -28,17 +53,44 @@ func createMcTestContext(t *testing.T) *mcTestContext {
 		now: mcTestTime,
 	}

-	mc := NewMissionControl(
+	file, err := ioutil.TempFile("", "*.db")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	ctx.dbPath = file.Name()
+
+	ctx.db, err = bbolt.Open(ctx.dbPath, 0600, nil)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	ctx.restartMc()
+
+	return ctx
+}
+
+// restartMc creates a new instances of mission control on the same database.
+func (ctx *mcTestContext) restartMc() {
+	mc, err := NewMissionControl(
+		ctx.db,
 		&MissionControlConfig{
 			PenaltyHalfLife:       30 * time.Minute,
 			AprioriHopProbability: 0.8,
 		},
 	)
+	if err != nil {
+		ctx.t.Fatal(err)
+	}

 	mc.now = func() time.Time { return ctx.now }
 	ctx.mc = mc
+}

-	return ctx
+// cleanup closes the database and removes the temp file.
+func (ctx *mcTestContext) cleanup() {
+	ctx.db.Close()
+	os.Remove(ctx.dbPath)
 }

 // Assert that mission control returns a probability for an edge.
@ -47,30 +99,41 @@ func (ctx *mcTestContext) expectP(amt lnwire.MilliSatoshi,

 	ctx.t.Helper()

-	p := ctx.mc.GetEdgeProbability(mcTestNode, mcTestEdge, amt)
+	p := ctx.mc.GetEdgeProbability(mcTestNode1, mcTestEdge, amt)
 	if p != expected {
 		ctx.t.Fatalf("unexpected probability %v", p)
 	}
 }

+// reportFailure reports a failure by using a test route.
+func (ctx *mcTestContext) reportFailure(t time.Time,
+	amt lnwire.MilliSatoshi, failure lnwire.FailureMessage) {
+
+	mcTestRoute.Hops[0].AmtToForward = amt
+
+	errorSourceIdx := 1
+	ctx.mc.ReportPaymentFail(
+		ctx.pid, mcTestRoute, &errorSourceIdx, failure,
+	)
+}
+
 // TestMissionControl tests mission control probability estimation.
 func TestMissionControl(t *testing.T) {
 	ctx := createMcTestContext(t)
+	defer ctx.cleanup()

 	ctx.now = testTime

 	testTime := time.Date(2018, time.January, 9, 14, 00, 00, 0, time.UTC)

-	testNode := route.Vertex{}
-	testEdge := edge{
-		channel: 123,
-	}
-
 	// Initial probability is expected to be 1.
 	ctx.expectP(1000, 0.8)

 	// Expect probability to be zero after reporting the edge as failed.
-	ctx.mc.ReportEdgeFailure(testEdge, 1000)
+	ctx.reportFailure(
+		testTime, 1000,
+		lnwire.NewTemporaryChannelFailure(nil),
+	)
 	ctx.expectP(1000, 0)

 	// As we reported with a min penalization amt, a lower amt than reported
@ -83,7 +146,10 @@ func TestMissionControl(t *testing.T) {

 	// Edge fails again, this time without a min penalization amt. The edge
 	// should be penalized regardless of amount.
-	ctx.mc.ReportEdgeFailure(testEdge, 0)
+	ctx.reportFailure(
+		ctx.now, 0,
+		lnwire.NewTemporaryChannelFailure(nil),
+	)
 	ctx.expectP(1000, 0)
 	ctx.expectP(500, 0)

@ -91,9 +157,16 @@ func TestMissionControl(t *testing.T) {
 	ctx.now = testTime.Add(60 * time.Minute)
 	ctx.expectP(1000, 0.4)

+	// Restart mission control to test persistence.
+	ctx.restartMc()
+	ctx.expectP(1000, 0.4)
+
 	// A node level failure should bring probability of every channel back
 	// to zero.
-	ctx.mc.ReportVertexFailure(testNode)
+	ctx.reportFailure(
+		ctx.now, 0,
+		lnwire.NewExpiryTooSoon(lnwire.ChannelUpdate{}),
+	)
 	ctx.expectP(1000, 0)

 	// Check whether history snapshot looks sane.
@ -111,20 +184,21 @@ func TestMissionControl(t *testing.T) {
 // penalizing the channel yet.
 func TestMissionControlChannelUpdate(t *testing.T) {
 	ctx := createMcTestContext(t)
-
-	testEdge := edge{
-		channel: 123,
-	}
+	defer ctx.cleanup()

 	// Report a policy related failure. Because it is the first, we don't
 	// expect a penalty.
-	ctx.mc.ReportEdgePolicyFailure(testEdge)
-
+	ctx.reportFailure(
+		ctx.now, 0,
+		lnwire.NewFeeInsufficient(0, lnwire.ChannelUpdate{}),
+	)
 	ctx.expectP(0, 0.8)

 	// Report another failure for the same channel. We expect it to be
 	// pruned.
-	ctx.mc.ReportEdgePolicyFailure(testEdge)
-
+	ctx.reportFailure(
+		ctx.now, 0,
+		lnwire.NewFeeInsufficient(0, lnwire.ChannelUpdate{}),
+	)
 	ctx.expectP(0, 0)
 }
--- a/routing/mock_test.go
+++ b/routing/mock_test.go
@ -98,6 +98,13 @@ type mockMissionControl struct {

 var _ MissionController = (*mockMissionControl)(nil)

+func (m *mockMissionControl) ReportPaymentFail(paymentID uint64,
+	rt *route.Route, failureSourceIdx *int, failure lnwire.FailureMessage) (
+	bool, channeldb.FailureReason, error) {
+
+	return false, 0, nil
+}
+
 func (m *mockMissionControl) ReportEdgeFailure(failedEdge edge,
 	minPenalizeAmt lnwire.MilliSatoshi) {
 }
--- a/routing/payment_lifecycle.go
+++ b/routing/payment_lifecycle.go
@ -343,7 +343,7 @@ func (p *paymentLifecycle) sendPaymentAttempt(firstHop lnwire.ShortChannelID,
 func (p *paymentLifecycle) handleSendError(sendErr error) error {

 	final, reason := p.router.processSendError(
-		&p.attempt.Route, sendErr,
+		p.attempt.PaymentID, &p.attempt.Route, sendErr,
 	)
 	if !final {
 		// Save the forwarding error so it can be returned if
--- a/routing/router.go
+++ b/routing/router.go
@ -174,15 +174,13 @@ type PaymentSessionSource interface {
 // MissionController is an interface that exposes failure reporting and
 // probability estimation.
 type MissionController interface {
-	// ReportEdgeFailure reports a channel level failure.
-	ReportEdgeFailure(failedEdge edge,
-		minPenalizeAmt lnwire.MilliSatoshi)
-
-	// ReportEdgePolicyFailure reports a policy related failure.
-	ReportEdgePolicyFailure(failedEdge edge)
-
-	// ReportVertexFailure reports a node level failure.
-	ReportVertexFailure(v route.Vertex)
+	// ReportPaymentFail reports a failed payment to mission control as
+	// input for future probability estimates. It returns a bool indicating
+	// whether this error is a final error and no further payment attempts
+	// need to be made.
+	ReportPaymentFail(paymentID uint64, rt *route.Route,
+		failureSourceIdx *int, failure lnwire.FailureMessage) (bool,
+		channeldb.FailureReason, error)

 	// GetEdgeProbability is expected to return the success probability of a
 	// payment from fromNode along edge.
@ -1895,21 +1893,33 @@ func (r *ChannelRouter) tryApplyChannelUpdate(rt *route.Route,
 // error type, this error is either the final outcome of the payment or we need
 // to continue with an alternative route. This is indicated by the boolean
 // return value.
-func (r *ChannelRouter) processSendError(rt *route.Route, sendErr error) (
-	bool, channeldb.FailureReason) {
+func (r *ChannelRouter) processSendError(paymentID uint64, rt *route.Route,
+	sendErr error) (bool, channeldb.FailureReason) {

-	// If the failure message could not be decrypted, attribute the failure
-	// to our own outgoing channel.
-	//
-	// TODO(joostager): Penalize all channels in the route.
-	if sendErr == htlcswitch.ErrUnreadableFailureMessage {
-		sendErr = &htlcswitch.ForwardingError{
-			FailureSourceIdx: 0,
-			FailureMessage:   lnwire.NewTemporaryChannelFailure(nil),
+	reportFail := func(srcIdx *int, msg lnwire.FailureMessage) (bool,
+		channeldb.FailureReason) {
+
+		// Report outcome to mission control.
+		final, reason, err := r.cfg.MissionControl.ReportPaymentFail(
+			paymentID, rt, srcIdx, msg,
+		)
+		if err != nil {
+			log.Errorf("Error reporting payment result to mc: %v",
+				err)
+
+			return true, channeldb.FailureReasonError
 		}
+
+		return final, reason
 	}

-	// If an internal, non-forwarding error occurred, we can stop trying.
+	if sendErr == htlcswitch.ErrUnreadableFailureMessage {
+		log.Tracef("Unreadable failure when sending htlc")
+
+		return reportFail(nil, nil)
+	}
+	// If an internal, non-forwarding error occurred, we can stop
+	// trying.
 	fErr, ok := sendErr.(*htlcswitch.ForwardingError)
 	if !ok {
 		return true, channeldb.FailureReasonError
@ -1929,195 +1939,10 @@ func (r *ChannelRouter) processSendError(rt *route.Route, sendErr error) (
 		}
 	}

-	var failureVertex route.Vertex
+	log.Tracef("Node=%v reported failure when sending htlc",
+		failureSourceIdx)

-	// For any non-self failure, look up the source pub key in the hops
-	// slice. Otherwise return the self node pubkey.
-	if failureSourceIdx > 0 {
-		failureVertex = rt.Hops[failureSourceIdx-1].PubKeyBytes
-	} else {
-		failureVertex = r.selfNode.PubKeyBytes
-	}
-	log.Tracef("Node %x (index %v) reported failure when sending htlc",
-		failureVertex, failureSourceIdx)
-
-	// Always determine chan id ourselves, because a channel
-	// update with id may not be available.
-	failedEdge, failedAmt := getFailedEdge(rt, failureSourceIdx)
-
-	switch fErr.FailureMessage.(type) {
-
-	// If the end destination didn't know the payment
-	// hash or we sent the wrong payment amount to the
-	// destination, then we'll terminate immediately.
-	case *lnwire.FailUnknownPaymentHash:
-		// TODO(joostjager): Check onionErr.Amount() whether it matches
-		// what we expect. (Will it ever not match, because if not
-		// final_incorrect_htlc_amount would be returned?)
-
-		return true, channeldb.FailureReasonIncorrectPaymentDetails
-
-	// If we sent the wrong amount to the destination, then
-	// we'll exit early.
-	case *lnwire.FailIncorrectPaymentAmount:
-		return true, channeldb.FailureReasonIncorrectPaymentDetails
-
-	// If the time-lock that was extended to the final node
-	// was incorrect, then we can't proceed.
-	case *lnwire.FailFinalIncorrectCltvExpiry:
-		// TODO(joostjager): Take into account that second last hop may
-		// have deliberately handed out an htlc that expires too soon.
-		// In that case we should continue routing.
-		return true, channeldb.FailureReasonError
-
-	// If we crafted an invalid onion payload for the final
-	// node, then we'll exit early.
-	case *lnwire.FailFinalIncorrectHtlcAmount:
-		// TODO(joostjager): Take into account that second last hop may
-		// have deliberately handed out an htlc with a too low value. In
-		// that case we should continue routing.
-
-		return true, channeldb.FailureReasonError
-
-	// Similarly, if the HTLC expiry that we extended to
-	// the final hop expires too soon, then will fail the
-	// payment.
-	//
-	// TODO(roasbeef): can happen to to race condition, try
-	// again with recent block height
-	case *lnwire.FailFinalExpiryTooSoon:
-		// TODO(joostjager): Take into account that any hop may have
-		// delayed. Ideally we should continue routing. Knowing the
-		// delaying node at this point would help.
-		return true, channeldb.FailureReasonIncorrectPaymentDetails
-
-	// If we erroneously attempted to cross a chain border,
-	// then we'll cancel the payment.
-	case *lnwire.FailInvalidRealm:
-		return true, channeldb.FailureReasonError
-
-	// If we get a notice that the expiry was too soon for
-	// an intermediate node, then we'll prune out the node
-	// that sent us this error, as it doesn't now what the
-	// correct block height is.
-	case *lnwire.FailExpiryTooSoon:
-		r.cfg.MissionControl.ReportVertexFailure(failureVertex)
-		return false, 0
-
-	// If we hit an instance of onion payload corruption or an invalid
-	// version, then we'll exit early as this shouldn't happen in the
-	// typical case.
-	//
-	// TODO(joostjager): Take into account that the previous hop may have
-	// tampered with the onion. Routing should continue using other paths.
-	case *lnwire.FailInvalidOnionVersion:
-		return true, channeldb.FailureReasonError
-	case *lnwire.FailInvalidOnionHmac:
-		return true, channeldb.FailureReasonError
-	case *lnwire.FailInvalidOnionKey:
-		return true, channeldb.FailureReasonError
-
-	// If we get a failure due to violating the minimum
-	// amount, we'll apply the new minimum amount and retry
-	// routing.
-	case *lnwire.FailAmountBelowMinimum:
-		r.cfg.MissionControl.ReportEdgePolicyFailure(failedEdge)
-		return false, 0
-
-	// If we get a failure due to a fee, we'll apply the
-	// new fee update, and retry our attempt using the
-	// newly updated fees.
-	case *lnwire.FailFeeInsufficient:
-		r.cfg.MissionControl.ReportEdgePolicyFailure(failedEdge)
-		return false, 0
-
-	// If we get the failure for an intermediate node that
-	// disagrees with our time lock values, then we'll
-	// apply the new delta value and try it once more.
-	case *lnwire.FailIncorrectCltvExpiry:
-		r.cfg.MissionControl.ReportEdgePolicyFailure(failedEdge)
-		return false, 0
-
-	// The outgoing channel that this node was meant to
-	// forward one is currently disabled, so we'll apply
-	// the update and continue.
-	case *lnwire.FailChannelDisabled:
-		r.cfg.MissionControl.ReportEdgeFailure(failedEdge, 0)
-		return false, 0
-
-	// It's likely that the outgoing channel didn't have
-	// sufficient capacity, so we'll prune this edge for
-	// now, and continue onwards with our path finding.
-	case *lnwire.FailTemporaryChannelFailure:
-		r.cfg.MissionControl.ReportEdgeFailure(failedEdge, failedAmt)
-		return false, 0
-
-	// If the send fail due to a node not having the
-	// required features, then we'll note this error and
-	// continue.
-	case *lnwire.FailRequiredNodeFeatureMissing:
-		r.cfg.MissionControl.ReportVertexFailure(failureVertex)
-		return false, 0
-
-	// If the send fail due to a node not having the
-	// required features, then we'll note this error and
-	// continue.
-	case *lnwire.FailRequiredChannelFeatureMissing:
-		r.cfg.MissionControl.ReportVertexFailure(failureVertex)
-		return false, 0
-
-	// If the next hop in the route wasn't known or
-	// offline, we'll only the channel which we attempted
-	// to route over. This is conservative, and it can
-	// handle faulty channels between nodes properly.
-	// Additionally, this guards against routing nodes
-	// returning errors in order to attempt to black list
-	// another node.
-	case *lnwire.FailUnknownNextPeer:
-		r.cfg.MissionControl.ReportEdgeFailure(failedEdge, 0)
-		return false, 0
-
-	// If the node wasn't able to forward for which ever
-	// reason, then we'll note this and continue with the
-	// routes.
-	case *lnwire.FailTemporaryNodeFailure:
-		r.cfg.MissionControl.ReportVertexFailure(failureVertex)
-		return false, 0
-
-	case *lnwire.FailPermanentNodeFailure:
-		r.cfg.MissionControl.ReportVertexFailure(failureVertex)
-		return false, 0
-
-	// If we crafted a route that contains a too long time
-	// lock for an intermediate node, we'll prune the node.
-	// As there currently is no way of knowing that node's
-	// maximum acceptable cltv, we cannot take this
-	// constraint into account during routing.
-	//
-	// TODO(joostjager): Record the rejected cltv and use
-	// that as a hint during future path finding through
-	// that node.
-	case *lnwire.FailExpiryTooFar:
-		r.cfg.MissionControl.ReportVertexFailure(failureVertex)
-		return false, 0
-
-	// If we get a permanent channel or node failure, then
-	// we'll prune the channel in both directions and
-	// continue with the rest of the routes.
-	case *lnwire.FailPermanentChannelFailure:
-		r.cfg.MissionControl.ReportEdgeFailure(failedEdge, 0)
-		r.cfg.MissionControl.ReportEdgeFailure(edge{
-			from:    failedEdge.to,
-			to:      failedEdge.from,
-			channel: failedEdge.channel,
-		}, 0)
-		return false, 0
-
-	// Any other failure or an empty failure will get the node pruned.
-	default:
-		r.cfg.MissionControl.ReportVertexFailure(failureVertex)
-		return false, 0
-	}
+	return reportFail(&failureSourceIdx, failureMessage)
 }

 // extractChannelUpdate examines the error and extracts the channel update.
@ -2143,46 +1968,6 @@ func (r *ChannelRouter) extractChannelUpdate(
 	return update
 }

-// getFailedEdge tries to locate the failing channel given a route and the
-// pubkey of the node that sent the failure. It will assume that the failure is
-// associated with the outgoing channel of the failing node. As a second result,
-// it returns the amount sent over the edge.
-func getFailedEdge(route *route.Route, failureSource int) (edge,
-	lnwire.MilliSatoshi) {
-
-	// Determine if we have a failure from the final hop. If it is, we
-	// assume that the failing channel is the incoming channel. In this
-	// function the outgoing channel of the hop indicated by failureSource
-	// is returned, where index zero is the self node. By decrementing
-	// failureSource by one, the outgoing channel of the penultimate hop is
-	// returned, which is the same as the incoming channel of the final
-	// node.
-	//
-	// TODO(joostjager): In this case, certain types of failures are not
-	// expected. For example FailUnknownNextPeer. This could be a reason to
-	// prune the node?
-	if failureSource == len(route.Hops) {
-		failureSource--
-	}
-
-	// As this failure indicates that the target channel was unable to carry
-	// this HTLC (for w/e reason), we'll return the _outgoing_ channel that
-	// the source of the failure was meant to pass the HTLC along to.
-	if failureSource == 0 {
-		return edge{
-			from:    route.SourcePubKey,
-			to:      route.Hops[0].PubKeyBytes,
-			channel: route.Hops[0].ChannelID,
-		}, route.TotalAmount
-	}
-
-	return edge{
-		from:    route.Hops[failureSource-1].PubKeyBytes,
-		to:      route.Hops[failureSource].PubKeyBytes,
-		channel: route.Hops[failureSource].ChannelID,
-	}, route.Hops[failureSource-1].AmtToForward
-}
-
 // applyChannelUpdate validates a channel update and if valid, applies it to the
 // database. It returns a bool indicating whether the updates was successful.
 func (r *ChannelRouter) applyChannelUpdate(msg *lnwire.ChannelUpdate,
--- a/routing/router_test.go
+++ b/routing/router_test.go
@ -90,7 +90,7 @@ func createTestCtxFromGraphInstance(startingHeight uint32, graphInstance *testGr
 		return nil, nil, err
 	}

-	pathFindingConfig := &PathFindingConfig{
+	pathFindingConfig := PathFindingConfig{
 		MinProbability:        0.01,
 		PaymentAttemptPenalty: 100,
 	}
@ -100,9 +100,13 @@ func createTestCtxFromGraphInstance(startingHeight uint32, graphInstance *testGr
 		AprioriHopProbability: 0.9,
 	}

-	mc := NewMissionControl(
+	mc, err := NewMissionControl(
+		graphInstance.graph.Database().DB,
 		mcConfig,
 	)
+	if err != nil {
+		return nil, nil, err
+	}

 	sessionSource := &SessionSource{
 		Graph:    graphInstance.graph,
@ -110,11 +114,8 @@ func createTestCtxFromGraphInstance(startingHeight uint32, graphInstance *testGr
 		QueryBandwidth: func(e *channeldb.ChannelEdgeInfo) lnwire.MilliSatoshi {
 			return lnwire.NewMSatFromSatoshis(e.Capacity)
 		},
-		PathFindingConfig: PathFindingConfig{
-			MinProbability:        0.01,
-			PaymentAttemptPenalty: 100,
-		},
-		MissionControl: mc,
+		PathFindingConfig: pathFindingConfig,
+		MissionControl:    mc,
 	}

 	router, err := New(Config{
@ -134,7 +135,7 @@ func createTestCtxFromGraphInstance(startingHeight uint32, graphInstance *testGr
 			next := atomic.AddUint64(&uniquePaymentID, 1)
 			return next, nil
 		},
-		PathFindingConfig: *pathFindingConfig,
+		PathFindingConfig: pathFindingConfig,
 	})
 	if err != nil {
 		return nil, nil, fmt.Errorf("unable to create router %v", err)
--- a/server.go
+++ b/server.go
@ -653,21 +653,28 @@ func newServer(listenAddrs []net.Addr, chanDB *channeldb.DB,
 	// servers, the mission control instance itself can be moved there too.
 	routingConfig := routerrpc.GetRoutingConfig(cfg.SubRPCServers.RouterRPC)

-	s.missionControl = routing.NewMissionControl(
+	s.missionControl, err = routing.NewMissionControl(
+		chanDB.DB,
 		&routing.MissionControlConfig{
 			AprioriHopProbability: routingConfig.AprioriHopProbability,
 			PenaltyHalfLife:       routingConfig.PenaltyHalfLife,
+			MaxMcHistory:          routingConfig.MaxMcHistory,
 		},
 	)
+	if err != nil {
+		return nil, fmt.Errorf("can't create mission control: %v", err)
+	}

 	srvrLog.Debugf("Instantiating payment session source with config: "+
 		"PaymentAttemptPenalty=%v, MinRouteProbability=%v",
-		int64(routingConfig.PaymentAttemptPenalty.ToSatoshis()),
+		int64(routingConfig.AttemptCost),
 		routingConfig.MinRouteProbability)

 	pathFindingConfig := routing.PathFindingConfig{
-		PaymentAttemptPenalty: routingConfig.PaymentAttemptPenalty,
-		MinProbability:        routingConfig.MinRouteProbability,
+		PaymentAttemptPenalty: lnwire.NewMSatFromSatoshis(
+			routingConfig.AttemptCost,
+		),
+		MinProbability: routingConfig.MinRouteProbability,
 	}

 	paymentSessionSource := &routing.SessionSource{