Source code for beansoup.transactions

"""Utilities to work with beancount.core.data.Transaction objects."""

import datetime
import itertools
from os import path

from beancount.core import data, flags, number


[docs]class TransactionCompleter: """A class capable of completing partial transactions. Importers typically generate incomplete transactions with a single posting to the main account related to the imported data. This class attempts to complete those transaction by adding a second posting to an account chosen based on the existing transaction history for the main account. It looks for existing transactions that have exactly two postings and where one of the two postings is to the main account. It scores each of these transactions based on the similarity of its payee and narration fields to the narration field of the incomplete transaction and selects the one with the highest score as a model to fill in the missing posting of the incomplete transaction. Equal scores are broken by selecting the most recent transaction. """ def __init__(self, existing_entries, account, min_score=0.5, max_age=None, interpolated=False): """Initialization. Args: existing_entries: The existing entries, ordered by increasing date. account: The main account of the incomplete transactions (i.e. the account of their only posting). min_score: The minimum score an existing transaction must have to be used as a model for an incomplete transaction. max_age: A datetime.timedelta object giving the maximum age (measure from datetime.date.today()) a transaction can have in order to be used as a model to fill in an incomplete transaction. interpolated: If True, the missing posting will include an interpolated amount; otherwise, the amount will be left blank. """ def is_model(entry): """A predicate asking whether an entry can be used as a model. An entry can be considered a model for incomplete transactions if it is a transaction with exactly two postings and it involves the main account. """ return (isinstance(entry, data.Transaction) and len(entry.postings) == 2 and any(posting.account == account for posting in entry.postings)) if max_age: min_date = datetime.date.today() - max_age entries = itertools.takewhile(lambda entry: entry.date >= min_date, reversed(existing_entries or [])) else: entries = existing_entries or [] self.model_txns = [entry for entry in entries if is_model(entry)] self.account = account self.min_score = min_score self.interpolated = interpolated
[docs] def __call__(self, entries): """Same as `complete_entries` method.""" return self.complete_entries(entries)
[docs] def complete_entries(self, entries): """Complete the given entries. Only transactions with a single posting to the account bound to the completer may be modified. Args: entries: The entries to be completed. Returns: A list of completed entries """ for entry in entries: self.complete_entry(entry) return entries
[docs] def complete_entry(self, entry): """Complete the given entry. This method attempts to complete the entry only if it is a transaction with a single posting to the account bound to the completer. The entry will be completed only if a suitable model transaction can be found. If multiple model transactions are found that balance the transaction against different account, the missing posting will be flagged for review. Args: entry: The entry to be completed. Returns: True is the entry was completed; False, otherwise. """ if (isinstance(entry, data.Transaction) and len(entry.postings) == 1 and entry.postings[0].account == self.account): model_txn, model_accounts = self.find_best_model(entry) if model_txn: # If past transactions similar to this one were posted against # different accounts, flag the posting in the new entry. flag = flags.FLAG_WARNING if len(model_accounts) > 1 else None # Add the missing posting to balance the transaction for posting in model_txn.postings: if posting.account != self.account: units = -entry.postings[0].units if self.interpolated else None missing_posting = data.Posting( posting.account, units, None, None, flag, None) entry.postings.append(missing_posting) return True return False
[docs] def find_best_model(self, txn): """Return the best model for the given incomplete transaction. Args: txn: A beancount.core.data.Transaction object; an incomplete transaction with a single posting. Returns: A pair of a beancount.core.data.Transaction object and a set of account strings; the first part is the model transaction or None, if no suitable model could be found; the second part is a set of the different accounts used by top-scoring transaction to balance the posting to the target account. """ scored_model_txns = [(self.score_model(model_txn, txn), model_txn) for model_txn in self.model_txns] # Discard low-score transactions scored_model_txns = [(score, model_txn) for score, model_txn in scored_model_txns if score >= self.min_score] if scored_model_txns: # Sort the scored transaction by descending score and date. # The first transaction in the sorted list is the best model. scored_model_txns = sorted(scored_model_txns, key=lambda p: (p[0], p[1].date), reverse=True) # Look at the other top-scoring transaction and count how many # different accounts they post to; if they post to more than one # account (other than the target account), the model is ambiguous. best_score, best_model_txn = scored_model_txns[0] top_model_txns = itertools.takewhile(lambda p: p[0] == best_score, scored_model_txns) accounts = set([posting.account for _, txn in top_model_txns for posting in txn.postings if posting.account != self.account]) return (best_model_txn, accounts) return (None, set())
[docs] def score_model(self, model_txn, txn): """Score an existing transaction for its ability to provide a model for an incomplete transaction. Args: model_txn: The transaction to be scored. txn: The incomplete transaction. Returns: A float number representing the score, normalized in [0,1]. """ def get_description(txn): return ('{} {}'.format(txn.payee or '', txn.narration or '')).strip() # If the target transaction does not have a description, there is # nothing we can do txn_description = get_description(txn) n_max = len(txn_description) if n_max > 1: # Only consider model transactions whose posting to the target # account has the same sign as the transaction to be completed posting = [p for p in model_txn.postings if p.account == self.account][0] if number.same_sign(posting.units.number, txn.postings[0].units.number): model_txn_description = get_description(model_txn) n_match = len(path.commonprefix( [model_txn_description, txn_description])) score = float(n_match) / float(n_max) return score return 0