Source code for beansoup.importers.filing
"""A file-only importer."""
import calendar
import datetime
import logging
from os import path
import re
from beancount.ingest import importer
from beansoup.utils import dates
[docs]class Importer(importer.ImporterProtocol):
"""A document-filing class for monthly files; it does not import anything.
This importer only supports bean-identify and bean-file. It does not
extract any transactions; in fact, it does not even open the file.
It uses a regular expression to match a filename to an account and to
a date (interpreted as the last day of a billing period).
"""
def __init__(self, account, basename=None,
first_day=1, filename_regexp=None):
"""Create a new filing importer for the given account.
Args:
account: An account string, the account to associate to the files.
basename: An optional string, the name of the new files.
filename_regexp: A regular expression string used to match the
basename (no path) of the target file. This regexp should include
capturing groups for `year`, `month`, and (optional) `day` of
the end of the period covered by the file; if `day` is
missing, the `first_day` argument will be used to compute the
end date.
Example: '^Statement_(?P<month>\w{3}) (?P<year>\d{4}).pdf$'
first_day: An int in [1,28]; the first day of the billing period.
"""
self.filename_re = re.compile(filename_regexp or self.filename_regexp)
self.account = account
self.basename = basename
self.first_day = first_day
[docs] def name(self):
"""Include the filing account in the name."""
return '{}: "{}"'.format(super().name(), self.file_account(None))
[docs] def identify(self, file):
"""Identify whether the file can be processed by this importer."""
# Match the file name.
return self.filename_re.match(path.basename(file.name))
[docs] def file_account(self, _):
"""Return the account associated with the file"""
return self.account
[docs] def file_name(self, file):
"""Return the optional renamed account file name."""
if self.basename:
return self.basename + path.splitext(file.name)[1]
[docs] def file_date(self, file):
"""Return the filing date for the file."""
matches = self.filename_re.match(path.basename(file.name))
if matches:
groups = matches.groupdict()
today = datetime.date.today()
year = int(groups['year']) if 'year' in groups else today.year
month = dates.month_number(groups.get('month')) or today.month
if 'day' in groups:
# The filename fully specifies the document date
day = int(groups['day'])
date = datetime.date(year, month, day)
else:
# Use the first day of the billing cycle to compute the
# last day of the period for the given year and month
if self.first_day > 1:
date = (datetime.date(year, month, self.first_day) -
datetime.timedelta(days=1))
else:
_, month_last_day = calendar.monthrange(year, month)
date = datetime.date(year, month, month_last_day)
return date