diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..7133ca7 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,3 @@ +include README.md +include LICENCE.md +include requirements.txt diff --git a/main.py b/main.py deleted file mode 100644 index e39d82b..0000000 --- a/main.py +++ /dev/null @@ -1,152 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - -import argparse -import os -import subprocess -import sys - -import requests -from ruamel.yaml import YAML, scalarstring - - -class BlacklistImporter: - def __init__(self, args): - self.outfile = args.outfile - self.dryrun = args.dryrun - self.path = os.path.dirname(__file__) - self.url = "https://raw.githubusercontent.com/JabberSPAM/blacklist/master/blacklist.txt" - self.blacklist = "" - self.change = False - - def request(self): - """ - determine if the download is required - """ - etag_path = "/".join([self.path, ".etag"]) - blacklist_path = "/".join([self.path, "blacklist.txt"]) - - # check if etag header is present if not set local_etag to "" - if os.path.isfile(etag_path): - # catch special case were etag file is present and blacklist.txt is not - if not os.path.isfile(blacklist_path): - local_etag = "" - else: - # if both files are present continue normally - with open(etag_path, "r") as local_file: - local_etag = local_file.read() - else: - local_etag = "" - - with requests.Session() as s: - # head request to check etag - head = s.head(self.url) - etag = head.headers['etag'] - - # if etags match up or if the connection is not possible fall back to local cache - if local_etag == etag or head.status_code != 200: - # if local cache is present overwrite blacklist var - if os.path.isfile(blacklist_path): - with open(blacklist_path, "r", encoding="utf-8") as local_file: - self.blacklist = local_file.read() - - # in any other case request a new file - else: - r = s.get(self.url) - r.encoding = 'utf-8' - local_etag = head.headers['etag'] - self.blacklist = r.content.decode() - - with open(blacklist_path, "w") as local_file: - local_file.write(self.blacklist) - - with open(etag_path, 'w') as local_file: - local_file.write(local_etag) - - def main(self): - # first check if blacklist is updated - self.request() - - # only output the selected outfile - if self.dryrun: - print("outfile selected: %s" % self.outfile) - - # blacklist processing - self.process() - - # reload config if changes have been applied - if self.change: - # catch ejabberdctl missing - if os.path.isfile('/usr/sbin/ejabberdctl'): - subprocess.call(['/usr/sbin/ejabberdctl', 'reload_config'], shell=False) - - # report missing ejabberdctl reload_config - else: - print('/usr/sbin/ejabberdctl was not found', file=sys.stderr) - print('blacklist changes have been applied\nejabberd config was not reloaded', file=sys.stderr) - sys.exit(1) - - def process(self): - """ - function to build and compare the local yaml file to the remote file - if the remote file is different, the local file gets overwritten - """ - # init new YAML variable - local_file = YAML(typ="safe") - - # None catch - if self.outfile is not None: - # prevent FileNotFoundError on first run or file missing - if os.path.isfile(self.outfile): - local_file = local_file.load(open(self.outfile, "r", encoding="utf-8")) - - # blacklist frame - remote_file = { - "acl": { - "spamblacklist": { - "server": [] - } - } - } - - # build the blacklist with the given frame to compare to local blacklist - for entry in self.blacklist.split(): - entry = scalarstring.DoubleQuotedScalarString(entry) - remote_file["acl"]["spamblacklist"]["server"].append(entry) - - yml = YAML() - yml.indent(offset=2) - yml.default_flow_style = False - - # if dry-run true print expected content - if self.dryrun: - yml.dump(remote_file, sys.stdout) - - # only if the local_file and remote_file are unequal write new file - elif local_file != remote_file: - - # prevent FileNotFoundError if self.outfile is not assigned - if self.outfile is None: - print("no outfile assigned", file=sys.stderr) - print(parser.format_help(), file=sys.stderr) - sys.exit(2) - - # proceed to update the defined outfile - elif self.outfile is not None: - self.change = True - yml.dump(remote_file, open(self.outfile, "w")) - - # if that's impossible break and display help message - else: - print(parser.format_help(), file=sys.stderr) - sys.exit(1) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument('-o', '--outfile', help='set path to output file', dest='outfile', default=None) - parser.add_argument('-dr', '--dry-run', help='perform a dry run', action='store_true', dest='dryrun', default=False) - args = parser.parse_args() - - # run - BlacklistImporter(args).main() diff --git a/requirements.txt b/requirements.txt index ac76703..0ee9f3a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ +appdirs==1.4.4 requests==2.25.1 -ruamel.yaml==0.16.13 +ruamel.yaml==0.17.2 ruamel.yaml.clib==0.2.2 -urllib3==1.26.3 +urllib3==1.26.4 diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..920adaf --- /dev/null +++ b/setup.cfg @@ -0,0 +1,39 @@ +[metadata] +name = blimp +version = attr: blimp.__version__ +url = https://github.com/mightyBroccoli/blacklist_importer +licence = GPLv3 +author = Nico Wellpott +author_email = nico@magicbroccoli.de +description = simple tool to download and update the JabberSpam blacklist list. +long_description = file: README.md +long_description_content_type = text/markdown +classifiers = + Development Status :: 4 - Beta + Intended Audience :: System Administrators + Licence :: OSI Approved :: GNU General Public License v3 (GPLv3) + Operating System :: Unix + Programming Language :: Python :: 3 + Programming Language :: Python :: 3.7 + Programming Language :: Python :: 3.8 + Programming Language :: Python :: 3.9 + Programming Language :: Python :: Implementation :: CPython + Topic :: Internet :: WWW/HTTP :: Dynamic Content + +[options] +packages = find: +package_dir = = src +include_package_data = true +python_requires = >= 3.7 + +[options.packages.find] +where = src + +[options.entry_points] +console_scripts = + blimp = blimp.cli:cli + +[tool:pytest] +testpaths = tests +filterwarnings = + error diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..36e690b --- /dev/null +++ b/setup.py @@ -0,0 +1,13 @@ +# -*- coding: utf-8 -*- +from setuptools import setup + +setup( + name="Blimp", + install_requires=[ + "appdirs>=1.4", + "requests>=2.25", + "ruamel.yaml>=0.17", + "ruamel.yaml.clib>=0.2", + "urllib3>=1.26" + ] +) diff --git a/src/blimp/__init__.py b/src/blimp/__init__.py new file mode 100644 index 0000000..19c2b68 --- /dev/null +++ b/src/blimp/__init__.py @@ -0,0 +1,5 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +# version +__version__ = "0.1" diff --git a/src/blimp/bl_process.py b/src/blimp/bl_process.py new file mode 100644 index 0000000..06c9c0f --- /dev/null +++ b/src/blimp/bl_process.py @@ -0,0 +1,56 @@ +# -*- coding: utf-8 -*- +import sys + +from ruamel.yaml import YAML, scalarstring + +from .misc import local_file_present + + +class ProcessBlocklist: + def __init__(self): + pass + + @classmethod + def process(self, blacklist, outfile, dryrun: bool): + """ + function to build and compare the local yaml file to the remote file + if the remote file is different, the local file gets overwritten + """ + # cheeky none catch + try: + # load local blacklist outfile + if local_file_present(outfile): + with open(outfile, "r", encoding="utf-8") as local_file: + local_blacklist = local_file.read() + + except TypeError: + # no local copy use empty one instead + local_blacklist = YAML(typ="safe") + + # blacklist frame + remote_file = {"acl": {"spamblacklist": {"server": []}}} + + # build the blacklist with the given frame to compare to local blacklist + for entry in blacklist.split(): + entry = scalarstring.DoubleQuotedScalarString(entry) + remote_file["acl"]["spamblacklist"]["server"].append(entry) + + yml = YAML() + yml.indent(offset=2) + yml.default_flow_style = False + + # if dry-run true print expected content + if dryrun: + yml.dump(remote_file, sys.stdout) + return + + if local_blacklist == remote_file: + return + + if outfile is None: + print("no outfile assigned", file=sys.stderr) + sys.exit(2) + + # proceed to update the defined outfile + with open(outfile, "w", encoding="utf-8") as new_local_file: + yml.dump(remote_file, new_local_file) diff --git a/src/blimp/cli.py b/src/blimp/cli.py new file mode 100644 index 0000000..12bd1dc --- /dev/null +++ b/src/blimp/cli.py @@ -0,0 +1,16 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import argparse + +from .main import Blimp + + +def cli(): + parser = argparse.ArgumentParser() + parser.add_argument("-out", "--outfile", help="set path to output file", action="store", default=None) + parser.add_argument("-dr", "--dry-run", help="perform a dry run", action="store_true", default=False) + args = parser.parse_args() + + # run + Blimp(args).main() diff --git a/src/blimp/main.py b/src/blimp/main.py new file mode 100644 index 0000000..93fdc29 --- /dev/null +++ b/src/blimp/main.py @@ -0,0 +1,96 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +from pathlib import Path + +import requests +from appdirs import user_cache_dir + +from .bl_process import ProcessBlocklist +from .misc import local_file_present + + +class Blimp: + def __init__(self, args): + self.outfile = args.outfile + self.dryrun = args.dry_run + self.path = Path(user_cache_dir("blimp")) + self.url = "https://raw.githubusercontent.com/JabberSPAM/blacklist/master/blacklist.txt" + self.blacklist = "" + self.apply_changes = False + + self.etag_path = self.path.joinpath(".etag") + self.blacklist_path = self.path.joinpath("blacklist.txt") + + def cache_dir_check(self): + if not self.path.is_dir(): + Path(self.path).mkdir(parents=True, exist_ok=True) + + def download_required(self, etag) -> bool: + """ + method to determine if a new download should be initiated + :param etag: requests etag object + :return: true if download is required + """ + # always trigger download if any local cache file is missing + if not local_file_present(self.blacklist_path): + return True + + if not local_file_present(self.etag_path): + return True + + with open(self.etag_path, "r") as local_file: + local_etag = local_file.read() + + # etag file is present but outdated + if local_etag != etag: + return True + + return False + + def start_request(self): + """ + determine if the download is required + """ + with requests.Session() as s: + # head request to check etag + head = s.head(self.url) + etag = head.headers["etag"] + + if head.status_code != requests.codes.ok: + return + + if not self.download_required(etag): + with open(self.blacklist_path, "r", encoding="utf-8") as local_file: + self.blacklist = local_file.read() + + else: + r = s.get(self.url) + r.encoding = "utf-8" + local_etag = head.headers["etag"] + self.blacklist = r.content.decode() + + with open(self.blacklist_path, "w", encoding="utf-8") as local_file: + local_file.write(self.blacklist) + + with open(self.etag_path, "w", encoding="utf-8") as local_file: + local_file.write(local_etag) + + def main(self): + # check the cache dir first + self.cache_dir_check() + + # only output the selected outfile + if self.dryrun: + print("outfile selected: %s" % self.outfile) + + # go + self.start_request() + + # blacklist processing + ProcessBlocklist.process(self.blacklist, self.outfile, self.dryrun) + + +if __name__ == "__main__": + from .cli import cli + cli() diff --git a/src/blimp/misc.py b/src/blimp/misc.py new file mode 100644 index 0000000..7242e3f --- /dev/null +++ b/src/blimp/misc.py @@ -0,0 +1,13 @@ +# -*- coding: utf-8 -*- +from pathlib import Path + + +def local_file_present(somepath) -> bool: + """ + check if a given local filepath exists + :return: true if present + """ + if not Path(somepath).is_file(): + return False + + return True