import argparse import json import os from pathlib import Path def get_package_files(repo_dir): # Get a list of all the Packages files package_files = [] for path in Path(repo_dir).rglob('Packages'): package_files.append(path.absolute()) return package_files def get_distribution(package_file): # Find the Release file corresponding to the Packages file, and get the # distribution name from it. with open(os.path.dirname(package_file) + '/Release', "r") as release_data: for release_line in release_data: if release_line.startswith("Archive: "): return release_line[9:] return None def get_licence(deb_file): # TODO: Figure out a sane way to get the license license = '' return license def get_packages(package_file): # Extract all the packages from a package file packages = [] with open(package_file, "r") as package_data: package = "" first = True for line in package_data: if line.startswith('Package: ') and not first: # Get the distribution distribution = get_distribution(package_file) if distribution is not None: package = package + '\nDistribution: ' + distribution packages.append(package) package = "" else: first = False package = package + line return packages def get_package(package_data): # Decode a Package entry into a dictionary package = {} in_description = False for line in package_data.splitlines(): # Package if line.startswith("Package: "): package['Package'] = line[9:] # Version if line.startswith("Version: "): # The build is normally prefixed with a -, but sometimes # just .pgdg package['Version'] = line[9:].split('-')[0] package['Build'] = ''.join(line[9:].split('-')[1:]) if '.pgdg' in package['Version']: version = line[9:].split('.pgdg') package['Version'] = version[0] package['Build'] = 'pgdg' + version[1] # Architecture if line.startswith("Architecture: "): package['Architecture'] = line[14:] # Filename if line.startswith("Filename: "): package['Filename'] = line[10:] # Licence licence = get_licence(line[10:]) if licence is not None: package['Licence'] = licence # Description. This can be multi-line. Treat the first line # as normal, then scan the rest until we hit the end if in_description: if line.strip() == ".": package['Description'] = package['Description'] + "\n" # The description ends when we encounter a line that doesn't start # with a space. elif not line.startswith(" "): in_description = False else: package['Description'] = \ package['Description'] + '\n' + line.strip() if line.startswith("Description: "): package['Description'] = line[13:] in_description = True # Distribution/Repo if line.startswith("Distribution: "): package['Distribution'] = line[14:].split('-')[0] package['Repo'] = line[14:] # URL if line.startswith("Homepage: "): package['Url'] = line[10:] # Packager if line.startswith("Maintainer: "): package['Maintainer'] = line[12:] return package def main(): # Command line arguments parser = argparse.ArgumentParser(description='Scan a set of APT repos and ' 'generate a JSON catalog of ' 'the contents.') parser.add_argument("repo", help="the repo directory, or directory " "containing multiple repos") args = parser.parse_args() package_info = [] package_files = get_package_files(args.repo) for package_file in package_files: packages = get_packages(package_file) for package in packages: package_info.append(get_package(package)) with open('apt.json', 'w') as output_file: json.dump(package_info, output_file, indent=2, sort_keys=True) if __name__ == "__main__": main()