#!/usr/bin/env python2
"""cacheclean - a simple python script to clean up the /data/storage/disk0/pacman/pkg directory.
More versatile than 'pacman -Sc' in that you can select how many old versions
to keep.
Usage: cacheclean {-p} {-v} <# of copies to keep>
    # of copies to keep - (required) how many generations of each package to keep
    -p - (optional) preview what would be deleted; forces verbose (-v) mode.
    -v - (optional) show deleted packages.
    
Adapted from https://github.com/graysky2/cacheclean for LinHES pacman cache directories
and python 2.6."""

# Note that the determination of package age is done by simply looking at the date-time
# modified stamp on the file. There is just enough variation in the way package version
# is done that I thought this would be simpler & just as good.
# Also note that you must be root to run this script.

import getopt
import os
import re
import sys

# helper function to get tuple of (file dtm, file name, file sz)    
def fn_stats(fn):
    s = os.stat(fn)
    return (s[8], fn, s[6])

# cleanup does the actual pkg file deletion
def cleanup(run_list):
    # strictly speaking only the first two of these globals need to be listed.
    global n_deleted, bytes_deleted, opt_verbose, opt_preview, n_to_keep
    # return if the run_list is too short
    #print run_list
    #return
    if len(run_list) <= n_to_keep: return
    # Build list of tuples (date-time file modified, file name, file size)
    dtm_list = [fn_stats(tfn) for tfn in run_list]
    # Sort the list by date-time
    dtm_list.sort()
    # Build list of the filenames to delete (all but last n_to_keep).
    # <showing_off>
    #kill_list = [tfn[1] for tfn in dtm_list[:-n_to_keep]]
    #bytes_deleted = sum(x[2] for x in dtm_list[:-n_to_keep])
    # </showing_off>
    kill_list = []
    for x in dtm_list[:-n_to_keep]:
        if os.path.isfile(x[1]):
            kill_list.append(x[1])
            bytes_deleted += x[2]
    if opt_verbose and kill_list: print (kill_list)
    n_deleted += len(kill_list)
    # and finally delete (if not in preview mode)
    if not opt_preview:
        for dfn in kill_list:
            os.unlink(dfn)

######################################################################
# mainline processing

# process command line options
try:
    opts, pargs = getopt.getopt(sys.argv[1:], 'vp')
    opt_dict = dict(opts)
    opt_preview = '-p' in opt_dict
    opt_verbose = '-v' in opt_dict
    if opt_preview: opt_verbose = True
    if len(pargs) == 1:
        n_to_keep = pargs[0]
    else:
        raise getopt.GetoptError("missing required argument.")
    try:
        n_to_keep = int(n_to_keep)
        if n_to_keep <= 0: raise ValueError
    except ValueError as e:
        raise getopt.GetoptError("# of copies to keep must be numeric > 0!")
except getopt.GetoptError as msg:
    print ("Error:",msg,"\n",__doc__)
    sys.exit(1)

# change to the pkg directory & get a sorted list of its contents
os.chdir('/data/storage/disk0/pacman/pkg')
pkg_fns = os.listdir('.')
pkg_fns.sort()

# Pattern to use to extract the package name from the tar file name:
# for pkg e.g. 'gnome-common-2.8.0-1-i686.pkg.tar.gz' group(1) is 'gnome-common'.

bpat = re.compile("""
^([^-/][^/]*?)-         # (1) package name
[^-/\s]+-               # (2) version
[^-/\s]+                # (3) release
(-i686|-x86_64|-any)?   # (4) architecture
\.pkg\.tar              # (5) extension
(?:\.(?:gz|bz2|xz|Z))?  # (6) compresssion extension
(?:\.aria2|.sig)?       # (7) other extension
(?:\.part)?$            # (8) partially-downloaded files' extension
""", re.X)

n_deleted = 0
bytes_deleted = 0
pkg_base_nm = ''
# now look for "runs" of the same package name differing only in version info.
for run_end in range(len(pkg_fns)):
    fn = pkg_fns[run_end]

    # make sure we skip directories
    if os.path.isfile(fn):
        mo = bpat.match(fn) # test for a match of the package name pattern
        if mo:
            # print ("Processing file '" + fn + "' " + str(mo.lastindex), file=sys.stdout)
            tbase = mo.group(1) # gets the 'base' package name
            # include the architecture in the name if it's present
            if mo.group(2) is not None:
                tbase += mo.group(2)
            # print ('tbase: ' + tbase + '  ' + str(mo.lastindex), file=sys.stdout)
            # is it a new base name, i.e. the start of a new run?
            if tbase != pkg_base_nm: # if so then process the prior run
                if pkg_base_nm != '':
                    cleanup(pkg_fns[run_start:run_end])
                pkg_base_nm = tbase # & setup for the new run
                run_start = run_end
        else:
            print >>sys.stderr, "File '"+fn+"' doesn't match package pattern!"
    else:
        print >>sys.stdout, "skipping directory '"+fn+"'!"

# catch the final run of the list
run_end += 1
cleanup(pkg_fns[run_start:run_end])

if opt_verbose:
    if opt_preview:
        print ("Preview mode (no files deleted):"),
    print n_deleted,"files deleted,",bytes_deleted/(2**10),"kbytes /",bytes_deleted/(2**20),"MBytes /",bytes_deleted/(2**30), "GBytes."