Skip to content

Python snippets

October 18, 2023
December 11, 2014

TODO: add code from ~/wip/python-snippets/

Site package

installation - How do I find the location of my Python site-packages directory? - Stack Overflow

python -m site
python -c 'import site; print(site.getsitepackages())'

python -c 'import sys; print(sys.path)'

clipping

def clip(n, smallest, largest): return max(smallest, min(n, largest))

loop files in folder

Python: List Files in a Directory

import os
for root, dirs, files in os.walk("."):
    for file in files:
        if file.endswith(".py"):
            print(os.path.join(root, file))
import pathlib

# define the path
currentDirectory = pathlib.Path('.')

# define the pattern
currentPattern = "*.txt"

for currentFile in currentDirectory.glob(currentPattern):
    print(currentFile)

Flatten list

Fastest Way to Flatten a List in Python - Chris Conlan
Flattening Lists in Python: Reducing Dimensionality Without Prior Knowledge of Data - αlphαrithms

from typing import Iterable

def flatten(any_list: Iterable[Any]) -> Iterable[Any]:
    # Iterate over every possible element
    for element in any_list:
        # Check if the object is iterable & not a string or bytes object
        if isinstance(element, Iterable) and not isinstance(element, (str, bytes)):
            # recurse on iterable elements
            yield from flatten(element)
        else:
            # keep single elements
            yield element

CSV

14.1. csv — CSV File Reading and Writing — Python documentation

Reading and Writing CSV Files in Python – Real Python
Reading and Writing CSV Files in Python
The fastest way to read a CSV in Pandas read as arrow and parquet

import csv

with open(filepath, "r", ,sep='\t', encoding="utf-8") as fp:
    reader = csv.DictReader(fp)
    for row in reader:
        print(row)
# extra noise needed for this script to work in Windows and
# create Excel readable CSV
import csv

with open(filepath, "w", newline="", encoding="utf-8") as fp:
    fp.write("\ufeff")  #  BOM
    writer = csv.writer(fp)
    writer.writerow(HEADER)
    writer.writerow(ROW)

groupby

# key by id, values are item
# for dict items use `itemgetter("id")`
from itertools import groupby
from operator import itemgetter
labelsById = {
    key: next(groupiter)
    for key, groupiter in groupby(
        labels, key=itemgetter("id")
    )
}

# group by id, values are lists of items
# for class items use `getattr(object, "id")`")`
from itertools import groupby
labelsById = {
    key: list(groupiter)
    for key, groupiter in groupby(
        labels, key=lambda l: getattr(l, "id")
    )
}

sort

# this creates a new list
# for dict items use `itemgetter("id")`
from operator import itemgetter
sorted(dicts, key=itemgetter("id"))

# this modify list in place
# for class items use `getattr(object, "id")`")`
sort(classes, key=lambda l: getattr(l, "id"))

sort list of dict

people = [
    { 'name': 'John', "age": 64 },
    { 'name': 'Janet', "age": 34 },
    { 'name': 'Ed', "age": 24 },
    { 'name': 'Sara', "age": 64 },
    { 'name': 'John', "age": 32 },
    { 'name': 'Jane', "age": 34 },
    { 'name': 'John', "age": 99 },
]

import operator
people.sort(key=operator.itemgetter('age'))
people.sort(key=operator.itemgetter('name'))

[
    {'name': 'Ed',   'age': 24},
    {'name': 'Jane', 'age': 34},
    {'name': 'Janet','age': 34},
    {'name': 'John', 'age': 32},
    {'name': 'John', 'age': 64},
    {'name': 'John', 'age': 99},
    {'name': 'Sara', 'age': 64}
]

sort dict

people = {3: "Jim", 2: "Jack", 4: "Jane", 1: "Jill"}
# use `.items()` and work on (key, value)
dict(sorted(people.items(), key=lambda (key, value): value))
# use dict comprehension, swapping key and value
{
    value: key
    for key, value in sorted(people.items(), key=lambda (key, value): value)
}

partition list

import itertools
from typing import Any, Iterator

def partition(l: list[Any], size: int) -> Iterator[tuple[Any]]:
    """
    partition a `list` into chunks of `size`
    return an `Iterator` of the chunks (in `tuples`)
    """
    for i in range(0, len(l), size):
        yield tuple(itertools.islice(l, i, i + size))

l = [1, 2, 3, 4, 5]
list(partition(l, 3)) # [(1, 2, 3), (4, 5)]
list(partition(l, 2)) # [(1, 2), (3, 4), (5)]
list(partition(l, 1)) # [(1,), (2,), (3,), (4,), (5,)]

bail

import sys

def bail(msg: str):
    print(msg, file=sys.stderr)
    exit(1)

JSON

import json
import os

def json_load(infile: os.PathLike, **kwargs) -> dict:
    """load JSON file specified by `infile` as `dict`"""
    with open(infile, encoding="utf-8") as fp:
        return json.load(fp, **kwargs)

def json_dump(data: dict, outfile: os.PathLike, minify: bool = False, **kwargs):
    """save `data` as JSON file to path specified by `outfile`"""
    with open(outfile, "w", encoding="utf-8") as fp:
        if minify:
            json.dump(
                data, fp, separators=(",", ":"), ensure_ascii=False, skipkeys=True, **kwargs
            )
        else:
            json.dump(data, fp, indent=2, ensure_ascii=False, skipkeys=True, **kwargs)

Markdown

#!/usr/bin/env python3

import argparse
import sys
from os import path
from pprint import pprint

import mistune


def bail(msg: str):
    print(msg, file=sys.stderr)
    exit(1)


def md_load(path: str):
    with open(path) as f:
        markdown = mistune.create_markdown(renderer=None)
        return markdown(f.read())


parser = argparse.ArgumentParser(
    description="Parse Markdown file and dump AST",
    formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
parser.add_argument("input", metavar="MD", help="Input file")
parser.add_argument(
    "-v",
    "--verbose",
    help="verbose verbose verbose verbose!!!",
    action="count",
    default=0,
)

args = parser.parse_args()
if args.verbose:
    print(args)

if not path.isfile(args.input):
    bail(f"Input [{args.input}] is not a file")

doc = md_load(args.input)

for elem in doc:
    pprint(elem)

Lodash

pydash — pydash documentation

# _.pick()
newdict = {k: v for k, v in olddict.items() if k.startswith("foo")}
newdict = {k: v for k, v in olddict.items() if k in ["a", "b", "c"]}

RLE

Run length encode with itertools.groupby()

from itertools import groupby
[(c,len(list(cs))) for c,cs in groupby(string)]

Natural sort

sorting - Does Python have a built in function for string natural sort? - Stack Overflow

import re

def natural_sort(l):
    def convert(text): return int(text) if text.isdigit() else text.lower()

    def alphanum_key(key): return [convert(c)
                                   for c in re.split('([0-9]+)', key)]
    return sorted(l, key=alphanum_key)

FourCC

import struct

def encode_fourcc(fourcc):
    return struct.unpack("!I", fourcc.encode('ascii'))[0]

def decode_fourcc(v):
    return struct.pack("!I", v).decode("ascii")

find duplicates in list

l = [ 1, 2, 3, 4, 3, 1]

if len(set(l)) != len(l):
    s = set()
    print(
        "Duplicates",
        set(x for x in l if x in s or s.add(x)))

http server

python2 -m SimpleHTTPServer 8000
python3 -m http.server 8000

test network

import socket
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.connect(('mariadb', 3306))
s.recv(1024)
import socket
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.connect(('10.6.64.48', 80))
s.send('GET / HTTP/1.1\nHost: 10.6.64.48\n\n')
s.recv(1024)
import socket

ais = socket.getaddrinfo("www.yahoo.com",0,0,0,0)
ip_list = list({addr[-1][0] for addr in ais})
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# Technical support: https://www.jianshu.com/u/69f40328d4f0
# Technical support https://china-testing.github.io/
# https://github.com/china-testing/python-api-tesing/blob/master/practices/ping.py
# Discuss nail free group 21745728 qq q group 144081101 567351477
# CreateDate: 2018-11-22

import os
import argparse
import socket
import struct
import select
import time


ICMP_ECHO_REQUEST = 8  # Platform specific
DEFAULT_TIMEOUT = 2
DEFAULT_COUNT = 4


class Pinger(object):
    """ Pings to a host -- the Pythonic way"""

    def __init__(self, target_host, count=DEFAULT_COUNT, timeout=DEFAULT_TIMEOUT):
        self.target_host = target_host
        self.count = count
        self.timeout = timeout

    def do_checksum(self, source_string):
        """  Verify the packet integritity """
        sum = 0
        max_count = (len(source_string) / 2) * 2
        count = 0
        while count < max_count:

            val = source_string[count + 1] * 256 + source_string[count]
            sum = sum + val
            sum = sum & 0xFFFFFFFF
            count = count + 2

        if max_count < len(source_string):
            sum = sum + ord(source_string[len(source_string) - 1])
            sum = sum & 0xFFFFFFFF

        sum = (sum >> 16) + (sum & 0xFFFF)
        sum = sum + (sum >> 16)
        answer = ~sum
        answer = answer & 0xFFFF
        answer = answer >> 8 | (answer << 8 & 0xFF00)
        return answer

    def receive_pong(self, sock, ID, timeout):
        """
        Receive ping from the socket.
        """
        time_remaining = timeout
        while True:
            start_time = time.time()
            readable = select.select([sock], [], [], time_remaining)
            time_spent = time.time() - start_time
            if readable[0] == []:  # Timeout
                return

            time_received = time.time()
            recv_packet, addr = sock.recvfrom(1024)
            icmp_header = recv_packet[20:28]
            type, code, checksum, packet_ID, sequence = struct.unpack(
                "bbHHh", icmp_header
            )
            if packet_ID == ID:
                bytes_In_double = struct.calcsize("d")
                time_sent = struct.unpack("d", recv_packet[28 : 28 + bytes_In_double])[
                    0
                ]
                return time_received - time_sent

            time_remaining = time_remaining - time_spent
            if time_remaining <= 0:
                return

    def send_ping(self, sock, ID):
        """
        Send ping to the target host
        """
        target_addr = socket.gethostbyname(self.target_host)

        my_checksum = 0

        # Create a dummy heder with a 0 checksum.
        header = struct.pack("bbHHh", ICMP_ECHO_REQUEST, 0, my_checksum, ID, 1)
        bytes_In_double = struct.calcsize("d")
        data = (192 - bytes_In_double) * "Q"
        data = struct.pack("d", time.time()) + bytes(data.encode("utf-8"))

        # Get the checksum on the data and the dummy header.
        my_checksum = self.do_checksum(header + data)
        header = struct.pack(
            "bbHHh", ICMP_ECHO_REQUEST, 0, socket.htons(my_checksum), ID, 1
        )
        packet = header + data
        sock.sendto(packet, (target_addr, 1))

    def ping_once(self):
        """
        Returns the delay (in seconds) or none on timeout.
        """
        icmp = socket.getprotobyname("icmp")
        sock = socket.socket(socket.AF_INET, socket.SOCK_RAW, icmp)

        my_ID = os.getpid() & 0xFFFF

        self.send_ping(sock, my_ID)
        delay = self.receive_pong(sock, my_ID, self.timeout)
        sock.close()
        return delay

    def ping(self):
        """
        Run the ping process
        """
        for i in range(self.count):
            print("Ping to %s..." % self.target_host)
            try:
                delay = self.ping_once()
            except socket.gaierror as e:
                print("Ping failed. (socket error: '%s')" % e[1])
                break

            if delay == None:
                print("Ping failed. (timeout within %ssec.)" % self.timeout)
            else:
                delay = delay * 1000
                print("Get pong in %0.4fms" % delay)


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Python ping")
    parser.add_argument("host", action="store", help=u"host name")
    given_args = parser.parse_args()
    target_host = given_args.host
    pinger = Pinger(target_host=target_host)
    pinger.ping()

Random number

Generating Random Data in Python (Guide) – Real Python
Fastest way to generate a random-like unique string with random length in Python 3 - Stack Overflow

9.6. random — Generate pseudo-random numbers — Python documentation
15.3. secrets — Generate secure random numbers for managing secrets — Python documentation
16.1. os.urandom() — Miscellaneous operating system interfaces — Python documentation
22.20. uuid — UUID objects according to RFC 4122 — Python documentation

TTS

from pygame import mixer
from gtts import gTTS

def main():
   tts = gTTS('Like This Article')
   tts.save('output.mp3')
   mixer.init()
   mixer.music.load('output.mp3')
   mixer.music.play()

if __name__ == "__main__":
   main()