git_author_stats

Frequency `dataclass`

A frequency of time.

Source code in src/git_author_stats/_stats.py

@dataclass
class Frequency:
    """
    A frequency of time.
    """

    quantity: int
    unit: FrequencyUnit

FrequencyUnit

Bases: enum.Enum

A unit of time.

Source code in src/git_author_stats/_stats.py

class FrequencyUnit(Enum):
    """
    A unit of time.
    """

    WEEK = "week"
    MONTH = "month"
    DAY = "day"
    YEAR = "year"

Stats `dataclass`

This object represents metrics obtained from the output of git log --numstat. Each record is unique when grouped by url + commit + author_name + file. The fields since and before are provided as a convenience for easy aggregation of stats, but do not provide any additional information about the commit or file.

Properties:

url (str): The URL of the repository
since (date|None): The start date for a pre-defined time period by which these metrics will be analyzed
before (date|None): The end date (non-inclusive) for a pre-defined time period by which these metrics will be analyzed
author_date (datetime|None): The date and time of the author's commit
author_name (str): The name of the author
commit (str): The abbreviated commit hash
file (str): The file path
insertions (int): The number of lines inserted in this commit
deletions (int): The number of lines deleted in this commit

Source code in src/git_author_stats/_stats.py

@dataclass
class Stats:
    """
    This object represents metrics obtained from the output of
    `git log --numstat`. Each record is unique when grouped by
    url + commit + author_name + file. The fields `since` and `before`
    are provided as a convenience for easy aggregation of stats, but do not
    provide any additional information about the commit or file.

    Properties:

    - url (str): The URL of the repository
    - since (date|None): The start date for a pre-defined time period by which
      these metrics will be analyzed
    - before (date|None): The end date (non-inclusive) for a pre-defined time
      period by which these metrics will be analyzed
    - author_date (datetime|None): The date and time of the author's commit
    - author_name (str): The name of the author
    - commit (str): The abbreviated commit hash
    - file (str): The file path
    - insertions (int): The number of lines inserted in this commit
    - deletions (int): The number of lines deleted in this commit
    """

    url: str = ""
    since: date | None = None
    before: date | None = None
    author_date: datetime | None = None
    author_name: str = ""
    commit: str = ""
    file: str = ""
    insertions: int = 0
    deletions: int = 0

    def __init__(
        self,
        url: str = "",
        since: date | str | None = None,
        before: date | str | None = None,
        author_date: datetime | str | None = None,
        author_name: str = "",
        commit: str = "",
        file: str = "",
        insertions: int | str = 0,
        deletions: int | str = 0,
    ) -> None:
        if isinstance(since, str):
            since = get_iso_date(since)
        if isinstance(before, str):
            before = get_iso_date(before)
        if isinstance(author_date, str):
            author_date = get_iso_datetime(author_date)
        if isinstance(insertions, str):
            insertions = int(insertions)
        if isinstance(deletions, str):
            deletions = int(deletions)
        self.url: str = url
        self.since: date = since
        self.before: date = before
        self.author_date: datetime = author_date
        self.author_name: str = author_name
        self.commit: str = commit
        self.file: str = file
        self.insertions: int = insertions
        self.deletions: int = deletions

iter_stats

iter_stats(
    urls: str | collections.abc.Iterable[str],
    user: str = "",
    password: str = "",
    since: datetime.date | None = None,
    after: datetime.date | None = None,
    before: datetime.date | None = None,
    until: datetime.date | None = None,
    frequency: (
        str | git_author_stats._stats.Frequency | None
    ) = None,
    *,
    no_mailmap: bool = False
) -> collections.abc.Iterable[
    git_author_stats._stats.Stats
]

Yield stats for all specified repositories, by author, for the specified time period and frequency (if provided).

Parameters:

urls (str|[str]): One or more git URLs, as you would pass to git clone, or the URL of a Github organization
user (str) = "": A username with which to authenticate. Note: If neither user name nor password are provided, the default system configuration will be used.
password (str) = "": A password/token with which to authenticate.
since (date|None) = None: If provided, only yield stats after this date
before (date|None) = None: If provided, only yield stats before this date
frequency (str|Frequency|None) = None: If provided, yield stats broken down by the specified frequency. For example, if frequency is "1 week", stats will be yielded for each week in the specified time, starting with since and ending with before (if provided).
no_mailmap (bool) = False: If True, do not use the mailmap file

Source code in src/git_author_stats/_stats.py

def iter_stats(  # noqa: C901
    urls: str | Iterable[str],
    user: str = "",
    password: str = "",
    since: date | None = None,
    after: date | None = None,
    before: date | None = None,
    until: date | None = None,
    frequency: str | Frequency | None = None,
    *,
    no_mailmap: bool = False,
) -> Iterable[Stats]:
    """
    Yield stats for all specified repositories, by author, for the specified
    time period and frequency (if provided).

    Parameters:

    - urls (str|[str]): One or more git URLs, as you would pass to `git clone`,
      or the URL of a Github organization
    - user (str) = "": A username with which to authenticate.
      Note: If neither user name nor password are provided, the default system
      configuration will be used.
    - password (str) = "": A password/token with which to authenticate.
    - since (date|None) = None: If provided, only yield stats after this date
    - before (date|None) = None: If provided, only yield stats before this date
    - frequency (str|Frequency|None) = None: If provided, yield stats
      broken down by the specified frequency. For example, if `frequency` is
      "1 week", stats will be yielded for each week in the specified time,
      starting with `since` and ending with `before` (if provided).
    - no_mailmap (bool) = False: If `True`, do not use the mailmap file
    """
    if isinstance(frequency, str):
        frequency = parse_frequency_string(frequency)
    urls_paths: Iterable[tuple[str, str]] = iter_clone(
        urls, user, password, since=since
    )
    url: str
    path: str
    if since is None:
        urls_paths = tuple(urls_paths)
        for path in map(itemgetter(1), urls_paths):
            if since is None:
                since = get_first_author_date(path)
            else:
                since = min(get_first_author_date(path), since)
    if since is None:
        raise ValueError((since, after, before, until))
    if before is None:
        before = datetime.now(tz=timezone.utc).date() + timedelta(days=1)
    since, before = get_date_range(since, after, before, until)
    date_range_map: dict[date, tuple[date, date]] = {}
    if frequency is not None:
        date_range_map = get_date_range_map(
            frequency, since=since, before=before
        )
    # Yield stats for each author, for each repository, for each time period
    for url, path in urls_paths:
        stats: Stats
        for stats in iter_local_repo_stats(
            path,
            since=since,
            before=before,
            no_mailmap=no_mailmap,
        ):
            stats.url = url
            if (frequency is not None) and (stats.author_date is not None):
                stats.since, stats.before = date_range_map.get(
                    stats.author_date.date(), (None, None)
                )
            yield stats

write_stats

write_stats(
    stats: collections.abc.Iterable[
        git_author_stats._stats.Stats
    ],
    file: str | pathlib.Path | typing.TextIO,
    *,
    no_header: bool = False,
    delimiter: str = "",
    markdown: bool | None = None
) -> None

Write stats for all specified repositories, by author, for the specified time period and frequency (if provided), to a CSV file.

Parameters:

stats (typing.Iterable[git_author_stats.Stats]): The stats to write
file (str|pathlib.Path|typing.TextIO): A file path or file-like object
delimiter (str) = "": The delimiter to use for CSV/TSV output. If not provided, the delimiter will be inferred based on the file extension if possible, otherwise it will default to ",".
markdown (bool|None) = None: If True, a markdown table will be written. If False, a CSV/TSV file will be written. If None, the output format will be inferred based on the file's extension.
no_header (bool) = False: Do not include a header in the output

Source code in src/git_author_stats/_stats.py

def write_stats(
    stats: Iterable[Stats],
    file: str | Path | TextIO,
    *,
    no_header: bool = False,
    delimiter: str = "",
    markdown: bool | None = None,
) -> None:
    """
    Write stats for all specified repositories, by author, for the specified
    time period and frequency (if provided), to a CSV file.

    Parameters:

    - stats (typing.Iterable[git_author_stats.Stats]): The stats to write
    - file (str|pathlib.Path|typing.TextIO): A file path or file-like object
    - delimiter (str) = "": The delimiter to use for CSV/TSV output.
      If not provided, the delimiter will be inferred based on the file
      extension if possible, otherwise it will default to ",".
    - markdown (bool|None) = None: If `True`, a markdown table
      will be written. If `False`, a CSV/TSV file will be written.
      If `None`, the output format will be inferred based on the file's
      extension.
    - no_header (bool) = False: Do not include a header in the output
    """
    # Determine the output format
    path: Path | None = _get_file_path(file)
    if (not (markdown or delimiter)) and (path is not None):
        delimiter = _get_path_delimiter(path)
    if markdown is None:
        markdown = bool(
            (not delimiter)
            and ((path is None) or path.suffix.lower().lstrip(".") == "md")
        )
    # Open a file for writing, if necessary
    file_io: TextIO
    file_io = (
        open(file, "w")  # noqa: SIM115
        if isinstance(file, (str, Path))
        else file
    )
    # Get the header
    field_names: tuple[str, ...] = _get_stats_field_names()
    # The `rows` list will only be needed for markdown output
    rows: list[tuple[str, ...]]
    # The CSV writer will only be needed for CSV/TSV output
    csv_writer: Any
    if markdown:
        rows = []
        rows.append(field_names)
    else:
        csv_writer = csv.writer(
            file_io,
            delimiter=(delimiter.replace("\\t", "\t") if delimiter else ","),
            lineterminator="\n",
        )
        if not no_header:
            csv_writer.writerow(field_names)
    stat: Stats
    for stat in stats:
        row: tuple[str, ...] = tuple(
            map(
                get_string_value,
                map(stat.__getattribute__, field_names),
            )
        )
        if markdown:
            rows.append(row)
        else:
            csv_writer.writerow(row)
    if markdown and rows:
        write_markdown_table(file_io, rows, no_header=no_header)

git_author_stats

Frequency dataclass

FrequencyUnit

Stats dataclass

iter_stats

write_stats

Frequency `dataclass`

Stats `dataclass`