Skip to content

git_author_stats

Frequency dataclass

A frequency of time.

Source code in src/git_author_stats/_stats.py
350
351
352
353
354
355
356
357
@dataclass
class Frequency:
    """
    A frequency of time.
    """

    quantity: int
    unit: FrequencyUnit

FrequencyUnit

Bases: enum.Enum

A unit of time.

Source code in src/git_author_stats/_stats.py
339
340
341
342
343
344
345
346
347
class FrequencyUnit(Enum):
    """
    A unit of time.
    """

    WEEK = "week"
    MONTH = "month"
    DAY = "day"
    YEAR = "year"

Stats dataclass

This object represents metrics obtained from the output of git log --numstat. Each record is unique when grouped by url + commit + author_name + file. The fields since and before are provided as a convenience for easy aggregation of stats, but do not provide any additional information about the commit or file.

Properties:

  • url (str): The URL of the repository
  • since (date|None): The start date for a pre-defined time period by which these metrics will be analyzed
  • before (date|None): The end date (non-inclusive) for a pre-defined time period by which these metrics will be analyzed
  • author_date (datetime|None): The date and time of the author's commit
  • author_name (str): The name of the author
  • commit (str): The abbreviated commit hash
  • file (str): The file path
  • insertions (int): The number of lines inserted in this commit
  • deletions (int): The number of lines deleted in this commit
Source code in src/git_author_stats/_stats.py
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
@dataclass
class Stats:
    """
    This object represents metrics obtained from the output of
    `git log --numstat`. Each record is unique when grouped by
    url + commit + author_name + file. The fields `since` and `before`
    are provided as a convenience for easy aggregation of stats, but do not
    provide any additional information about the commit or file.

    Properties:

    - url (str): The URL of the repository
    - since (date|None): The start date for a pre-defined time period by which
      these metrics will be analyzed
    - before (date|None): The end date (non-inclusive) for a pre-defined time
      period by which these metrics will be analyzed
    - author_date (datetime|None): The date and time of the author's commit
    - author_name (str): The name of the author
    - commit (str): The abbreviated commit hash
    - file (str): The file path
    - insertions (int): The number of lines inserted in this commit
    - deletions (int): The number of lines deleted in this commit
    """

    url: str = ""
    since: date | None = None
    before: date | None = None
    author_date: datetime | None = None
    author_name: str = ""
    commit: str = ""
    file: str = ""
    insertions: int = 0
    deletions: int = 0

    def __init__(
        self,
        url: str = "",
        since: date | str | None = None,
        before: date | str | None = None,
        author_date: datetime | str | None = None,
        author_name: str = "",
        commit: str = "",
        file: str = "",
        insertions: int | str = 0,
        deletions: int | str = 0,
    ) -> None:
        if isinstance(since, str):
            since = get_iso_date(since)
        if isinstance(before, str):
            before = get_iso_date(before)
        if isinstance(author_date, str):
            author_date = get_iso_datetime(author_date)
        if isinstance(insertions, str):
            insertions = int(insertions)
        if isinstance(deletions, str):
            deletions = int(deletions)
        self.url: str = url
        self.since: date = since
        self.before: date = before
        self.author_date: datetime = author_date
        self.author_name: str = author_name
        self.commit: str = commit
        self.file: str = file
        self.insertions: int = insertions
        self.deletions: int = deletions

iter_stats

iter_stats(
    urls: str | collections.abc.Iterable[str],
    user: str = "",
    password: str = "",
    since: datetime.date | None = None,
    after: datetime.date | None = None,
    before: datetime.date | None = None,
    until: datetime.date | None = None,
    frequency: (
        str | git_author_stats._stats.Frequency | None
    ) = None,
    *,
    no_mailmap: bool = False
) -> collections.abc.Iterable[
    git_author_stats._stats.Stats
]

Yield stats for all specified repositories, by author, for the specified time period and frequency (if provided).

Parameters:

  • urls (str|[str]): One or more git URLs, as you would pass to git clone, or the URL of a Github organization
  • user (str) = "": A username with which to authenticate. Note: If neither user name nor password are provided, the default system configuration will be used.
  • password (str) = "": A password/token with which to authenticate.
  • since (date|None) = None: If provided, only yield stats after this date
  • before (date|None) = None: If provided, only yield stats before this date
  • frequency (str|Frequency|None) = None: If provided, yield stats broken down by the specified frequency. For example, if frequency is "1 week", stats will be yielded for each week in the specified time, starting with since and ending with before (if provided).
  • no_mailmap (bool) = False: If True, do not use the mailmap file
Source code in src/git_author_stats/_stats.py
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
def iter_stats(  # noqa: C901
    urls: str | Iterable[str],
    user: str = "",
    password: str = "",
    since: date | None = None,
    after: date | None = None,
    before: date | None = None,
    until: date | None = None,
    frequency: str | Frequency | None = None,
    *,
    no_mailmap: bool = False,
) -> Iterable[Stats]:
    """
    Yield stats for all specified repositories, by author, for the specified
    time period and frequency (if provided).

    Parameters:

    - urls (str|[str]): One or more git URLs, as you would pass to `git clone`,
      or the URL of a Github organization
    - user (str) = "": A username with which to authenticate.
      Note: If neither user name nor password are provided, the default system
      configuration will be used.
    - password (str) = "": A password/token with which to authenticate.
    - since (date|None) = None: If provided, only yield stats after this date
    - before (date|None) = None: If provided, only yield stats before this date
    - frequency (str|Frequency|None) = None: If provided, yield stats
      broken down by the specified frequency. For example, if `frequency` is
      "1 week", stats will be yielded for each week in the specified time,
      starting with `since` and ending with `before` (if provided).
    - no_mailmap (bool) = False: If `True`, do not use the mailmap file
    """
    if isinstance(frequency, str):
        frequency = parse_frequency_string(frequency)
    urls_paths: Iterable[tuple[str, str]] = iter_clone(
        urls, user, password, since=since
    )
    url: str
    path: str
    if since is None:
        urls_paths = tuple(urls_paths)
        for path in map(itemgetter(1), urls_paths):
            if since is None:
                since = get_first_author_date(path)
            else:
                since = min(get_first_author_date(path), since)
    if since is None:
        raise ValueError((since, after, before, until))
    if before is None:
        before = datetime.now(tz=timezone.utc).date() + timedelta(days=1)
    since, before = get_date_range(since, after, before, until)
    date_range_map: dict[date, tuple[date, date]] = {}
    if frequency is not None:
        date_range_map = get_date_range_map(
            frequency, since=since, before=before
        )
    # Yield stats for each author, for each repository, for each time period
    for url, path in urls_paths:
        stats: Stats
        for stats in iter_local_repo_stats(
            path,
            since=since,
            before=before,
            no_mailmap=no_mailmap,
        ):
            stats.url = url
            if (frequency is not None) and (stats.author_date is not None):
                stats.since, stats.before = date_range_map.get(
                    stats.author_date.date(), (None, None)
                )
            yield stats

write_stats

write_stats(
    stats: collections.abc.Iterable[
        git_author_stats._stats.Stats
    ],
    file: str | pathlib.Path | typing.TextIO,
    *,
    no_header: bool = False,
    delimiter: str = "",
    markdown: bool | None = None
) -> None

Write stats for all specified repositories, by author, for the specified time period and frequency (if provided), to a CSV file.

Parameters:

  • stats (typing.Iterable[git_author_stats.Stats]): The stats to write
  • file (str|pathlib.Path|typing.TextIO): A file path or file-like object
  • delimiter (str) = "": The delimiter to use for CSV/TSV output. If not provided, the delimiter will be inferred based on the file extension if possible, otherwise it will default to ",".
  • markdown (bool|None) = None: If True, a markdown table will be written. If False, a CSV/TSV file will be written. If None, the output format will be inferred based on the file's extension.
  • no_header (bool) = False: Do not include a header in the output
Source code in src/git_author_stats/_stats.py
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
def write_stats(
    stats: Iterable[Stats],
    file: str | Path | TextIO,
    *,
    no_header: bool = False,
    delimiter: str = "",
    markdown: bool | None = None,
) -> None:
    """
    Write stats for all specified repositories, by author, for the specified
    time period and frequency (if provided), to a CSV file.

    Parameters:

    - stats (typing.Iterable[git_author_stats.Stats]): The stats to write
    - file (str|pathlib.Path|typing.TextIO): A file path or file-like object
    - delimiter (str) = "": The delimiter to use for CSV/TSV output.
      If not provided, the delimiter will be inferred based on the file
      extension if possible, otherwise it will default to ",".
    - markdown (bool|None) = None: If `True`, a markdown table
      will be written. If `False`, a CSV/TSV file will be written.
      If `None`, the output format will be inferred based on the file's
      extension.
    - no_header (bool) = False: Do not include a header in the output
    """
    # Determine the output format
    path: Path | None = _get_file_path(file)
    if (not (markdown or delimiter)) and (path is not None):
        delimiter = _get_path_delimiter(path)
    if markdown is None:
        markdown = bool(
            (not delimiter)
            and ((path is None) or path.suffix.lower().lstrip(".") == "md")
        )
    # Open a file for writing, if necessary
    file_io: TextIO
    file_io = (
        open(file, "w")  # noqa: SIM115
        if isinstance(file, (str, Path))
        else file
    )
    # Get the header
    field_names: tuple[str, ...] = _get_stats_field_names()
    # The `rows` list will only be needed for markdown output
    rows: list[tuple[str, ...]]
    # The CSV writer will only be needed for CSV/TSV output
    csv_writer: Any
    if markdown:
        rows = []
        rows.append(field_names)
    else:
        csv_writer = csv.writer(
            file_io,
            delimiter=(delimiter.replace("\\t", "\t") if delimiter else ","),
            lineterminator="\n",
        )
        if not no_header:
            csv_writer.writerow(field_names)
    stat: Stats
    for stat in stats:
        row: tuple[str, ...] = tuple(
            map(
                get_string_value,
                map(stat.__getattribute__, field_names),
            )
        )
        if markdown:
            rows.append(row)
        else:
            csv_writer.writerow(row)
    if markdown and rows:
        write_markdown_table(file_io, rows, no_header=no_header)