Skip to content

Commit

Permalink
Merge pull request #271 from GeekMasher/graphql-limit-updates
Browse files Browse the repository at this point in the history
feat(deps): Update the getDependenciesGraphQL function
  • Loading branch information
GeekMasher authored Sep 20, 2024
2 parents 2c45f25 + 1460d50 commit eefc781
Show file tree
Hide file tree
Showing 6 changed files with 140 additions and 48 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@

public/
.data/
test.py
*.spdx

Expand Down
12 changes: 9 additions & 3 deletions examples/dependencies.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,20 @@
import os
import json
from ghastoolkit.octokit.dependencygraph import DependencyGraph
from ghastoolkit.octokit.github import GitHub
from ghastoolkit import DependencyGraph, GitHub

GitHub.init("GeekMasher/ghastoolkit")
GitHub.init(repository=os.environ.get("GITHUB_REPOSITORY", "GeekMasher/ghastoolkit"))
print(f"Repository :: {GitHub.repository}")

depgraph = DependencyGraph()
dependencies = depgraph.getDependencies()

print(f"Total Dependencies :: {len(dependencies)}")

# or you can get the data from the GraphQL API as well
# This can be useful if you want to get more information about the dependencies
dependencies = depgraph.getDependenciesGraphQL()
print(f"Total Dependencies (GraphQL) :: {len(dependencies)}")

gpl = dependencies.findLicenses(["GPL-*", "AGPL-*"])
print(f"Total GPL Dependencies :: {len(gpl)}")

Expand Down
149 changes: 109 additions & 40 deletions src/ghastoolkit/octokit/dependencygraph.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,52 +127,121 @@ def getDependenciesSbom(self) -> Dependencies:

return result

def getDependenciesGraphQL(self) -> Dependencies:
"""Get Dependencies from GraphQL."""
def getDependenciesGraphQL(self, dependencies_count: int = 100) -> Dependencies:
"""Get Dependencies from GraphQL.
This functions requests each manifest file in the repository and the
dependencies associated with it. It then paginates through both the manifests
and dependencies.
This is done to avoid the timeout errors in the GraphQL API when requesting
large projects with many manifests and dependencies.
"""
deps = Dependencies()
data = self.graphql.query(
"GetDependencyInfo",
{"owner": self.repository.owner, "repo": self.repository.repo},
)
graph_manifests = (
data.get("data", {})
.get("repository", {})
.get("dependencyGraphManifests", {})
)
logger.debug(
f"Graph Manifests Total Count :: {graph_manifests.get('totalCount')}"
)

for manifest in graph_manifests.get("edges", []):
node = manifest.get("node", {})
logger.debug(f"Processing :: '{node.get('filename')}'")
manifests = True
manifests_cursor = ""
dependencies_cursor = ""

while manifests:
# Query a single manifest at a time
data = self.graphql.query(
"GetDependencyInfo",
{
"owner": self.repository.owner,
"repo": self.repository.repo,
"manifests_cursor": manifests_cursor,
"dependencies_first": dependencies_count,
"dependencies_cursor": dependencies_cursor,
},
)

graph_manifests = (
data.get("data", {})
.get("repository", {})
.get("dependencyGraphManifests", {})
)
logger.debug(f"Processing :: '{graph_manifests.get('totalCount')}'")

# Runs at least once
has_next_page = True

for dep in node.get("dependencies", {}).get("edges", []):
dep = dep.get("node", {})
license = None
repository = None
while has_next_page:
for manifest in graph_manifests.get("edges", []):
node = manifest.get("node", {})
dependencies = node.get("dependencies", {})
logger.debug(f"Processing :: '{node.get('filename')}'")

if dep.get("repository"):
if dep.get("repository", {}).get("licenseInfo"):
license = (
dep.get("repository", {}).get("licenseInfo", {}).get("name")
# Pagination
has_next_page = dependencies.get("pageInfo", {}).get(
"hasNextPage", False
)
if has_next_page:
dependencies_cursor = f'after: "{dependencies.get("pageInfo", {}).get("endCursor")}"'
else:
dependencies_cursor = ""

for dep in dependencies.get("edges", []):
dep = dep.get("node", {})
license = None
repository = None

if dep.get("repository"):
if dep.get("repository", {}).get("licenseInfo"):
license = (
dep.get("repository", {})
.get("licenseInfo", {})
.get("name")
)
if dep.get("repository", {}).get("nameWithOwner"):
repository = dep.get("repository", {}).get(
"nameWithOwner"
)

version = dep.get("requirements")
if version:
version = version.replace("= ", "")

deps.append(
Dependency(
name=dep.get("packageName"),
manager=dep.get("packageManager"),
version=version,
license=license,
repository=repository,
)
)
if dep.get("repository", {}).get("nameWithOwner"):
repository = dep.get("repository", {}).get("nameWithOwner")

version = dep.get("requirements")
if version:
version = version.replace("= ", "")

deps.append(
Dependency(
name=dep.get("packageName"),
manager=dep.get("packageManager"),
version=version,
license=license,
repository=repository,

if has_next_page:
logger.debug(
f"Re-run and fetch next data page :: {manifests_cursor} ({dependencies_cursor})"
)
)

data = self.graphql.query(
"GetDependencyInfo",
{
"owner": self.repository.owner,
"repo": self.repository.repo,
"manifests_cursor": manifests_cursor,
"dependencies_first": dependencies_count,
"dependencies_cursor": dependencies_cursor,
},
)
graph_manifests = (
data.get("data", {})
.get("repository", {})
.get("dependencyGraphManifests", {})
)

# If there are no other manifest files, then we are done
if graph_manifests.get("pageInfo", {}).get("hasNextPage"):
cursor = graph_manifests.get("pageInfo", {}).get("endCursor")
manifests_cursor = f'after: "{cursor}"' if cursor != "" else ""
logger.debug(f"Cursor :: {manifests_cursor}")
else:
manifests = False
manifests_cursor = ""
logger.debug("No more manifests to be processed")

return deps

Expand Down
11 changes: 8 additions & 3 deletions src/ghastoolkit/octokit/graphql/GetDependencyInfo.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
licenseInfo {
name
}
dependencyGraphManifests {
dependencyGraphManifests(first: 1, $manifests_cursor) {
totalCount
pageInfo {
hasNextPage
Expand All @@ -13,7 +13,12 @@
edges {
node {
filename
dependencies {
dependencies(first: $dependencies_first, $dependencies_cursor) {
totalCount
pageInfo {
hasNextPage
endCursor
}
edges {
node {
packageName
Expand All @@ -38,4 +43,4 @@
}
}
}
}
}
9 changes: 7 additions & 2 deletions src/ghastoolkit/octokit/graphql/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@
licenseInfo {
name
}
dependencyGraphManifests {
dependencyGraphManifests(first: 1, $manifests_cursor) {
totalCount
pageInfo {
hasNextPage
Expand All @@ -61,7 +61,12 @@
edges {
node {
filename
dependencies {
dependencies(first: $dependencies_first, $dependencies_cursor) {
totalCount
pageInfo {
hasNextPage
endCursor
}
edges {
node {
packageName
Expand Down
6 changes: 6 additions & 0 deletions src/ghastoolkit/octokit/octokit.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
# a GitHub App which has a higher limit
# https://docs.github.com/en/rest/overview/resources-in-the-rest-api?apiVersion=2022-11-28#rate-limiting
REST_MAX_CALLS = 80 # ~5000 per hour
GRAPHQL_MAX_CALLS = 100 # ~5000 per hour

__OCTOKIT_PATH__ = os.path.dirname(os.path.realpath(__file__))

Expand Down Expand Up @@ -342,9 +343,13 @@ def __init__(self, repository: Optional[Repository] = None) -> None:
# load in default hardcoded queries
self.queries = QUERIES

@sleep_and_retry
@limits(calls=GRAPHQL_MAX_CALLS, period=60)
def query(self, name: str, options: dict[str, Any] = {}) -> dict:
"""Run a GraphQL query.
https://docs.github.com/en/enterprise-cloud@latest/graphql/overview/about-the-graphql-api
https://docs.github.com/en/enterprise-cloud@latest/graphql/overview/rate-limits-and-node-limits-for-the-graphql-api#primary-rate-limit
"""
logger.debug(f"Loading Query by Name :: {name}")
query_content = self.queries.get(name)
Expand All @@ -371,6 +376,7 @@ def query(self, name: str, options: dict[str, Any] = {}) -> dict:
)

rjson = response.json()

if rjson.get("errors"):
for err in rjson.get("errors"):
logger.warning(f"GraphQL Query failed :: {err.get('message')}")
Expand Down

0 comments on commit eefc781

Please sign in to comment.