1
0

Improve refresh-egg performance by re-using existing commits for new branches

This commit is contained in:
Mike Schwörer 2022-10-17 19:57:11 +02:00
parent 18af582343
commit 1084bd36a6
Signed by: Mikescher
GPG Key ID: D3C7172E0A70F8CF
6 changed files with 140 additions and 47 deletions

19
.idea/sqlDataSources.xml generated Normal file
View File

@ -0,0 +1,19 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="SqlDataSourceStorage">
<option name="dataSources">
<list>
<State>
<option name="id" value="b1f51636-21bf-4ca9-a884-b0419489ab5f" />
<option name="name" value="EGG DB" />
<option name="languageId" value="SQLite" />
<option name="urls">
<array>
<option value="file://$PROJECT_DIR$/www/extern/egg/db_init.sql" />
</array>
</option>
</State>
</list>
</option>
</component>
</project>

5
.idea/sqldialects.xml generated
View File

@ -1,6 +1,11 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="SqlDialectMappings">
<file url="file://$PROJECT_DIR$" dialect="MySQL" />
<file url="file://$PROJECT_DIR$/www/extern/egg" dialect="SQLite" />
<file url="PROJECT" dialect="MySQL" />
</component>
<component name="SqlResolveMappings">
<file url="file://$PROJECT_DIR$/www/extern/egg" scope="{&quot;node&quot;:{ &quot;@negative&quot;:&quot;1&quot;, &quot;group&quot;:{ &quot;@kind&quot;:&quot;root&quot;, &quot;node&quot;:{ &quot;name&quot;:{ &quot;@qname&quot;:&quot;b1f51636-21bf-4ca9-a884-b0419489ab5f&quot; }, &quot;group&quot;:{ &quot;@kind&quot;:&quot;schema&quot;, &quot;node&quot;:{ &quot;name&quot;:{ &quot;@qname&quot;:&quot;&quot; } } } } } }}" />
</component>
</project>

View File

@ -355,7 +355,7 @@ class EGGDatabase
public function deleteDanglingCommitdata(string $name)
{
$hashes = $this->sql_query_assoc_prep("SELECT metadata.hash FROM metadata LEFT JOIN commits ON metadata.hash = commits.hash WHERE commits.hash IS NULL", []);
$hashes = $this->sql_query_assoc_prep("SELECT metadata.hash AS mdh FROM metadata LEFT JOIN commits ON metadata.hash = commits.hash WHERE commits.hash IS NULL", []);
if (count($hashes) === 0) return;
@ -363,7 +363,7 @@ class EGGDatabase
$this->beginTransaction();
foreach ($hashes as $hash) {
$this->sql_query_assoc_prep("DELETE FROM metadata WHERE hash = :hash", [ [":hash", $hash, PDO::PARAM_STR] ]);
$this->sql_query_assoc_prep("DELETE FROM metadata WHERE hash = :hash", [ [":hash", $hash['mdh'], PDO::PARAM_STR] ]);
}
$this->commitTransaction();
@ -475,9 +475,13 @@ class EGGDatabase
/**
* @return Commit[]
*/
public function getCommits(Branch $branch): array
public function getCommitsForBranch(Branch $branch): array
{
$rows = $this->sql_query_assoc("SELECT metadata.*, commits.id AS commitid FROM commits LEFT JOIN metadata WHERE commits.branch_id = :bid", [[":bid", $branch->ID, PDO::PARAM_INT]]);
$rows = $this->sql_query_assoc_prep("SELECT metadata.*, commits.id AS commitid FROM commits LEFT JOIN metadata ON metadata.hash = commits.hash WHERE commits.branch_id = :bid",
[
[":bid", $branch->ID, PDO::PARAM_INT]
]);
$r = [];
foreach ($rows as $row)
{
@ -491,7 +495,35 @@ class EGGDatabase
$c->CommitterEmail = $row['committer_email'];
$c->Message = $row['message'];
$c->Date = $row['date'];
$c->Parents = $row['parent_commits'];
$c->Parents = array_filter(explode(';', $row['parent_commits']), fn($p) => $p !== '');
$r []= $c;
}
return $r;
}
/**
* @return Commit[]
*/
public function getCommitsForRepo(Repository $repo, Branch $branchValue): array
{
$rows = $this->sql_query_assoc_prep("SELECT DISTINCT metadata.* FROM branches INNER JOIN commits ON branches.id = commits.branch_id LEFT JOIN metadata ON metadata.hash = commits.hash WHERE branches.repo_id = :rid",
[
[":rid", $repo->ID, PDO::PARAM_INT]
]);
$r = [];
foreach ($rows as $row)
{
$c = new Commit();
$c->Branch = $branchValue;
$c->Hash = $row['hash'];
$c->AuthorName = $row['author_name'];
$c->AuthorEmail = $row['author_email'];
$c->CommitterName = $row['committer_name'];
$c->CommitterEmail = $row['committer_email'];
$c->Message = $row['message'];
$c->Date = $row['date'];
$c->Parents = array_filter(explode(';', $row['parent_commits']), fn($p) => $p !== '');
$r []= $c;
}
return $r;

View File

@ -32,10 +32,10 @@ class Branch
/** @var Repository */
public $Repo;
/** @var string */
/** @var string|null */
public $Head;
/** @var string */
/** @var string|null */
public $HeadFromAPI = null;
/** @var string */

View File

@ -72,15 +72,15 @@ abstract class StandardGitConnection implements IRemoteSource
continue;
}
$commits = $this->listAndUpdateCommits($db, $repo, $branch);
$newcommits = $this->listAndUpdateCommits($db, $repo, $branch);
$db->setUpdateDateOnBranch($branch);
if (count($commits) === 0)
if (count($newcommits) === 0)
{
$this->logger->proclog("Branch: [" . $this->name . "|" . $repo->Name . "|" . $branch->Name . "] has no new commits");
continue;
}
$this->logger->proclog("Found " . count($commits) . " new commits in Branch: [" . $this->name . "|" . $repo->Name . "|" . $branch->Name . "]");
$this->logger->proclog("Found " . count($newcommits) . " new commits in Branch: [" . $this->name . "|" . $repo->Name . "|" . $branch->Name . "]");
$repo_changed = true;
$db->setChangeDateOnBranch($branch);
@ -269,34 +269,36 @@ abstract class StandardGitConnection implements IRemoteSource
$targetFound = false;
$next_sha = [ $branch->HeadFromAPI ];
$visited = array_map(function(Commit $m):string{return $m->Hash;}, $db->getCommits($branch));
$visited = array_map(function(Commit $m):string{return $m->Hash;}, $db->getCommitsForBranch($branch));
$this->logger->proclog("Query commit for [" . $this->name . "|" . $repo->Name . "|" . $branch->Name . "] (initial @ {" . substr($next_sha[0], 0, 8) . "})");
$query_counter=0;
$json = $this->queryCommits($repo->Name, $branch->Name, $next_sha[0]);
$existing = [];
$reusedFromExisting = 0;
if ($branch->Head === null) {
for ($pg=2;;$pg++)
// new branch, perhaps we can mix'n'match existing commits+metadata
$this->logger->proclog("Query existing commits for [" . $this->name . "|" . $repo->Name . "] (potentially reuse for new branch '" . $branch->Name . "')");
foreach ($db->getCommitsForRepo($repo, $branch) as $c) $existing[$c->Hash] = $c;
}
$query_counter++;
$this->logger->proclog("Query commits for [" . $this->name . "|" . $repo->Name . "|" . $branch->Name . "] (initial @ {" . substr($next_sha[0], 0, 8) . "}) (target: {" . substr($target ?? 'NULL', 0, 8) . "})");
$unprocessed = array_map(fn($p) => $this->createCommit($branch, $p), $this->queryCommits($repo->Name, $branch->Name, $next_sha[0]));
for (;;)
{
foreach ($json as $result_commit)
foreach ($unprocessed as $commit)
{
$jdata = $this->readCommit($result_commit);
while (($rmshakey = array_search($commit->Hash, $next_sha)) !== false) unset($next_sha[$rmshakey]);
$sha = $jdata['sha'];
$author_name = $jdata['author_name'];
$author_email = $jdata['author_email'];
$committer_name = $jdata['committer_name'];
$committer_email = $jdata['committer_email'];
$message = $jdata['message'];
$date = $jdata['date'];
if (in_array($commit->Hash, $visited)) continue;
$visited []= $commit->Hash;
$parents = $jdata['parents'];
if (($rmshakey = array_search($sha, $next_sha)) !== false) unset($next_sha[$rmshakey]);
if (in_array($sha, $visited)) continue;
$visited []= $sha;
if ($sha === $target) $targetFound = true;
if ($commit->Hash === $target) $targetFound = true;
if ($targetFound && count($next_sha) === 0)
{
@ -316,20 +318,9 @@ abstract class StandardGitConnection implements IRemoteSource
}
}
$commit = new Commit();
$commit->Branch = $branch;
$commit->Hash = $sha;
$commit->AuthorName = $author_name;
$commit->AuthorEmail = $author_email;
$commit->CommitterName = $committer_name;
$commit->CommitterEmail = $committer_email;
$commit->Message = $message;
$commit->Date = $date;
$commit->Parents = $parents;
$newcommits []= $commit;
foreach ($parents as $p)
foreach ($commit->Parents as $p)
{
$next_sha []= $p;
}
@ -338,12 +329,29 @@ abstract class StandardGitConnection implements IRemoteSource
$next_sha = array_values($next_sha); // fix numeric keys
if (count($next_sha) === 0) break;
$this->logger->proclog("Query commit for [" . $this->name . "|" . $repo->Name . "|" . $branch->Name . "] (" . $pg . " @ {" . substr($next_sha[0], 0, 8) . "})");
if (array_key_exists($next_sha[0], $existing)) {
// fast-track for existing Commits
$unprocessed = [ $existing[$next_sha[0]] ];
$reusedFromExisting++;
} else {
$query_counter++;
$this->logger->proclog("Query commits for [" . $this->name . "|" . $repo->Name . "|" . $branch->Name . "] (" . $query_counter . " @ {" . substr($next_sha[0], 0, 8) . "})");
$unprocessed = array_map(fn($p) => $this->createCommit($branch, $p), $this->queryCommits($repo->Name, $branch->Name, $next_sha[0]));
$json = $this->queryCommits($repo->Name, $branch->Name, $next_sha[0]);
}
$this->logger->proclog("HEAD pointer in Branch: [" . $this->name . "|" . $repo->Name . "|" . $branch->Name . "] no longer matches. Re-query all " . count($newcommits) . " commits (old HEAD := {".substr($branch->Head ?? 'NULL', 0, 8)."}, missing: [" . join(", ", array_map(function($p){return substr($p ?? 'NULL', 0, 8);}, $next_sha)) . "] )");
}
if ($branch->Head === null) {
$this->logger->proclog("HEAD pointer in new Branch: [" . $this->name . "|" . $repo->Name . "|" . $branch->Name . "] set to ". $branch->HeadFromAPI ." Queried " . count($newcommits) . " commits (reused $reusedFromExisting commits from DB)");
} else {
$this->logger->proclog("HEAD pointer in Branch: [" . $this->name . "|" . $repo->Name . "|" . $branch->Name . "] no longer matches. Re-queried all " . count($newcommits) . " commits (old HEAD := {".substr($branch->Head ?? 'NULL', 0, 8)."}, missing: [" . join(", ", array_map(function($p){return substr($p ?? 'NULL', 0, 8);}, $next_sha)) . "] )");
}
$db->deleteAllCommits($branch);
@ -359,6 +367,35 @@ abstract class StandardGitConnection implements IRemoteSource
return $newcommits;
}
private function createCommit(Branch $branch, $result_commit): Commit
{
$jdata = $this->readCommit($result_commit);
$sha = $jdata['sha'];
$author_name = $jdata['author_name'];
$author_email = $jdata['author_email'];
$committer_name = $jdata['committer_name'];
$committer_email = $jdata['committer_email'];
$message = $jdata['message'];
$date = $jdata['date'];
$parents = $jdata['parents'];
$commit = new Commit();
$commit->Branch = $branch;
$commit->Hash = $sha;
$commit->AuthorName = $author_name;
$commit->AuthorEmail = $author_email;
$commit->CommitterName = $committer_name;
$commit->CommitterEmail = $committer_email;
$commit->Message = $message;
$commit->Date = $date;
$commit->Parents = $parents;
return $commit;
}
/** @inheritDoc */
public function getName() { return $this->name; }

View File

@ -1,3 +1,3 @@
This year I don't have too much free time. SO I won't use a completely new language but one which I'm currently using in my dayjob: golang.
This year I don't have too much free time. So I won't use a completely new language but one which I'm currently using in my dayjob: golang.
I'm not the biggest fan of go (I think it's ok, but could really make with more convenience features), but I'm pretty experienced in it and can hopefully work a bit faster through the problems whis year.