diff --git a/kibana-dashboards/downloadsVisualizations.json b/kibana-dashboards/downloadsVisualizations.json new file mode 100644 index 0000000..6260da6 --- /dev/null +++ b/kibana-dashboards/downloadsVisualizations.json @@ -0,0 +1,30 @@ +[ + { + "_id": "AWJNwWr9xRjHDZlzGOX9", + "_type": "visualization", + "_source": { + "title": "Github Downloads to Date (Total)", + "visState": "{\"title\":\"Github Downloads to Date (Total)\",\"type\":\"table\",\"params\":{\"perPage\":200,\"showPartialRows\":false,\"showMeticsAtAllLevels\":false,\"sort\":{\"columnIndex\":null,\"direction\":null},\"showTotal\":true,\"totalFunc\":\"sum\",\"type\":\"table\"},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"sum\",\"schema\":\"metric\",\"params\":{\"field\":\"assets.downloadsCount\",\"customLabel\":\"Total Downloads to Date\"}},{\"id\":\"4\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"bucket\",\"params\":{\"field\":\"asOfYYYYMMDD\",\"interval\":\"d\",\"customInterval\":\"2h\",\"min_doc_count\":1,\"extended_bounds\":{},\"customLabel\":\"Date\"}},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"repo_name\",\"size\":50,\"order\":\"desc\",\"orderBy\":\"1\",\"customLabel\":\"GitHub Project\"}},{\"id\":\"3\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"release_name.keyword\",\"size\":5,\"order\":\"desc\",\"orderBy\":\"1\",\"customLabel\":\"Releases\"}}],\"listeners\":{}}", + "uiStateJSON": "{\"vis\":{\"params\":{\"sort\":{\"columnIndex\":null,\"direction\":null}}}}", + "description": "", + "version": 1, + "kibanaSavedObjectMeta": { + "searchSourceJSON": "{\"index\":\"AWJNeisdxRjHDZlzGOUN\",\"query\":{\"match_all\":{}},\"filter\":[]}" + } + } + }, + { + "_id": "AWJNyMutxRjHDZlzGOX_", + "_type": "visualization", + "_source": { + "title": "Most Downloaded Projects", + "visState": "{\"title\":\"Most Downloaded Projects\",\"type\":\"line\",\"params\":{\"grid\":{\"categoryLines\":false,\"style\":{\"color\":\"#eee\"}},\"categoryAxes\":[{\"id\":\"CategoryAxis-1\",\"type\":\"category\",\"position\":\"bottom\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\"},\"labels\":{\"show\":true,\"truncate\":100},\"title\":{\"text\":\"Date\"}}],\"valueAxes\":[{\"id\":\"ValueAxis-1\",\"name\":\"LeftAxis-1\",\"type\":\"value\",\"position\":\"left\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\",\"mode\":\"normal\"},\"labels\":{\"show\":true,\"rotate\":0,\"filter\":false,\"truncate\":100},\"title\":{\"text\":\"Total Downloads to Date\"}}],\"seriesParams\":[{\"show\":\"true\",\"type\":\"line\",\"mode\":\"normal\",\"data\":{\"label\":\"Total Downloads to Date\",\"id\":\"1\"},\"valueAxis\":\"ValueAxis-1\",\"drawLinesBetweenPoints\":true,\"showCircles\":true}],\"addTooltip\":true,\"addLegend\":true,\"legendPosition\":\"right\",\"times\":[],\"addTimeMarker\":false,\"type\":\"line\"},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"sum\",\"schema\":\"metric\",\"params\":{\"field\":\"assets.downloadsCount\",\"customLabel\":\"Total Downloads to Date\"}},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"asOfYYYYMMDD\",\"interval\":\"auto\",\"customInterval\":\"2h\",\"min_doc_count\":1,\"extended_bounds\":{},\"customLabel\":\"Date\"}},{\"id\":\"3\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"group\",\"params\":{\"field\":\"repo_name\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\",\"customLabel\":\"Github Project\"}}],\"listeners\":{}}", + "uiStateJSON": "{}", + "description": "", + "version": 1, + "kibanaSavedObjectMeta": { + "searchSourceJSON": "{\"index\":\"AWJNeisdxRjHDZlzGOUN\",\"query\":{\"match_all\":{}},\"filter\":[]}" + } + } + } +] \ No newline at end of file diff --git a/osstracker-scraper/src/main/scala/com/netflix/oss/tools/osstrackerscraper/GithubAccess.scala b/osstracker-scraper/src/main/scala/com/netflix/oss/tools/osstrackerscraper/GithubAccess.scala index bc855ad..a41d604 100644 --- a/osstracker-scraper/src/main/scala/com/netflix/oss/tools/osstrackerscraper/GithubAccess.scala +++ b/osstracker-scraper/src/main/scala/com/netflix/oss/tools/osstrackerscraper/GithubAccess.scala @@ -18,10 +18,11 @@ package com.netflix.oss.tools.osstrackerscraper import java.io.IOException import java.util.{Date, Properties} +import com.netflix.oss.tools.osstrackerscraper import com.netflix.oss.tools.osstrackerscraper.OssLifecycle.OssLifecycle import org.kohsuke.github._ -import org.slf4j.{Logger, LoggerFactory} -import play.api.libs.json.{JsObject, Json} +import org.slf4j.LoggerFactory +import play.api.libs.json._ import scala.collection.JavaConversions._ @@ -41,11 +42,17 @@ case class IssuesInfo( val openCountTrulyOpen: Int ) {} case class PRsInfo(val closedPRsSize: Int, val avgPRs: Int) {} +case class AssetInfo(assetName: String, downloadsCount: Long) {} +case class ReleaseInfo(releaseName: String, assets: JsArray){} class GithubAccess(val asOfYYYYMMDD: String, val asOfISO: String, val connectToGithub: Boolean) { + val logger = LoggerFactory.getLogger(getClass) val github: Option[GitHub] = if (connectToGithub) Some(GitHub.connect()) else None + implicit val releaseInfoWrites: OWrites[ReleaseInfo] = Json.writes[ReleaseInfo] + implicit val assetInfoWrites: OWrites[AssetInfo] = Json.writes[AssetInfo] + def getOSSMetaDataOSSLifecycle(repo: GHRepository): OssLifecycle = { try { val content: GHContent = repo.getFileContent("OSSMETADATA", "master") @@ -121,6 +128,24 @@ class GithubAccess(val asOfYYYYMMDD: String, val asOfISO: String, val connectToG repoJson } + def getRepoDownloads(repo: GHRepository, public: Boolean, ossLifecycle: osstrackerscraper.OssLifecycle.Value): List[JsObject] = { + logger.info(s"Getting downloads for repo = ${repo.getName()}") + + // Note that in this case, the github-api will crash on calls to listIssues with java.lang.Error + // https://github.com/kohsuke/github-api/issues/65 + val neverPushed = getCloseEnoughForSameDates(repo.getCreatedAt, repo.getPushedAt) + + val (releasesInfo: List[JsObject]) = if (neverPushed) { + logger.warn("repo has never been pushed, so providing fake zero counts for downloads") + List[JsObject]() + } else { + val relStats = getReleaseStats(repo) + relStats + } + releasesInfo + } + + // TODO: Is there a faster way to only pull the last commit? def getCommitInfo(repo: GHRepository) : CommitInfo = { val commits = repo.listCommits().asList() @@ -288,6 +313,40 @@ class GithubAccess(val asOfYYYYMMDD: String, val asOfISO: String, val connectToG openCountWithLabelBug } + def getReleaseStats(repo: GHRepository) : List[JsObject] = { + val allReleases = repo.listReleases().asList() + var releaseStatistics = List[JsObject]() + + var totalDownloadsToDate: Long = 0 + allReleases.foreach(release => { + + var assetsStatistics = List[AssetInfo]() + val assets = release.getAssets + assets.foreach( + asset =>{ + assetsStatistics = AssetInfo(asset.getName, asset.getDownloadCount) :: assetsStatistics + totalDownloadsToDate += asset.getDownloadCount + } + ) + + val releaseDownloadsJson: JsObject = Json.obj( + "asOfISO" -> asOfISO, + "asOfYYYYMMDD" -> asOfYYYYMMDD, + "repo_name" -> repo.getName(), + "release_name" -> release.getName, + "assets" -> assetsStatistics, + "total_downloads_to_date" -> totalDownloadsToDate + ) + logger.debug("repo downloads json = " + releaseDownloadsJson) + + + releaseStatistics = releaseDownloadsJson :: releaseStatistics + }) + + + releaseStatistics + } + def daysBetween(smaller: Date, bigger: Date): Int = { val diff = (bigger.getTime() - smaller.getTime()) / (1000 * 60 * 60 * 24) diff.toInt diff --git a/osstracker-scraper/src/main/scala/com/netflix/oss/tools/osstrackerscraper/GithubScraper.scala b/osstracker-scraper/src/main/scala/com/netflix/oss/tools/osstrackerscraper/GithubScraper.scala index 9d41839..63f3f47 100644 --- a/osstracker-scraper/src/main/scala/com/netflix/oss/tools/osstrackerscraper/GithubScraper.scala +++ b/osstracker-scraper/src/main/scala/com/netflix/oss/tools/osstrackerscraper/GithubScraper.scala @@ -79,9 +79,19 @@ class GithubScraper(githubOrg: String, cassHost: String, cassPort: Int, esHost: if (alreadyExistsDoc.isEmpty) { val stat = github.getRepoStats(ghRepo, public, ossLifecycle) - val indexed = es.indexDocInES("/osstracker/repo_stats", stat.toString) + var indexed = es.indexDocInES("/osstracker/repo_stats", stat.toString) if (!indexed) { return false + } else { + val releaseStats = github.getRepoDownloads(ghRepo, public, ossLifecycle) + + releaseStats.foreach( rel => { + indexed &= es.indexDocInES("/osstracker/repo_downloads", rel.toString) + }) + if (!indexed) { + return false + } + } docsList += stat }