ColaboratoryでGoogle Apiを使う
最初は、libraryをつくったけど、簡便でなかった。
なので、手元でやるのが簡便という結論におちついた。
認証とおして、requestを整形(batch対応)するだけなので。
GoogleサービスのAPIは batchRequestに対応してるので、5個までreqを詰める?形にしてる。
GADataInColab.py#@title Default title text
CLIENT_ID = "xxxx" #@param {type:"string"}
CLIENT_SECRET = "xxx" #@param {type:"string"}
import httplib2
import math
import itertools
import pandas as pd
from apiclient import errors
from apiclient.discovery import build
from oauth2client.client import OAuth2WebServerFlow
# Check https://developers.google.com/webmaster-tools/search-console-api-original/v3/ for all available scopes
OAUTH_SCOPE = 'https://www.googleapis.com/auth/analytics.readonly'
# Redirect URI for installed apps
REDIRECT_URI = 'urn:ietf:wg:oauth:2.0:oob'
# Run through the OAuth flow and retrieve credentials
flow = OAuth2WebServerFlow(CLIENT_ID, CLIENT_SECRET, OAUTH_SCOPE, REDIRECT_URI)
authorize_url = flow.step1_get_authorize_url()
print('Go to the following link in your browser: ' + authorize_url)
code = input('Enter verification code: ').strip()
credentials = flow.step2_exchange(code)
ga = build("analytics", "v4", credentials=credentials)
viewId = "ga:xxx" #@param {type:"string"}
req = {
"viewId": viewId,
"dateRanges": [{"startDate": "yesterday", "endDate":"yesterday"}],
"metrics": [{"expression": "ga:pageViews"}, {"expression":"ga:transactions"}],
"dimensions": [{"name": "ga:dateHour"}, {"name":"ga:minute"}, {"name":"ga:deviceCategory"},{'name':'ga:channelGrouping'}, {'name':"ga:region"}]
}
res = ga.reports().batchGet(body={"reportRequests":[req]}).execute()
n = math.ceil(sum([x['data']['rowCount'] for x in res.get('reports', [])]) / 10000)
print(n) #check for how many rows
df = pd.DataFrame()
for _, nRange in itertools.groupby(range(n), lambda x: math.floor(x/5)):
tmp = []
for i in nRange:
print(i)
req['pageSize'] = 10000
req['pageToken'] = str(i * 10000)
print(req)
tmp.append(req.copy())
res = ga.reports().batchGet(body={"reportRequests":tmp}).execute()
for r in res.get("reports", []):
dt = [x['dimensions']+x['metrics'][0]['values'] for x in r['data']['rows']]
df = pd.concat([df, pd.DataFrame(dt)])
print(df.shape)
names = [z.replace("ga:","") for z in
[x for x in res['reports'][0]['columnHeader']['dimensions']] + [x['name'] for x in
res['reports'][0]['columnHeader']['metricHeader']['metricHeaderEntries']]]
df.columns = names
#ここは取得のデータの項目依存。ここでは、pageView, transactions
df['pageViews'] = pd.to_numeric(df['pageViews'])
df['transactions'] = pd.to_numeric(df['transactions'])
# date型を取得してるとして。
df['tm'] = pd.to_datetime(df.dateHour + df.minute)
df1 = df.drop(columns=['dateHour','minute'])
#@title Default title text
dim = "deviceCategory" #@param {type:"string"}
(df1
.groupby(["tm", dim])['pageViews'].sum()
.reset_index()
.pivot(index="tm", columns=dim, values='pageViews')
).plot(figsize=(15,6))