diff --git a/.gitignore b/.gitignore index 9b64b6d48..6c6784a96 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ +data-dist/ + .idea/ *.py[cod] diff --git a/.stickler.yml b/.stickler.yml index acbe742ff..87c617bbc 100644 --- a/.stickler.yml +++ b/.stickler.yml @@ -5,13 +5,13 @@ linters: fixer: true flake8: - python: 3 fixer: true - - # Make sure to copy changes to tox.ini too + python: 3 + + # copied from tox.ini max-complexity: 15 ignore: W,E - exclude: .venv, .git, __pycache__, dist + exclude: .venv, .git, __pycache__, dist, data fixers: enable: true diff --git a/Dockerfile b/Dockerfile index e5221ee0b..e250076d1 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,13 +2,18 @@ FROM python:3.6 MAINTAINER Sargun Vohra -RUN curl -sSL https://raw.githubusercontent.com/sdispater/poetry/master/get-poetry.py | python +ENV PYTHONUNBUFFERED 1 +ENV DITTO_BASE_URL http://localhost/ + +EXPOSE 80 RUN mkdir /ditto WORKDIR /ditto/ -ADD . /ditto/ -RUN poetry install +COPY pyproject.* /ditto/ +COPY pokeapi_ditto /ditto/pokeapi_ditto +COPY data /ditto/data -CMD poetry run ditto serve -EXPOSE 80 +RUN curl -sSL https://raw.githubusercontent.com/sdispater/poetry/master/get-poetry.py | python +RUN poetry install +CMD poetry run ditto serve "--base-url=$DITTO_BASE_URL" diff --git a/README.md b/README.md index 3e5a0dc32..0f28262a3 100644 --- a/README.md +++ b/README.md @@ -4,38 +4,44 @@ This repository contains: - - a static copy of the JSON data generated from - [PokeAPI](https://github.com/PokeAPI/pokeapi) based on - [Veekun’s data](https://github.com/veekun/pokedex) - - a PokeAPI schema generated from the above data - - a script to serve the data in the same form as PokeAPI - - a script to crawl an instance of PokeAPI to regenerate the data - - a script to analyze the generated data and produce a JSON Schema + - `ditto clone`: a script to crawl an instance of PokeAPI and download all objects + - `data/api`: a static copy of the JSON data generated with the above script + - `ditto analyze`: a script to generate a JSON schema of the above data + - `data/schema`: a static copy of the PokeAPI schema generated from the above data + - `ditto transform`: a script to apply a new base url to data in `data/api` + - `ditto serve`: a script to serve the data in the same form as PokeAPI + - with full support for dynamic pagination using GET args `offset` and `limit` -## Usage +## Docker + +This project is on Docker Hub. If you just want to serve a PokeApi clone, you +just have to run one command. -This project is on Docker Hub. If you just want to run it, you just have -to run one command. Replace `8080` with the port of your choice. + - Replace `8080` with the port of your choice + - Replace `http://localhost:8080` with the base url of your choice ``` bash -docker run -p 8080:80 sargunv/pokeapi-ditto +docker run -p 8080:80 -e DITTO_BASE_URL=http://localhost:8080 sargunv/pokeapi-ditto ``` -## Development +## Usage -If you plan to edit the project, you can install it locally for -development. [Poetry](https://poetry.eustace.io/) is required. +If you'd rather use the data for something else, you can generate a +copy with the base url of your choice applied. This assumes +[Poetry](https://poetry.eustace.io/) is installed and in your PATH. ``` bash -cd ~ git clone https://github.com/PokeAPI/ditto.git cd ditto poetry install - -# now you can run ditto! -poetry run ditto --help +poetry run ditto transform --base-url http://localhost:8080 ``` +For other ditto functionality, run `poetry run ditto --help` + +If you're on Windows, you'll have to adapt the commands above to your platform. +The general idea is the same. + ## Advanced You can manually update the data if necessary. If I abandon this @@ -43,7 +49,7 @@ project, here’s how to update it. It's a bit of an involved process. Before starting, you’ll need to install [Docker and Docker Compose](https://docs.docker.com/compose/install/). You'll -also need [Poetry](https://poetry.eustace.io/). +also need [Poetry](https://poetry.eustace.io/) in your PATH. First clone the PokeAPI and Ditto repositories: @@ -81,21 +87,21 @@ from data.v2.build import build_all build_all() ``` -The above step can take a really long time to complete. Once it’s done, -you can finally update Ditto’s data: +Once it’s done, you can update Ditto’s data: ``` bash cd ~/ditto rm -r ./data poetry install -poetry run ditto clone --source http://localhost/ --destination ./data +poetry run ditto clone --src-url http://localhost/ --dest-dir ./data poetry run ditto analyze --api-dir ./data/api --schema-dir ./data/schema ``` This will crawl your local instance of PokeAPI, copy all the data to -./data, and regenerate the schema. Once that's finished, you can serve -the freshly updated data! +./data, and regenerate the schema. + +Once that's finished, you can serve the freshly updated data! ``` bash -poetry run ditto serve --port 8080 -``` +poetry run ditto serve --port 8080 --base-url http://localhost:8080 +``` \ No newline at end of file diff --git a/pokeapi_ditto/__init__.py b/pokeapi_ditto/__init__.py index d3ec452c3..493f7415d 100644 --- a/pokeapi_ditto/__init__.py +++ b/pokeapi_ditto/__init__.py @@ -1 +1 @@ -__version__ = "0.2.0" +__version__ = "0.3.0" diff --git a/pokeapi_ditto/commands/__init__.py b/pokeapi_ditto/commands/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/pokeapi_ditto/analyze.py b/pokeapi_ditto/commands/analyze.py similarity index 68% rename from pokeapi_ditto/analyze.py rename to pokeapi_ditto/commands/analyze.py index 5d8041209..9bcbeebe6 100644 --- a/pokeapi_ditto/analyze.py +++ b/pokeapi_ditto/commands/analyze.py @@ -7,28 +7,14 @@ from genson import SchemaBuilder +from pokeapi_ditto.common import from_dir -def _from_dir(target_dir: str): - target_dir = os.path.abspath(target_dir) - def func_decorator(func: callable): - def func_wrapper(*args, **kwargs): - cwd = os.getcwd() - os.chdir(target_dir) - result = func(*args, **kwargs) - os.chdir(cwd) - return result - - return func_wrapper - - return func_decorator - - -def do_analyze(data_dir: str, schema_dir: str): +def do_analyze(api_dir: str, schema_dir: str): if not Path(schema_dir).exists(): Path(schema_dir).mkdir(parents=True) - @_from_dir(data_dir) + @from_dir(api_dir) def get_schema_paths() -> List[Path]: return sorted( { @@ -37,12 +23,11 @@ def get_schema_paths() -> List[Path]: } ) - @_from_dir(data_dir) + @from_dir(api_dir) def gen_single_schema(path: Path) -> SchemaBuilder: glob_exp = os.path.join( *["*" if part == "$id" else part for part in path.parts] ) - print(os.path.join(*Path(data_dir).parts, glob_exp)) file_names = glob.iglob(glob_exp, recursive=True) schema = SchemaBuilder() for file_name in file_names: @@ -50,9 +35,10 @@ def gen_single_schema(path: Path) -> SchemaBuilder: schema.add_object(json.load(f)) return schema - @_from_dir(schema_dir) + @from_dir(schema_dir) def gen_schemas(paths: List[Path]): for path in paths: + print(Path(schema_dir).joinpath(path)) if not path.parent.exists(): os.makedirs(path.parent) schema = gen_single_schema(path) diff --git a/pokeapi_ditto/clone.py b/pokeapi_ditto/commands/clone.py similarity index 70% rename from pokeapi_ditto/clone.py rename to pokeapi_ditto/commands/clone.py index d7e88e735..ce009d3b6 100644 --- a/pokeapi_ditto/clone.py +++ b/pokeapi_ditto/commands/clone.py @@ -4,29 +4,31 @@ import requests +from pokeapi_ditto.common import BASE_URL_PLACEHOLDER + + +def do_clone(src_url, dest_dir): + if not src_url.endswith("/"): + src_url += "/" + + if not dest_dir.endswith("/"): + dest_dir += "/" -def do_clone(base_url, target_dir, replacement_url): def safe_open_w(file_name): os.makedirs(os.path.dirname(file_name), exist_ok=True) return open(file_name, "w") def print_json(data, file_name): transformed_data = json.dumps(data, indent=4, sort_keys=True) - transformed_data = transformed_data.replace(base_url, replacement_url) + transformed_data = transformed_data.replace(src_url, BASE_URL_PLACEHOLDER + "/") print(transformed_data, file=safe_open_w(file_name)) - if not base_url.endswith("/"): - base_url += "/" - - if not target_dir.endswith("/"): - target_dir += "/" - # Root - url = base_url + "api/v2/" + url = src_url + "api/v2/" endpoints = requests.get(url) - path = target_dir + url.replace(base_url, "") + "index.json" + path = dest_dir + url.replace(src_url, "") + "index.json" print(path) print_json(endpoints.json(), path) @@ -41,14 +43,14 @@ def print_json(data, file_name): # Full index url = endpoint + "?limit=" + count resource_list = requests.get(url) - path = target_dir + endpoint.replace(base_url, "") + "index.json" + path = dest_dir + endpoint.replace(src_url, "") + "index.json" print(path) print_json(resource_list.json(), path) # All resources for resourceSummary in resource_list.json()["results"]: resource_url = resourceSummary["url"] - path = target_dir + resource_url.replace(base_url, "") + "index.json" + path = dest_dir + resource_url.replace(src_url, "") + "index.json" if not os.path.isfile(path): print(path) @@ -57,7 +59,7 @@ def print_json(data, file_name): if endpoint.endswith("/pokemon/"): resource_url += "encounters/" - path = target_dir + resource_url.replace(base_url, "") + "index.json" + path = dest_dir + resource_url.replace(src_url, "") + "index.json" if not os.path.isfile(path): print(path) resource = requests.get(resource_url) diff --git a/pokeapi_ditto/commands/serve.py b/pokeapi_ditto/commands/serve.py new file mode 100644 index 000000000..d0ad90e9e --- /dev/null +++ b/pokeapi_ditto/commands/serve.py @@ -0,0 +1,138 @@ +import json + +from flask import Flask, Response, request, url_for +from flask_cors import CORS + +from pokeapi_ditto import __version__ +from pokeapi_ditto.common import apply_base_url + + +def _to_json(obj): + return json.dumps(obj, indent=4, sort_keys=True) + + +def _safe_cast(val, to_type, default=None): + try: + return to_type(val) + except (TypeError, ValueError): + return default + + +class DittoApp: + def __init__(self, root_dir: str, base_url: str): + if not root_dir.endswith("/"): + root_dir += "/" + self.root_dir = root_dir + + if base_url.endswith("/"): + base_url = base_url[:-1] + self.base_url = base_url + + def _stream_file(self, file): + for line in file: + yield apply_base_url(line, self.base_url) + + def api_path(self, path: str): + return self.root_dir + "api/v2/" + path + "/index.json" + + def streamed_api_file(self, path: str): + return self._stream_file(open(self.api_path(path))) + + def get_index(self): + return self.streamed_api_file(".") + + def get_resource_list(self, category: str): + result_obj = json.loads(open(self.api_path(category)).read()) + + args = request.args.to_dict() + offset = max(_safe_cast(args.get("offset"), int, 0), 0) + limit = _safe_cast(args.get("limit"), int, 20) + + result_obj["results"] = result_obj["results"][offset : offset + limit] + if offset > 0: + prev_offset = max(offset - limit, 0) + prev_page = url_for( + "resource_list", + category=category, + limit=limit, + offset=prev_offset, + _external=True, + ) + result_obj["previous"] = prev_page + if offset + limit < result_obj["count"]: + next_offset = offset + limit + next_page = url_for( + "resource_list", + category=category, + limit=limit, + offset=next_offset, + _external=True, + ) + result_obj["next"] = next_page + + return apply_base_url(_to_json(result_obj), self.base_url) + + def get_resource_item(self, category: str, key: str): + return self.streamed_api_file("/".join([category, key])) + + def get_resource_extra(self, category: str, key: str, extra: str): + return self.streamed_api_file("/".join([category, key, extra])) + + +def create_app(root_dir: str, base_url: str): + app = Flask(__name__) + CORS(app) + app.url_map.strict_slashes = False + + ditto = DittoApp(root_dir, base_url) + + content_json = "application/json" + + @app.errorhandler(FileNotFoundError) + @app.errorhandler(404) + def not_found_404(_): + return Response( + _to_json({"error": "Not found"}), status=404, mimetype=content_json + ) + + @app.errorhandler(500) + def not_found_500(_): + return Response( + _to_json({"error": "Internal server error"}), + status=500, + mimetype=content_json, + ) + + @app.route("/") + def root(): + return Response( + _to_json({"application": "pokeapi-ditto", "version": __version__}), + status=200, + mimetype=content_json, + ) + + @app.route("/api/v2/") + def index(): + return Response(ditto.get_index(), status=200, mimetype=content_json) + + @app.route("/api/v2//") + def resource_list(category): + return Response( + ditto.get_resource_list(category), status=200, mimetype=content_json + ) + + @app.route("/api/v2///") + def resource_item(category, key): + return Response( + ditto.get_resource_item(category, key), status=200, mimetype=content_json + ) + + @app.route("/api/v2///") + def resource_extra(category, key, extra): + return Response( + ditto.get_resource_extra(category, key, extra), + status=200, + mimetype=content_json, + ) + + return app diff --git a/pokeapi_ditto/commands/transform.py b/pokeapi_ditto/commands/transform.py new file mode 100644 index 000000000..bfc13071f --- /dev/null +++ b/pokeapi_ditto/commands/transform.py @@ -0,0 +1,26 @@ +from pathlib import Path +from typing import List + +from pokeapi_ditto.common import apply_base_url + + +def do_transform(src_dir: str, dest_dir: str, base_url: str): + src_dir: Path = Path(src_dir) + dest_dir: Path = Path(dest_dir) + + if base_url.endswith("/"): + base_url = base_url[:-1] + + if not dest_dir.exists(): + dest_dir.mkdir(parents=True) + + orig_paths: List[Path] = src_dir.glob("api/**/*.json") + + for orig in orig_paths: + new = dest_dir.joinpath(orig.relative_to(src_dir)) + print(new) + + if not new.parent.exists(): + new.parent.mkdir(parents=True) + + new.write_text(apply_base_url(orig.read_text(), base_url)) diff --git a/pokeapi_ditto/common.py b/pokeapi_ditto/common.py new file mode 100644 index 000000000..6afce91b2 --- /dev/null +++ b/pokeapi_ditto/common.py @@ -0,0 +1,23 @@ +import os + +BASE_URL_PLACEHOLDER = "$BASE_URL_PLACEHOLDER" + + +def from_dir(target_dir: str) -> callable: + target_dir = os.path.abspath(target_dir) + + def func_decorator(func: callable) -> callable: + def func_wrapper(*args, **kwargs): + cwd = os.getcwd() + os.chdir(target_dir) + result = func(*args, **kwargs) + os.chdir(cwd) + return result + + return func_wrapper + + return func_decorator + + +def apply_base_url(data: str, base_url: str) -> str: + return data.replace(BASE_URL_PLACEHOLDER, base_url) diff --git a/pokeapi_ditto/main.py b/pokeapi_ditto/main.py index 6175bbf8c..f4f3d0f55 100644 --- a/pokeapi_ditto/main.py +++ b/pokeapi_ditto/main.py @@ -3,7 +3,7 @@ from gevent.pywsgi import WSGIServer -from pokeapi_ditto import analyze, clone, serve +from pokeapi_ditto.commands import analyze, clone, serve, transform class Ditto(object): @@ -12,37 +12,51 @@ def __init__(self): subparsers = parser.add_subparsers(dest="command") clone_args = subparsers.add_parser("clone") - clone_args.add_argument("--source", type=str, default="http://localhost/") - clone_args.add_argument("--destination", type=str, default="./data") - clone_args.add_argument( - "--replacement-url", type=str, default="https://pokeapi.co/" - ) + clone_args.add_argument("--src-url", type=str, default="http://localhost/") + clone_args.add_argument("--dest-dir", type=str, default="./data") - serve_args = subparsers.add_parser("serve") - serve_args.add_argument("--port", type=int, default=80) + transform_args = subparsers.add_parser("transform") + transform_args.add_argument("--src-dir", type=str, default="./data") + transform_args.add_argument("--dest-dir", type=str, default="./data-dist") + transform_args.add_argument("--base-url", type=str, required=True) analyze_args = subparsers.add_parser("analyze") analyze_args.add_argument("--api-dir", type=str, default="./data/api") analyze_args.add_argument("--schema-dir", type=str, default="./data/schema") - args = parser.parse_args(sys.argv[1:]) - if args.command is None: + serve_args = subparsers.add_parser("serve") + serve_args.add_argument("--port", type=int, default=80) + serve_args.add_argument("--base-url", type=str, default="") + serve_args.add_argument("--root-dir", type=str, default="./data") + + args = vars(parser.parse_args(sys.argv[1:])) + command = args.pop("command") + if command is None: parser.print_help() exit(1) - getattr(self, args.command)(args) - @staticmethod - def analyze(args): - analyze.do_analyze(args.api_dir, args.schema_dir) + print( + "Doing '{}' with configuration: {}".format(command, args), file=sys.stderr + ) + getattr(self, command)(args) @staticmethod def clone(args): - clone.do_clone(args.source, args.destination, args.replacement_url) + clone.do_clone(**args) + + @staticmethod + def transform(args): + transform.do_transform(**args) + + @staticmethod + def analyze(args): + analyze.do_analyze(**args) @staticmethod def serve(args): - print("Starting Ditto server with configuration: {}".format(vars(args))) - WSGIServer(("", args.port), serve.app).serve_forever() + port = args.pop("port") + app = serve.create_app(**args) + WSGIServer(("", port), app).serve_forever() if __name__ == "__main__": diff --git a/pokeapi_ditto/serve.py b/pokeapi_ditto/serve.py deleted file mode 100644 index e42704ce0..000000000 --- a/pokeapi_ditto/serve.py +++ /dev/null @@ -1,160 +0,0 @@ -import json -import re - -from flask import Flask, Response, request, url_for -from flask_cors import CORS - - -def to_json(obj): - return json.dumps(obj, indent=4, sort_keys=True) - - -def replace_host(string, url): - return re.sub("https?://[a-zA-Z0-9.]+/", url, string) - - -def safe_cast(val, to_type, default=None): - try: - return to_type(val) - except (TypeError, ValueError): - return default - - -def stream_file(file, url): - for line in file: - yield replace_host(line, url) - - -def host_url(): - return url_for("root", _external=True) - - -class DittoApp: - def __init__(self, root_dir: str): - if not root_dir.endswith("/"): - root_dir += "/" - self.root_dir = root_dir - - def api_path(self, path: str): - return self.root_dir + "api/v2/" + path + "/index.json" - - def media_path(self, path: str): - return self.root_dir + "media/" + path - - def streamed_api_file(self, path: str): - return stream_file(open(self.api_path(path)), host_url()) - - def get_index(self): - return self.streamed_api_file(".") - - def get_resource_list(self, category: str): - result_obj = json.loads(open(self.api_path(category)).read()) - - args = request.args.to_dict() - offset = max(safe_cast(args.get("offset"), int, 0), 0) - limit = safe_cast(args.get("limit"), int, 20) - - result_obj["results"] = result_obj["results"][offset : offset + limit] - if offset > 0: - prev_offset = max(offset - limit, 0) - prev_page = url_for( - "resource_list", - category=category, - limit=limit, - offset=prev_offset, - _external=True, - ) - result_obj["previous"] = prev_page - if offset + limit < result_obj["count"]: - next_offset = offset + limit - next_page = url_for( - "resource_list", - category=category, - limit=limit, - offset=next_offset, - _external=True, - ) - result_obj["next"] = next_page - - return replace_host(to_json(result_obj), host_url()) - - def get_resource_item(self, category: str, key: str): - return self.streamed_api_file(category + "/" + key) - - def get_resource_extra(self, category: str, key: str, extra: str): - return self.streamed_api_file(category + "/" + key + "/" + extra) - - def get_media(self, path: str): - return open(self.media_path(path), "r+b").read() - - -app = Flask(__name__) -CORS(app) -app.url_map.strict_slashes = False - -ditto = DittoApp("./data") - -error_404 = to_json({"error": "Not found"}) - -error_500 = to_json({"error": "Internal server error"}) - -information = to_json( - { - "docker": "sargunv/pokeapi-ditto", - "git": "https://github.com/PokeAPI/ditto", - "github": "PokeAPI/ditto", - "pypi": "pokeapi-ditto", - } -) - -content_json = "application/json" -content_png = "image/png" - - -@app.errorhandler(FileNotFoundError) -@app.errorhandler(404) -def not_found_404(_): - return Response(error_404, status=404, mimetype=content_json) - - -@app.errorhandler(500) -def not_found_500(_): - return Response(error_500, status=500, mimetype=content_json) - - -@app.route("/") -def root(): - return Response(information, status=200, mimetype=content_json) - - -@app.route("/media/") -def media(path): - return Response(ditto.get_media(path), status=200, mimetype=content_png) - - -@app.route("/api/v2/") -def index(): - return Response(ditto.get_index(), status=200, mimetype=content_json) - - -@app.route("/api/v2//") -def resource_list(category): - return Response( - ditto.get_resource_list(category), status=200, mimetype=content_json - ) - - -@app.route("/api/v2///") -def resource_item(category, key): - return Response( - ditto.get_resource_item(category, key), status=200, mimetype=content_json - ) - - -@app.route("/api/v2///") -def resource_extra(category, key, extra): - return Response( - ditto.get_resource_extra(category, key, extra), - status=200, - mimetype=content_json, - ) diff --git a/pyproject.toml b/pyproject.toml index a4e169a13..9acb007a8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "pokeapi-ditto" -version = "0.2.0" +version = "0.3.0" description = "Ditto is a server that serves a static copy of PokeAPI's data." license = "Apache-2.0" authors = ["Sargun Vohra "] diff --git a/tox.ini b/tox.ini index 052b132a0..ef4c41d57 100644 --- a/tox.ini +++ b/tox.ini @@ -2,15 +2,15 @@ # support pyproject.toml yet [isort] -skip = .venv, .git, __pycache__, dist +skip = .venv, .git, __pycache__, dist, data multi_line_output = 3 include_trailing_comma = True force_grid_wrap = 0 combine_as_imports = True line_length = 88 -# Make sure to copy changes to .stickler.yml too [flake8] +# copy changes to .stickler.yml max-complexity = 15 ignore = W,E # use black for formatting -exclude = .venv, .git, __pycache__, dist \ No newline at end of file +exclude = .venv, .git, __pycache__, dist, data \ No newline at end of file