diff --git a/README.md b/README.md index dbc0bc8..656ca9f 100644 --- a/README.md +++ b/README.md @@ -72,7 +72,20 @@ touch results.csv && docker run -v $PWD/example-queries.txt:/example-queries -v file `results.csv` will contain the parsed results. -**If you want emails use additionally the `-email` parameter** +**If you want emails use additionally the `-email` parameter* + +### REST API +The Google Maps Scraper provides a RESTful API for programmatic management of scraping tasks. + +### Key Endpoints + +- POST /api/v1/jobs: Create a new scraping job +- GET /api/v1/jobs: List all jobs +- GET /api/v1/jobs/{id}: Get details of a specific job +- DELETE /api/v1/jobs/{id}: Delete a job +- GET /api/v1/jobs/{id}/download: Download job results as CSV + +For detailed API documentation, refer to the OpenAPI 3.0.3 specification available through Swagger UI or Redoc when running the app https://localhost:8080/api/docs ## 🌟 Support the Project! @@ -208,6 +221,8 @@ The results are written when they arrive in the `results` file you specified try `./google-maps-scraper -h` to see the command line options available: ``` + -addr string + address to listen on for web server (default ":8080") -aws-access-key string AWS access key -aws-lambda @@ -460,4 +475,3 @@ banner is generated using OpenAI's DALE If you register via the links on my page I may get a commission. This is another way to support my work - diff --git a/go.work.sum b/go.work.sum index b437384..19f3f06 100644 --- a/go.work.sum +++ b/go.work.sum @@ -25,27 +25,16 @@ dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9 h1:VpgP7xuJadIUu github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802 h1:1BDTz0u9nC3//pOCMdNH+CiXJVYJh5UQNCOBG7jbELc= github.com/DATA-DOG/go-sqlmock v1.5.0 h1:Shsta01QNfFxHCfpW6YH2STWB0MudeXXEWMr20OEh60= github.com/DATA-DOG/go-sqlmock v1.5.0/go.mod h1:f/Ixk793poVmq4qj/V1dPUg2JEAKC73Q5eFN3EC/SaM= -github.com/Noooste/azuretls-client v1.5.11 h1:adoAsHjKbxcxDxineOMMS2z2qwauHXvNhqOC3+N2pYU= -github.com/Noooste/azuretls-client v1.5.11/go.mod h1:EmuuM4FlELR5XlTqJQfe02dIP6Fi92bhcEPtnjJg0SU= -github.com/Noooste/fhttp v1.0.12 h1:2N15bIATKaC6q+LVyRGyxPyuqEPvwAS3Uk1peC3YVHU= -github.com/Noooste/fhttp v1.0.12/go.mod h1:CMVxKOhNheqJN5HYE4Rlvz2SRdV8Uv7YWmi6OwmB/Bk= -github.com/Noooste/utls v1.2.12 h1:Zcm/7OB6W4Ro1q2OV1BrFb3qBI7uqYeC21wHYX+Ez9I= -github.com/Noooste/utls v1.2.12/go.mod h1:CJaLzDHOhjuKESY3/wTSEzs3N2QgdXTrNQE3sW2632M= -github.com/Noooste/websocket v1.0.3 h1:drW7tvZ3YqzqI9wApnaH1Q0syFMXO7gbLlsBWjZvMNA= -github.com/Noooste/websocket v1.0.3/go.mod h1:Qhw0Rtuju/fPPbcb3R5XGq7poa51qPDL462jTltl9nQ= github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751 h1:JYp7IbQjafoB+tBA3gMyHYHrpOtNuDiK/uB5uXxq5wM= github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d h1:UQZhZ2O0vMHr2cI+DC1Mbh0TJxzA3RcLoMsFw+aXw7E= -github.com/andybalholm/brotli v1.1.1 h1:PR2pgnyFznKEugtsUo0xLdDop5SKXd5Qf5ysW+7XdTA= -github.com/andybalholm/brotli v1.1.1/go.mod h1:05ib4cKhjx3OQYUY22hTVd34Bc8upXjOLL2rKwwZBoA= github.com/armon/go-metrics v0.3.10 h1:FR+drcQStOe+32sYyJYyZ7FIdgoGGBnwLl+flodp8Uo= github.com/armon/go-metrics v0.3.10/go.mod h1:4O98XIr/9W0sxpJ8UaYkvjk10Iff7SnFrb4QAOwNTFc= +github.com/bwesterb/go-ristretto v1.2.3/go.mod h1:fUIoIZaG73pV5biE2Blr2xEzDoMj7NFEuV9ekS419A0= github.com/census-instrumentation/opencensus-proto v0.2.1 h1:glEXhBS5PSLLv4IXzLA5yPRVX4bilULVyxxbrfOtDAk= github.com/chzyer/logex v1.1.10 h1:Swpa1K6QvQznwJRcfTfQJmTE72DqScAa40E+fbHEXEE= github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e h1:fY5BOSpyZCqRo5OhCuC+XN+r/bBCmeuuJtjz+bCNIf8= github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1 h1:q763qf9huN11kDQavWsoZXJNW3xEE4JJyHa5Q25/sd8= github.com/client9/misspell v0.3.4 h1:ta993UF76GwbvJcIo3Y68y/M3WxlpEHPWIGDkJYwzJI= -github.com/cloudflare/circl v1.5.0 h1:hxIWksrX6XN5a1L2TI/h53AGPhNHoUBo+TD1ms9+pys= -github.com/cloudflare/circl v1.5.0/go.mod h1:uddAzsPgqdMAYatqJ0lsjX1oECcQLIlRpzZh3pJrofs= github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f h1:WBZRG4aNOuI15bLRrCgN8fCq8E5Xuty6jGbmSNEvSsU= github.com/confluentinc/confluent-kafka-go v1.9.2 h1:gV/GxhMBUb03tFWkN+7kdhg+zf+QUM+wVkI9zwh770Q= github.com/confluentinc/confluent-kafka-go v1.9.2/go.mod h1:ptXNqsuDfYbAE/LBW6pnwWZElUoWxHoV8E43DCrliyo= @@ -57,8 +46,6 @@ github.com/cristalhq/acmd v0.12.0 h1:RdlKnxjN+txbQosg8p/TRNZ+J1Rdne43MVQZ1zDhGWk github.com/cristalhq/acmd v0.12.0/go.mod h1:LG5oa43pE/BbxtfMoImHCQN++0Su7dzipdgBjMCBVDQ= github.com/envoyproxy/go-control-plane v0.9.4 h1:rEvIZUSZ3fx39WIi3JkQqQBitGwpELBIYWeBVh6wn+E= github.com/envoyproxy/protoc-gen-validate v0.1.0 h1:EQciDnbrYxy13PgWoY8AqoxGiPrpgBZ1R8UNe3ddc+A= -github.com/fatih/color v1.18.0 h1:S8gINlzdQ840/4pfAwic/ZE0djQEH3wM94VfqLTZcOM= -github.com/fatih/color v1.18.0/go.mod h1:4FelSpRwEGDpQ12mAdzqdOukCy4u8WUtOY6lkT/6HfU= github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= github.com/go-chi/chi/v5 v5.0.8 h1:lD+NLqFcAi1ovnVZpsnObHGW4xb4J8lNmoYVfECH1Y0= @@ -103,8 +90,6 @@ github.com/gorilla/websocket v1.5.1 h1:gmztn0JnHVt9JZquRuzLw3g4wouNVzKL15iLr/zn/ github.com/gorilla/websocket v1.5.1/go.mod h1:x3kM2JMyaluk02fnUJpQuwD2dCS5NDG2ZHL0uE0tcaY= github.com/gosom/scrapemate v0.8.1 h1:xyKeZTQ+mrnjCA6W7KnsXLuyUBEcbiChhpJtC8haI+8= github.com/gosom/scrapemate v0.8.1/go.mod h1:0EuH67Lz16HlyxQfoSOY46zpLNq/75/qlarYstMPHiQ= -github.com/gosom/scrapemate v0.8.3 h1:okEpdLKkdaivKlZt0GzCVouwRnWGMei5OU4okAuHVYA= -github.com/gosom/scrapemate v0.8.3/go.mod h1:k6nFr9vq78/JHPAq6MgLKo2lh3eMaYnRNmv/CrxNY0s= github.com/h2non/filetype v1.1.3 h1:FKkx9QbD7HR/zjK1Ia5XiBsq9zdLi5Kf3zGyFTAFkGg= github.com/h2non/filetype v1.1.3/go.mod h1:319b3zT68BvV+WRj7cwy856M2ehB3HqNOt6sy1HndBY= github.com/hashicorp/consul/api v1.12.0 h1:k3y1FYv6nuKyNTqj6w9gXOx5r5CfLj/k/euUeBXj1OY= @@ -137,8 +122,6 @@ github.com/julienschmidt/httprouter v1.3.0 h1:U0609e9tgbseu3rBINet9P48AI/D3oJs4d github.com/kelseyhightower/envconfig v1.4.0 h1:Im6hONhd3pLkfDFsbRgu68RDNkGF1r3dvMUtDTo2cv8= github.com/kelseyhightower/envconfig v1.4.0/go.mod h1:cccZRl6mQpaq41TPp5QxidR+Sa3axMbJDNb//FQX6Gg= github.com/kisielk/gotool v1.0.0 h1:AV2c/EiW3KqPNT9ZKl07ehoAGi4C5/01Cfbblndcapg= -github.com/klauspost/compress v1.17.11 h1:In6xLpyWOi1+C7tXUUWv2ot1QvBjxevKAaI6IXrJmUc= -github.com/klauspost/compress v1.17.11/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0= github.com/konsorten/go-windows-terminal-sequences v1.0.3 h1:CE8S1cTafDpPvMhIxNJKvHsGVBgn1xWYf1NbHQhywc8= github.com/kr/fs v0.1.0 h1:Jskdu9ieNAYnjxsi0LbQp1ulIKZV1LAFgK1tWhpZgl8= github.com/kr/fs v0.1.0/go.mod h1:FFnZGqtBN9Gxj7eW1uZ42v5BccTP0vu6NEaFoC2HwRg= @@ -167,8 +150,10 @@ github.com/pkg/sftp v1.13.6 h1:JFZT4XbOU7l77xGSpOdW+pwIMqP044IyjXX6FGyEKFo= github.com/pkg/sftp v1.13.6/go.mod h1:tz1ryNURKu77RL+GuCzmoJYxQczL3wLNNpPWagdg4Qk= github.com/quasilyte/go-ruleguard/rules v0.0.0-20211022131956-028d6511ab71 h1:CNooiryw5aisadVfzneSZPswRWvnVW8hF1bS/vo8ReI= github.com/quasilyte/go-ruleguard/rules v0.0.0-20211022131956-028d6511ab71/go.mod h1:4cgAphtvu7Ftv7vOT2ZOYhC6CvBxZixcasr8qIOTA50= +github.com/quic-go/quic-go v0.40.1/go.mod h1:PeN7kuVJ4xZbxSv/4OX6S1USOX8MJvydwpTx31vx60c= github.com/realclientip/realclientip-go v1.0.0 h1:+yPxeC0mEaJzq1BfCt2h4BxlyrvIIBzR6suDc3BEF1U= github.com/realclientip/realclientip-go v1.0.0/go.mod h1:CXnUdVwFRcXFJIRb/dTYqbT7ud48+Pi2pFm80bxDmcI= +github.com/refraction-networking/utls v1.6.2/go.mod h1:yil9+7qSl+gBwJqztoQseO6Pr3h62pQoY1lXiNR/FPs= github.com/rollbar/rollbar-go v1.4.5 h1:Z+5yGaZdB7MFv7t759KUR3VEkGdwHjo7Avvf3ApHTVI= github.com/rollbar/rollbar-go v1.4.5/go.mod h1:kLQ9gP3WCRGrvJmF0ueO3wK9xWocej8GRX98D8sa39w= github.com/rs/cors v1.8.2 h1:KCooALfAYGs415Cwu5ABvv9n9509fSiG5SQJn/AQo4U= @@ -223,29 +208,17 @@ go.opentelemetry.io/otel/metric v1.29.0/go.mod h1:auu/QWieFVWx+DmQOUMgj0F8LHWdga go.opentelemetry.io/otel/trace v1.29.0 h1:J/8ZNK4XgR7a21DZUAsbF8pZ5Jcw1VhACmnYt39JTi4= go.opentelemetry.io/otel/trace v1.29.0/go.mod h1:eHl3w0sp3paPkYstJOmAimxhiFXPg+MMTlEh3nsQgWQ= golang.org/x/crypto v0.30.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk= -golang.org/x/crypto v0.31.0 h1:ihbySMvVjLAeSH1IbfcRTkD/iNscyz8rGzjF/E5hV6U= -golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk= golang.org/x/image v0.0.0-20190802002840-cff245a6509b h1:+qEpEAPhDZ1o0x3tHzZTQDArnOixOzGD9HUJfcg0mb4= golang.org/x/lint v0.0.0-20210508222113-6edffad5e616 h1:VLliZ0d+/avPrXXH+OakdXhpJuEoBZuwh1m2j7U6Iug= golang.org/x/lint v0.0.0-20210508222113-6edffad5e616/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY= golang.org/x/mobile v0.0.0-20190719004257-d2bd2a29d028 h1:4+4C/Iv2U4fMZBiMCc98MG1In4gJY5YRhtpDNeDeHWs= golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44= -golang.org/x/net v0.32.0 h1:ZqPmj8Kzc+Y6e0+skZsuACbx+wzMgo5MQsJh9Qd6aYI= -golang.org/x/net v0.32.0/go.mod h1:CwU0IoeOlnQQWJ6ioyFrfRuomB8GKF6KbYXZVyeXNfs= golang.org/x/oauth2 v0.22.0 h1:BzDx2FehcG7jJwgWLELCdmLuxk2i+x9UDpSiss2u0ZA= golang.org/x/oauth2 v0.22.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= -golang.org/x/sync v0.10.0 h1:3NQrjDixjgGwUOCaF8w2+VYHv0Ve/vGYSbdkTa98gmQ= -golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.25.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/sys v0.28.0 h1:Fksou7UEQUWlKvIdsqzJmUmCX3cZuD2+P3XyyzwMhlA= -golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/telemetry v0.0.0-20240521205824-bda55230c457 h1:zf5N6UOrA487eEFacMePxjXAJctxKmyjKUsjA11Uzuk= golang.org/x/telemetry v0.0.0-20240521205824-bda55230c457/go.mod h1:pRgIJT+bRLFKnoM1ldnzKoxTIn14Yxz928LQRYYgIN0= -golang.org/x/term v0.27.0 h1:WP60Sv1nlK1T6SupCHbXzSaN0b9wUmsPoRS9b61A23Q= -golang.org/x/term v0.27.0/go.mod h1:iMsnZpn0cago0GOrHO2+Y7u7JPn5AylBrcoWkElMTSM= -golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo= -golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ= golang.org/x/time v0.6.0 h1:eTDhh4ZXt5Qf0augr54TN6suAUudPcawVZeIAPU7D4U= golang.org/x/time v0.6.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= golang.org/x/tools v0.13.0/go.mod h1:HvlwmtVNQAhOuCjW7xxvovg8wbNq7LwfXh/k7wXUl58= diff --git a/runner/runner.go b/runner/runner.go index 5bbbb82..efb32d1 100644 --- a/runner/runner.go +++ b/runner/runner.go @@ -76,6 +76,7 @@ type Config struct { AwsLambdaChunkSize int FastMode bool Radius float64 + Addr string } func ParseConfig() *Config { @@ -119,6 +120,7 @@ func ParseConfig() *Config { flag.IntVar(&cfg.AwsLambdaChunkSize, "aws-lambda-chunk-size", 100, "AWS Lambda chunk size") flag.BoolVar(&cfg.FastMode, "fast-mode", false, "fast mode (reduced data collection)") flag.Float64Var(&cfg.Radius, "radius", 10000, "search radius in meters. Default is 10000 meters") + flag.StringVar(&cfg.Addr, "addr", ":8080", "address to listen on for web server") flag.Parse() diff --git a/runner/webrunner/webrunner.go b/runner/webrunner/webrunner.go index 55c6c82..9a19d2e 100644 --- a/runner/webrunner/webrunner.go +++ b/runner/webrunner/webrunner.go @@ -50,7 +50,7 @@ func New(cfg *runner.Config) (runner.Runner, error) { svc := web.NewService(repo, cfg.DataFolder) - srv, err := web.New(svc) + srv, err := web.New(svc, cfg.Addr) if err != nil { return nil, err } diff --git a/web/service.go b/web/service.go index 889295b..2dbc3c0 100644 --- a/web/service.go +++ b/web/service.go @@ -28,6 +28,10 @@ func (s *Service) All(ctx context.Context) ([]Job, error) { return s.repo.Select(ctx, SelectParams{}) } +func (s *Service) Get(ctx context.Context, id string) (Job, error) { + return s.repo.Get(ctx, id) +} + func (s *Service) Delete(ctx context.Context, id string) error { if strings.Contains(id, "/") || strings.Contains(id, "\\") || strings.Contains(id, "..") { return fmt.Errorf("invalid file name") diff --git a/web/static/css/main.css b/web/static/css/main.css index 784a2ba..9e59f37 100644 --- a/web/static/css/main.css +++ b/web/static/css/main.css @@ -332,3 +332,26 @@ th { margin-right: 8px; } +nav { + margin-bottom: 16px; + padding: 8px 0; +} + +nav a { + color: white; + text-decoration: none; + font-size: 16px; + font-weight: 500; + padding: 8px 16px; + background-color: var(--color-primary); + border: 1px solid var(--color-primary); + border-radius: 4px; + transition: all 0.2s ease; + display: inline-block; +} + +nav a:hover { + background-color: var(--color-primary-light); + transform: translateY(-1px); + box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1); +} diff --git a/web/static/spec/spec.yaml b/web/static/spec/spec.yaml new file mode 100644 index 0000000..2f71188 --- /dev/null +++ b/web/static/spec/spec.yaml @@ -0,0 +1,252 @@ +openapi: 3.0.3 +info: + title: Google Maps Scraper API + version: 1.0.0 + description: API for managing job google maps scraping tasks + +paths: + /api/v1/jobs: + post: + summary: Create a new job scraping task + x-code-samples: + - lang: curl + source: | + curl -X POST "http://localhost:8080/api/v1/jobs" \ + -H "Content-Type: application/json" \ + -d '{ + "name": "Coffee shops Ilion", + "keywords": ["coffee in ilion"], + "lang": "el", + "zoom": 15, + "depth": 1, + "max_time": 3600 + }' + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/ApiScrapeRequest' + responses: + '201': + description: Job created successfully + content: + application/json: + schema: + $ref: '#/components/schemas/ApiScrapeResponse' + '422': + description: Unprocessable entity + content: + application/json: + schema: + $ref: '#/components/schemas/ApiError' + '500': + description: Internal server error + content: + application/json: + schema: + $ref: '#/components/schemas/ApiError' + + get: + summary: Get all jobs + x-code-samples: + - lang: curl + source: | + curl -X GET "http://localhost:8080/api/v1/jobs" + responses: + '200': + description: Successful response + content: + application/json: + schema: + type: array + items: + $ref: '#/components/schemas/Job' + '500': + description: Internal server error + content: + application/json: + schema: + $ref: '#/components/schemas/ApiError' + + /api/v1/jobs/{id}: + get: + summary: Get a specific job + x-code-samples: + - lang: curl + source: | + curl -X GET "http://localhost:8080/api/v1/jobs/6f0c1af8-3c4e-4742-84bb-590938ae8930" + parameters: + - name: id + in: path + required: true + schema: + type: string + responses: + '200': + description: Successful response + content: + application/json: + schema: + $ref: '#/components/schemas/Job' + '404': + description: Job not found + content: + application/json: + schema: + $ref: '#/components/schemas/ApiError' + '422': + description: Invalid ID + content: + application/json: + schema: + $ref: '#/components/schemas/ApiError' + + delete: + summary: Delete a specific job + x-code-samples: + - lang: curl + source: | + curl -X DELETE "http://localhost:8080/api/v1/jobs/455a6a00-cefb-4a9d-9e7d-791f01873700" + parameters: + - name: id + in: path + required: true + schema: + type: string + responses: + '200': + description: Job deleted successfully + '422': + description: Invalid ID + content: + application/json: + schema: + $ref: '#/components/schemas/ApiError' + '500': + description: Internal server error + content: + application/json: + schema: + $ref: '#/components/schemas/ApiError' + + /api/v1/jobs/{id}/download: + get: + summary: Download job results as CSV + x-code-samples: + source: | + curl -X GET "http://localhost:8080/api/v1/jobs/18eafda3-53a9-4970-ac96-8f8dfc7011c3/download" --output results.csv + parameters: + - name: id + in: path + required: true + schema: + type: string + responses: + '200': + description: Successful response + content: + text/csv: + schema: + type: string + format: binary + '404': + description: File not found + '422': + description: Invalid ID + '500': + description: Internal server error + +components: + schemas: + ApiError: + type: object + properties: + code: + type: integer + message: + type: string + + ApiScrapeRequest: + type: object + properties: + name: + type: string + keywords: + type: array + items: + type: string + lang: + type: string + zoom: + type: integer + lat: + type: string + lon: + type: string + fast_mode: + type: boolean + radius: + type: integer + depth: + type: integer + email: + type: boolean + max_time: + type: integer + proxies: + type: array + items: + type: string + + ApiScrapeResponse: + type: object + properties: + id: + type: string + + Job: + type: object + properties: + id: + type: string + name: + type: string + date: + type: string + format: date-time + status: + type: string + data: + $ref: '#/components/schemas/JobData' + + JobData: + type: object + properties: + keywords: + type: array + items: + type: string + lang: + type: string + zoom: + type: integer + lat: + type: string + lon: + type: string + fast_mode: + type: boolean + radius: + type: integer + depth: + type: integer + email: + type: boolean + max_time: + type: integer + proxies: + type: array + items: + type: string + diff --git a/web/static/templates/index.html b/web/static/templates/index.html index d68adec..365e6c1 100644 --- a/web/static/templates/index.html +++ b/web/static/templates/index.html @@ -11,6 +11,9 @@

Google Maps Scraper

+

If you find this tool useful, please consider starring our repository:

diff --git a/web/static/templates/redoc.html b/web/static/templates/redoc.html new file mode 100644 index 0000000..952cdef --- /dev/null +++ b/web/static/templates/redoc.html @@ -0,0 +1,13 @@ + + + + API Documentation + + + + + + + + + diff --git a/web/web.go b/web/web.go index 5dc8c5f..932cc89 100644 --- a/web/web.go +++ b/web/web.go @@ -3,6 +3,7 @@ package web import ( "context" "embed" + "encoding/json" "fmt" "html/template" "io" @@ -27,13 +28,17 @@ type Server struct { svc *Service } -func New(svc *Service) (*Server, error) { +func New(svc *Service, addr string) (*Server, error) { ans := Server{ svc: svc, tmpl: make(map[string]*template.Template), srv: &http.Server{ - Addr: ":8080", + Addr: addr, ReadHeaderTimeout: 10 * time.Second, + ReadTimeout: 60 * time.Second, + WriteTimeout: 60 * time.Second, + IdleTimeout: 120 * time.Second, + MaxHeaderBytes: 1 << 20, }, } @@ -47,17 +52,80 @@ func New(svc *Service) (*Server, error) { mux.Handle("/static/", http.StripPrefix("/static/", fileServer)) mux.HandleFunc("/scrape", ans.scrape) - mux.HandleFunc("/download", ans.download) - mux.HandleFunc("/delete", ans.delete) + mux.HandleFunc("/download", func(w http.ResponseWriter, r *http.Request) { + r = requestWithID(r) + + ans.download(w, r) + }) + mux.HandleFunc("/delete", func(w http.ResponseWriter, r *http.Request) { + r = requestWithID(r) + + ans.delete(w, r) + }) mux.HandleFunc("/jobs", ans.getJobs) mux.HandleFunc("/", ans.index) - ans.srv.Handler = mux + // api routes + mux.HandleFunc("/api/docs", ans.redocHandler) + mux.HandleFunc("/api/v1/jobs", func(w http.ResponseWriter, r *http.Request) { + switch r.Method { + case http.MethodPost: + ans.apiScrape(w, r) + case http.MethodGet: + ans.apiGetJobs(w, r) + default: + ans := apiError{ + Code: http.StatusMethodNotAllowed, + Message: "Method not allowed", + } + + renderJSON(w, http.StatusMethodNotAllowed, ans) + } + }) + + mux.HandleFunc("/api/v1/jobs/{id}", func(w http.ResponseWriter, r *http.Request) { + r = requestWithID(r) + + switch r.Method { + case http.MethodGet: + ans.apiGetJob(w, r) + case http.MethodDelete: + ans.apiDeleteJob(w, r) + default: + ans := apiError{ + Code: http.StatusMethodNotAllowed, + Message: "Method not allowed", + } + + renderJSON(w, http.StatusMethodNotAllowed, ans) + } + }) + + mux.HandleFunc("/api/v1/jobs/{id}/download", func(w http.ResponseWriter, r *http.Request) { + r = requestWithID(r) + + if r.Method != http.MethodGet { + ans := apiError{ + Code: http.StatusMethodNotAllowed, + Message: "Method not allowed", + } + + renderJSON(w, http.StatusMethodNotAllowed, ans) + + return + } + + ans.download(w, r) + }) + + handler := securityHeaders(mux) + ans.srv.Handler = handler tmplsKeys := []string{ "static/templates/index.html", "static/templates/job_rows.html", "static/templates/job_row.html", + "static/templates/redoc.html", } for _, key := range tmplsKeys { @@ -111,6 +179,30 @@ type formData struct { Proxies []string } +type ctxKey string + +const idCtxKey ctxKey = "id" + +func requestWithID(r *http.Request) *http.Request { + id := r.PathValue("id") + if id == "" { + id = r.URL.Query().Get("id") + } + + parsed, err := uuid.Parse(id) + if err == nil { + r = r.WithContext(context.WithValue(r.Context(), idCtxKey, parsed)) + } + + return r +} + +func getIDFromRequest(r *http.Request) (uuid.UUID, bool) { + id, ok := r.Context().Value(idCtxKey).(uuid.UUID) + + return id, ok +} + //nolint:gocritic // this is used in template func (f formData) ProxiesString() string { return strings.Join(f.Proxies, "\n") @@ -307,16 +399,15 @@ func (s *Server) download(w http.ResponseWriter, r *http.Request) { } ctx := r.Context() - id := r.URL.Query().Get("id") - _, err := uuid.Parse(id) - if err != nil { + id, ok := getIDFromRequest(r) + if !ok { http.Error(w, "Invalid ID", http.StatusUnprocessableEntity) return } - filePath, err := s.svc.GetCSV(ctx, id) + filePath, err := s.svc.GetCSV(ctx, id.String()) if err != nil { http.Error(w, err.Error(), http.StatusNotFound) return @@ -347,16 +438,14 @@ func (s *Server) delete(w http.ResponseWriter, r *http.Request) { return } - deleteID := r.URL.Query().Get("id") - - _, err := uuid.Parse(deleteID) - if err != nil { + deleteID, ok := getIDFromRequest(r) + if !ok { http.Error(w, "Invalid ID", http.StatusUnprocessableEntity) return } - err = s.svc.Delete(r.Context(), deleteID) + err := s.svc.Delete(r.Context(), deleteID.String()) if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) @@ -366,6 +455,185 @@ func (s *Server) delete(w http.ResponseWriter, r *http.Request) { w.WriteHeader(http.StatusOK) } +type apiError struct { + Code int `json:"code"` + Message string `json:"message"` +} + +type apiScrapeRequest struct { + Name string + JobData +} + +type apiScrapeResponse struct { + ID string `json:"id"` +} + +func (s *Server) redocHandler(w http.ResponseWriter, _ *http.Request) { + tmpl, ok := s.tmpl["static/templates/redoc.html"] + if !ok { + http.Error(w, "missing tpl", http.StatusInternalServerError) + + return + } + + _ = tmpl.Execute(w, nil) +} + +func (s *Server) apiScrape(w http.ResponseWriter, r *http.Request) { + var req apiScrapeRequest + + err := json.NewDecoder(r.Body).Decode(&req) + if err != nil { + ans := apiError{ + Code: http.StatusUnprocessableEntity, + Message: err.Error(), + } + + renderJSON(w, http.StatusUnprocessableEntity, ans) + + return + } + + newJob := Job{ + ID: uuid.New().String(), + Name: req.Name, + Date: time.Now().UTC(), + Status: StatusPending, + Data: req.JobData, + } + + // convert to seconds + newJob.Data.MaxTime *= time.Second + + err = newJob.Validate() + if err != nil { + ans := apiError{ + Code: http.StatusUnprocessableEntity, + Message: err.Error(), + } + + renderJSON(w, http.StatusUnprocessableEntity, ans) + + return + } + + err = s.svc.Create(r.Context(), &newJob) + if err != nil { + ans := apiError{ + Code: http.StatusInternalServerError, + Message: err.Error(), + } + + renderJSON(w, http.StatusInternalServerError, ans) + + return + } + + ans := apiScrapeResponse{ + ID: newJob.ID, + } + + renderJSON(w, http.StatusCreated, ans) +} + +func (s *Server) apiGetJobs(w http.ResponseWriter, r *http.Request) { + jobs, err := s.svc.All(r.Context()) + if err != nil { + apiError := apiError{ + Code: http.StatusInternalServerError, + Message: err.Error(), + } + + renderJSON(w, http.StatusInternalServerError, apiError) + + return + } + + renderJSON(w, http.StatusOK, jobs) +} + +func (s *Server) apiGetJob(w http.ResponseWriter, r *http.Request) { + id, ok := getIDFromRequest(r) + if !ok { + apiError := apiError{ + Code: http.StatusUnprocessableEntity, + Message: "Invalid ID", + } + + renderJSON(w, http.StatusUnprocessableEntity, apiError) + + return + } + + job, err := s.svc.Get(r.Context(), id.String()) + if err != nil { + apiError := apiError{ + Code: http.StatusNotFound, + Message: http.StatusText(http.StatusNotFound), + } + + renderJSON(w, http.StatusNotFound, apiError) + + return + } + + renderJSON(w, http.StatusOK, job) +} + +func (s *Server) apiDeleteJob(w http.ResponseWriter, r *http.Request) { + id, ok := getIDFromRequest(r) + if !ok { + apiError := apiError{ + Code: http.StatusUnprocessableEntity, + Message: "Invalid ID", + } + + renderJSON(w, http.StatusUnprocessableEntity, apiError) + + return + } + + err := s.svc.Delete(r.Context(), id.String()) + if err != nil { + apiError := apiError{ + Code: http.StatusInternalServerError, + Message: err.Error(), + } + + renderJSON(w, http.StatusInternalServerError, apiError) + + return + } + + w.WriteHeader(http.StatusOK) +} + +func renderJSON(w http.ResponseWriter, code int, data any) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(code) + + _ = json.NewEncoder(w).Encode(data) +} + func formatDate(t time.Time) string { return t.Format("Jan 02, 2006 15:04:05") } + +func securityHeaders(next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("X-Content-Type-Options", "nosniff") + w.Header().Set("X-Frame-Options", "DENY") + w.Header().Set("X-XSS-Protection", "1; mode=block") + w.Header().Set("Content-Security-Policy", + "default-src 'self'; "+ + "script-src 'self' cdn.redoc.ly cdnjs.cloudflare.com 'unsafe-inline' 'unsafe-eval'; "+ + "worker-src 'self' blob:; "+ + "style-src 'self' 'unsafe-inline' fonts.googleapis.com; "+ + "img-src 'self' data: cdn.redoc.ly; "+ + "font-src 'self' fonts.gstatic.com; "+ + "connect-src 'self'") + + next.ServeHTTP(w, r) + }) +}