From fa4bf1d76540e28dab4152e237ce571d9298cf4b Mon Sep 17 00:00:00 2001 From: Lucas Brown Date: Wed, 14 Aug 2019 01:18:21 -0800 Subject: [PATCH 1/8] Updates to readme files to fix some misspellings. --- README.md | 12 ++++++------ cmd/web-api/README.md | 2 +- cmd/web-app/README.md | 2 +- tools/devops/README.md | 2 +- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 484939c..c5fb683 100644 --- a/README.md +++ b/README.md @@ -62,7 +62,7 @@ delivered to clients. a knowledge of a completely different expertise - DevOps. This project provides a complete continuous build pipeline that will push the code to production with minimal effort using serverless deployments to AWS Fargate with GitLab CI/CD. 5. Observability - Ensure the code is running as expected in a remote environment. This project implements Datadog to -facilitate exposing metrics, logs and request tracing to obversabe and validate your services are stable and responsive +facilitate exposing metrics, logs and request tracing to obverse and validate your services are stable and responsive for your clients (hopefully paying clients). @@ -71,7 +71,7 @@ facilitate exposing metrics, logs and request tracing to obversabe and validate The example project is a complete starter kit for building SasS with GoLang. It provides two example services: * Web App - Responsive web application to provide service to clients. Includes user signup and user authentication for direct client interaction via their web browsers. -* Web API - REST API with JWT authentication that renders results as JSON. This allows clients and other third-pary companies to develop deep +* Web API - REST API with JWT authentication that renders results as JSON. This allows clients and other third-party companies to develop deep integrations with the project. The example project also provides these tools: @@ -106,7 +106,7 @@ Accordingly, the project architecture is illustrated with the following diagram. With SaaS, a client subscribes to an online service you provide them. The example project provides functionality for clients to subscribe and then once subscribed they can interact with your software service. -The initial contributors to this project are building this saas-starter-kit based on their years of experience building enterprise B2B SaaS. Particularily, this saas-starter-kit is based on their most recent experience building the +The initial contributors to this project are building this saas-starter-kit based on their years of experience building enterprise B2B SaaS. Particularly, this saas-starter-kit is based on their most recent experience building the B2B SaaS for [standard operating procedure software](https://keeni.space) (written entirely in Golang). Please refer to the Keeni.Space website, its [SOP software pricing](https://keeni.space/pricing) and its signup process. The SaaS web app is then available at [app.keeni.space](https://app.keeni.space). They plan on leveraging this experience and build it into a simplified set @@ -175,7 +175,7 @@ $ git clone git@gitlab.com:geeks-accelerator/oss/saas-starter-kit.git $ cd saas-starter-kit/ ``` -If you have Go Modules enabled, you should be able compile the project locally. If you have Go Modulels disabled, see +If you have Go Modules enabled, you should be able compile the project locally. If you have Go Modules disabled, see the next section. @@ -386,7 +386,7 @@ Policy Document: { } ``` -Create a new user with programic access and directly attach it the policy `SaasStarterKitDevServices` +Create a new user with programmatic access and directly attach it the policy `SaasStarterKitDevServices` 4. Create a new docker-compose config file ```bash @@ -395,7 +395,7 @@ Create a new user with programic access and directly attach it the policy `SaasS 5. Update .env_docker_compose with the Access key ID and Secret access key -6. Update `.gitlab-ci.yml` with relevent details. +6. Update `.gitlab-ci.yml` with relevant details. ### Optional. Set AWS and Datadog Configs diff --git a/cmd/web-api/README.md b/cmd/web-api/README.md index 4aa4612..31eeb01 100644 --- a/cmd/web-api/README.md +++ b/cmd/web-api/README.md @@ -254,7 +254,7 @@ swag init ### Additional Swagger Annotations -Below are some additional example annotions that can be added to `main.go` +Below are some additional example annotations that can be added to `main.go` ```go // @title SaaS Example API // @description This provides a public API... diff --git a/cmd/web-app/README.md b/cmd/web-app/README.md index fb718d7..83f75b8 100644 --- a/cmd/web-app/README.md +++ b/cmd/web-app/README.md @@ -24,7 +24,7 @@ http://127.0.0.1:3000/ While the web-api service has significant functionality, this web-app service is still in development. Currently this web-app services only resizes -an image and displays resvised versions of it on the index page. See section below on Future Functionality. +an image and displays resized versions of it on the index page. See section below on Future Functionality. If you would like to help, please email twins@geeksinthewoods.com. diff --git a/tools/devops/README.md b/tools/devops/README.md index 1b1f8e2..7a6acad 100644 --- a/tools/devops/README.md +++ b/tools/devops/README.md @@ -26,7 +26,7 @@ in other configuration files. And since this project is open-source, we wanted t If you don't have an AWS account, signup for one now and then proceed with the deployment setup. -We assume that if you are deploying the SaaS Stater Kit, you are starting from scratch with no existing dependencies. +We assume that if you are deploying the SaaS Starter Kit, you are starting from scratch with no existing dependencies. This however, excludes any domain names that you would like to use for resolving your services publicly. To use any pre-purchased domain names, make sure they are added to Route 53 in the AWS account. Or you can let the deploy script create a new zone is Route 53 and update the DNS for the domain name when your ready to make the transition. It is From 8c28261fee80292826f0b87d7b9a74692c76b5b7 Mon Sep 17 00:00:00 2001 From: huyng Date: Thu, 15 Aug 2019 14:27:05 +0700 Subject: [PATCH 2/8] Update GetGeoNames and Migration functions. --- internal/geonames/geonames.go | 134 ++++++++++++++++++++++++++++++++++ internal/schema/migrations.go | 98 ++++++++++++++++++++----- 2 files changed, 214 insertions(+), 18 deletions(-) diff --git a/internal/geonames/geonames.go b/internal/geonames/geonames.go index 47a4e48..f184425 100644 --- a/internal/geonames/geonames.go +++ b/internal/geonames/geonames.go @@ -8,10 +8,13 @@ import ( "encoding/csv" "fmt" "io" + "net/http" "strconv" "strings" + "time" "geeks-accelerator/oss/saas-starter-kit/internal/platform/web/webcontext" + "github.com/huandu/go-sqlbuilder" "github.com/jmoiron/sqlx" "github.com/pkg/errors" @@ -325,3 +328,134 @@ func loadGeonameCountry(ctx context.Context, rr chan<- interface{}, country stri } } } + +// GetGeonameCountry downloads geoname data for the country. +// Parses data and returns slice of Geoname +func GetGeonameCountry(ctx context.Context, country string) ([]Geoname, error) { + res := make([]Geoname, 0) + var err error + var resp *http.Response + + u := fmt.Sprintf("http://download.geonames.org/export/zip/%s.zip", country) + resp, err = pester.Get(u) + if err != nil { + for i := 0; i < 3; i++ { + resp, err = pester.Get(u) + if err == nil { + break + } + time.Sleep(time.Second * 1) + } + if err != nil { + err = errors.WithMessagef(err, "Failed to read countries from '%s'", u) + return res, err + } + } + defer resp.Body.Close() + + br := bufio.NewReader(resp.Body) + + buff := bytes.NewBuffer([]byte{}) + size, err := io.Copy(buff, br) + if err != nil { + err = errors.WithStack(err) + return res, err + } + + b := bytes.NewReader(buff.Bytes()) + zr, err := zip.NewReader(b, size) + if err != nil { + err = errors.WithStack(err) + return res, err + } + + for _, f := range zr.File { + if f.Name == "readme.txt" { + continue + } + + fh, err := f.Open() + if err != nil { + err = errors.WithStack(err) + return res, err + } + + scanner := bufio.NewScanner(fh) + for scanner.Scan() { + line := scanner.Text() + + if strings.Contains(line, "\"") { + line = strings.Replace(line, "\"", "\\\"", -1) + } + + r := csv.NewReader(strings.NewReader(line)) + r.Comma = '\t' // Use tab-delimited instead of comma <---- here! + r.LazyQuotes = true + r.FieldsPerRecord = -1 + + lines, err := r.ReadAll() + if err != nil { + err = errors.WithStack(err) + continue + } + + for _, row := range lines { + + /* + fmt.Println("CountryCode: row[0]", row[0]) + fmt.Println("PostalCode: row[1]", row[1]) + fmt.Println("PlaceName: row[2]", row[2]) + fmt.Println("StateName: row[3]", row[3]) + fmt.Println("StateCode : row[4]", row[4]) + fmt.Println("CountyName: row[5]", row[5]) + fmt.Println("CountyCode : row[6]", row[6]) + fmt.Println("CommunityName: row[7]", row[7]) + fmt.Println("CommunityCode: row[8]", row[8]) + fmt.Println("Latitude: row[9]", row[9]) + fmt.Println("Longitude: row[10]", row[10]) + fmt.Println("Accuracy: row[11]", row[11]) + */ + + gn := Geoname{ + CountryCode: row[0], + PostalCode: row[1], + PlaceName: row[2], + StateName: row[3], + StateCode: row[4], + CountyName: row[5], + CountyCode: row[6], + CommunityName: row[7], + CommunityCode: row[8], + } + if row[9] != "" { + gn.Latitude, err = decimal.NewFromString(row[9]) + if err != nil { + err = errors.WithStack(err) + } + } + + if row[10] != "" { + gn.Longitude, err = decimal.NewFromString(row[10]) + if err != nil { + err = errors.WithStack(err) + } + } + + if row[11] != "" { + gn.Accuracy, err = strconv.Atoi(row[11]) + if err != nil { + err = errors.WithStack(err) + } + } + + res = append(res, gn) + } + } + + if err := scanner.Err(); err != nil { + err = errors.WithStack(err) + } + } + + return res, err +} diff --git a/internal/schema/migrations.go b/internal/schema/migrations.go index fe6a3c9..4523de1 100644 --- a/internal/schema/migrations.go +++ b/internal/schema/migrations.go @@ -9,6 +9,10 @@ import ( "strings" "geeks-accelerator/oss/saas-starter-kit/internal/geonames" + + "fmt" + "time" + "github.com/geeks-accelerator/sqlxmigrate" "github.com/jmoiron/sqlx" _ "github.com/lib/pq" @@ -240,33 +244,91 @@ func migrationList(ctx context.Context, db *sqlx.DB, log *log.Logger, isUnittest } } - q := "insert into geonames " + - "(country_code,postal_code,place_name,state_name,state_code,county_name,county_code,community_name,community_code,latitude,longitude,accuracy) " + - "values(?,?,?,?,?,?,?,?,?,?,?,?)" - q = db.Rebind(q) - stmt, err := db.Prepare(q) - if err != nil { - return errors.WithMessagef(err, "Failed to prepare sql query '%s'", q) - } - + countries := geonames.ValidGeonameCountries(context.Background()) if isUnittest { - } else { - resChan := make(chan interface{}) - go geonames.LoadGeonames(ctx, resChan) + } - for r := range resChan { - switch v := r.(type) { - case geonames.Geoname: - _, err = stmt.Exec(v.CountryCode, v.PostalCode, v.PlaceName, v.StateName, v.StateCode, v.CountyName, v.CountyCode, v.CommunityName, v.CommunityCode, v.Latitude, v.Longitude, v.Accuracy) + ncol := 12 + fn := func(geoNames []geonames.Geoname) error { + valueStrings := make([]string, 0, len(geoNames)) + valueArgs := make([]interface{}, 0, len(geoNames)*ncol) + for _, geoname := range geoNames { + valueStrings = append(valueStrings, "(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)") + + valueArgs = append(valueArgs, geoname.CountryCode) + valueArgs = append(valueArgs, geoname.PostalCode) + valueArgs = append(valueArgs, geoname.PlaceName) + + valueArgs = append(valueArgs, geoname.StateName) + valueArgs = append(valueArgs, geoname.StateCode) + valueArgs = append(valueArgs, geoname.CountyName) + + valueArgs = append(valueArgs, geoname.CountyCode) + valueArgs = append(valueArgs, geoname.CommunityName) + valueArgs = append(valueArgs, geoname.CommunityCode) + + valueArgs = append(valueArgs, geoname.Latitude) + valueArgs = append(valueArgs, geoname.Longitude) + valueArgs = append(valueArgs, geoname.Accuracy) + } + insertStmt := fmt.Sprintf("insert into geonames "+ + "(country_code,postal_code,place_name,state_name,state_code,county_name,county_code,community_name,community_code,latitude,longitude,accuracy) "+ + "VALUES %s", strings.Join(valueStrings, ",")) + insertStmt = db.Rebind(insertStmt) + + stmt, err := db.Prepare(insertStmt) + if err != nil { + return errors.WithMessagef(err, "Failed to prepare sql query '%s'", insertStmt) + } + + _, err = stmt.Exec(valueArgs...) + return err + } + start := time.Now() + for _, country := range countries { + //fmt.Println("LoadGeonames: start country: ", country) + v, err := geonames.GetGeonameCountry(context.Background(), country) + if err != nil { + return errors.WithStack(err) + } + //fmt.Println("Geoname records: ", len(v)) + + batch := 4500 + n := len(v) / batch + + //fmt.Println("Number of batch: ", n) + + if n == 0 { + err := fn(v) + if err != nil { + return errors.WithStack(err) + } + } else { + for i := 0; i < n; i++ { + vn := v[i*batch : (i+1)*batch] + err := fn(vn) + if err != nil { + return errors.WithStack(err) + } + if n > 0 && n%25 == 0 { + time.Sleep(200) + } + } + if len(v)%batch > 0 { + fmt.Println("Remain part: ", len(v)-n*batch) + vn := v[n*batch:] + err := fn(vn) if err != nil { return errors.WithStack(err) } - case error: - return v } } + + //fmt.Println("Insert Geoname took: ", time.Since(start)) + //fmt.Println("LoadGeonames: end country: ", country) } + fmt.Println("Total Geonames population took: ", time.Since(start)) queries := []string{ `create index idx_geonames_country_code on geonames (country_code)`, From 71713280729d79ee4207a3771cc96d574daa30de Mon Sep 17 00:00:00 2001 From: huyng Date: Thu, 15 Aug 2019 14:40:22 +0700 Subject: [PATCH 3/8] Use ctx param from outer function --- internal/schema/migrations.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/schema/migrations.go b/internal/schema/migrations.go index 4523de1..bab4c17 100644 --- a/internal/schema/migrations.go +++ b/internal/schema/migrations.go @@ -244,7 +244,7 @@ func migrationList(ctx context.Context, db *sqlx.DB, log *log.Logger, isUnittest } } - countries := geonames.ValidGeonameCountries(context.Background()) + countries := geonames.ValidGeonameCountries(ctx) if isUnittest { } From c61a934279b03fb77c87a2afdbff9fe7f7e46d79 Mon Sep 17 00:00:00 2001 From: huyng Date: Thu, 15 Aug 2019 14:46:44 +0700 Subject: [PATCH 4/8] Add more comment --- internal/geonames/geonames.go | 2 ++ internal/schema/migrations.go | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/internal/geonames/geonames.go b/internal/geonames/geonames.go index f184425..ae1c9c8 100644 --- a/internal/geonames/geonames.go +++ b/internal/geonames/geonames.go @@ -339,6 +339,8 @@ func GetGeonameCountry(ctx context.Context, country string) ([]Geoname, error) { u := fmt.Sprintf("http://download.geonames.org/export/zip/%s.zip", country) resp, err = pester.Get(u) if err != nil { + // Add re-try three times after failing first time + // This reduces the risk when network is lagy, we still have chance to re-try. for i := 0; i < 3; i++ { resp, err = pester.Get(u) if err == nil { diff --git a/internal/schema/migrations.go b/internal/schema/migrations.go index bab4c17..2ec3bec 100644 --- a/internal/schema/migrations.go +++ b/internal/schema/migrations.go @@ -293,7 +293,7 @@ func migrationList(ctx context.Context, db *sqlx.DB, log *log.Logger, isUnittest return errors.WithStack(err) } //fmt.Println("Geoname records: ", len(v)) - + // Max argument values of Postgres is about 54460. So the batch size for bulk insert is selected 4500*12 (ncol) batch := 4500 n := len(v) / batch From 83118e85ca67aac4d5858f2f276437a616c310e3 Mon Sep 17 00:00:00 2001 From: Lee Brown Date: Fri, 16 Aug 2019 14:52:57 -0800 Subject: [PATCH 5/8] Remove POD architecture from readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index c5fb683..fc58cd3 100644 --- a/README.md +++ b/README.md @@ -489,7 +489,7 @@ For more details on this service, read [web-app readme](https://gitlab.com/geeks Schema is a minimalistic database migration helper that can manually be invoked via CLI. It provides schema versioning and migration rollback. -To support POD architecture, the schema for the entire project is defined globally and is located inside internal: +The schema for the entire project is defined globally and is located inside internal: [internal/schema](https://gitlab.com/geeks-accelerator/oss/saas-starter-kit/tree/master/internal/schema) Keeping a global schema helps ensure business logic can be decoupled across multiple packages. It is a firm belief that From c7106f089fae9638d91815244e2ac6299c91e757 Mon Sep 17 00:00:00 2001 From: Lee Brown Date: Fri, 16 Aug 2019 20:40:48 -0800 Subject: [PATCH 6/8] Copper Valley was here --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index fc58cd3..bfd2bdb 100644 --- a/README.md +++ b/README.md @@ -3,6 +3,7 @@ Copyright 2019, Geeks Accelerator twins@geeksaccelerator.com +Sponsored by Copper Valley Telecom The SaaS Starter Kit is a set of libraries for building scalable software-as-a-service (SaaS) applications that helps preventing both misuse and fraud. The goal of this project is to provide a proven starting point for new From 666eafceec47f4bd92897ec3f3bf9c0afbfb8da6 Mon Sep 17 00:00:00 2001 From: Lee Brown Date: Sat, 17 Aug 2019 10:58:45 -0800 Subject: [PATCH 7/8] Fix random errors from tests --- cmd/web-api/main.go | 7 +++---- cmd/web-app/handlers/projects.go | 2 +- cmd/web-app/handlers/users.go | 2 +- cmd/web-app/main.go | 9 ++++----- internal/mid/saas-swagger/example/main.go | 3 ++- internal/mid/saas-swagger/swagger_test.go | 3 ++- internal/platform/logger/log.go | 5 +++-- 7 files changed, 16 insertions(+), 15 deletions(-) diff --git a/cmd/web-api/main.go b/cmd/web-api/main.go index e331ead..08b3f51 100644 --- a/cmd/web-api/main.go +++ b/cmd/web-api/main.go @@ -66,10 +66,9 @@ func main() { // ========================================================================= // Logging - log.SetFlags(log.LstdFlags|log.Lmicroseconds|log.Lshortfile) - log.SetPrefix(service+" : ") - log := log.New(os.Stdout, log.Prefix() , log.Flags()) - + log.SetFlags(log.LstdFlags | log.Lmicroseconds | log.Lshortfile) + log.SetPrefix(service + " : ") + log := log.New(os.Stdout, log.Prefix(), log.Flags()) // ========================================================================= // Configuration diff --git a/cmd/web-app/handlers/projects.go b/cmd/web-app/handlers/projects.go index d3bda68..b10ed69 100644 --- a/cmd/web-app/handlers/projects.go +++ b/cmd/web-app/handlers/projects.go @@ -73,7 +73,7 @@ func (h *Projects) Index(ctx context.Context, w http.ResponseWriter, r *http.Req var v datatable.ColumnValue switch col.Field { case "id": - v.Value = fmt.Sprintf("%d", q.ID) + v.Value = fmt.Sprintf("%s", q.ID) case "name": v.Value = q.Name v.Formatted = fmt.Sprintf("%s", urlProjectsView(q.ID), v.Value) diff --git a/cmd/web-app/handlers/users.go b/cmd/web-app/handlers/users.go index e0b9526..637e16a 100644 --- a/cmd/web-app/handlers/users.go +++ b/cmd/web-app/handlers/users.go @@ -100,7 +100,7 @@ func (h *Users) Index(ctx context.Context, w http.ResponseWriter, r *http.Reques var v datatable.ColumnValue switch col.Field { case "id": - v.Value = fmt.Sprintf("%d", q.ID) + v.Value = fmt.Sprintf("%s", q.ID) case "name": if strings.TrimSpace(q.Name) == "" { v.Value = q.Email diff --git a/cmd/web-app/main.go b/cmd/web-app/main.go index 046b695..9b4e215 100644 --- a/cmd/web-app/main.go +++ b/cmd/web-app/main.go @@ -66,10 +66,9 @@ func main() { // ========================================================================= // Logging - log.SetFlags(log.LstdFlags|log.Lmicroseconds|log.Lshortfile) - log.SetPrefix(service+" : ") - log := log.New(os.Stdout, log.Prefix() , log.Flags()) - + log.SetFlags(log.LstdFlags | log.Lmicroseconds | log.Lshortfile) + log.SetPrefix(service + " : ") + log := log.New(os.Stdout, log.Prefix(), log.Flags()) // ========================================================================= // Configuration @@ -474,7 +473,7 @@ func main() { // URL Formatter projectRoutes, err := project_routes.New(cfg.Service.WebApiBaseUrl, cfg.Service.BaseUrl) if err != nil { - log.Fatalf("main : project routes : %+v", cfg.Service.BaseUrl, err) + log.Fatalf("main : project routes : %s : %+v", cfg.Service.BaseUrl, err) } // s3UrlFormatter is a help function used by to convert an s3 key to diff --git a/internal/mid/saas-swagger/example/main.go b/internal/mid/saas-swagger/example/main.go index 5f9f2ae..70e9625 100644 --- a/internal/mid/saas-swagger/example/main.go +++ b/internal/mid/saas-swagger/example/main.go @@ -2,6 +2,7 @@ package main import ( "context" + "geeks-accelerator/oss/saas-starter-kit/internal/platform/web/webcontext" "log" "net/http" "os" @@ -135,7 +136,7 @@ func main() { func API(shutdown chan os.Signal, log *log.Logger) http.Handler { // Construct the web.App which holds all routes as well as common Middleware. - app := web.NewApp(shutdown, log, mid.Trace(), mid.Logger(log), mid.Errors(log), mid.Metrics(), mid.Panics()) + app := web.NewApp(shutdown, log, webcontext.Env_Dev, mid.Logger(log)) app.Handle("GET", "/swagger/", saasSwagger.WrapHandler) app.Handle("GET", "/swagger/*", saasSwagger.WrapHandler) diff --git a/internal/mid/saas-swagger/swagger_test.go b/internal/mid/saas-swagger/swagger_test.go index e533de0..ea2e037 100644 --- a/internal/mid/saas-swagger/swagger_test.go +++ b/internal/mid/saas-swagger/swagger_test.go @@ -9,6 +9,7 @@ import ( _ "geeks-accelerator/oss/saas-starter-kit/internal/mid/saas-swagger/example/docs" "geeks-accelerator/oss/saas-starter-kit/internal/platform/web" + "geeks-accelerator/oss/saas-starter-kit/internal/platform/web/webcontext" "github.com/stretchr/testify/assert" ) @@ -17,7 +18,7 @@ func TestWrapHandler(t *testing.T) { log := log.New(os.Stdout, "", log.LstdFlags|log.Lmicroseconds|log.Lshortfile) log.SetOutput(ioutil.Discard) - app := web.NewApp(nil, log) + app := web.NewApp(nil, log, webcontext.Env_Dev) app.Handle("GET", "/swagger/*", WrapHandler) w1 := performRequest("GET", "/swagger/index.html", app) diff --git a/internal/platform/logger/log.go b/internal/platform/logger/log.go index 58a7fb5..ba5b8e5 100644 --- a/internal/platform/logger/log.go +++ b/internal/platform/logger/log.go @@ -3,12 +3,13 @@ package logger import ( "context" "fmt" - "geeks-accelerator/oss/saas-starter-kit/internal/platform/web" + + "geeks-accelerator/oss/saas-starter-kit/internal/platform/web/webcontext" ) // WithContext manual injects context values to log message including Trace ID func WithContext(ctx context.Context, msg string) string { - v, ok := ctx.Value(web.KeyValues).(*web.Values) + v, ok := ctx.Value(webcontext.KeyValues).(*webcontext.Values) if !ok { return msg } From 295e46a885eed2328b5a0b8f00d83cdb7d281d5a Mon Sep 17 00:00:00 2001 From: Lee Brown Date: Sat, 17 Aug 2019 11:15:45 -0800 Subject: [PATCH 8/8] Cache geonames download --- internal/geonames/geonames.go | 218 ++++++++-------------------------- internal/schema/migrations.go | 12 +- 2 files changed, 51 insertions(+), 179 deletions(-) diff --git a/internal/geonames/geonames.go b/internal/geonames/geonames.go index ae1c9c8..c7a7bc9 100644 --- a/internal/geonames/geonames.go +++ b/internal/geonames/geonames.go @@ -5,10 +5,13 @@ import ( "bufio" "bytes" "context" + "crypto/md5" "encoding/csv" "fmt" "io" "net/http" + "os" + "path/filepath" "strconv" "strings" "time" @@ -191,144 +194,6 @@ func FindGeonameRegions(ctx context.Context, dbConn *sqlx.DB, orderBy, where str return resp, nil } -// LoadGeonames enables streaming retrieval of GeoNames. The downloaded results -// will be written to the interface{} resultReceiver channel enabling processing the results while -// they're still being fetched. After all pages have been processed the channel is closed. -// Possible types sent to the channel are limited to: -// - error -// - GeoName -func LoadGeonames(ctx context.Context, rr chan<- interface{}, countries ...string) { - defer close(rr) - - if len(countries) == 0 { - countries = ValidGeonameCountries(ctx) - } - - for _, country := range countries { - loadGeonameCountry(ctx, rr, country) - } -} - -// loadGeonameCountry enables streaming retrieval of GeoNames. The downloaded results -// will be written to the interface{} resultReceiver channel enabling processing the results while -// they're still being fetched. -// Possible types sent to the channel are limited to: -// - error -// - GeoName -func loadGeonameCountry(ctx context.Context, rr chan<- interface{}, country string) { - u := fmt.Sprintf("http://download.geonames.org/export/zip/%s.zip", country) - resp, err := pester.Get(u) - if err != nil { - rr <- errors.WithMessagef(err, "Failed to read countries from '%s'", u) - return - } - defer resp.Body.Close() - - br := bufio.NewReader(resp.Body) - - buff := bytes.NewBuffer([]byte{}) - size, err := io.Copy(buff, br) - if err != nil { - rr <- errors.WithStack(err) - return - } - - b := bytes.NewReader(buff.Bytes()) - zr, err := zip.NewReader(b, size) - if err != nil { - rr <- errors.WithStack(err) - return - } - - for _, f := range zr.File { - if f.Name == "readme.txt" { - continue - } - - fh, err := f.Open() - if err != nil { - rr <- errors.WithStack(err) - return - } - - scanner := bufio.NewScanner(fh) - for scanner.Scan() { - line := scanner.Text() - - if strings.Contains(line, "\"") { - line = strings.Replace(line, "\"", "\\\"", -1) - } - - r := csv.NewReader(strings.NewReader(line)) - r.Comma = '\t' // Use tab-delimited instead of comma <---- here! - r.LazyQuotes = true - r.FieldsPerRecord = -1 - - lines, err := r.ReadAll() - if err != nil { - rr <- errors.WithStack(err) - continue - } - - for _, row := range lines { - - /* - fmt.Println("CountryCode: row[0]", row[0]) - fmt.Println("PostalCode: row[1]", row[1]) - fmt.Println("PlaceName: row[2]", row[2]) - fmt.Println("StateName: row[3]", row[3]) - fmt.Println("StateCode : row[4]", row[4]) - fmt.Println("CountyName: row[5]", row[5]) - fmt.Println("CountyCode : row[6]", row[6]) - fmt.Println("CommunityName: row[7]", row[7]) - fmt.Println("CommunityCode: row[8]", row[8]) - fmt.Println("Latitude: row[9]", row[9]) - fmt.Println("Longitude: row[10]", row[10]) - fmt.Println("Accuracy: row[11]", row[11]) - */ - - gn := Geoname{ - CountryCode: row[0], - PostalCode: row[1], - PlaceName: row[2], - StateName: row[3], - StateCode: row[4], - CountyName: row[5], - CountyCode: row[6], - CommunityName: row[7], - CommunityCode: row[8], - } - if row[9] != "" { - gn.Latitude, err = decimal.NewFromString(row[9]) - if err != nil { - rr <- errors.WithStack(err) - } - } - - if row[10] != "" { - gn.Longitude, err = decimal.NewFromString(row[10]) - if err != nil { - rr <- errors.WithStack(err) - } - } - - if row[11] != "" { - gn.Accuracy, err = strconv.Atoi(row[11]) - if err != nil { - rr <- errors.WithStack(err) - } - } - - rr <- gn - } - } - - if err := scanner.Err(); err != nil { - rr <- errors.WithStack(err) - } - } -} - // GetGeonameCountry downloads geoname data for the country. // Parses data and returns slice of Geoname func GetGeonameCountry(ctx context.Context, country string) ([]Geoname, error) { @@ -337,25 +202,51 @@ func GetGeonameCountry(ctx context.Context, country string) ([]Geoname, error) { var resp *http.Response u := fmt.Sprintf("http://download.geonames.org/export/zip/%s.zip", country) - resp, err = pester.Get(u) - if err != nil { - // Add re-try three times after failing first time - // This reduces the risk when network is lagy, we still have chance to re-try. - for i := 0; i < 3; i++ { - resp, err = pester.Get(u) - if err == nil { - break - } - time.Sleep(time.Second * 1) - } - if err != nil { - err = errors.WithMessagef(err, "Failed to read countries from '%s'", u) - return res, err - } - } - defer resp.Body.Close() - br := bufio.NewReader(resp.Body) + h := fmt.Sprintf("%x", md5.Sum([]byte(u))) + cp := filepath.Join(os.TempDir(), h+".zip") + + if _, err := os.Stat(cp); err != nil { + resp, err = pester.Get(u) + if err != nil { + // Add re-try three times after failing first time + // This reduces the risk when network is lagy, we still have chance to re-try. + for i := 0; i < 3; i++ { + resp, err = pester.Get(u) + if err == nil { + break + } + time.Sleep(time.Second * 1) + } + if err != nil { + err = errors.WithMessagef(err, "Failed to read countries from '%s'", u) + return res, err + } + } + defer resp.Body.Close() + + // Create the file + out, err := os.Create(cp) + if err != nil { + return nil, err + } + defer out.Close() + + // Write the body to file + _, err = io.Copy(out, resp.Body) + if err != nil { + return nil, err + } + + out.Close() + } + + f, err := os.Open(cp) + if err != nil { + return nil, err + } + defer f.Close() + br := bufio.NewReader(f) buff := bytes.NewBuffer([]byte{}) size, err := io.Copy(buff, br) @@ -403,21 +294,6 @@ func GetGeonameCountry(ctx context.Context, country string) ([]Geoname, error) { for _, row := range lines { - /* - fmt.Println("CountryCode: row[0]", row[0]) - fmt.Println("PostalCode: row[1]", row[1]) - fmt.Println("PlaceName: row[2]", row[2]) - fmt.Println("StateName: row[3]", row[3]) - fmt.Println("StateCode : row[4]", row[4]) - fmt.Println("CountyName: row[5]", row[5]) - fmt.Println("CountyCode : row[6]", row[6]) - fmt.Println("CommunityName: row[7]", row[7]) - fmt.Println("CommunityCode: row[8]", row[8]) - fmt.Println("Latitude: row[9]", row[9]) - fmt.Println("Longitude: row[10]", row[10]) - fmt.Println("Accuracy: row[11]", row[11]) - */ - gn := Geoname{ CountryCode: row[0], PostalCode: row[1], diff --git a/internal/schema/migrations.go b/internal/schema/migrations.go index 2ec3bec..3fa9e87 100644 --- a/internal/schema/migrations.go +++ b/internal/schema/migrations.go @@ -217,7 +217,7 @@ func migrationList(ctx context.Context, db *sqlx.DB, log *log.Logger, isUnittest }, // Load new geonames table. { - ID: "20190731-02h", + ID: "20190731-02l", Migrate: func(tx *sql.Tx) error { schemas := []string{ @@ -246,7 +246,7 @@ func migrationList(ctx context.Context, db *sqlx.DB, log *log.Logger, isUnittest countries := geonames.ValidGeonameCountries(ctx) if isUnittest { - + countries = []string{"US"} } ncol := 12 @@ -287,7 +287,6 @@ func migrationList(ctx context.Context, db *sqlx.DB, log *log.Logger, isUnittest } start := time.Now() for _, country := range countries { - //fmt.Println("LoadGeonames: start country: ", country) v, err := geonames.GetGeonameCountry(context.Background(), country) if err != nil { return errors.WithStack(err) @@ -316,7 +315,7 @@ func migrationList(ctx context.Context, db *sqlx.DB, log *log.Logger, isUnittest } } if len(v)%batch > 0 { - fmt.Println("Remain part: ", len(v)-n*batch) + log.Println("Remain part: ", len(v)-n*batch) vn := v[n*batch:] err := fn(vn) if err != nil { @@ -324,11 +323,8 @@ func migrationList(ctx context.Context, db *sqlx.DB, log *log.Logger, isUnittest } } } - - //fmt.Println("Insert Geoname took: ", time.Since(start)) - //fmt.Println("LoadGeonames: end country: ", country) } - fmt.Println("Total Geonames population took: ", time.Since(start)) + log.Println("Total Geonames population took: ", time.Since(start)) queries := []string{ `create index idx_geonames_country_code on geonames (country_code)`,