1
0
mirror of https://github.com/twirl/The-API-Book.git synced 2025-07-12 22:50:21 +02:00

references grossly re-worked

This commit is contained in:
Sergey Konstantinov
2025-02-01 21:23:37 +02:00
parent 472c42482e
commit fb3bf0a6d0
32 changed files with 439 additions and 205 deletions

Binary file not shown.

File diff suppressed because one or more lines are too long

Binary file not shown.

File diff suppressed because one or more lines are too long

View File

@ -1,10 +1,7 @@
import { resolve } from 'node:path';
import { readdir, writeFile } from 'node:fs/promises';
import { statSync } from 'node:fs';
import {
Path,
Structure
} from '@twirl/book-builder';
import { Path, Structure } from './builder-model';
import {
CustomTemplates,
Example,
@ -12,7 +9,7 @@ import {
linker,
shareLink,
toc
} from '../src/templates';
} from './templates';
export const buildLanding = async ({
structure,

View File

@ -8,10 +8,10 @@ import {
plugins,
LogLevel,
applyAstPluginToStructure,
L10n,
} from '@twirl/book-builder';
L10n
} from './builder-model';
import { buildLanding } from './build-landing';
import { CustomTemplates, ExtraStrings } from '../src/templates';
import { CustomTemplates, ExtraStrings } from './templates';
const SRC = resolve('./src') as Path;
const LOCALES: { [language: string]: string } = {

16
scripts/builder-model.ts Normal file
View File

@ -0,0 +1,16 @@
export {
AImgParams,
DefaultTemplates,
escapeHtml,
HtmlString,
Bibliography,
init,
Path,
plugins,
LogLevel,
applyAstPluginToStructure,
L10n,
Section,
Strings,
Structure
} from '@twirl/book-builder';

View File

@ -9,7 +9,7 @@ import {
Section,
Strings,
Structure
} from '@twirl/book-builder';
} from './builder-model';
import { resolve } from 'node:path';
export interface ExtraStrings extends Strings {

View File

@ -1,4 +1,10 @@
{
"bellemare-event-driven-microservices": {
"authors": "Bellemare, A.",
"publicationDate": "2020",
"title": "Building Event-Driven Microservices",
"hrefs": ["isbn:9781492057895"]
},
"birrel-nelson-implementing-rpc": {
"authors": "Birrell, A. D., Nelson, B. J.",
"publicationDate": "1984",
@ -34,6 +40,18 @@
"title": "High Performance Browser Networking",
"hrefs": ["isbn:9781449344764", "https://hpbn.co"]
},
"gorton-scalable-systems": {
"authors": "Gorton, I.",
"publicationDate": "2022",
"title": "Foundations of Scalable Systems",
"hrefs": ["isbn:9781098106065"]
},
"gourley-totty-http": {
"authors": "Gourley D., Totty, B.",
"publicationDate": "2002",
"title": ["HTTP: The Definitive Guide"],
"hrefs": ["isbn:9781565925090"]
},
"hoffman-web-application-security": {
"authors": "Hoffman, A.",
"publicationDate": "2024",
@ -41,16 +59,76 @@
"subtitle": ["Second Edition"],
"hrefs": ["isbn:9781098143930"]
},
"kleppmann-data-intensive-applications": {
"authors": "Kleppmann, M.",
"publicationDate": "2017",
"title": "Designing Data-Intensive Applications",
"hrefs": ["isbn:9781449373320"]
},
"kung-robinson-occ": {
"authors": "Kung, H. T., Robinson, J. T.",
"publicationDate": "1981",
"title": "On Optimistic Methods for Concurrency Control",
"subtitle": [
"ACM Transactions on Database Systems,",
"Vol. 6, No. 2, June 1981, Pages 213-226"
],
"hrefs": ["https://dl.acm.org/doi/10.1145/319566.319567"]
},
"madden-api-security-in-action": {
"authors": "Madden, N.",
"publicationDate": "2020",
"title": "API Security in Action",
"hrefs": ["isbn:9781617296024"]
},
"martin-functional-design": {
"authors": "Martin, R. C.",
"publicationDate": "2023",
"title": "Functional Design: Principles, Patterns, and Practices",
"hrefs": ["isbn:9780138176518"]
},
"mcconnell-code-complete": {
"authors": "McConnell, S. C.",
"publicationDate": "2004",
"title": "Code Complete, Second Edition",
"hrefs": ["isbn:9780735619678"]
},
"nelson-rpc": {
"authors": "Nelson, B. J.",
"publicationDate": "1981",
"title": "Remote Procedure Call",
"hrefs": ["https://dl.acm.org/doi/10.5555/910306"]
},
"shapiro-et-al-crdt": {
"authors": "Shapiro, M., Preguiça, N., Baquero, C., Zawirski, M.",
"publicationDate": "2011",
"title": "Conflict-Free Replicated Data Types",
"subtitle": [
"13th International Conference Stabilization, Safety, and Security of Distributed Systems",
"page 386-400"
],
"hrefs": [
"https://link.springer.com/chapter/10.1007/978-3-642-24550-3_29"
]
},
"steen-tanenbaum-distributed-systems": {
"authors": "Van Steen, M., Tanenbaum A.",
"publicationDate": "2024",
"title": "Distributed Systems 4th edition",
"hrefs": [
"isbn:9789081540629",
"https://www.distributed-systems.net/index.php/books/ds4/"
]
},
"stevens-unix-network-programming-2": {
"authors": "Stevens, W. R.",
"publicationDate": "1990",
"title": "UNIX Network Programming",
"subtitle": [
"Interprocess Communication",
"Volume 2",
"Second Edition"
],
"hrefs": ["isbn:0130810819"]
}
}

View File

@ -1,13 +1,13 @@
### On the Structure of This Book
### On the Structure of This Book
The book you're holding in your hands is dedicated to developing APIs as a separate engineering task. Although many concepts we're going to discuss apply to any type of software, our primary goal is to describe those problems and approaches to solving them that are most relevant in the context of the API subject area.
We expect that the reader possesses expertise in software engineering, so we do not provide detailed definitions and explanations of the terms that a developer should already be familiar with in our understanding. Without this knowledge, it will be rather uncomfortable to read the last section of the book (and even more so, other sections). We sincerely apologize for this but that's the only way of writing the book without tripling its size.
The book comprises the Introduction and six large sections. The first three (namely, “The API Design”, “The API Patterns”, and “The Backward Compatibility”) are fully abstract and not bound to any concrete technology. We hope they will help those readers who seek to build a systematic understanding of the API architecture in developing complex interface hierarchies. The proposed approach, as we see it, allows for designing APIs from start to finish, from a raw idea to concrete implementation.
The fourth and fifth sections are dedicated to specific technologies, namely developing HTTP APIs (in the “REST paradigm”) and SDKs (we will mostly talk about UI component libraries).
Finally, in the sixth section, which is the least technical of all, we will discuss APIs as products and focus on non-engineering aspects of the API lifecycle: doing market research, positioning the service, communicating to consumers, setting KPIs for the team, etc. We insist that the last section is equally important to both PMs and software engineers as products for developers thrive only if the product and technical teams work jointly on them.
We expect that the reader possesses expertise in software engineering, so we do not provide detailed definitions and explanations of the terms that a developer should already be familiar with in our understanding. Without this knowledge, it will be rather uncomfortable to read the last section of the book (and even more so, other sections). We sincerely apologize for this but that's the only way of writing the book without tripling its size. We provide the list of recommended readings in the “[Bibliography](#bibliography)” section.
Let's start.

View File

@ -1,4 +1,4 @@
### The API Definition
### The API Definition
Before we start talking about the API design, we need to explicitly define what the API is. Encyclopedias tell us that “API” is an acronym for “Application Program Interface.” This definition is fine but useless, much like the “Man” definition by Plato: “Man stands upright on two legs without feathers.” This definition is fine again, but it gives us no understanding of what's so important about a Man. (Actually, it's not even “fine”: Diogenes of Sinope once brought a plucked chicken, saying “That's Plato's Man.” And Plato had to add “with broad nails” to his definition.)
@ -22,4 +22,6 @@ What differs between a Roman aqueduct and a good API is that in the case of APIs
An aqueduct also illustrates another problem with the API design: your customers are engineers themselves. You are not supplying water to end-users. Suppliers are plugging their pipes into your engineering structure, building their own structures upon it. On the one hand, you may provide access to water to many more people through them, not spending your time plugging each individual house into your network. On the other hand, you can't control the quality of suppliers' solutions, and you are to blame every time there is a water problem caused by their incompetence.
The situation with API design becomes even more complicated when we acknowledge that modern APIs are typically interfaces to *distributed systems*. There is no single aqueduct but rather a collection of connections between multiple sources and destinations, often established on-demand — and your task is to make these connections work *coherently* so that clients don't even need to know how complex this water distribution architecture is internally.
That's why designing an API implies a larger area of responsibility. **An API is a multiplier to both your opportunities and your mistakes**.

View File

@ -1,13 +1,19 @@
### The API Contexts Pyramid
### The API Contexts Pyramid
The approach we use to design APIs comprises four steps:
* Defining an application field
* Separating abstraction levels
* Isolating responsibility areas
* Describing final interfaces.
* Defining an application field
* Separating abstraction levels
* Isolating responsibility areas
* Describing final interfaces.
This four-step algorithm actually builds an API from top to bottom, from common requirements and use case scenarios down to a refined nomenclature of entities. In fact, moving this way will eventually conclude with a ready-to-use API, and that's why we value this approach highly.
It might seem that the most useful pieces of advice are given in the last chapter, but that's not true. The cost of a mistake made at certain levels differs. Fixing the naming is simple; revising the wrong understanding of what the API stands for is practically impossible.
**NB**: Here and throughout we will illustrate the API design concepts using a hypothetical example of an API that allows ordering a cup of coffee in city cafes. Just in case: this example is totally synthetic. If we were to design such an API in the real world, it would probably have very little in common with our fictional example.
Here and throughout we will illustrate the API design concepts using a hypothetical example of an API that allows ordering a cup of coffee in city cafes. Just in case: this example is totally synthetic. If we were to design such an API in the real world, it would probably have very little in common with our fictional example.
**NB**. A knowledgeable reader might notice that the approach we discuss is quite similar to the concept of “Levels of Design” proposed by Steve McConnell in his definitive book.[ref:mcconnell-code-complete 5.2 Key Design Concepts]() This is both true and not true at the same time. On one hand, as APIs are software, all the classical architecture design patterns work for them, including those described by McConnell. On the other hand, there is a major difference between exposing APIs and working on shared code: you only provide *the contract* to customers, as they are unable and/or unwilling to check the code itself. This shifts the focus significantly, starting from the very first McConnell's design level: while it is your number-one task to split the grand design into subsystems when you develop a software project as an architect, it is often undesirable to provide the notion of your subsystem split in the API, as API consumers do not need to know about it. In the following chapters, we will focus on providing a well-designed nomenclature of entities that is both convenient for external developers and allows for implementing efficient architecture under the hood.

View File

@ -1,4 +1,4 @@
### Isolating Responsibility Areas
### Isolating Responsibility Areas
In the previous chapter, we concluded that the hierarchy of abstractions in our hypothetical project would comprise:
* The user level (the entities formulated in terms understandable by users and acted upon by them: orders, coffee recipes)
@ -210,7 +210,9 @@ It is also worth mentioning that unresolvable errors are useless to a user at th
From our own API development experience, we can tell without a doubt that the greatest final interface design mistake (and the greatest developer's pain accordingly) is the excessive overloading of entities' interfaces with fields, methods, events, parameters, and other attributes.
Meanwhile, there is the “Golden Rule” of interface design (applicable not only to APIs but almost to anything): humans can comfortably keep 7±2 entities in short-term memory. Manipulating a larger number of chunks complicates things for most humans. The rule is also known as Miller's Law[ref Miller's Law](https://en.wikipedia.org/wiki/Working_memory#Capacity).
Meanwhile, there is the “Golden Rule” of interface design (applicable not only to APIs but almost to anything): humans can comfortably keep 7±2 entities in short-term memory. Manipulating a larger number of chunks complicates things for most humans. The rule is also known as Miller's Law.[ref Miller's Law](https://en.wikipedia.org/wiki/Working_memory#Capacity)
**NB**. The law shouldn't be taken literally, as its direct applicability to human cognition in general and software engineering in particular is quite controversial. Still, many influential works (such as the foundational research by Victor Basili, Lionel Briand, and Walcelio Melo[ref Basili, V., Briand, L., Melo, W. (1996) A validation of object-oriented design metrics as quality indicators](https://ieeexplore.ieee.org/document/544352/) and its numerous follow-ups by other authors) claim that an increased number of methods in classes and analogous metrics indicate poor code quality. While the exact numbers are debatable, we envision the “7±2” rule as good guidance.
The only possible method of overcoming this law is decomposition. Entities should be grouped under a single designation at every concept level of the API so that developers never have to operate on more than a reasonable amount of entities (let's say, ten) at a time.

View File

@ -890,11 +890,11 @@ However, be warned: clients are bad at implementing idempotency tokens. Two comm
If the author of this book were given a dollar each time he had to implement an additional security protocol invented by someone, he would be retired by now. API developers' inclination to create new signing procedures for requests or complex schemes of exchanging passwords for tokens is both obvious and meaningless.
**First**, there is no need to reinvent the wheel when it comes to security-enhancing procedures for various operations. All the algorithms you need are already invented, just adopt and implement them. No self-invented algorithm for request signature checking can provide the same level of protection against a Manipulator-in-the-middle (*MitM*) attack[ref Manipulator-in-the-middle Attack](https://owasp.org/www-community/attacks/Manipulator-in-the-middle_attack) as a mutual TLS authentication with certificate pinning.[ref Mutual Authentication. mTLS](https://en.wikipedia.org/wiki/Mutual_authentication#mTLS)
**First**, there is no need to reinvent the wheel when it comes to security-enhancing procedures for various operations. All the algorithms you need are already invented, just adopt and implement them. No self-invented algorithm for request signature checking can provide the same level of protection against a Manipulator-in-the-middle (*MitM*) attack[ref Manipulator-in-the-middle Attack](https://owasp.org/www-community/attacks/Manipulator-in-the-middle_attack) as a mutual TLS authentication with certificate pinning.[ref:madden-api-security-in-action 11.4 Mutual TLS authentication]()
**Second**, assuming oneself to be an expert in security is presumptuous and dangerous. New attack vectors emerge daily, and staying fully aware of all actual threats is a full-time job. If you do something different during workdays, the security system you design will contain vulnerabilities that you have never heard about — for example, your password-checking algorithm might be susceptible to a timing attack[ref Timing Attack](https://en.wikipedia.org/wiki/Timing_attack) or your webserver might be vulnerable to a request splitting attack.[ref HTTP Request Splitting](https://capec.mitre.org/data/definitions/105.html)
The OWASP Foundation compiles a list of the most common vulnerabilities in APIs every year,[ref OWASP API Security Project](https://owasp.org/www-project-api-security/) which we strongly recommend studying. We also recommend a definitive work by Andrew Hoffman[ref:hoffman-web-application-security]() for everyone interested in Web security.
The OWASP Foundation compiles a list of the most common vulnerabilities in APIs every year,[ref OWASP API Security Project](https://owasp.org/www-project-api-security/) which we strongly recommend studying. We also recommend a definitive work by Andrew Hoffman[ref:hoffman-web-application-security Web Application Security. Second Edition]() for everyone interested in Web security.
And just in case: all APIs must be provided over TLS 1.2 or higher (preferably 1.3).

View File

@ -1,10 +1,10 @@
### On Design Patterns in the API Context
The concept of “patterns” in the field of software engineering was introduced by Kent Beck and Ward Cunningham in 1987[ref Software Design Pattern. History](https://en.wikipedia.org/wiki/Software_design_pattern#History) and popularized by “The Gang of Four” (Erich Gamma, Richard Helm, Ralph Johnson, and John Vlissides) in their book “Design Patterns: Elements of Reusable Object-Oriented Software,” which was published in 1994.[ref:gang-of-four-patterns]() According to the most widespread definition, a software design pattern is a “general, reusable solution to a commonly occurring problem within a given context.”
The concept of “patterns” in the field of software engineering was introduced by Kent Beck and Ward Cunningham in 1987[ref Software Design Pattern. History](https://en.wikipedia.org/wiki/Software_design_pattern#History) and popularized by “The Gang of Four” (Erich Gamma, Richard Helm, Ralph Johnson, and John Vlissides) in their book “Design Patterns: Elements of Reusable Object-Oriented Software,” which was published in 1994.[ref:gang-of-four-patterns Design Patterns. Elements of Reusable Object-Oriented Software]() According to the most widespread definition, a software design pattern is a “general, reusable solution to a commonly occurring problem within a given context.”
If we talk about APIs, especially those to which developers are end users (e.g., frameworks or operating system interfaces), the classical software design patterns are well applicable to them. Indeed, many examples in the previous Section of this book are just about applying some design patterns.
However, if we try to extend this approach to include API development in general, we will soon find that many typical API design issues are high-level and can't be reduced to basic software patterns. Let's say, caching resources (and invalidating the cache) or organizing paginated access are not covered in classical writings.
However, if we try to extend this approach to include API development in general (which, let us remind to the reader, is typically about building interfaces to *distributed systems*), we will soon find that many typical API design issues are high-level and can't be reduced to basic software patterns. Let's say, caching resources (and invalidating the cache) or organizing paginated access are not covered in classical writings.
In this Section, we will specify those API design problems that we see as the most important ones. We are not aiming to encompass *every* problem, let alone every solution, and rather focus on describing approaches to solving typical problems with their pros and cons. We do understand that readers familiar with the works of “The Gang of Four,” Grady Booch, and Martin Fowler might expect a more systematic approach and greater depth of outreach from a section called “The API Patterns,” and we apologize to them in advance.

View File

@ -1,25 +1,30 @@
### Synchronization Strategies
Let's proceed to the technical problems that API developers face. We begin with the last one described in the introductory chapter: the necessity to synchronize states. Let us imagine that a user creates a request to order coffee through our API. While this request travels from the client to the coffee house and back, many things might happen. Consider the following chain of events:
Let's proceed to the technical problems that API developers face. We begin with the last one described in the introductory chapter: the distributed nature of modern software that necessitates the problem of synchronizing shared states. Let us imagine that a user creates a request to order coffee through our API. While this request travels from the client to the coffee house and back, many things might happen. Consider the following chain of events:
1. The client sends the order creation request
2. Because of network issues, the request propagates to the server very slowly, and the client gets a timeout
* Therefore, the client does not know whether the query was served or not.
3. The client requests the current state of the system and gets an empty response as the initial request still hasn't reached the server:
1. The client sends the order creation request
```typescript
let pendingOrders = await
api.getOngoingOrders(); // → []
```
2. Because of network issues, the request propagates to the server very slowly, and the client gets a timeout
* Therefore, the client does not know whether the query was served or not.
4. The server finally gets the initial request for creating an order and serves it.
5. The client, being unaware of this, tries to create an order anew.
3. The client requests the current state of the system and gets an empty response as the initial request still hasn't reached the server:
As the operations of reading the list of ongoing orders and of creating a new order happen at different moments of time, we can't guarantee that the system state hasn't changed in between. If we do want to have this guarantee, we must implement some synchronization strategy[ref Synchronization (Computer Science)](https://en.wikipedia.org/wiki/Synchronization_(computer_science)). In the case of, let's say, operating system APIs or client frameworks we might rely on the primitives provided by the platform. But in the case of distributed client-server APIs, we would need to implement such a primitive of our own.
```typescript
let pendingOrders = await
api.getOngoingOrders(); // → []
```
4. The server finally gets the initial request for creating an order and serves it.
5. The client, being unaware of this, tries to create an order anew.
As the operations of reading the list of ongoing orders and of creating a new order happen at different moments of time, we can't guarantee that the system state hasn't changed in between. This might happen if the application backend state is replicated (i.e., the second request reads data from a different node of the data storage) or if the customer uses two client devices simultaneously. In other words, we encountered the classical problem of *state synchronization* in distributed systems. To solve this issue, we need to select a *consistency model*[ref Consistency model|ref:steen-tanenbaum-distributed-systems 7.3 Client-centric consistency models](https://en.wikipedia.org/wiki/Consistency_model) for our application and implement some *synchronization strategy*.
As clients are your customers, it is highly desirable to provide them an API with the highest degree of robustness — *strong consistency*,[ref Strong Consistency|ref:gorton-scalable-systems Chapter 12. Strong Consistency](https://en.wikipedia.org/wiki/Strong_consistency) which guarantees that all clients read the most recent writes. It is not universally possible, and we will discuss relaxing this constraint in the following chapters. However, with APIs the rule of thumbs is: if you can provide strongly consistent interfaces, do it.
There are two main approaches to solving this problem: the pessimistic one (implementing locks in the API) and the optimistic one (resource versioning).
**NB**: Generally speaking, the best approach to tackling an issue is not having the issue at all. Let's say, if your API is idempotent, the duplicating calls are not a problem. However, in the real world, not every operation is idempotent; for example, creating new orders is not. We might add mechanisms to prevent *automatic* retries (such as client-generated idempotency tokens) but we can't forbid users from just creating a second identical order.
**NB**: Generally speaking, the best solution is not having the issue at all. Let's say, if your API is idempotent, the duplicating calls are not a problem. However, in the real world, not every operation is idempotent; for example, creating new orders is not. We might add mechanisms to prevent *automatic* retries (such as client-generated idempotency tokens) but we can't forbid users from just creating a second identical order.
#### API Locks
@ -29,9 +34,9 @@ The first approach is to literally implement standard synchronization primitives
let lock;
try {
// Capture the exclusive
// right to create new orders
// right to manipulate orders
lock = await api.
acquireLock(ORDER_CREATION);
acquireLock(ORDERS_ACCESS);
// Get the list of current orders
// known to the system
let pendingOrders = await
@ -50,16 +55,19 @@ try {
}
```
Rather unsurprisingly, this approach sees very rare use in distributed client-server APIs because of the plethora of related problems:
This solution is quite similar to using mutexes to avoid race conditions in multithreaded systems,[ref Lock|ref:stevens-unix-network-programming-2 Chapter 7. Mutexes and Condition Variables](https://en.wikipedia.org/wiki/Lock_(computer_science)) just exposed via a formal API. Rather unsurprisingly, this approach sees very rare use in distributed client-server APIs because of the plethora of related problems:
1. Waiting for acquiring a lock introduces new latencies to the interaction that are hardly predictable and might potentially be quite significant.
2. The lock itself is one more entity that constitutes a subsystem of its own, and quite a demanding one as strong consistency[ref Strong consistency](https://en.wikipedia.org/wiki/Strong_consistency) is required for implementing locks: the `getPendingOrders` function must return the up-to-date state of the system otherwise the duplicate order will be anyway created.
3. As it's partners who develop client code, we can't guarantee it works with locks always correctly. Inevitably, “lost” locks will occur in the system, and that means we need to provide some tools to partners so they can find the problem and debug it.
4. A certain granularity of locks is to be developed so that partners can't affect each other. We are lucky if there are natural boundaries for a lock — for example, if it's limited to a specific user in the specific partner's system. If we are not so lucky (let's say all partners share the same user profile), we will have to develop even more complex systems to deal with potential errors in the partners' code — for example, introduce locking quotas.
1. Waiting for acquiring a lock introduces new latencies to the interaction that are hardly predictable and might potentially be quite significant.
#### Optimistic Concurrency Control
2. The locks themselves [i.e., the storage for lock identifiers and its API] constitute a separate subsystem of its own and require additional effort from the API vendor to implement it.
A less implementation-heavy approach is to develop an optimistic concurrency control[ref Optimistic concurrency control](https://en.wikipedia.org/wiki/Optimistic_concurrency_control) system, i.e., to require clients to pass a flag proving they know the actual state of a shared resource.
3. As it's partners who develop client code, we can't guarantee it works with locks always correctly. Inevitably, “lost” locks will occur in the system, and that means we need to provide some tools to partners so they can find the problem and debug it.
4. A certain granularity of locks is to be developed so that partners can't affect each other. We are lucky if there are natural boundaries for a lock — for example, if it's limited to a specific user in the specific partner's system. If we are not so lucky (let's say all partners share the same user profile), we will have to develop even more complex systems to deal with potential errors in the partners' code — for example, introduce locking quotas.
#### Optimistic Concurrency Control
A less implementation-heavy approach is to develop an *optimistic concurrency control*[ref Optimistic concurrency control|ref:kung-robinson-occ](https://en.wikipedia.org/wiki/Optimistic_concurrency_control) system, i.e., to require clients to pass a flag proving they know the actual state of a shared resource.
```typescript
// Retrieve the state
@ -85,10 +93,10 @@ try {
}
```
**NB**: An attentive reader might note that the necessity to implement some synchronization strategy and strongly consistent reading has not disappeared: there must be a component in the system that performs a locking read of the resource version and its subsequent change. It's not entirely true as synchronization strategies and strongly consistent reading have disappeared *from the public API*. The distance between the client that sets the lock and the server that processes it became much smaller, and the entire interaction now happens in a controllable environment. It might be a single subsystem in the form of an ACID-compatible[ref ACID](https://en.wikipedia.org/wiki/ACID) database or even an in-memory solution.
**NB**: An attentive reader might note that the necessity to implement locking has not disappeared: there must be a component in the system that performs a locking read of the resource version and its subsequent change. It's not entirely true as synchronization strategies and strongly consistent reading have disappeared *from the public API*. The distance between the client that sets the lock and the server that processes it became much smaller, and the entire interaction now happens in a controllable environment, being free from the problems we've described earlier.
Instead of a version, the date of the last modification of the resource might be used (which is much less reliable as clocks are not ideally synchronized across different system nodes; at least save it with the maximum possible precision!) or entity identifiers (ETags).
The advantage of optimistic concurrency control is therefore the possibility to hide under the hood the complexity of implementing locking mechanisms. The disadvantage is that the versioning errors are no longer exceptional situations — it's now a regular behavior of the system. Furthermore, client developers *must* implement working with them otherwise the application might render inoperable as users will be infinitely creating an order with the wrong version.
The advantage of optimistic concurrency control is therefore the possibility to hide under the hood the complexity of implementing locking mechanisms. The disadvantage is that the versioning errors are no longer exceptional situations — it's now a *regular behavior* of the system. Furthermore, client developers *must* implement working with them otherwise the application might render inoperable as users will be infinitely creating an order with the wrong version.
**NB**: Which resource to select for making versioning is extremely important. If in our example we create a global system version that is incremented after any order comes, users' chances to successfully create an order will be close to zero.

View File

@ -1,6 +1,8 @@
### Eventual Consistency
The approach described in the previous chapter is in fact a trade-off: the API performance issues are traded for “normal” (i.e., expected) background errors that happen while working with the API. This is achieved by isolating the component responsible for controlling concurrency and only exposing read-only tokens in the public API. Still, the achievable throughput of the API is limited, and the only way of scaling it up is removing the strict consistency from the external API and thus allowing reading system state from read-only replicas:
The approach described in the previous chapter is in fact a trade-off: the API performance issues are traded for “normal” (i.e., expected) background errors that happen while working with the API. This is achieved by isolating the component responsible for controlling concurrency and only exposing only revision tokens in the public API. Still, the achievable throughput of the API is limited as strong consistency implies strict constraints on backend implementation.
In many situations, given the rate of writes is much less than reads (as in out case, when making two orders from two different devices under one account is rather an exceptional situation), it might make sense to stick *eventual consistency* rather than the strict one.[ref:steen-tanenbaum-distributed-systems 7.2.2 Eventual consistency]() The typical setup in Web often involves having asynchronously replicated databases:
```typescript
// Reading the state,
@ -20,9 +22,16 @@ try {
}
```
As orders are created much more rarely than read, we might significantly increase the system performance if we drop the requirement of returning the most recent state of the resource from the state retrieval endpoints. The versioning will help us avoid possible problems: creating an order will still be impossible unless the client has the actual version. In fact, we transited to the eventual consistency[ref Consistency Model. Eventual Consistency](https://en.wikipedia.org/wiki/Consistency_model#Eventual_consistency) model: the client will be able to fulfill its request *sometime* when it finally gets the actual data. In modern microservice architectures, eventual consistency is rather an industrial standard, and it might be close to impossible to achieve the opposite, i.e., strict consistency.
As orders are created much more rarely than read, we might significantly increase the system performance if we drop the requirement of returning the most recent state of the resource from the state retrieval endpoints. The versioning will help us avoid possible problems: creating an order will still be impossible unless the client has the actual version. The client will be able to fulfill its request *eventually* when it finally gets the actual data.
**NB**: Let us stress that you might choose the approach only in the case of exposing new APIs. If you're already providing an endpoint implementing some consistency model, you can't just lower the consistency level (for instance, introduce eventual consistency instead of the strict one) even if you never documented the behavior. This will be discussed in detail in the “[On the Waterline of the Iceberg](#back-compat-iceberg-waterline)” chapter of “The Backward Compatibility” section of this book.
**NB**: Strictly speaking, in this example, we're referring to the “single-leader replication” type of eventual consistency: while reads might return outdated data, *writes* are nevertheless strictly ordered, and the service that physically makes writes *can* resolve the actual state of the system. There is also the “multi-leader replication” class of systems, where there is no such thing as “the actual state” or “the latest version,” as every leader replica handles writes independently and concurrently — which, in our case, means clients *can always* create duplicate orders, whatever precautions we take. Typically, such systems are only used in the following cases:
* The operations are naturally idempotent.
* A certain percentage of duplicate entities is acceptable.
* There is a mechanism in place that always routes specific clients to specific replicas, so concurrent conflicting requests to different leaders are not possible.
The curious reader may refer to Martin Kleppmann's work on the subject.[ref:kleppmann-data-intensive-applications Chapter 5. Replication]()
Choosing weak consistency instead of a strict one, however, brings some disadvantages. For instance, we might require partners to wait until they get the actual resource state to make changes — but it is quite unobvious for partners (and actually inconvenient) they must be prepared to wait for changes they made themselves to propagate.
@ -38,7 +47,7 @@ let pendingOrders = await api.
If strict consistency is not guaranteed, the second call might easily return an empty result as it reads data from a replica, and the newest order might not have hit it yet.
An important pattern that helps in this situation is implementing the “read-your-writes[ref Consistency Model. Read-Your-Writes Consistency](https://en.wikipedia.org/wiki/Consistency_model#Read-your-writes_consistency)” model, i.e., guaranteeing that clients observe the changes they have just made. The consistency might be lifted to the read-your-writes level by making clients pass some token that describes the last changes known to the client.
An important pattern that helps in this situation is implementing the “read-your-writes[ref Consistency Model. Read-Your-Writes Consistency|ref:steen-tanenbaum-distributed-systems 7.3.3 Read your writes](https://en.wikipedia.org/wiki/Consistency_model#Read-your-writes_consistency)” model: it guarantees that clients observe the changes they have just made. In APIs, the read-your-writes strategy could be implemented by by making clients pass some token that describes the last change known to the client.
```typescript
let der = await api
@ -54,25 +63,36 @@ let pendingOrders = await api.
```
Such a token might be:
* An identifier (or identifiers) of the last modifying operations carried out by the client
* The last known resource version (modification date, ETag) known to the client.
* An identifier (or identifiers) of the last modifying operations carried out by the client
* The last known resource version (modification date, ETag) known to the client.
Upon getting the token, the server must check that the response (e.g., the list of ongoing operations it returns) matches the token, i.e., the eventual consistency converged. If it did not (the client passed the modification date / version / last order id newer than the one known to the server), one of the following policies or their combinations might be applied:
* The server might repeat the request to the underlying DB or to the other kind of data storage in order to get the newest version (eventually)
* The server might return an error that requires the client to try again later
* The server queries the main node of the DB, if such a thing exists, or otherwise initiates retrieving the master data.
* The server might repeat the request to the underlying DB or to the other kind of data storage in order to get the newest version (eventually)
* The server might return an error that requires the client to try again later
* The server queries the main node of the DB, if such a thing exists, or otherwise initiates retrieving the master data.
The advantage of this approach is client development convenience (compared to the absence of any guarantees): by preserving the version token, client developers get rid of the possible inconsistency of the data got from API endpoints. There are two disadvantages, however:
* It is still a trade-off between system scalability and a constant inflow of background errors:
* If you're querying master data or repeating the request upon the version mismatch, the load on the master storage is increased in poorly a predictable manner
* If you return a client error instead, the number of such errors might be considerable, and partners will need to write some additional code to deal with the errors.
* This approach is still probabilistic, and will only help in a limited number of use cases (to be discussed below).
* It is still a trade-off between system scalability and a constant inflow of background errors:
* If you're querying master data or repeating the request upon the version mismatch, the load on the master storage is increased in poorly a predictable manner
* If you return a client error instead, the number of such errors might be considerable, and partners will need to write some additional code to deal with the errors.
* This approach is still probabilistic, and will only help in a limited number of use cases (to be discussed below).
There is also an important question regarding the default behavior of the server if no version token was passed. Theoretically, in this case, master data should be returned, as the absence of the token might be the result of an app crash and subsequent restart or corrupted data storage. However, this implies an additional load on the master node.
#### Evaluating the Risks of Switching to Eventual Consistency
First, let us stress that you might choose the approach only in the case of exposing new APIs. If you're already providing an endpoint implementing some consistency model, you can't just lower the consistency level (for instance, introduce eventual consistency instead of the strict one) even if you never documented the behavior. This will be discussed in detail in the “[On the Waterline of the Iceberg](#back-compat-iceberg-waterline)” chapter of “The Backward Compatibility” section of this book.
Let us state an important assertion: the methods of solving architectural problems we're discussing in this section are probabilistic. Abolishing strict consistency means that even if all components of the system work perfectly, client errors will still occur. It might appear that they could be simply ignored, but in reality, doing so means introducing risks.
Second, let us state another important assertion: the methods of solving architectural problems we're discussing in this section are probabilistic. Abolishing strict consistency means that even if all components of the system work perfectly, client errors will still occur. It might appear that they could be simply ignored, but in reality, doing so means introducing risks.
Imagine that because of eventual consistency, users of our API sometimes cannot create orders with their first attempt. For example, a customer adds a new payment method in the application, but their subsequent order creation request is routed to a replica that hasn't yet received the information regarding the newest payment method. As these two actions (adding a bank card and making an order) often go in conjunction, there will be a noticeable percentage of errors — let's say, 1%. At this stage, we could disregard the situation as it appears harmless: in the worst-case scenario, the client will repeat the request.

View File

@ -60,12 +60,16 @@ Here we assume that task creation requires minimal checks and doesn't wait for a
Thus we naturally came to the pattern of organizing asynchronous APIs through task queues. Here we use the term “asynchronous” logically meaning the absence of mutual *logical* locks: the party that makes a request gets a response immediately and does not wait until the requested procedure is fully carried out being able to continue to interact with the API. *Technically* in modern application environments, locking (of both the client and server) almost universally doesn't happen during long-responding calls. However, *logically* allowing users to work with the API while waiting for a response from a modifying endpoint is error-prone and leads to collisions like the one we described above.
The asynchronous call pattern is useful for solving other practical tasks as well:
* Caching operation results and providing links to them (implying that if the client needs to reread the operation result or share it with another client, it might use the task identifier to do so)
* Ensuring operation idempotency (through introducing the task confirmation step we will actually get the draft-commit system as discussed in the “[Describing Final Interfaces](#api-design-describing-interfaces)” chapter)
* Naturally improving resilience to peak loads on the service as the new tasks will be queuing up (possibly prioritized) in fact implementing the “token bucket” technique[ref Token Bucket](https://en.wikipedia.org/wiki/Token_bucket)
* Organizing interaction in the cases of very long-lasting operations that require more time than typical timeouts (which are tens of seconds in the case of network calls) or can take unpredictable time.
Also, asynchronous communication is more robust from a future API development point of view: request handling procedures might evolve towards prolonging and extending the asynchronous execution pipelines whereas synchronous handlers must retain reasonable execution times which puts certain restrictions on possible internal architecture.
* Caching operation results and providing links to them (implying that if the client needs to reread the operation result or share it with another client, it might use the task identifier to do so)
* Ensuring operation idempotency (through introducing the task confirmation step we will actually get the draft-commit system as discussed in the “[Describing Final Interfaces](#api-design-describing-interfaces)” chapter)
* Naturally improving resilience to peak loads on the service as the new tasks will be queuing up (possibly prioritized)
* Organizing interaction in the cases of very long-lasting operations that require more time than typical timeouts (which are tens of seconds in the case of network calls) or can take unpredictable time.
Also, asynchronous communication is more robust from a future API development point of view: request handling procedures might evolve towards prolonging and extending the asynchronous execution pipelines whereas synchronous handlers must retain reasonable execution times which puts certain restrictions on possible internal architecture. One might refer to the definitive work by Adam Bellemare on advantages of event-driven architectures.[ref:bellemare-event-driven-microservices Building Event-Driven Microservices]()
**NB**: In some APIs, an ambivalent decision is implemented where endpoints feature a double interface that might either return a result or a link to a task. Although from the API developer's point of view, this might look logical (if the request was processed “quickly”, e.g., served from cache, the result is to be returned immediately; otherwise, the asynchronous task is created), for API consumers, this solution is quite inconvenient as it forces them to maintain two execution branches in their code. Sometimes, a concept of providing a double set of endpoints (synchronous and asynchronous ones) is implemented, but this simply shifts the burden of making decisions onto partners.

View File

@ -227,8 +227,8 @@ The first request format allows for implementing the first scenario, i.e., retri
Another possible anchor to rely on is the record creation date. However, this approach is harder to implement for the following reasons:
* Creation dates for two records might be identical, especially if the records are mass-generated programmatically. In the worst-case scenario, it might happen that at some specific moment, more records were created than one request page contains making it impossible to traverse them.
* If the storage supports parallel writing to several nodes, the most recently created record might have a slightly earlier creation date than the second-recent one because clocks on different nodes might tick slightly differently, and it is challenging to achieve even microsecond-precision coherence.[ref Ranganathan, K. A Matter of Time: Evolving Clock Sync for Distributed Databases](https://www.yugabyte.com/blog/evolving-clock-sync-for-distributed-databases/) This breaks the monotonicity invariant, which makes it poorly fit for use in public APIs. If there is no other choice but relying on such storage, one of two evils is to be chosen:
* Introducing artificial delays, i.e., returning only items created earlier than N seconds ago, selecting this N to be certainly less than the clock irregularity. This technique also works in the case of asynchronously populated lists. Keep in mind, however, that this solution is probabilistic, and wrong data will be served to clients in case of backend synchronization problems.
* If the storage supports parallel writing to several nodes (i.e., implements the “multi-leader replication” approach), the most recently created record might have a slightly earlier creation date than the second-recent one because clocks on different nodes might tick slightly differently, and it is challenging to achieve even microsecond-precision coherence.[ref Ranganathan, K. A. Matter of Time: Evolving Clock Sync for Distributed Databases|ref:kleppmann-data-intensive-applications Chapter 8. The Trouble with Distributed Systems](https://www.yugabyte.com/blog/evolving-clock-sync-for-distributed-databases/) This breaks the monotonicity invariant, which makes it poorly fit for use in public APIs, as we discussed it in the “[Eventual Consistency](#api-patterns-weak-consistency)” chapter. If there is no other choice but relying on such storage, one of two evils is to be chosen:
* Introducing artificial delays, i.e., returning only items created earlier than N seconds ago, selecting this N to be certainly more than the clock irregularity and the replication lag. This technique also works in the case of asynchronously populated lists. Keep in mind, however, that this solution is probabilistic, and wrong data will be served to clients in case of backend synchronization problems.
* Describe the instability of ordering list items in the docs (and thus make partners responsible for solving arising issues).
Often, the interfaces of traversing data through stating boundaries are generalized by introducing the concept of a “cursor”:

View File

@ -1,6 +1,6 @@
### Bidirectional Data Flows. Push and Poll Models
In the previous chapter, we discussed the following scenario: a partner receives information about new events occuring in the system by periodically requesting an endpoint that supports retrieving ordered lists.
In the previous chapter, we discussed the following scenario: a partner receives information about new events occurring in the system by periodically requesting an endpoint that supports retrieving ordered lists.
```json
GET /v1/orders/created-history↵
@ -44,11 +44,23 @@ Duplex connections still suffer from the unreliability of the network and requir
##### Separate Callback Channels
Instead of a duplex connection, two separate connections might be used: one for sending requests to the server and one to receive notifications from the server. The most popular technology of this kind is *MQTT*[ref MQTT](https://docs.oasis-open.org/mqtt/mqtt/v5.0/mqtt-v5.0.html). Although it is considered very effective because of utilizing low-level protocols, its disadvantages follow from its advantages:
* The technology is meant to implement the pub/sub pattern, and its main value is that the server software (MQTT Broker) is provided alongside the protocol itself. Applying it to other tasks, especially bidirectional communication, might be challenging.
* The low-level protocols force you to develop your own data formats.
Instead of a duplex connection, two separate channels might be used: one for sending requests to the server and one for receiving notifications from the server. This implies that clients subscribe to message queues generated by the server (a “message broker”) or, sometimes, other clients, typically by implementing the publisher/subscriber (“pub/sub”) pattern.[ref Publish / Subscribe Pattern](https://en.wikipedia.org/wiki/Publish%E2%80%93subscribe_pattern) This implies that:
There is also a Web standard for sending server notifications called Server-Sent Events[ref HTML Living Standard. Server-Sent Events](https://html.spec.whatwg.org/multipage/server-sent-events.html) (SSE). However, it's less functional than WebSocket (only text data and unidirectional flow are allowed) and rarely used.
* The client sends requests either through regular API calls or by publishing events to a queue (or queues).
* The client receives callback notifications by listening for events on a queue. It might be the same queue the client used for sending events or a completely different queue (or queues).
Therefore, this approach is following neither request-response (even if a callback event is a direct response to the client’s actions, it is received asynchronously, requiring the client to match the response to its requests) nor a duplex connection pattern. However, we must note that this is a *logical* distinction for the convenience of client developers, as, under the hood, the underlying messaging system framework typically relies on WebSockets or implements polling.
The most popular technology of this kind is *MQTT*[ref MQTT](https://docs.oasis-open.org/mqtt/mqtt/v5.0/mqtt-v5.0.html). Although it is considered highly efficient due to its use of low-level protocols, its disadvantages stem from its advantages:
* The technology is designed to implement the pub/sub pattern, and its primary value lies in the fact that the server software (MQTT Broker) is provided alongside the protocol itself. Applying it to other tasks, especially bidirectional communication, can be challenging.
* The use of low-level protocols requires developers to define their own data formats.
Another popular technology for organizing message queues is the Advanced Message Queuing Protocol (*AMQP*). AMQP is an open standard for implementing message queues,[ref AMQP](https://www.amqp.org/) with many independent client and server (broker) implementations. One notable broker implementation is RabbitMQ,[ref RabbitMQ](https://www.rabbitmq.com/) while AMQP clients are typically implemented as libraries for specific client platforms and programming languages.
There is also a web standard for sending server notifications called Server-Sent Events[ref HTML Living Standard. Server-Sent Events](https://html.spec.whatwg.org/multipage/server-sent-events.html) (*SSE*). However, SSE is less functional than WebSockets (supporting only text data and unidirectional flow) and is rarely used.
A curious reader may refer to the corresponding chapter in Ian Gorton’s influential book[ref:gorton-scalable-systems Chapter 7. Asynchronous Messaging]() or to Adam Bellemare’s compendium on the topic.[ref:bellemare-event-driven-microservices Building Event-Driven Microservices]()
##### Third-Party Push Notifications
@ -89,7 +101,7 @@ What is important is that the *must* be a formal contract (preferably in a form
##### 2. Agree on Authorization and Authentication Methods
As a *webhook* is a callback channel, you will need to develop a separate authorization system to deal with it as it's *partners* duty to check that the request is genuinely coming from the API backend, not vice versa. We reiterate here our strictest recommendation to stick to existing standard techniques, for example, mTLS[ref Mutual Authentication. mTLS](https://en.wikipedia.org/wiki/Mutual_authentication#mTLS); though in the real world, you will likely have to use archaic methods like fixing the caller server's IP address.
As a *webhook* is a callback channel, you will need to develop a separate authorization system to deal with it as it's *partners* duty to check that the request is genuinely coming from the API backend, not vice versa. We reiterate here our strictest recommendation to stick to existing standard techniques, such as mTLS; though in the real world, you will likely have to use archaic methods like fixing the caller server's IP address.
##### 3. Develop an Interface for Setting the URL of a *Webhook*
@ -98,7 +110,7 @@ As the callback endpoint is developed by partners, we do not know its URL before
**Importantly**, the operation of setting a *webhook* URL is to be treated as a potentially hazardous one. It is highly desirable to request a second authentication factor to authorize the operations as a potential attacker wreak a lot of havoc if there is a vulnerability in the procedure:
* By setting an arbitrary URL, the perpetrator might get access to all partner's orders (and the partner might lose access)
* This vulnerability might be used for organizing DoS attacks on third parties
* If an internal URL might be set as a *webhook*, a SSRF attack[ref SSRF](https://en.wikipedia.org/wiki/SSRF) might be directed toward the API vendor's own infrastructure.
* If an internal URL might be set as a *webhook*, a SSRF attack[ref Server Side Request Forgery|ref:madden-api-security-in-action 10.2.7 Preventing SSRF attacks](https://owasp.org/www-community/attacks/Server_Side_Request_Forgery) might be directed toward the API vendor's own infrastructure.
#### Typical Problems of *Webhook*-Powered Integrations
@ -127,9 +139,9 @@ Obviously, we can't guarantee partners don't make any of these mistakes. The onl
#### Message Queues
As for internal APIs, the *webhook* technology (i.e., the possibility to programmatically define a callback URL) is either not needed at all or is replaced with the Service Discovery[ref Web Services Discovery](https://en.wikipedia.org/wiki/Web_Services_Discovery) protocol as services comprising a single backend are symmetrically able to call each other. However, the problems of callback-based integration discussed above are equally actual for internal calls. Requesting an internal API might result in a false-negative mistake, internal clients might be unaware that ordering is not guaranteed, etc.
As for internal APIs, the *webhook* technology (i.e., the possibility to programmatically define a callback URL) is typically not needed at all as backend services comprising are symmetrically able to call each other. However, the problems of callback-based integration discussed above are equally actual for internal calls. Requesting an internal API might result in a false-negative mistake, internal clients might be unaware that ordering is not guaranteed, etc.
To solve these problems, and also to ensure better horizontal scalability, message queues[ref Message Queue](https://en.wikipedia.org/wiki/Message_queue) were developed, most notably numerous pub/sub pattern[ref Publish / Subscribe Pattern](https://en.wikipedia.org/wiki/Publish%E2%80%93subscribe_pattern) implementations. At present moment, pub/sub-based architectures are very popular in enterprise software development, up to switching any inter-service communication to message queues.
To solve these problems, as with client-server interaction, message queues might be used instead of making direct calls. At present moment, pub/sub-based architectures are very popular in enterprise software development, up to switching any inter-service communication to message queues.
**NB**: Let us note that everything comes with a price, and these delivery guarantees and horizontal scalability are not an exclusion:
* All communication becomes eventually consistent with all the implications

View File

@ -230,4 +230,4 @@ X-Idempotency-Token: <token>
This approach is much more complex to implement, but it is the only viable technique for realizing collaborative editing as it explicitly reflects the exact actions the client applied to an entity. Having the changes in this format also allows for organizing offline editing with accumulating changes on the client side for the server to resolve the conflict later based on the revision history.
**NB**: One approach to this task is developing a set of operations in which all actions are transitive (i.e., the final state of the entity does not change regardless of the order in which the changes were applied). One example of such a nomenclature is a conflict-free replicated data type (*CRDT*).[ref Conflict-Free Replicated Data Type](https://en.wikipedia.org/wiki/Conflict-free_replicated_data_type) However, we consider this approach viable only in some subject areas, as in real life, non-transitive changes are always possible. If one user entered new text in the document and another user removed the document completely, there is no way to automatically resolve this conflict that would satisfy both users. The only correct way of resolving this conflict is explicitly asking users which option for mitigating the issue they prefer.
**NB**: One approach to this task is developing a set of operations in which all actions are transitive (i.e., the final state of the entity does not change regardless of the order in which the changes were applied). One example of such a nomenclature is a conflict-free replicated data type (*CRDT*).[ref Conflict-Free Replicated Data Type|ref:shapiro-et-al-crdt Conflict-Free Replicated Data Types](https://en.wikipedia.org/wiki/Conflict-free_replicated_data_type) However, we consider this approach viable only in some subject areas, as in real life, non-transitive changes are always possible. If one user entered new text in the document and another user removed the document completely, there is no way to automatically resolve this conflict that would satisfy both users. The only correct way of resolving this conflict is explicitly asking users which option for mitigating the issue they prefer.

View File

@ -6,7 +6,7 @@ Let us summarize what we have written in the three previous chapters:
2. Higher-level entities are to be the informational contexts for low-level ones, meaning they don't prescribe any specific behavior but rather translate their state and expose functionality to modify it, either directly through calling some methods or indirectly through firing events.
3. Concrete functionality, such as working with “bare metal” hardware or underlying platform APIs, should be delegated to low-level entities.
**NB**: There is nothing novel about these rules: one might easily recognize them as the *SOLID* architecture principles[ref SOLID](https://en.wikipedia.org/wiki/SOLID)[ref:martin-functional-design 12. Solid](). This is not surprising either, as *SOLID* focuses on contract-oriented development, and APIs are contracts by definition. We have simply introduced the concepts of “abstraction levels” and “informational contexts” to these principles.
**NB**: There is nothing novel about these rules: one might easily recognize them as the *SOLID* architecture principles[ref SOLID|ref:martin-functional-design 12. Solid](https://en.wikipedia.org/wiki/SOLID). This is not surprising either, as *SOLID* focuses on contract-oriented development, and APIs are contracts by definition. We have simply introduced the concepts of “abstraction levels” and “informational contexts” to these principles.
However, there remains an unanswered question: how should we design the entity nomenclature from the beginning so that extending the API won't result in a mess of assorted inconsistent methods from different stages of development? The answer is quite obvious: to avoid clumsy situations during abstracting (as with the recipe properties), all the entities must be originally considered as specific implementations of a more general interface, even if there are no planned alternative implementations for them.

View File

@ -2,7 +2,7 @@
The problem of designing HTTP APIs is, unfortunately, one of the most “holywar”-inspiring issues. On one hand, it is one of the most popular technologies; on the other hand, it is quite complex and difficult to comprehend due to the large and fragmented standard split into many RFCs. As a result, the HTTP specification is doomed to be poorly understood and imperfectly interpreted by millions of software engineers and thousands of textbook writers. Therefore, before proceeding to the useful part of this Section, we must clarify exactly what we are going to discuss.
Let's start with a short historical overview. Performing users' requests on a remote server has been one of the basic tasks in software engineering since mainframes, and it naturally gained additional momentum with the development of ARPANET. The first high-level protocol for network communication worked in the paradigm of sending messages over the network (as an example, see the DEL protocol that was proposed in one of the very first RFCs — RFC-5 published in 1969[ref RFC-5. DEL](https://datatracker.ietf.org/doc/html/rfc5)). However, scholars quickly understood that it would be much more convenient if calling a remote server and accessing remote resources wasn't any different from working with local memory and resources *in terms of function signatures*. This concept was strictly formulated under the name “Remote Procedure Call” (RPC) by Bruce Nelson, an employee of the famous Xerox Palo Alto Research Center in 1981.[ref:nelson-rpc]() Nelson was also the co-author of the first practical implementation of the proposed paradigm, namely Sun RPC[ref:birrel-nelson-implementing-rpc]()[ref RPC: Remote Procedure Call Protocol Specification](https://datatracker.ietf.org/doc/html/rfc1050), which still exists as ONC RPC.
Let's start with a short historical overview. Performing users' requests on a remote server has been one of the basic tasks in software engineering since mainframes, and it naturally gained additional momentum with the development of ARPANET. The first high-level protocol for network communication worked in the paradigm of sending messages over the network (as an example, see the DEL protocol that was proposed in one of the very first RFCs — RFC-5 published in 1969[ref RFC-5. DEL](https://datatracker.ietf.org/doc/html/rfc5)). However, scholars quickly understood that it would be much more convenient if calling a remote server and accessing remote resources wasn't any different from working with local memory and resources *in terms of function signatures*. This concept was strictly formulated under the name “Remote Procedure Call” (RPC) by Bruce Nelson, an employee of the famous Xerox Palo Alto Research Center in 1981.[ref:nelson-rpc Remote Procedure Call]() Nelson was also the co-author of the first practical implementation of the proposed paradigm, namely Sun RPC[ref:birrel-nelson-implementing-rpc Implementing Remote Procedure Calls]()[ref RPC: Remote Procedure Call Protocol Specification](https://datatracker.ietf.org/doc/html/rfc1050), which still exists as ONC RPC.
First widely adopted RPC protocols (such as the aforementioned Sun RPC, Java RMI[ref Remote Method Invocation (RMI)](https://www.oracle.com/java/technologies/javase/remote-method-invocation-home.html), and CORBA[ref CORBA](https://www.corba.org/)) strictly followed the paradigm. The technology allowed achieving exactly what Nelson was writing about — that is, making no difference between local and remote code execution. The “magic” is hidden within tooling that generates the implementation of working with remote servers, and developers don't need to know how the protocol works.

View File

@ -95,6 +95,8 @@ However, on many occasions (including this book) developers prefer the textual J
Apart from being human-readable, JSON features another important advantage: it is strictly formal meaning it does not contain any constructs that can be interpreted differently in different architectures (with a possible exception of the sizes of numbers and strings), and the deserialization result aligns very well with native data structures (i.e., indexed and associative arrays) of almost every programming language. From this point of view, we actually had no other choice when selecting a format for code samples in this book.
**NB**. To get a more thorough understanding of data formats and their features the reader might refer to the Kleppmann's overview.[ref:kleppmann-data-intensive-applications Chapter 4. Encoding and Evolution]()
#### Choosing a Client-Server Development Technology
As we see, HTTP APIs and alternative RPC protocols occupy different market niches:

View File

@ -1,6 +1,6 @@
### Components of an HTTP Request and Their Semantics
The important exercise we must conduct is to describe the format of an HTTP request and response and explain the basic concepts. Many of these may seem obvious to the reader. However, the situation is that even the basic knowledge we require to move further is scattered across vast and fragmented documentation, causing even experienced developers to struggle with some nuances. Below, we will try to compile a structured overview that is sufficient to design HTTP APIs.
The important exercise we must conduct is to describe the format of an HTTP request and response and explain the basic concepts. Many of these may seem obvious to the reader. However, the situation is that even the basic knowledge we require to move further is scattered across vast and fragmented documentation, causing even experienced developers to struggle with some nuances. Below, we will try to compile a structured overview that is sufficient to design HTTP APIs. For a deeper understanding a curious reader might refer to the comprehensive book by David Gourley and Brian Totty.[ref:gourley-totty-http HTTP: The Definitive Guide]()
To describe the semantics and formats, we will refer to the brand-new RFC 9110[ref RFC 9110. HTTP Semantics](https://www.rfc-editor.org/rfc/rfc9110.html), which replaces no fewer than nine previous specifications dealing with different aspects of the technology. However, a significant volume of additional functionality is still covered by separate standards. In particular, the HTTP caching principles are described in the standalone RFC 9111[ref RFC 9111. HTTP Caching](https://www.rfc-editor.org/rfc/rfc9111.html), while the popular `PATCH` method is omitted in the main RFC and is regulated by RFC 5789[ref PATCH Method for HTTP](https://www.rfc-editor.org/rfc/rfc5789.html).

View File

@ -210,4 +210,6 @@ In the case of the “triple-stacked” access checking endpoint, our only optio
This approach might be further enhanced by introducing granular permissions to carry out specific actions, access levels, additional ACL service calls, etc.
Importantly, the visible redundancy of the format ceases to exist: `user_id` in the request is now not duplicated in the token payload as these identifiers carry different semantics: *on which resource* the operation is performed against *who* performs it. The two often coincide, but this coincidence is just a special case. Unfortunately, this doesn't negate the fact that it's quite easy simply to forget to implement this unobvious check in the code. This is the way.
Importantly, the visible redundancy of the format ceases to exist: `user_id` in the request is now not duplicated in the token payload as these identifiers carry different semantics: *on which resource* the operation is performed against *who* performs it. The two often coincide, but this coincidence is just a special case. Unfortunately, this doesn't negate the fact that it's quite easy simply to forget to implement this unobvious check in the code. This is the way.
**NB**. A curious reader may note an important problem with this setup: the list of authorized entities (`user_ids` in our case) is encoded in the token itself *when it is issued*. If permissions change (let's say, if a specific ID is removed from the list), it will not affect existing tokens. This contributes to the general problem of invalidating stateless tokens; the usual approach to tackle this is (a) making tokens themselves short-lived so they are refreshed often, and (b) maintaining a cache of issued or revoked tokens.[ref:madden-api-security-in-action 6.5 Handling token revocation]() Though implementing these techniques might be challenging, it is anyway a more scalable solution than checking permissions on every call.

View File

@ -36,6 +36,6 @@ Additionally, we'd like to provide some code style advice:
* HTTP Response Splitting[ref HTTP Response Splitting](https://owasp.org/www-community/attacks/HTTP_Response_Splitting)
* Unvalidated Redirects and Forwards[ref Unvalidated Redirects and Forwards Cheat Sheet](https://cheatsheetseries.owasp.org/cheatsheets/Unvalidated_Redirects_and_Forwards_Cheat_Sheet.html)
and include protection against these attack vectors at the webserver software level. The OWASP community provides a good cheatsheet on the best HTTP API security practices.[ref REST Security Cheat Sheet](https://cheatsheetseries.owasp.org/cheatsheets/REST_Security_Cheat_Sheet.html)
and include protection against these attack vectors at the webserver software level. The OWASP community provides a good cheatsheet on the best HTTP API security practices,[ref REST Security Cheat Sheet](https://cheatsheetseries.owasp.org/cheatsheets/REST_Security_Cheat_Sheet.html) or one may refer to the Andrew Hoffman's[ref:hoffman-web-application-security Web Application Security]() and Neil Madden's[ref:madden-api-security-in-action API Security in Action]() books we've already recommended earlier.
In conclusion, we would like to make the following statement: building an HTTP API is relying on the common knowledge of HTTP call semantics and drawing benefits from it by leveraging various software built upon this paradigm, from client frameworks to server gateways, and developers reading and understanding API specifications. In this sense, the HTTP ecosystem provides probably the most comprehensive vocabulary, both in terms of profoundness and adoption, compared to other technologies, allowing for describing many different situations that may arise in client-server communication. While the technology is not perfect and has its flaws, for a *public API* vendor, it is the default choice, and opting for other technologies rather needs to be substantiated as of today.

View File

@ -55,7 +55,7 @@ As we lose the ability to prepare data for subcomponents, we can no longer attac
* Finally, `SearchBox` doesn't interact with either of them and only provides a context, methods to change it, and the corresponding notifications.
By making these reductions, in fact, we end up with a setup that follows the “Model-View-Controller” (MVC) methodology[ref MVC](https://en.wikipedia.org/wiki/Model%E2%80%93view%E2%80%93controller). `OfferList` and `OfferPanel` (also, the code that displays the input field) constitute a *view* that the user observes and interacts with. `Composer` is a *controller* that listens to the *view*'s events and modifies a *model* (`SearchBox` itself).
By making these reductions, in fact, we end up with a setup that follows the “Model-View-Controller” (MVC) methodology. This is one of the very first patterns for designing user interfaces proposed as early as 1979 by Trygve Reenskaug.[ref MVC](https://en.wikipedia.org/wiki/Model%E2%80%93view%E2%80%93controller)[ref Reenskaug, T. (1979) MVC](https://folk.universitetetioslo.no/trygver/themes/mvc/mvc-index.html) `OfferList` and `OfferPanel` (also, the code that displays the input field) constitute a *view* that the user observes and interacts with. `Composer` is a *controller* that listens to the *view*'s events and modifies a *model* (`SearchBox` itself).
**NB**: to follow the letter of the paradigm, we must separate the *model*, which will be responsible only for the data, from `SearchBox` itself. We leave this exercise to the reader.
@ -91,7 +91,7 @@ Another ideological problem is organizing nested controllers. If there are subor
If we take a closer look at modern UI libraries that claim to employ MV* paradigms, we will learn they employ it quite loosely. Usually, only the main principle that a model defines UI and can only be modified through controllers is adopted. Nested components usually have their own models (in most cases, comprising a subset of the parent model enriched with the component's own state), and the global model contains only a limited number of fields. This approach is implemented in many modern UI frameworks, including those that claim they have nothing to do with MV* paradigms (React, for instance[ref Why did we build React?](https://legacy.reactjs.org/blog/2013/06/05/why-react.html)[ref Mattiazzi, R. How React and Redux brought back MVC and everyone loved it](https://rangle.io/blog/how-react-and-redux-brought-back-mvc-and-everyone-loved-it)).
All these problems of the MVC paradigm were highlighted by Martin Fowler in his “GUI Architectures” essay.[ref:fowler-gui-architectures]() The proposed solution is the “Model-View-*Presenter*” framework, in which the controller entity is replaced with a *presenter*. The responsibility of the presenter is not only translating events, but preparing data for views as well. This allows for full separation of abstraction levels (a model now stores only semantic data while a presenter transforms it into low-level parameters that define UI look; the set of these parameters is called the “Application Model” or “Presentation Model” in Fowler's text).
All these problems of the MVC paradigm were highlighted by Martin Fowler in his “GUI Architectures” essay.[ref:fowler-gui-architectures GUI Architectures]() The proposed solution is the “Model-View-*Presenter*” framework, in which the controller entity is replaced with a *presenter*. The responsibility of the presenter is not only translating events, but preparing data for views as well. This allows for full separation of abstraction levels (a model now stores only semantic data while a presenter transforms it into low-level parameters that define UI look; the set of these parameters is called the “Application Model” or “Presentation Model” in Fowler's text).
[![PLOT](/img/graphs/sdk-mvp.en.png "MVP entities interaction chart")]()

View File

@ -13,6 +13,8 @@
"file": "API",
"isbn": "ISBN",
"references": "References",
"referenceSee": "See",
"referenceOr": "or refer to",
"aboutMe": {
"title": "About the Author",
"content": [

View File

@ -34,6 +34,12 @@
"title": "High Performance Browser Networking",
"hrefs": ["isbn:9781449344764", "https://hpbn.co"]
},
"gourley-totty-http": {
"authors": "Gourley D., Totty, B.",
"publicationDate": "2002",
"title": ["HTTP: The Definitive Guide"],
"hrefs": ["isbn:9781565925090"]
},
"hoffman-web-application-security": {
"authors": "Hoffman, A.",
"publicationDate": "2024",
@ -52,5 +58,14 @@
"publicationDate": "1981",
"title": "Remote Procedure Call",
"hrefs": ["https://dl.acm.org/doi/10.5555/910306"]
},
"steen-tanenbaum-distributed-systems": {
"authors": "Van Steen, M., Tanenbaum A.",
"publicationDate": "2024",
"title": "Distributed Systems 4th edition",
"hrefs": [
"isbn:9789081540629",
"https://www.distributed-systems.net/index.php/books/ds4/"
]
}
}

View File

@ -1,4 +1,4 @@
### Определение API
### Определение API
Прежде чем говорить о разработке API, необходимо для начала договориться о том, что же такое API. Энциклопедия скажет нам, что API — это программный интерфейс приложений. Это точное определение, но бессмысленное. Примерно как определение человека по Платону: «двуногое без перьев» — определение точное, но никоим образом не дающее нам представление о том, чем на самом деле человек примечателен. (Да и не очень-то и точное: Диоген Синопский как-то ощипал петуха и заявил, что это человек Платона; пришлось дополнить определение уточнением «с плоскими ногтями».)
@ -23,4 +23,6 @@
Акведук хорошо иллюстрирует и другую проблему разработки API: вашими пользователями являются инженеры. Вы не поставляете воду напрямую потребителю: к вашей инженерной мысли подключаются заказчики путём пристройки к ней каких-то своих инженерных конструкций. С одной стороны, вы можете обеспечить водой гораздо больше людей, нежели если бы вы сами подводили трубы к каждому крану. С другой — качество инженерных решений заказчика вы не можете контролировать, и проблемы с водой, вызванные некомпетентностью подрядчика, неизбежно будут валить на вас.
Именно поэтому проектирование API налагает на вас несколько большую ответственность. **API является как мультипликатором ваших возможностей, так и мультипликатором ваших ошибок**.
Проектирование API усложняется ещё и тем, что современные API, как правило, представляют собой интерфейсы к *распределённым системам*. Вместо единственного акведука вы имеете дело со множеством соединений источников и приёмников, причём зачастую устанавливаемых по требованию, — и вам необходимо оперировать ими так, чтобы потребителю вовсе и не нужно было знать, как сложна внутренняя архитектура распределения воды.
Именно поэтому проектирование API налагает на вас большую ответственность. **API является как мультипликатором ваших возможностей, так и мультипликатором ваших ошибок**.

View File

@ -1,6 +1,6 @@
### Составляющие HTTP запросов и их семантика
Важное подготовительное упражнение, которое мы должны сделать — это дать описание формата HTTP-запросов и ответов и прояснить базовые понятия. Многое из написанного ниже может показаться читателю самоочевидным, но, увы, специфика протокола такова, что даже базовые сведения о нём, без которых мы не сможем двигаться дальше, разбросаны по обширной и фрагментированной документации, и даже опытные разработчики могут не знать тех или иных нюансов. Ниже мы попытаемся дать структурированный обзор протокола в том объёме, который необходим нам для проектирования HTTP API.
Важное подготовительное упражнение, которое мы должны сделать — это дать описание формата HTTP-запросов и ответов и прояснить базовые понятия. Многое из написанного ниже может показаться читателю самоочевидным, но, увы, специфика протокола такова, что даже базовые сведения о нём, без которых мы не сможем двигаться дальше, разбросаны по обширной и фрагментированной документации, и даже опытные разработчики могут не знать тех или иных нюансов. Ниже мы попытаемся дать структурированный обзор протокола в том объёме, который необходим нам для проектирования HTTP API. За более подробным описанием технологии любознательный читатель может обратиться, например, к детальному труду Дэвида Гурли и Брайана Тотти[ref:gourley-totty-http]().
В описании семантики и формата протокола мы будем руководствоваться свежевышедшим RFC 9110[ref RFC 9110 HTTP Semantics](https://www.rfc-editor.org/rfc/rfc9110.html), который заменил аж девять предыдущих спецификаций, описывавших разные аспекты технологии (при этом большое количество различной дополнительной функциональности всё ещё покрывается отдельными стандартами. В частности, принципы HTTP-кэширования описаны в отдельном RFC 9111[ref RFC 9111 HTTP Caching](https://www.rfc-editor.org/rfc/rfc9111.html), а широко используемый в API метод `PATCH` так и не вошёл в основной RFC и регулируется RFC 5789[ref PATCH Method for HTTP](https://www.rfc-editor.org/rfc/rfc5789.html)).