Added documentation for the /api/cookbook/extract route

2025-12-22 13:17:10 +00:00 · 2023-10-22 13:03:49 +02:00
parent 1de3eef31e
commit b956cd92c9
3 changed files with 161 additions and 4 deletions
--- a/docs/api/api.yaml
+++ b/docs/api/api.yaml
@@ -45,7 +45,7 @@ info:
    On this documentation, you can test every route.

    If you don't provide an API token in [Authentication](#auth), you can still test all the routes
-    but note that the given curl example only work with an API token.
+    but note that the given curl examples only work with an API token.


 servers:
@@ -69,9 +69,12 @@ tags:
  - name: bookmarks
    description: Bookmarks

-  - name: bookmarks-export
+  - name: bookmark export
    description: Bookmark Export

+  - name: dev tools
+    description: Routes used for testing or development
+
 paths:
  /bookmarks:
    get:
@@ -110,7 +113,7 @@ paths:
      - "bookmarks/routes.yaml#.withBookmark"

    get:
-      tags: [bookmarks-export]
+      tags: [bookmark export]
      $merge:
        - "traits.yaml#.authenticated"
        - "bookmarks/routes.yaml#.article"
@@ -120,7 +123,14 @@ paths:
      - "bookmarks/routes.yaml#.withBookmark"

    get:
-      tags: [bookmarks-export]
+      tags: [bookmark export]
      $merge:
        - "traits.yaml#.authenticated"
        - "bookmarks/routes.yaml#.export"
+
+  /cookbook/extract:
+    get:
+      tags: [dev tools]
+      $merge:
+        - "traits.yaml#.authenticated"
+        - "cookbook/routes.yaml#.extract"
--- a/docs/api/cookbook/routes.yaml
+++ b/docs/api/cookbook/routes.yaml
@@ -0,0 +1,143 @@
+---
+# SPDX-FileCopyrightText: © 2023 Olivier Meunier <olivier@neokraft.net>
+#
+# SPDX-License-Identifier: AGPL-3.0-only
+
+# GET /cookbook/extrack
+extract:
+  summary: Extract Link
+  description: |
+    This route extracts a link and returns the extraction result.
+
+    You can pass an `Accept` header to the request, with one of the following values:
+
+    - `application/json` (default) returns a JSON response
+    - `text/html` returns an HTML response with all the media included as base64 encoded
+      URLs.
+
+  parameters:
+    - name: url
+      in: query
+      required: true
+      schema:
+        type: string
+        format: uri
+      description: URL to extract
+
+  responses:
+    "200":
+      description: |
+        Extraction result.
+      content:
+        application/json:
+          schema:
+            properties:
+              url:
+                type: string
+                format: uri
+                description: The extracted URL
+              logs:
+                type: array
+                items:
+                  type: string
+                description: Extraction log
+              errors:
+                type: array
+                items:
+                  type: string
+                description: Extraction errors, if any
+              meta:
+                type: object
+                additionalProperties:
+                  type: array
+                  items:
+                    type: string
+                description: |
+                  Contains the meta tags extracted from the page.
+              properties:
+                properties:
+                  json-ld:
+                    type: array
+                    items:
+                      type: object
+                    description: A list of JSON-LD documents retrieved during the extraction
+                  link:
+                    type: array
+                    items:
+                      type: object
+                      patternProperties:
+                        "^@.+":
+                          type: string
+                          description: Link attribute, always starting with `@`
+                    description: A list of all `link` tags retrieved during the extraction
+                  meta:
+                    type: array
+                    items:
+                      type: object
+                      patternProperties:
+                        "^@.+":
+                          type: string
+                          description: Meta attribute, always starting with `@`
+                    description: A list of all `meta` tags retrieved during the extraction
+              domain:
+                type: string
+                format: hostname
+                description: Page's domain name
+              title:
+                type: string
+                description: Page's title
+              authors:
+                type: "[string]"
+                description: Page's author list
+              site:
+                type: string
+                format: hostname
+                description: Page's site
+              site_name:
+                type: string
+                description: Page's site name
+              lang:
+                type: string
+                description: Language Code
+              text_direction:
+                type: string
+                enum: [rtl, ltr]
+                description: |
+                  Direction of the article's text. It can be empty when it's unknown.
+              date:
+                type: [string]
+                format: date-time
+                nullable: true
+                description: Publication date. Can be `null` when unknown.
+              document_type:
+                type: string
+                description: |
+                  The detected document type. The value is usualy `article`, `photo` or `video`
+                  but can vary, based on the extraction process.
+              description:
+                type: string
+                description: |
+                  Page's short description, when it exists. It's always an unformatted text.
+              html:
+                type: string
+                description: |
+                  The HTML content after processing.
+              embed:
+                type: string
+                description: |
+                  The oembed HTML fragment, when it exists. It usualy contains an iframe when
+                  extracting videos.
+              images:
+                properties:
+                  additionalProperties:
+                    properties:
+                      size:
+                        type: "[integer]"
+                        description: The image size in pixels
+                      encoded:
+                        type: string
+                        description: The base64 URI encoded image
+
+        text/html:
+          schema:
+            type: string
--- a/docs/api/types.yaml
+++ b/docs/api/types.yaml
@@ -1,4 +1,8 @@
 ---
+# SPDX-FileCopyrightText: © 2023 Olivier Meunier <olivier@neokraft.net>
+#
+# SPDX-License-Identifier: AGPL-3.0-only
+
 schemas:
  message:
    properties: