Type annotations & mypy

9 months later

9 months ago

Kyiv.py#19

Type Annotations

  • PEP-484
  • Designed on top of PEP-3107 (Function Annotations)
  • Created at September 2014
  • Became part of standard library in Python 3.5

Changes in Python 3.6

  • Still in provisional status
  • PEP-526: Syntax for Variable Annotations
  • Bunch of updates: ContextManager, Collection, ClassVar
  • TYPE_CHECKING constant
  • NamedTuple improvements

PEP-526

def hello(name: str) -> str:
    content: str
    if name == 'world':
        content = 'WORLD'
    else:
        content = name
    return f'Hello, {content}'

PEP-526

class Article(object):

    name: str
    content: str

    def __init__(self, name: str, content: str) -> None:
        self.name = name
        self.content = content

NamedTuple

from typing import List, Optional, NamedTuple


class Author(NamedTuple):

    full_name: str


class Article(NamedTuple):

    author: Author

    name: str
    content: str

    tags: Optional[List[str]] = None

    @property
    def url(self) -> str:
        return f'/talks/{slugify(self.name)}'

NamedTuple

author = Author('Igor Davydenko')
kyiv_py_19 = Article(author, 'Kyiv.py#19', ...)
kyiv_py_21 = Article(
    author=author,
    content=...,
    name='Kyiv.py#21',
    tags=['mypy', 'type', 'hinting'])

NamedTuple

assert kyiv_py_21._fields == ('author', 'name', 'content', 'tags')
assert kyiv_py_21._asdict() == {...}

assert kyiv_py_21.author.full_name == 'Igor Davydenko'
assert kyiv_py_21.url == '/talks/kyivpy-21'

kyiv_py_22 = kyiv_py_21._replace(
    name='Kyiv.py#22',
    tags=['graphql', 'rules', 'maybe'])

IRL

    files: DefaultDict[str, List[DocumentFile]] = defaultdict(list)
    for row in (db_files or []):
        files[row.meta_id].append(
            DocumentFile(
                **rowproxy_to_dict(row, rename_fields=RP_ID_TYPE_MAPPING)))

    meta = tuple(
        DocumentMeta(  # type: ignore
            **rowproxy_to_dict(row, rename_fields=RP_ID_TYPE_MAPPING),
            files=tuple(files.get(row.id) or []))
        for row in db_meta or [])

Typing by Example

Elm

add : Int -> Int -> Int
add x y =
    x + y

Elm

type alias TweetId =
    Int


type alias Tweet =
    { id : TweetId
    , text : String
    , url : String
    , createdAt : String
    }

Elm

tweetDecoder : Decode.Decoder Tweet
tweetDecoder =
    decode Tweet
        |> required "id" Decode.int
        |> required "text" Decode.string
        |> required "url" Decode.string
        |> custom (Decode.at [ "created_at" ] Decode.string)


tweetsDecoder : Decode.Decoder (List Tweet)
tweetsDecoder =
    Decode.list tweetDecoder


fetchTweets : Cmd Msg
fetchTweets =
    Http.get "http://vusaifootball.com/api/tweets" tweetsDecoder
        |> RemoteData.sendRequest
        |> Cmd.map Msgs.OnFetchTweets

Rust

fn add(x: i32, y: i32) -> i32 {
    x + y
}

Rust

struct DebugWriter {
    sha: Writer<Blake2b512>,
    data: Opt<Vec<u8>>,
}

Code from tailhook/vagga

Rust

impl Digest {
    pub fn new(debug: bool, raw_debug: bool) -> Digest {
        Digest {
            sha: DebugWriter {
                sha: Writer::new(Blake2b512::new()),
                data: if raw_debug { Opt::Out(Vec::new()) } else { Opt::Sink },
            },
            debug: if debug { Opt::Out(String::new()) } else { Opt::Sink },
        }
    }
}

Code from tailhook/vagga

Python

def add(x: int, y: int) -> int:
    return x + y

It all looks
the same!

Elm

module Main exposing (..)

import Html exposing (Html, text)


add : Int -> Int -> Int
add x y =
    x + y


main : Html msg
main =
    let
        value =
            add "1" "2"
    in
        text (toString value)

Elm

Rust

fn add(x: i32, y: i32) -> i32 {
    x + y
}

fn main() {
    println!("{}", add("x", "y"));
}

Rust

Python

def add(x: int, y: int) -> int:
    return x + y


if __name__ == '__main__':
    print(add("x", "y"))

Python

pip install enforce

import enforce


@enforce.runtime_validation
def add(x: int, y: int) -> int:
    return x + y


if __name__ == '__main__':
    print(add("x", "y"))

pip install enforce

It works!

Kind of…

In [1]: def add(x: int, y: int) -> int:
   ...:     return x + y
   ...:

In [2]: import enforce

In [3]: @enforce.runtime_validation
   ...: def enforce_add(x: int, y: int) -> int:
   ...:     return x + y
   ...:

In [4]: %timeit add(1, 2)
132 ns ± 1.78 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)

In [5]: %timeit enforce_add(1, 2)
245 µs ± 14.4 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)

Kind of…

In [6]: %prun add(1, 2)
         4 function calls in 0.000 seconds

   Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000    0.000    0.000 {built-in method builtins.exec}
        1    0.000    0.000    0.000    0.000 <ipython-input-1-1e63accb4849>:1(add)
        1    0.000    0.000    0.000    0.000 <string>:1(<module>)
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}

Kind of…

In [7]: %prun enforce_add(1, 2)
         641 function calls (599 primitive calls) in 0.003 seconds

   Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        3    0.000    0.000    0.001    0.000 types.py:139(is_type_of_type)
        1    0.000    0.000    0.002    0.002 enforcers.py:57(validate_inputs)
        1    0.000    0.000    0.000    0.000 inspect.py:2829(_bind)
       39    0.000    0.000    0.001    0.000 typing.py:1177(__instancecheck__)
        1    0.000    0.000    0.003    0.003 decorators.py:78(universal)
      9/3    0.000    0.000    0.002    0.001 utils.py:5(visit)
       39    0.000    0.000    0.000    0.000 typing.py:1166(__subclasscheck__)
        3    0.000    0.000    0.001    0.000 nodes.py:220(validate_data)
        3    0.000    0.000    0.000    0.000 settings.py:50(covariant)
        9    0.000    0.000    0.001    0.000 nodes.py:48(validate)
       39    0.000    0.000    0.000    0.000 abc.py:194(__subclasscheck__)
        1    0.000    0.000    0.000    0.000 validator.py:35(reset)
    99/75    0.000    0.000    0.001    0.000 {built-in method builtins.isinstance}
        6    0.000    0.000    0.000    0.000 types.py:189(<genexpr>)
       51    0.000    0.000    0.000    0.000 _weakrefset.py:70(__contains__)
        1    0.000    0.000    0.003    0.003 <string>:1(<module>)
        3    0.000    0.000    0.000    0.000 nodes.py:198(reset)
       39    0.000    0.000    0.000    0.000 {built-in method builtins.issubclass}
       12    0.000    0.000    0.000    0.000 types.py:260(sort_and_flat_type)

mypy

What's new?

  • Latest version: 0.521
  • Installed as pip install mypy
  • Supports Python 3.6 features by default
  • Extended Callable Types
  • Improved error messages
  • A lot of new features, really…

Configuration

[mypy]
check_untyped_defs = True
disallow_any = generics
disallow_untyped_calls = True
disallow_untyped_defs = True
follow_imports = silent
ignore_missing_imports = True
strict_optional = True
warn_redundant_casts = True
warn_unused_ignores = True

Typeshed

  • python/typeshed
  • Collection of library stubs for Python, with static types
  • Stubs for standart library & third party libraries
  • Third party libraries:
    • dateutil
    • jwt
    • lxml
    • requests
    • click / itsdangerous / jinja2 / werkzeug
  • #1105

Follow Imports

  • Third party code is not annotated in most cases
  • You need to ignore missed annotations
  • Third party code == Any

# type: ignore

  • One does not simply
  • use type annotations without # type: ignore
  • Our codebase: cloc 28K, 33 type ignores
  • Will overview occurences later

Using mypy in prod

Team Acceptance

  • 3 developers without any static typing background
  • 28K lines of code as of now
  • Started covering with typing annotations when project already run in production
  • Extermely happy with result!

Path to Developer Success

  • Enable mypy in your editor / IDE
  • Run mypy on pre-commit / pre-push hook
  • Upgrade to latest mypy

Path to Project Success

  1. Enable mypy check on CI (with or after flake8)
  2. Cover smallest package
  3. Include package config in mypy.ini
  4. Repeat before all packages covered

What Changed?

  • Better codebase understanding
  • Faster refactoring
  • Easier code reviews
  • Less dicts
  • More NamedTuples & Enums

lib.types

from types import GeneratorType
from typing import ...

from aiohttp import web
from aiopg.sa.result import RowProxy


AnyDict = Dict[Any, Any]
AnyList = List[Any]
DataDict = Dict[str, Any]
IntDict = Dict[int, str]
IntList = List[int]
StrDict = Dict[str, str]
StrList = List[str]
StrMapping = Mapping[str, str]

UserHandler = Callable[[web.Request, RowProxy], Awaitable[web.Response]]

RequestOrView = Union[web.Request, web.View]
ResponseDecorator = Callable[..., web.Response]

Mypy Extensions

  • pip install mypy-extensions
  • Don't forget to include to requirements.txt instead of requirements-dev.txt
  • TypedDict
  • Arg, VarArg, NamedArg, DefaultArg, DefaultNamedArg
  • NoReturn

TypedDict

from mypy_extensions import TypedDict


class Article(TypedDict):

    name: str
    content: str


article = Article({'name': 'Kyiv.py#21', 'content': ...})

# type: ignore

  • Missed stubs for new features in stdlib
  • Incomplete stubs for third party libraries (see, lxml)
data = {'name': 'Kyiv.py#21'}
Article(**data, content=...)

Found Issues

  • error: Incompatible types in assignment (expression has type "str", target has type "int")
  • error: Module 'lib.helpers' has no attribute 'ensure_list'
  • error: Missing return statement
  • error: Assertion is always true, perhaps remove parentheses?
  • error: Cannot assign to a method

Missed Issues

  • Any
  • Any
  • Any
  • Tests are not annotated
  • Bad type annotations leads to issues in runtime

What we're missing?

Better Data Validation

class Model(BaseModel):
    cos_function: PyObject = None
    path_to_something: Path = None

    short_str: constr(min_length=2, max_length=10) = None
    regex_str: constr(regex='apple (pie|tart|sandwich)') = None

    big_int: conint(gt=1000, lt=1024) = None
    pos_int: PositiveInt = None
    neg_int: NegativeInt = None

    email_address: EmailStr = None
    email_and_name: NameEmail = None

Easier Interaction with Database

  • Tables declared as sa.Table
  • Queries made via sa.sql.Select
  • Data fetched to aiopg.sa.result.RowProxy
  • RowProxy sometimes translated to NamedTuple
  • Code duplication
  • Bad maintainability
  • It will fail at runtime in case of typo anyway

Better stubs

  • SQLAlchemy removed from typeshed
  • aio-libs not covered enough
  • I want easier way for providing extra stubs

Conclusion

mypy is ready
to be used on production

Perhaps

Questions?

Twitter: @playpausenstop
GitHub: @playpauseandstop