Skip to content

Triangle

reserving.triangle.Triangle

Core data structure for a loss development triangle.

A triangle represents cumulative losses (paid or incurred) organized by accident year (rows) and development lag (columns). It is the input to all reserving methods in this library.

Parameters

data : pd.DataFrame A DataFrame in triangle format — accident years as the index, development lags as columns, loss values as cells. Missing cells (future periods) should be NaN.

Examples

import pandas as pd from reserving import Triangle data = pd.DataFrame( ... {1: [1000, 1200, 900], 2: [1100, 1300, None], 3: [1150, None, None]}, ... index=[2021, 2022, 2023] ... ) tri = Triangle(data) tri.shape (3, 3)

Source code in reserving/triangle.py
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
class Triangle:
    """
    Core data structure for a loss development triangle.

    A triangle represents cumulative losses (paid or incurred) organized
    by accident year (rows) and development lag (columns). It is the
    input to all reserving methods in this library.

    Parameters
    ----------
    data : pd.DataFrame
        A DataFrame in triangle format — accident years as the index,
        development lags as columns, loss values as cells.
        Missing cells (future periods) should be NaN.

    Examples
    --------
    >>> import pandas as pd
    >>> from reserving import Triangle
    >>> data = pd.DataFrame(
    ...     {1: [1000, 1200, 900], 2: [1100, 1300, None], 3: [1150, None, None]},
    ...     index=[2021, 2022, 2023]
    ... )
    >>> tri = Triangle(data)
    >>> tri.shape
    (3, 3)
    """

    def __init__(self, data: pd.DataFrame) -> None:
        if not isinstance(data, pd.DataFrame):
            raise TypeError(f"data must be a DataFrame, got {type(data).__name__}")
        if data.empty:
            raise ValueError("data must not be empty")
        if data.shape[0] == 0 or data.shape[1] == 0:
            raise ValueError("data must have at least one row and one column")

        self._data = data.copy().astype(float)
        self._data.index.name = "accident_year"
        self._data.columns.name = "dev_lag"

    # ------------------------------------------------------------------ #
    # Class methods — alternative constructors                            #
    # ------------------------------------------------------------------ #

    @classmethod
    def from_dataframe(
        cls,
        df: pd.DataFrame,
        origin: str,
        dev: str,
        values: str,
    ) -> "Triangle":
        """
        Construct a Triangle from a long-format DataFrame.

        Parameters
        ----------
        df : pd.DataFrame
            Long-format data with one row per origin/development observation.
        origin : str
            Column name for accident year (rows of the triangle).
        dev : str
            Column name for development lag (columns of the triangle).
        values : str
            Column name for the loss values.

        Returns
        -------
        Triangle

        Examples
        --------
        >>> tri = Triangle.from_dataframe(df, origin="AccidentYear",
        ...                               dev="DevelopmentLag",
        ...                               values="CumPaidLoss")
        """
        required = {origin, dev, values}
        missing = required - set(df.columns)
        if missing:
            raise ValueError(f"Columns not found in DataFrame: {missing}")

        pivot = df.pivot_table(
            index=origin, columns=dev, values=values, aggfunc="first"
        )
        pivot.index.name = "accident_year"
        pivot.columns.name = "dev_lag"
        return cls(pivot)

    @classmethod
    def from_csv(
        cls,
        path: str,
        origin: str,
        dev: str,
        values: str,
        **kwargs,
    ) -> "Triangle":
        """
        Construct a Triangle from a CSV file.

        Parameters
        ----------
        path : str
            Path to the CSV file.
        origin : str
            Column name for accident year.
        dev : str
            Column name for development lag.
        values : str
            Column name for loss values.
        **kwargs
            Additional keyword arguments passed to pd.read_csv.

        Returns
        -------
        Triangle
        """
        df = pd.read_csv(path, **kwargs)
        return cls.from_dataframe(df, origin=origin, dev=dev, values=values)

    # ------------------------------------------------------------------ #
    # Properties                                                          #
    # ------------------------------------------------------------------ #

    @property
    def data(self) -> pd.DataFrame:
        """The underlying triangle as a DataFrame (read-only copy)."""
        return self._data.copy()

    @property
    def shape(self) -> tuple[int, int]:
        """(n_origin_years, n_dev_lags)"""
        return self._data.shape

    @property
    def origin_years(self) -> pd.Index:
        """Accident years (row index)."""
        return self._data.index.copy()

    @property
    def dev_lags(self) -> pd.Index:
        """Development lags (column index)."""
        return self._data.columns.copy()

    @property
    def n_origins(self) -> int:
        """Number of accident years."""
        return self._data.shape[0]

    @property
    def n_devs(self) -> int:
        """Number of development lags."""
        return self._data.shape[1]

    @property
    def is_complete(self) -> bool:
        """True if the triangle has no missing (NaN) values."""
        return not self._data.isnull().any().any()

    @property
    def latest_diagonal(self) -> pd.Series:
        """
        The most recent known value for each accident year.

        For a standard upper-left triangle, this is the last non-NaN
        value in each row — the most recently observed development.
        """
        return self._data.apply(
            lambda row: row.dropna().iloc[-1] if not row.dropna().empty else np.nan,
            axis=1,
        )

    @property
    def latest_dev_lag(self) -> pd.Series:
        """
        The most recent development lag observed for each accident year.
        """
        return self._data.apply(
            lambda row: row.dropna().index[-1] if not row.dropna().empty else np.nan,
            axis=1,
        )

    # ------------------------------------------------------------------ #
    # Methods                                                             #
    # ------------------------------------------------------------------ #

    def to_incremental(self) -> "Triangle":
        """
        Convert cumulative triangle to incremental (period-over-period) losses.

        Returns
        -------
        Triangle
            A new Triangle with incremental values.
        """
        inc = self._data.diff(axis=1)
        inc.iloc[:, 0] = self._data.iloc[:, 0]
        return Triangle(inc)

    def link_ratios(self) -> pd.DataFrame:
        """
        Compute age-to-age link ratios for each cell.

        Returns a DataFrame of the same shape where each cell is
        value(lag+1) / value(lag). The last column is all NaN
        (no next period to develop to).

        Returns
        -------
        pd.DataFrame
        """
        ratios = self._data.copy()
        for i in range(len(self._data.columns) - 1):
            current_col = self._data.columns[i]
            next_col = self._data.columns[i + 1]
            ratios[current_col] = self._data[next_col] / self._data[current_col].replace(0, np.nan)
        ratios[self._data.columns[-1]] = np.nan
        return ratios

    def volume_weighted_factors(self) -> pd.Series:
        """
        Compute volume-weighted average development factors by lag.

        The volume-weighted factor at lag k is:
            sum(loss[lag k+1]) / sum(loss[lag k])
        summed over all accident years with data at both lags.

        Returns
        -------
        pd.Series indexed by development lag (excludes final lag).
        """
        factors = {}
        cols = list(self._data.columns)
        for i in range(len(cols) - 1):
            curr, nxt = cols[i], cols[i + 1]
            both = self._data[[curr, nxt]].dropna()
            if len(both) > 0 and both[curr].sum() > 0:
                factors[curr] = both[nxt].sum() / both[curr].sum()
            else:
                factors[curr] = np.nan
        return pd.Series(factors, name="vw_factor")

    def summary(self) -> pd.DataFrame:
        """
        Summary statistics for each development lag.

        Returns a DataFrame with columns: n_obs, mean, std, min, max
        for each development lag (non-NaN values only).

        Returns
        -------
        pd.DataFrame
        """
        return self._data.agg(["count", "mean", "std", "min", "max"]).T.rename(
            columns={"count": "n_obs"}
        )

    # ------------------------------------------------------------------ #
    # Dunder methods                                                      #
    # ------------------------------------------------------------------ #

    def __repr__(self) -> str:
        return (
            f"Triangle(origins={self.n_origins}, "
            f"dev_lags={self.n_devs}, "
            f"complete={self.is_complete})"
        )

    def __str__(self) -> str:
        return self._data.to_string()

    def __getitem__(self, key):
        """Allow indexing directly into the underlying DataFrame."""
        return self._data[key]

    def __len__(self) -> int:
        return self.n_origins

data property

The underlying triangle as a DataFrame (read-only copy).

dev_lags property

Development lags (column index).

is_complete property

True if the triangle has no missing (NaN) values.

latest_dev_lag property

The most recent development lag observed for each accident year.

latest_diagonal property

The most recent known value for each accident year.

For a standard upper-left triangle, this is the last non-NaN value in each row — the most recently observed development.

n_devs property

Number of development lags.

n_origins property

Number of accident years.

origin_years property

Accident years (row index).

shape property

(n_origin_years, n_dev_lags)

__getitem__(key)

Allow indexing directly into the underlying DataFrame.

Source code in reserving/triangle.py
276
277
278
def __getitem__(self, key):
    """Allow indexing directly into the underlying DataFrame."""
    return self._data[key]

from_csv(path, origin, dev, values, **kwargs) classmethod

Construct a Triangle from a CSV file.

Parameters

path : str Path to the CSV file. origin : str Column name for accident year. dev : str Column name for development lag. values : str Column name for loss values. **kwargs Additional keyword arguments passed to pd.read_csv.

Returns

Triangle

Source code in reserving/triangle.py
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
@classmethod
def from_csv(
    cls,
    path: str,
    origin: str,
    dev: str,
    values: str,
    **kwargs,
) -> "Triangle":
    """
    Construct a Triangle from a CSV file.

    Parameters
    ----------
    path : str
        Path to the CSV file.
    origin : str
        Column name for accident year.
    dev : str
        Column name for development lag.
    values : str
        Column name for loss values.
    **kwargs
        Additional keyword arguments passed to pd.read_csv.

    Returns
    -------
    Triangle
    """
    df = pd.read_csv(path, **kwargs)
    return cls.from_dataframe(df, origin=origin, dev=dev, values=values)

from_dataframe(df, origin, dev, values) classmethod

Construct a Triangle from a long-format DataFrame.

Parameters

df : pd.DataFrame Long-format data with one row per origin/development observation. origin : str Column name for accident year (rows of the triangle). dev : str Column name for development lag (columns of the triangle). values : str Column name for the loss values.

Returns

Triangle

Examples

tri = Triangle.from_dataframe(df, origin="AccidentYear", ... dev="DevelopmentLag", ... values="CumPaidLoss")

Source code in reserving/triangle.py
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
@classmethod
def from_dataframe(
    cls,
    df: pd.DataFrame,
    origin: str,
    dev: str,
    values: str,
) -> "Triangle":
    """
    Construct a Triangle from a long-format DataFrame.

    Parameters
    ----------
    df : pd.DataFrame
        Long-format data with one row per origin/development observation.
    origin : str
        Column name for accident year (rows of the triangle).
    dev : str
        Column name for development lag (columns of the triangle).
    values : str
        Column name for the loss values.

    Returns
    -------
    Triangle

    Examples
    --------
    >>> tri = Triangle.from_dataframe(df, origin="AccidentYear",
    ...                               dev="DevelopmentLag",
    ...                               values="CumPaidLoss")
    """
    required = {origin, dev, values}
    missing = required - set(df.columns)
    if missing:
        raise ValueError(f"Columns not found in DataFrame: {missing}")

    pivot = df.pivot_table(
        index=origin, columns=dev, values=values, aggfunc="first"
    )
    pivot.index.name = "accident_year"
    pivot.columns.name = "dev_lag"
    return cls(pivot)

Compute age-to-age link ratios for each cell.

Returns a DataFrame of the same shape where each cell is value(lag+1) / value(lag). The last column is all NaN (no next period to develop to).

pd.DataFrame

Source code in reserving/triangle.py
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
def link_ratios(self) -> pd.DataFrame:
    """
    Compute age-to-age link ratios for each cell.

    Returns a DataFrame of the same shape where each cell is
    value(lag+1) / value(lag). The last column is all NaN
    (no next period to develop to).

    Returns
    -------
    pd.DataFrame
    """
    ratios = self._data.copy()
    for i in range(len(self._data.columns) - 1):
        current_col = self._data.columns[i]
        next_col = self._data.columns[i + 1]
        ratios[current_col] = self._data[next_col] / self._data[current_col].replace(0, np.nan)
    ratios[self._data.columns[-1]] = np.nan
    return ratios

summary()

Summary statistics for each development lag.

Returns a DataFrame with columns: n_obs, mean, std, min, max for each development lag (non-NaN values only).

Returns

pd.DataFrame

Source code in reserving/triangle.py
247
248
249
250
251
252
253
254
255
256
257
258
259
260
def summary(self) -> pd.DataFrame:
    """
    Summary statistics for each development lag.

    Returns a DataFrame with columns: n_obs, mean, std, min, max
    for each development lag (non-NaN values only).

    Returns
    -------
    pd.DataFrame
    """
    return self._data.agg(["count", "mean", "std", "min", "max"]).T.rename(
        columns={"count": "n_obs"}
    )

to_incremental()

Convert cumulative triangle to incremental (period-over-period) losses.

Returns

Triangle A new Triangle with incremental values.

Source code in reserving/triangle.py
191
192
193
194
195
196
197
198
199
200
201
202
def to_incremental(self) -> "Triangle":
    """
    Convert cumulative triangle to incremental (period-over-period) losses.

    Returns
    -------
    Triangle
        A new Triangle with incremental values.
    """
    inc = self._data.diff(axis=1)
    inc.iloc[:, 0] = self._data.iloc[:, 0]
    return Triangle(inc)

volume_weighted_factors()

Compute volume-weighted average development factors by lag.

The volume-weighted factor at lag k is

sum(loss[lag k+1]) / sum(loss[lag k])

summed over all accident years with data at both lags.

Returns

pd.Series indexed by development lag (excludes final lag).

Source code in reserving/triangle.py
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
def volume_weighted_factors(self) -> pd.Series:
    """
    Compute volume-weighted average development factors by lag.

    The volume-weighted factor at lag k is:
        sum(loss[lag k+1]) / sum(loss[lag k])
    summed over all accident years with data at both lags.

    Returns
    -------
    pd.Series indexed by development lag (excludes final lag).
    """
    factors = {}
    cols = list(self._data.columns)
    for i in range(len(cols) - 1):
        curr, nxt = cols[i], cols[i + 1]
        both = self._data[[curr, nxt]].dropna()
        if len(both) > 0 and both[curr].sum() > 0:
            factors[curr] = both[nxt].sum() / both[curr].sum()
        else:
            factors[curr] = np.nan
    return pd.Series(factors, name="vw_factor")