1 #ifndef Py_BUILD_CORE_BUILTIN
2 # define Py_BUILD_CORE_MODULE 1
3 #endif
4
5 #include "Python.h"
6 #include "pycore_long.h" // _PyLong_GetOne()
7 #include "structmember.h"
8
9 #include <ctype.h>
10 #include <stddef.h>
11 #include <stdint.h>
12
13 #include "datetime.h"
14
15 #include "clinic/_zoneinfo.c.h"
16 /*[clinic input]
17 module zoneinfo
18 class zoneinfo.ZoneInfo "PyObject *" "PyTypeObject *"
19 [clinic start generated code]*/
20 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=d12c73c0eef36df8]*/
21
22
23 typedef struct TransitionRuleType TransitionRuleType;
24 typedef struct StrongCacheNode StrongCacheNode;
25
26 typedef struct {
27 PyObject *utcoff;
28 PyObject *dstoff;
29 PyObject *tzname;
30 long utcoff_seconds;
31 } _ttinfo;
32
33 typedef struct {
34 _ttinfo std;
35 _ttinfo dst;
36 int dst_diff;
37 TransitionRuleType *start;
38 TransitionRuleType *end;
39 unsigned char std_only;
40 } _tzrule;
41
42 typedef struct {
43 PyDateTime_TZInfo base;
44 PyObject *key;
45 PyObject *file_repr;
46 PyObject *weakreflist;
47 size_t num_transitions;
48 size_t num_ttinfos;
49 int64_t *trans_list_utc;
50 int64_t *trans_list_wall[2];
51 _ttinfo **trans_ttinfos; // References to the ttinfo for each transition
52 _ttinfo *ttinfo_before;
53 _tzrule tzrule_after;
54 _ttinfo *_ttinfos; // Unique array of ttinfos for ease of deallocation
55 unsigned char fixed_offset;
56 unsigned char source;
57 } PyZoneInfo_ZoneInfo;
58
59 struct TransitionRuleType {
60 int64_t (*year_to_timestamp)(TransitionRuleType *, int);
61 };
62
63 typedef struct {
64 TransitionRuleType base;
65 uint8_t month;
66 uint8_t week;
67 uint8_t day;
68 int8_t hour;
69 int8_t minute;
70 int8_t second;
71 } CalendarRule;
72
73 typedef struct {
74 TransitionRuleType base;
75 uint8_t julian;
76 unsigned int day;
77 int8_t hour;
78 int8_t minute;
79 int8_t second;
80 } DayRule;
81
82 struct StrongCacheNode {
83 StrongCacheNode *next;
84 StrongCacheNode *prev;
85 PyObject *key;
86 PyObject *zone;
87 };
88
89 typedef struct {
90 PyTypeObject *ZoneInfoType;
91
92 // Imports
93 PyObject *io_open;
94 PyObject *_tzpath_find_tzfile;
95 PyObject *_common_mod;
96
97 // Caches
98 PyObject *TIMEDELTA_CACHE;
99 PyObject *ZONEINFO_WEAK_CACHE;
100 StrongCacheNode *ZONEINFO_STRONG_CACHE;
101
102 _ttinfo NO_TTINFO;
103 } zoneinfo_state;
104
105 // Constants
106 static const int EPOCHORDINAL = 719163;
107 static int DAYS_IN_MONTH[] = {
108 -1, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31,
109 };
110
111 static int DAYS_BEFORE_MONTH[] = {
112 -1, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334,
113 };
114
115 static const int SOURCE_NOCACHE = 0;
116 static const int SOURCE_CACHE = 1;
117 static const int SOURCE_FILE = 2;
118
119 static const size_t ZONEINFO_STRONG_CACHE_MAX_SIZE = 8;
120
121 // Forward declarations
122 static int
123 load_data(zoneinfo_state *state, PyZoneInfo_ZoneInfo *self,
124 PyObject *file_obj);
125 static void
126 utcoff_to_dstoff(size_t *trans_idx, long *utcoffs, long *dstoffs,
127 unsigned char *isdsts, size_t num_transitions,
128 size_t num_ttinfos);
129 static int
130 ts_to_local(size_t *trans_idx, int64_t *trans_utc, long *utcoff,
131 int64_t *trans_local[2], size_t num_ttinfos,
132 size_t num_transitions);
133
134 static int
135 parse_tz_str(zoneinfo_state *state, PyObject *tz_str_obj, _tzrule *out);
136
137 static Py_ssize_t
138 parse_abbr(const char *const p, PyObject **abbr);
139 static Py_ssize_t
140 parse_tz_delta(const char *const p, long *total_seconds);
141 static Py_ssize_t
142 parse_transition_time(const char *const p, int8_t *hour, int8_t *minute,
143 int8_t *second);
144 static Py_ssize_t
145 parse_transition_rule(const char *const p, TransitionRuleType **out);
146
147 static _ttinfo *
148 find_tzrule_ttinfo(_tzrule *rule, int64_t ts, unsigned char fold, int year);
149 static _ttinfo *
150 find_tzrule_ttinfo_fromutc(_tzrule *rule, int64_t ts, int year,
151 unsigned char *fold);
152
153 static int
154 build_ttinfo(zoneinfo_state *state, long utcoffset, long dstoffset,
155 PyObject *tzname, _ttinfo *out);
156 static void
157 xdecref_ttinfo(_ttinfo *ttinfo);
158 static int
159 ttinfo_eq(const _ttinfo *const tti0, const _ttinfo *const tti1);
160
161 static int
162 build_tzrule(zoneinfo_state *state, PyObject *std_abbr, PyObject *dst_abbr,
163 long std_offset, long dst_offset, TransitionRuleType *start,
164 TransitionRuleType *end, _tzrule *out);
165 static void
166 free_tzrule(_tzrule *tzrule);
167
168 static PyObject *
169 load_timedelta(zoneinfo_state *state, long seconds);
170
171 static int
172 get_local_timestamp(PyObject *dt, int64_t *local_ts);
173 static _ttinfo *
174 find_ttinfo(zoneinfo_state *state, PyZoneInfo_ZoneInfo *self, PyObject *dt);
175
176 static int
177 ymd_to_ord(int y, int m, int d);
178 static int
179 is_leap_year(int year);
180
181 static size_t
182 _bisect(const int64_t value, const int64_t *arr, size_t size);
183
184 static int
185 eject_from_strong_cache(zoneinfo_state *state, const PyTypeObject *const type,
186 PyObject *key);
187 static void
188 clear_strong_cache(zoneinfo_state *state, const PyTypeObject *const type);
189 static void
190 update_strong_cache(zoneinfo_state *state, const PyTypeObject *const type,
191 PyObject *key, PyObject *zone);
192 static PyObject *
193 zone_from_strong_cache(zoneinfo_state *state, const PyTypeObject *const type,
194 PyObject *const key);
195
196 static inline zoneinfo_state *
197 zoneinfo_get_state(PyObject *mod)
198 {
199 zoneinfo_state *state = (zoneinfo_state *)PyModule_GetState(mod);
200 assert(state != NULL);
201 return state;
202 }
203
204 static inline zoneinfo_state *
205 zoneinfo_get_state_by_cls(PyTypeObject *cls)
206 {
207 zoneinfo_state *state = (zoneinfo_state *)_PyType_GetModuleState(cls);
208 assert(state != NULL);
209 return state;
210 }
211
212 static struct PyModuleDef zoneinfomodule;
213
214 static inline zoneinfo_state *
215 zoneinfo_get_state_by_self(PyTypeObject *self)
216 {
217 PyObject *mod = PyType_GetModuleByDef(self, &zoneinfomodule);
218 assert(mod != NULL);
219 return zoneinfo_get_state(mod);
220 }
221
222 static PyObject *
223 zoneinfo_new_instance(zoneinfo_state *state, PyTypeObject *type, PyObject *key)
224 {
225 PyObject *file_obj = NULL;
226 PyObject *file_path = NULL;
227
228 file_path = PyObject_CallFunctionObjArgs(state->_tzpath_find_tzfile,
229 key, NULL);
230 if (file_path == NULL) {
231 return NULL;
232 }
233 else if (file_path == Py_None) {
234 PyObject *meth = state->_common_mod;
235 file_obj = PyObject_CallMethod(meth, "load_tzdata", "O", key);
236 if (file_obj == NULL) {
237 Py_DECREF(file_path);
238 return NULL;
239 }
240 }
241
242 PyObject *self = (PyObject *)(type->tp_alloc(type, 0));
243 if (self == NULL) {
244 goto error;
245 }
246
247 if (file_obj == NULL) {
248 PyObject *func = state->io_open;
249 file_obj = PyObject_CallFunction(func, "Os", file_path, "rb");
250 if (file_obj == NULL) {
251 goto error;
252 }
253 }
254
255 if (load_data(state, (PyZoneInfo_ZoneInfo *)self, file_obj)) {
256 goto error;
257 }
258
259 PyObject *rv = PyObject_CallMethod(file_obj, "close", NULL);
260 Py_SETREF(file_obj, NULL);
261 if (rv == NULL) {
262 goto error;
263 }
264 Py_DECREF(rv);
265
266 ((PyZoneInfo_ZoneInfo *)self)->key = Py_NewRef(key);
267
268 goto cleanup;
269 error:
270 Py_CLEAR(self);
271 cleanup:
272 if (file_obj != NULL) {
273 PyObject *exc = PyErr_GetRaisedException();
274 PyObject *tmp = PyObject_CallMethod(file_obj, "close", NULL);
275 _PyErr_ChainExceptions1(exc);
276 if (tmp == NULL) {
277 Py_CLEAR(self);
278 }
279 Py_XDECREF(tmp);
280 Py_DECREF(file_obj);
281 }
282 Py_DECREF(file_path);
283 return self;
284 }
285
286 static PyObject *
287 get_weak_cache(zoneinfo_state *state, PyTypeObject *type)
288 {
289 if (type == state->ZoneInfoType) {
290 return state->ZONEINFO_WEAK_CACHE;
291 }
292 else {
293 PyObject *cache =
294 PyObject_GetAttrString((PyObject *)type, "_weak_cache");
295 // We are assuming that the type lives at least as long as the function
296 // that calls get_weak_cache, and that it holds a reference to the
297 // cache, so we'll return a "borrowed reference".
298 Py_XDECREF(cache);
299 return cache;
300 }
301 }
302
303 static PyObject *
304 zoneinfo_new(PyTypeObject *type, PyObject *args, PyObject *kw)
305 {
306 PyObject *key = NULL;
307 static char *kwlist[] = {"key", NULL};
308 if (PyArg_ParseTupleAndKeywords(args, kw, "O", kwlist, &key) == 0) {
309 return NULL;
310 }
311
312 zoneinfo_state *state = zoneinfo_get_state_by_self(type);
313 PyObject *instance = zone_from_strong_cache(state, type, key);
314 if (instance != NULL || PyErr_Occurred()) {
315 return instance;
316 }
317
318 PyObject *weak_cache = get_weak_cache(state, type);
319 instance = PyObject_CallMethod(weak_cache, "get", "O", key, Py_None);
320 if (instance == NULL) {
321 return NULL;
322 }
323
324 if (instance == Py_None) {
325 Py_DECREF(instance);
326 PyObject *tmp = zoneinfo_new_instance(state, type, key);
327 if (tmp == NULL) {
328 return NULL;
329 }
330
331 instance =
332 PyObject_CallMethod(weak_cache, "setdefault", "OO", key, tmp);
333 Py_DECREF(tmp);
334 if (instance == NULL) {
335 return NULL;
336 }
337 ((PyZoneInfo_ZoneInfo *)instance)->source = SOURCE_CACHE;
338 }
339
340 update_strong_cache(state, type, key, instance);
341 return instance;
342 }
343
344 static int
345 zoneinfo_traverse(PyZoneInfo_ZoneInfo *self, visitproc visit, void *arg)
346 {
347 Py_VISIT(Py_TYPE(self));
348 Py_VISIT(self->key);
349 return 0;
350 }
351
352 static int
353 zoneinfo_clear(PyZoneInfo_ZoneInfo *self)
354 {
355 Py_CLEAR(self->key);
356 Py_CLEAR(self->file_repr);
357 return 0;
358 }
359
360 static void
361 zoneinfo_dealloc(PyObject *obj_self)
362 {
363 PyZoneInfo_ZoneInfo *self = (PyZoneInfo_ZoneInfo *)obj_self;
364 PyTypeObject *tp = Py_TYPE(self);
365 PyObject_GC_UnTrack(self);
366
367 if (self->weakreflist != NULL) {
368 PyObject_ClearWeakRefs(obj_self);
369 }
370
371 if (self->trans_list_utc != NULL) {
372 PyMem_Free(self->trans_list_utc);
373 }
374
375 for (size_t i = 0; i < 2; i++) {
376 if (self->trans_list_wall[i] != NULL) {
377 PyMem_Free(self->trans_list_wall[i]);
378 }
379 }
380
381 if (self->_ttinfos != NULL) {
382 for (size_t i = 0; i < self->num_ttinfos; ++i) {
383 xdecref_ttinfo(&(self->_ttinfos[i]));
384 }
385 PyMem_Free(self->_ttinfos);
386 }
387
388 if (self->trans_ttinfos != NULL) {
389 PyMem_Free(self->trans_ttinfos);
390 }
391
392 free_tzrule(&(self->tzrule_after));
393
394 zoneinfo_clear(self);
395 tp->tp_free(obj_self);
396 Py_DECREF(tp);
397 }
398
399 /*[clinic input]
400 @classmethod
401 zoneinfo.ZoneInfo.from_file
402
403 cls: defining_class
404 file_obj: object
405 /
406 key: object = None
407
408 Create a ZoneInfo file from a file object.
409 [clinic start generated code]*/
410
411 static PyObject *
412 zoneinfo_ZoneInfo_from_file_impl(PyTypeObject *type, PyTypeObject *cls,
413 PyObject *file_obj, PyObject *key)
414 /*[clinic end generated code: output=77887d1d56a48324 input=d26111f29eed6863]*/
415 {
416 PyObject *file_repr = NULL;
417 PyZoneInfo_ZoneInfo *self = NULL;
418
419 PyObject *obj_self = (PyObject *)(type->tp_alloc(type, 0));
420 self = (PyZoneInfo_ZoneInfo *)obj_self;
421 if (self == NULL) {
422 return NULL;
423 }
424
425 file_repr = PyUnicode_FromFormat("%R", file_obj);
426 if (file_repr == NULL) {
427 goto error;
428 }
429
430 zoneinfo_state *state = zoneinfo_get_state_by_cls(cls);
431 if (load_data(state, self, file_obj)) {
432 goto error;
433 }
434
435 self->source = SOURCE_FILE;
436 self->file_repr = file_repr;
437 self->key = Py_NewRef(key);
438 return obj_self;
439
440 error:
441 Py_XDECREF(file_repr);
442 Py_XDECREF(self);
443 return NULL;
444 }
445
446 /*[clinic input]
447 @classmethod
448 zoneinfo.ZoneInfo.no_cache
449
450 cls: defining_class
451 /
452 key: object
453
454 Get a new instance of ZoneInfo, bypassing the cache.
455 [clinic start generated code]*/
456
457 static PyObject *
458 zoneinfo_ZoneInfo_no_cache_impl(PyTypeObject *type, PyTypeObject *cls,
459 PyObject *key)
460 /*[clinic end generated code: output=b0b09b3344c171b7 input=0238f3d56b1ea3f1]*/
461 {
462 zoneinfo_state *state = zoneinfo_get_state_by_cls(cls);
463 PyObject *out = zoneinfo_new_instance(state, type, key);
464 if (out != NULL) {
465 ((PyZoneInfo_ZoneInfo *)out)->source = SOURCE_NOCACHE;
466 }
467
468 return out;
469 }
470
471 /*[clinic input]
472 @classmethod
473 zoneinfo.ZoneInfo.clear_cache
474
475 cls: defining_class
476 /
477 *
478 only_keys: object = None
479
480 Clear the ZoneInfo cache.
481 [clinic start generated code]*/
482
483 static PyObject *
484 zoneinfo_ZoneInfo_clear_cache_impl(PyTypeObject *type, PyTypeObject *cls,
485 PyObject *only_keys)
486 /*[clinic end generated code: output=114d9b7c8a22e660 input=e32ca3bb396788ba]*/
487 {
488 zoneinfo_state *state = zoneinfo_get_state_by_cls(cls);
489 PyObject *weak_cache = get_weak_cache(state, type);
490
491 if (only_keys == NULL || only_keys == Py_None) {
492 PyObject *rv = PyObject_CallMethod(weak_cache, "clear", NULL);
493 if (rv != NULL) {
494 Py_DECREF(rv);
495 }
496
497 clear_strong_cache(state, type);
498 }
499 else {
500 PyObject *item = NULL;
501 PyObject *pop = PyUnicode_FromString("pop");
502 if (pop == NULL) {
503 return NULL;
504 }
505
506 PyObject *iter = PyObject_GetIter(only_keys);
507 if (iter == NULL) {
508 Py_DECREF(pop);
509 return NULL;
510 }
511
512 while ((item = PyIter_Next(iter))) {
513 // Remove from strong cache
514 if (eject_from_strong_cache(state, type, item) < 0) {
515 Py_DECREF(item);
516 break;
517 }
518
519 // Remove from weak cache
520 PyObject *tmp = PyObject_CallMethodObjArgs(weak_cache, pop, item,
521 Py_None, NULL);
522
523 Py_DECREF(item);
524 if (tmp == NULL) {
525 break;
526 }
527 Py_DECREF(tmp);
528 }
529 Py_DECREF(iter);
530 Py_DECREF(pop);
531 }
532
533 if (PyErr_Occurred()) {
534 return NULL;
535 }
536
537 Py_RETURN_NONE;
538 }
539
540 /*[clinic input]
541 zoneinfo.ZoneInfo.utcoffset
542
543 cls: defining_class
544 dt: object
545 /
546
547 Retrieve a timedelta representing the UTC offset in a zone at the given datetime.
548 [clinic start generated code]*/
549
550 static PyObject *
551 zoneinfo_ZoneInfo_utcoffset_impl(PyObject *self, PyTypeObject *cls,
552 PyObject *dt)
553 /*[clinic end generated code: output=b71016c319ba1f91 input=2bb6c5364938f19c]*/
554 {
555 zoneinfo_state *state = zoneinfo_get_state_by_cls(cls);
556 _ttinfo *tti = find_ttinfo(state, (PyZoneInfo_ZoneInfo *)self, dt);
557 if (tti == NULL) {
558 return NULL;
559 }
560 return Py_NewRef(tti->utcoff);
561 }
562
563 /*[clinic input]
564 zoneinfo.ZoneInfo.dst
565
566 cls: defining_class
567 dt: object
568 /
569
570 Retrieve a timedelta representing the amount of DST applied in a zone at the given datetime.
571 [clinic start generated code]*/
572
573 static PyObject *
574 zoneinfo_ZoneInfo_dst_impl(PyObject *self, PyTypeObject *cls, PyObject *dt)
575 /*[clinic end generated code: output=cb6168d7723a6ae6 input=2167fb80cf8645c6]*/
576 {
577 zoneinfo_state *state = zoneinfo_get_state_by_cls(cls);
578 _ttinfo *tti = find_ttinfo(state, (PyZoneInfo_ZoneInfo *)self, dt);
579 if (tti == NULL) {
580 return NULL;
581 }
582 return Py_NewRef(tti->dstoff);
583 }
584
585 /*[clinic input]
586 zoneinfo.ZoneInfo.tzname
587
588 cls: defining_class
589 dt: object
590 /
591
592 Retrieve a string containing the abbreviation for the time zone that applies in a zone at a given datetime.
593 [clinic start generated code]*/
594
595 static PyObject *
596 zoneinfo_ZoneInfo_tzname_impl(PyObject *self, PyTypeObject *cls,
597 PyObject *dt)
598 /*[clinic end generated code: output=3b6ae6c3053ea75a input=15a59a4f92ed1f1f]*/
599 {
600 zoneinfo_state *state = zoneinfo_get_state_by_cls(cls);
601 _ttinfo *tti = find_ttinfo(state, (PyZoneInfo_ZoneInfo *)self, dt);
602 if (tti == NULL) {
603 return NULL;
604 }
605 return Py_NewRef(tti->tzname);
606 }
607
608 #define GET_DT_TZINFO PyDateTime_DATE_GET_TZINFO
609
610 static PyObject *
611 zoneinfo_fromutc(PyObject *obj_self, PyObject *dt)
612 {
613 if (!PyDateTime_Check(dt)) {
614 PyErr_SetString(PyExc_TypeError,
615 "fromutc: argument must be a datetime");
616 return NULL;
617 }
618 if (GET_DT_TZINFO(dt) != obj_self) {
619 PyErr_SetString(PyExc_ValueError,
620 "fromutc: dt.tzinfo "
621 "is not self");
622 return NULL;
623 }
624
625 PyZoneInfo_ZoneInfo *self = (PyZoneInfo_ZoneInfo *)obj_self;
626
627 int64_t timestamp;
628 if (get_local_timestamp(dt, ×tamp)) {
629 return NULL;
630 }
631 size_t num_trans = self->num_transitions;
632
633 _ttinfo *tti = NULL;
634 unsigned char fold = 0;
635
636 if (num_trans >= 1 && timestamp < self->trans_list_utc[0]) {
637 tti = self->ttinfo_before;
638 }
639 else if (num_trans == 0 ||
640 timestamp > self->trans_list_utc[num_trans - 1]) {
641 tti = find_tzrule_ttinfo_fromutc(&(self->tzrule_after), timestamp,
642 PyDateTime_GET_YEAR(dt), &fold);
643
644 // Immediately after the last manual transition, the fold/gap is
645 // between self->trans_ttinfos[num_transitions - 1] and whatever
646 // ttinfo applies immediately after the last transition, not between
647 // the STD and DST rules in the tzrule_after, so we may need to
648 // adjust the fold value.
649 if (num_trans) {
650 _ttinfo *tti_prev = NULL;
651 if (num_trans == 1) {
652 tti_prev = self->ttinfo_before;
653 }
654 else {
655 tti_prev = self->trans_ttinfos[num_trans - 2];
656 }
657 int64_t diff = tti_prev->utcoff_seconds - tti->utcoff_seconds;
658 if (diff > 0 &&
659 timestamp < (self->trans_list_utc[num_trans - 1] + diff)) {
660 fold = 1;
661 }
662 }
663 }
664 else {
665 size_t idx = _bisect(timestamp, self->trans_list_utc, num_trans);
666 _ttinfo *tti_prev = NULL;
667
668 if (idx >= 2) {
669 tti_prev = self->trans_ttinfos[idx - 2];
670 tti = self->trans_ttinfos[idx - 1];
671 }
672 else {
673 tti_prev = self->ttinfo_before;
674 tti = self->trans_ttinfos[0];
675 }
676
677 // Detect fold
678 int64_t shift =
679 (int64_t)(tti_prev->utcoff_seconds - tti->utcoff_seconds);
680 if (shift > (timestamp - self->trans_list_utc[idx - 1])) {
681 fold = 1;
682 }
683 }
684
685 PyObject *tmp = PyNumber_Add(dt, tti->utcoff);
686 if (tmp == NULL) {
687 return NULL;
688 }
689
690 if (fold) {
691 if (PyDateTime_CheckExact(tmp)) {
692 ((PyDateTime_DateTime *)tmp)->fold = 1;
693 dt = tmp;
694 }
695 else {
696 PyObject *replace = PyObject_GetAttrString(tmp, "replace");
697 Py_DECREF(tmp);
698 if (replace == NULL) {
699 return NULL;
700 }
701 PyObject *args = PyTuple_New(0);
702 if (args == NULL) {
703 Py_DECREF(replace);
704 return NULL;
705 }
706 PyObject *kwargs = PyDict_New();
707 if (kwargs == NULL) {
708 Py_DECREF(replace);
709 Py_DECREF(args);
710 return NULL;
711 }
712
713 dt = NULL;
714 if (!PyDict_SetItemString(kwargs, "fold", _PyLong_GetOne())) {
715 dt = PyObject_Call(replace, args, kwargs);
716 }
717
718 Py_DECREF(args);
719 Py_DECREF(kwargs);
720 Py_DECREF(replace);
721
722 if (dt == NULL) {
723 return NULL;
724 }
725 }
726 }
727 else {
728 dt = tmp;
729 }
730 return dt;
731 }
732
733 static PyObject *
734 zoneinfo_repr(PyZoneInfo_ZoneInfo *self)
735 {
736 PyObject *rv = NULL;
737 const char *type_name = Py_TYPE((PyObject *)self)->tp_name;
738 if (!(self->key == Py_None)) {
739 rv = PyUnicode_FromFormat("%s(key=%R)", type_name, self->key);
740 }
741 else {
742 assert(PyUnicode_Check(self->file_repr));
743 rv = PyUnicode_FromFormat("%s.from_file(%U)", type_name,
744 self->file_repr);
745 }
746
747 return rv;
748 }
749
750 static PyObject *
751 zoneinfo_str(PyZoneInfo_ZoneInfo *self)
752 {
753 if (!(self->key == Py_None)) {
754 return Py_NewRef(self->key);
755 }
756 else {
757 return zoneinfo_repr(self);
758 }
759 }
760
761 /* Pickles the ZoneInfo object by key and source.
762 *
763 * ZoneInfo objects are pickled by reference to the TZif file that they came
764 * from, which means that the exact transitions may be different or the file
765 * may not un-pickle if the data has changed on disk in the interim.
766 *
767 * It is necessary to include a bit indicating whether or not the object
768 * was constructed from the cache, because from-cache objects will hit the
769 * unpickling process's cache, whereas no-cache objects will bypass it.
770 *
771 * Objects constructed from ZoneInfo.from_file cannot be pickled.
772 */
773 static PyObject *
774 zoneinfo_reduce(PyObject *obj_self, PyObject *unused)
775 {
776 PyZoneInfo_ZoneInfo *self = (PyZoneInfo_ZoneInfo *)obj_self;
777 if (self->source == SOURCE_FILE) {
778 // Objects constructed from files cannot be pickled.
779 PyObject *pickle_error =
780 _PyImport_GetModuleAttrString("pickle", "PicklingError");
781 if (pickle_error == NULL) {
782 return NULL;
783 }
784
785 PyErr_Format(pickle_error,
786 "Cannot pickle a ZoneInfo file from a file stream.");
787 Py_DECREF(pickle_error);
788 return NULL;
789 }
790
791 unsigned char from_cache = self->source == SOURCE_CACHE ? 1 : 0;
792 PyObject *constructor = PyObject_GetAttrString(obj_self, "_unpickle");
793
794 if (constructor == NULL) {
795 return NULL;
796 }
797
798 PyObject *rv = Py_BuildValue("O(OB)", constructor, self->key, from_cache);
799 Py_DECREF(constructor);
800 return rv;
801 }
802
803 /*[clinic input]
804 @classmethod
805 zoneinfo.ZoneInfo._unpickle
806
807 cls: defining_class
808 key: object
809 from_cache: unsigned_char(bitwise=True)
810 /
811
812 Private method used in unpickling.
813 [clinic start generated code]*/
814
815 static PyObject *
816 zoneinfo_ZoneInfo__unpickle_impl(PyTypeObject *type, PyTypeObject *cls,
817 PyObject *key, unsigned char from_cache)
818 /*[clinic end generated code: output=556712fc709deecb input=6ac8c73eed3de316]*/
819 {
820 if (from_cache) {
821 PyObject *val_args = Py_BuildValue("(O)", key);
822 if (val_args == NULL) {
823 return NULL;
824 }
825
826 PyObject *rv = zoneinfo_new(type, val_args, NULL);
827
828 Py_DECREF(val_args);
829 return rv;
830 }
831 else {
832 zoneinfo_state *state = zoneinfo_get_state_by_cls(cls);
833 return zoneinfo_new_instance(state, type, key);
834 }
835 }
836
837 /* It is relatively expensive to construct new timedelta objects, and in most
838 * cases we're looking at a relatively small number of timedeltas, such as
839 * integer number of hours, etc. We will keep a cache so that we construct
840 * a minimal number of these.
841 *
842 * Possibly this should be replaced with an LRU cache so that it's not possible
843 * for the memory usage to explode from this, but in order for this to be a
844 * serious problem, one would need to deliberately craft a malicious time zone
845 * file with many distinct offsets. As of tzdb 2019c, loading every single zone
846 * fills the cache with ~450 timedeltas for a total size of ~12kB.
847 *
848 * This returns a new reference to the timedelta.
849 */
850 static PyObject *
851 load_timedelta(zoneinfo_state *state, long seconds)
852 {
853 PyObject *rv;
854 PyObject *pyoffset = PyLong_FromLong(seconds);
855 if (pyoffset == NULL) {
856 return NULL;
857 }
858 rv = PyDict_GetItemWithError(state->TIMEDELTA_CACHE, pyoffset);
859 if (rv == NULL) {
860 if (PyErr_Occurred()) {
861 goto error;
862 }
863 PyObject *tmp = PyDateTimeAPI->Delta_FromDelta(
864 0, seconds, 0, 1, PyDateTimeAPI->DeltaType);
865
866 if (tmp == NULL) {
867 goto error;
868 }
869
870 rv = PyDict_SetDefault(state->TIMEDELTA_CACHE, pyoffset, tmp);
871 Py_DECREF(tmp);
872 }
873
874 Py_XINCREF(rv);
875 Py_DECREF(pyoffset);
876 return rv;
877 error:
878 Py_DECREF(pyoffset);
879 return NULL;
880 }
881
882 /* Constructor for _ttinfo object - this starts by initializing the _ttinfo
883 * to { NULL, NULL, NULL }, so that Py_XDECREF will work on partially
884 * initialized _ttinfo objects.
885 */
886 static int
887 build_ttinfo(zoneinfo_state *state, long utcoffset, long dstoffset,
888 PyObject *tzname, _ttinfo *out)
889 {
890 out->utcoff = NULL;
891 out->dstoff = NULL;
892 out->tzname = NULL;
893
894 out->utcoff_seconds = utcoffset;
895 out->utcoff = load_timedelta(state, utcoffset);
896 if (out->utcoff == NULL) {
897 return -1;
898 }
899
900 out->dstoff = load_timedelta(state, dstoffset);
901 if (out->dstoff == NULL) {
902 return -1;
903 }
904
905 out->tzname = Py_NewRef(tzname);
906
907 return 0;
908 }
909
910 /* Decrease reference count on any non-NULL members of a _ttinfo */
911 static void
912 xdecref_ttinfo(_ttinfo *ttinfo)
913 {
914 if (ttinfo != NULL) {
915 Py_XDECREF(ttinfo->utcoff);
916 Py_XDECREF(ttinfo->dstoff);
917 Py_XDECREF(ttinfo->tzname);
918 }
919 }
920
921 /* Equality function for _ttinfo. */
922 static int
923 ttinfo_eq(const _ttinfo *const tti0, const _ttinfo *const tti1)
924 {
925 int rv;
926 if ((rv = PyObject_RichCompareBool(tti0->utcoff, tti1->utcoff, Py_EQ)) <
927 1) {
928 goto end;
929 }
930
931 if ((rv = PyObject_RichCompareBool(tti0->dstoff, tti1->dstoff, Py_EQ)) <
932 1) {
933 goto end;
934 }
935
936 if ((rv = PyObject_RichCompareBool(tti0->tzname, tti1->tzname, Py_EQ)) <
937 1) {
938 goto end;
939 }
940 end:
941 return rv;
942 }
943
944 /* Given a file-like object, this populates a ZoneInfo object
945 *
946 * The current version calls into a Python function to read the data from
947 * file into Python objects, and this translates those Python objects into
948 * C values and calculates derived values (e.g. dstoff) in C.
949 *
950 * This returns 0 on success and -1 on failure.
951 *
952 * The function will never return while `self` is partially initialized —
953 * the object only needs to be freed / deallocated if this succeeds.
954 */
955 static int
956 load_data(zoneinfo_state *state, PyZoneInfo_ZoneInfo *self, PyObject *file_obj)
957 {
958 PyObject *data_tuple = NULL;
959
960 long *utcoff = NULL;
961 long *dstoff = NULL;
962 size_t *trans_idx = NULL;
963 unsigned char *isdst = NULL;
964
965 self->trans_list_utc = NULL;
966 self->trans_list_wall[0] = NULL;
967 self->trans_list_wall[1] = NULL;
968 self->trans_ttinfos = NULL;
969 self->_ttinfos = NULL;
970 self->file_repr = NULL;
971
972 size_t ttinfos_allocated = 0;
973
974 data_tuple = PyObject_CallMethod(state->_common_mod, "load_data", "O",
975 file_obj);
976
977 if (data_tuple == NULL) {
978 goto error;
979 }
980
981 if (!PyTuple_CheckExact(data_tuple)) {
982 PyErr_Format(PyExc_TypeError, "Invalid data result type: %r",
983 data_tuple);
984 goto error;
985 }
986
987 // Unpack the data tuple
988 PyObject *trans_idx_list = PyTuple_GetItem(data_tuple, 0);
989 if (trans_idx_list == NULL) {
990 goto error;
991 }
992
993 PyObject *trans_utc = PyTuple_GetItem(data_tuple, 1);
994 if (trans_utc == NULL) {
995 goto error;
996 }
997
998 PyObject *utcoff_list = PyTuple_GetItem(data_tuple, 2);
999 if (utcoff_list == NULL) {
1000 goto error;
1001 }
1002
1003 PyObject *isdst_list = PyTuple_GetItem(data_tuple, 3);
1004 if (isdst_list == NULL) {
1005 goto error;
1006 }
1007
1008 PyObject *abbr = PyTuple_GetItem(data_tuple, 4);
1009 if (abbr == NULL) {
1010 goto error;
1011 }
1012
1013 PyObject *tz_str = PyTuple_GetItem(data_tuple, 5);
1014 if (tz_str == NULL) {
1015 goto error;
1016 }
1017
1018 // Load the relevant sizes
1019 Py_ssize_t num_transitions = PyTuple_Size(trans_utc);
1020 if (num_transitions < 0) {
1021 goto error;
1022 }
1023
1024 Py_ssize_t num_ttinfos = PyTuple_Size(utcoff_list);
1025 if (num_ttinfos < 0) {
1026 goto error;
1027 }
1028
1029 self->num_transitions = (size_t)num_transitions;
1030 self->num_ttinfos = (size_t)num_ttinfos;
1031
1032 // Load the transition indices and list
1033 self->trans_list_utc =
1034 PyMem_Malloc(self->num_transitions * sizeof(int64_t));
1035 if (self->trans_list_utc == NULL) {
1036 goto error;
1037 }
1038 trans_idx = PyMem_Malloc(self->num_transitions * sizeof(Py_ssize_t));
1039 if (trans_idx == NULL) {
1040 goto error;
1041 }
1042
1043 for (size_t i = 0; i < self->num_transitions; ++i) {
1044 PyObject *num = PyTuple_GetItem(trans_utc, i);
1045 if (num == NULL) {
1046 goto error;
1047 }
1048 self->trans_list_utc[i] = PyLong_AsLongLong(num);
1049 if (self->trans_list_utc[i] == -1 && PyErr_Occurred()) {
1050 goto error;
1051 }
1052
1053 num = PyTuple_GetItem(trans_idx_list, i);
1054 if (num == NULL) {
1055 goto error;
1056 }
1057
1058 Py_ssize_t cur_trans_idx = PyLong_AsSsize_t(num);
1059 if (cur_trans_idx == -1) {
1060 goto error;
1061 }
1062
1063 trans_idx[i] = (size_t)cur_trans_idx;
1064 if (trans_idx[i] > self->num_ttinfos) {
1065 PyErr_Format(
1066 PyExc_ValueError,
1067 "Invalid transition index found while reading TZif: %zd",
1068 cur_trans_idx);
1069
1070 goto error;
1071 }
1072 }
1073
1074 // Load UTC offsets and isdst (size num_ttinfos)
1075 utcoff = PyMem_Malloc(self->num_ttinfos * sizeof(long));
1076 isdst = PyMem_Malloc(self->num_ttinfos * sizeof(unsigned char));
1077
1078 if (utcoff == NULL || isdst == NULL) {
1079 goto error;
1080 }
1081 for (size_t i = 0; i < self->num_ttinfos; ++i) {
1082 PyObject *num = PyTuple_GetItem(utcoff_list, i);
1083 if (num == NULL) {
1084 goto error;
1085 }
1086
1087 utcoff[i] = PyLong_AsLong(num);
1088 if (utcoff[i] == -1 && PyErr_Occurred()) {
1089 goto error;
1090 }
1091
1092 num = PyTuple_GetItem(isdst_list, i);
1093 if (num == NULL) {
1094 goto error;
1095 }
1096
1097 int isdst_with_error = PyObject_IsTrue(num);
1098 if (isdst_with_error == -1) {
1099 goto error;
1100 }
1101 else {
1102 isdst[i] = (unsigned char)isdst_with_error;
1103 }
1104 }
1105
1106 dstoff = PyMem_Calloc(self->num_ttinfos, sizeof(long));
1107 if (dstoff == NULL) {
1108 goto error;
1109 }
1110
1111 // Derive dstoff and trans_list_wall from the information we've loaded
1112 utcoff_to_dstoff(trans_idx, utcoff, dstoff, isdst, self->num_transitions,
1113 self->num_ttinfos);
1114
1115 if (ts_to_local(trans_idx, self->trans_list_utc, utcoff,
1116 self->trans_list_wall, self->num_ttinfos,
1117 self->num_transitions)) {
1118 goto error;
1119 }
1120
1121 // Build _ttinfo objects from utcoff, dstoff and abbr
1122 self->_ttinfos = PyMem_Malloc(self->num_ttinfos * sizeof(_ttinfo));
1123 if (self->_ttinfos == NULL) {
1124 goto error;
1125 }
1126 for (size_t i = 0; i < self->num_ttinfos; ++i) {
1127 PyObject *tzname = PyTuple_GetItem(abbr, i);
1128 if (tzname == NULL) {
1129 goto error;
1130 }
1131
1132 ttinfos_allocated++;
1133 int rc = build_ttinfo(state, utcoff[i], dstoff[i], tzname,
1134 &(self->_ttinfos[i]));
1135 if (rc) {
1136 goto error;
1137 }
1138 }
1139
1140 // Build our mapping from transition to the ttinfo that applies
1141 self->trans_ttinfos =
1142 PyMem_Calloc(self->num_transitions, sizeof(_ttinfo *));
1143 if (self->trans_ttinfos == NULL) {
1144 goto error;
1145 }
1146 for (size_t i = 0; i < self->num_transitions; ++i) {
1147 size_t ttinfo_idx = trans_idx[i];
1148 assert(ttinfo_idx < self->num_ttinfos);
1149 self->trans_ttinfos[i] = &(self->_ttinfos[ttinfo_idx]);
1150 }
1151
1152 // Set ttinfo_before to the first non-DST transition
1153 for (size_t i = 0; i < self->num_ttinfos; ++i) {
1154 if (!isdst[i]) {
1155 self->ttinfo_before = &(self->_ttinfos[i]);
1156 break;
1157 }
1158 }
1159
1160 // If there are only DST ttinfos, pick the first one, if there are no
1161 // ttinfos at all, set ttinfo_before to NULL
1162 if (self->ttinfo_before == NULL && self->num_ttinfos > 0) {
1163 self->ttinfo_before = &(self->_ttinfos[0]);
1164 }
1165
1166 if (tz_str != Py_None && PyObject_IsTrue(tz_str)) {
1167 if (parse_tz_str(state, tz_str, &(self->tzrule_after))) {
1168 goto error;
1169 }
1170 }
1171 else {
1172 if (!self->num_ttinfos) {
1173 PyErr_Format(PyExc_ValueError, "No time zone information found.");
1174 goto error;
1175 }
1176
1177 size_t idx;
1178 if (!self->num_transitions) {
1179 idx = self->num_ttinfos - 1;
1180 }
1181 else {
1182 idx = trans_idx[self->num_transitions - 1];
1183 }
1184
1185 _ttinfo *tti = &(self->_ttinfos[idx]);
1186 build_tzrule(state, tti->tzname, NULL, tti->utcoff_seconds, 0, NULL,
1187 NULL, &(self->tzrule_after));
1188
1189 // We've abused the build_tzrule constructor to construct an STD-only
1190 // rule mimicking whatever ttinfo we've picked up, but it's possible
1191 // that the one we've picked up is a DST zone, so we need to make sure
1192 // that the dstoff is set correctly in that case.
1193 if (PyObject_IsTrue(tti->dstoff)) {
1194 _ttinfo *tti_after = &(self->tzrule_after.std);
1195 Py_SETREF(tti_after->dstoff, Py_NewRef(tti->dstoff));
1196 }
1197 }
1198
1199 // Determine if this is a "fixed offset" zone, meaning that the output of
1200 // the utcoffset, dst and tzname functions does not depend on the specific
1201 // datetime passed.
1202 //
1203 // We make three simplifying assumptions here:
1204 //
1205 // 1. If tzrule_after is not std_only, it has transitions that might occur
1206 // (it is possible to construct TZ strings that specify STD and DST but
1207 // no transitions ever occur, such as AAA0BBB,0/0,J365/25).
1208 // 2. If self->_ttinfos contains more than one _ttinfo object, the objects
1209 // represent different offsets.
1210 // 3. self->ttinfos contains no unused _ttinfos (in which case an otherwise
1211 // fixed-offset zone with extra _ttinfos defined may appear to *not* be
1212 // a fixed offset zone).
1213 //
1214 // Violations to these assumptions would be fairly exotic, and exotic
1215 // zones should almost certainly not be used with datetime.time (the
1216 // only thing that would be affected by this).
1217 if (self->num_ttinfos > 1 || !self->tzrule_after.std_only) {
1218 self->fixed_offset = 0;
1219 }
1220 else if (self->num_ttinfos == 0) {
1221 self->fixed_offset = 1;
1222 }
1223 else {
1224 int constant_offset =
1225 ttinfo_eq(&(self->_ttinfos[0]), &self->tzrule_after.std);
1226 if (constant_offset < 0) {
1227 goto error;
1228 }
1229 else {
1230 self->fixed_offset = constant_offset;
1231 }
1232 }
1233
1234 int rv = 0;
1235 goto cleanup;
1236 error:
1237 // These resources only need to be freed if we have failed, if we succeed
1238 // in initializing a PyZoneInfo_ZoneInfo object, we can rely on its dealloc
1239 // method to free the relevant resources.
1240 if (self->trans_list_utc != NULL) {
1241 PyMem_Free(self->trans_list_utc);
1242 self->trans_list_utc = NULL;
1243 }
1244
1245 for (size_t i = 0; i < 2; ++i) {
1246 if (self->trans_list_wall[i] != NULL) {
1247 PyMem_Free(self->trans_list_wall[i]);
1248 self->trans_list_wall[i] = NULL;
1249 }
1250 }
1251
1252 if (self->_ttinfos != NULL) {
1253 for (size_t i = 0; i < ttinfos_allocated; ++i) {
1254 xdecref_ttinfo(&(self->_ttinfos[i]));
1255 }
1256 PyMem_Free(self->_ttinfos);
1257 self->_ttinfos = NULL;
1258 }
1259
1260 if (self->trans_ttinfos != NULL) {
1261 PyMem_Free(self->trans_ttinfos);
1262 self->trans_ttinfos = NULL;
1263 }
1264
1265 rv = -1;
1266 cleanup:
1267 Py_XDECREF(data_tuple);
1268
1269 if (utcoff != NULL) {
1270 PyMem_Free(utcoff);
1271 }
1272
1273 if (dstoff != NULL) {
1274 PyMem_Free(dstoff);
1275 }
1276
1277 if (isdst != NULL) {
1278 PyMem_Free(isdst);
1279 }
1280
1281 if (trans_idx != NULL) {
1282 PyMem_Free(trans_idx);
1283 }
1284
1285 return rv;
1286 }
1287
1288 /* Function to calculate the local timestamp of a transition from the year. */
1289 int64_t
1290 calendarrule_year_to_timestamp(TransitionRuleType *base_self, int year)
1291 {
1292 CalendarRule *self = (CalendarRule *)base_self;
1293
1294 // We want (year, month, day of month); we have year and month, but we
1295 // need to turn (week, day-of-week) into day-of-month
1296 //
1297 // Week 1 is the first week in which day `day` (where 0 = Sunday) appears.
1298 // Week 5 represents the last occurrence of day `day`, so we need to know
1299 // the first weekday of the month and the number of days in the month.
1300 int8_t first_day = (ymd_to_ord(year, self->month, 1) + 6) % 7;
1301 uint8_t days_in_month = DAYS_IN_MONTH[self->month];
1302 if (self->month == 2 && is_leap_year(year)) {
1303 days_in_month += 1;
1304 }
1305
1306 // This equation seems magical, so I'll break it down:
1307 // 1. calendar says 0 = Monday, POSIX says 0 = Sunday so we need first_day
1308 // + 1 to get 1 = Monday -> 7 = Sunday, which is still equivalent
1309 // because this math is mod 7
1310 // 2. Get first day - desired day mod 7 (adjusting by 7 for negative
1311 // numbers so that -1 % 7 = 6).
1312 // 3. Add 1 because month days are a 1-based index.
1313 int8_t month_day = ((int8_t)(self->day) - (first_day + 1)) % 7;
1314 if (month_day < 0) {
1315 month_day += 7;
1316 }
1317 month_day += 1;
1318
1319 // Now use a 0-based index version of `week` to calculate the w-th
1320 // occurrence of `day`
1321 month_day += ((int8_t)(self->week) - 1) * 7;
1322
1323 // month_day will only be > days_in_month if w was 5, and `w` means "last
1324 // occurrence of `d`", so now we just check if we over-shot the end of the
1325 // month and if so knock off 1 week.
1326 if (month_day > days_in_month) {
1327 month_day -= 7;
1328 }
1329
1330 int64_t ordinal = ymd_to_ord(year, self->month, month_day) - EPOCHORDINAL;
1331 return ((ordinal * 86400) + (int64_t)(self->hour * 3600) +
1332 (int64_t)(self->minute * 60) + (int64_t)(self->second));
1333 }
1334
1335 /* Constructor for CalendarRule. */
1336 int
1337 calendarrule_new(uint8_t month, uint8_t week, uint8_t day, int8_t hour,
1338 int8_t minute, int8_t second, CalendarRule *out)
1339 {
1340 // These bounds come from the POSIX standard, which describes an Mm.n.d
1341 // rule as:
1342 //
1343 // The d'th day (0 <= d <= 6) of week n of month m of the year (1 <= n <=
1344 // 5, 1 <= m <= 12, where week 5 means "the last d day in month m" which
1345 // may occur in either the fourth or the fifth week). Week 1 is the first
1346 // week in which the d'th day occurs. Day zero is Sunday.
1347 if (month <= 0 || month > 12) {
1348 PyErr_Format(PyExc_ValueError, "Month must be in (0, 12]");
1349 return -1;
1350 }
1351
1352 if (week <= 0 || week > 5) {
1353 PyErr_Format(PyExc_ValueError, "Week must be in (0, 5]");
1354 return -1;
1355 }
1356
1357 // If the 'day' parameter type is changed to a signed type,
1358 // "day < 0" check must be added.
1359 if (/* day < 0 || */ day > 6) {
1360 PyErr_Format(PyExc_ValueError, "Day must be in [0, 6]");
1361 return -1;
1362 }
1363
1364 TransitionRuleType base = {&calendarrule_year_to_timestamp};
1365
1366 CalendarRule new_offset = {
1367 .base = base,
1368 .month = month,
1369 .week = week,
1370 .day = day,
1371 .hour = hour,
1372 .minute = minute,
1373 .second = second,
1374 };
1375
1376 *out = new_offset;
1377 return 0;
1378 }
1379
1380 /* Function to calculate the local timestamp of a transition from the year.
1381 *
1382 * This translates the day of the year into a local timestamp — either a
1383 * 1-based Julian day, not including leap days, or the 0-based year-day,
1384 * including leap days.
1385 * */
1386 int64_t
1387 dayrule_year_to_timestamp(TransitionRuleType *base_self, int year)
1388 {
1389 // The function signature requires a TransitionRuleType pointer, but this
1390 // function is only applicable to DayRule* objects.
1391 DayRule *self = (DayRule *)base_self;
1392
1393 // ymd_to_ord calculates the number of days since 0001-01-01, but we want
1394 // to know the number of days since 1970-01-01, so we must subtract off
1395 // the equivalent of ymd_to_ord(1970, 1, 1).
1396 //
1397 // We subtract off an additional 1 day to account for January 1st (we want
1398 // the number of full days *before* the date of the transition - partial
1399 // days are accounted for in the hour, minute and second portions.
1400 int64_t days_before_year = ymd_to_ord(year, 1, 1) - EPOCHORDINAL - 1;
1401
1402 // The Julian day specification skips over February 29th in leap years,
1403 // from the POSIX standard:
1404 //
1405 // Leap days shall not be counted. That is, in all years-including leap
1406 // years-February 28 is day 59 and March 1 is day 60. It is impossible to
1407 // refer explicitly to the occasional February 29.
1408 //
1409 // This is actually more useful than you'd think — if you want a rule that
1410 // always transitions on a given calendar day (other than February 29th),
1411 // you would use a Julian day, e.g. J91 always refers to April 1st and J365
1412 // always refers to December 31st.
1413 unsigned int day = self->day;
1414 if (self->julian && day >= 59 && is_leap_year(year)) {
1415 day += 1;
1416 }
1417
1418 return ((days_before_year + day) * 86400) + (self->hour * 3600) +
1419 (self->minute * 60) + self->second;
1420 }
1421
1422 /* Constructor for DayRule. */
1423 static int
1424 dayrule_new(uint8_t julian, unsigned int day, int8_t hour, int8_t minute,
1425 int8_t second, DayRule *out)
1426 {
1427 // The POSIX standard specifies that Julian days must be in the range (1 <=
1428 // n <= 365) and that non-Julian (they call it "0-based Julian") days must
1429 // be in the range (0 <= n <= 365).
1430 if (day < julian || day > 365) {
1431 PyErr_Format(PyExc_ValueError, "day must be in [%u, 365], not: %u",
1432 julian, day);
1433 return -1;
1434 }
1435
1436 TransitionRuleType base = {
1437 &dayrule_year_to_timestamp,
1438 };
1439
1440 DayRule tmp = {
1441 .base = base,
1442 .julian = julian,
1443 .day = day,
1444 .hour = hour,
1445 .minute = minute,
1446 .second = second,
1447 };
1448
1449 *out = tmp;
1450
1451 return 0;
1452 }
1453
1454 /* Calculate the start and end rules for a _tzrule in the given year. */
1455 static void
1456 tzrule_transitions(_tzrule *rule, int year, int64_t *start, int64_t *end)
1457 {
1458 assert(rule->start != NULL);
1459 assert(rule->end != NULL);
1460 *start = rule->start->year_to_timestamp(rule->start, year);
1461 *end = rule->end->year_to_timestamp(rule->end, year);
1462 }
1463
1464 /* Calculate the _ttinfo that applies at a given local time from a _tzrule.
1465 *
1466 * This takes a local timestamp and fold for disambiguation purposes; the year
1467 * could technically be calculated from the timestamp, but given that the
1468 * callers of this function already have the year information accessible from
1469 * the datetime struct, it is taken as an additional parameter to reduce
1470 * unnecessary calculation.
1471 * */
1472 static _ttinfo *
1473 find_tzrule_ttinfo(_tzrule *rule, int64_t ts, unsigned char fold, int year)
1474 {
1475 if (rule->std_only) {
1476 return &(rule->std);
1477 }
1478
1479 int64_t start, end;
1480 uint8_t isdst;
1481
1482 tzrule_transitions(rule, year, &start, &end);
1483
1484 // With fold = 0, the period (denominated in local time) with the smaller
1485 // offset starts at the end of the gap and ends at the end of the fold;
1486 // with fold = 1, it runs from the start of the gap to the beginning of the
1487 // fold.
1488 //
1489 // So in order to determine the DST boundaries we need to know both the
1490 // fold and whether DST is positive or negative (rare), and it turns out
1491 // that this boils down to fold XOR is_positive.
1492 if (fold == (rule->dst_diff >= 0)) {
1493 end -= rule->dst_diff;
1494 }
1495 else {
1496 start += rule->dst_diff;
1497 }
1498
1499 if (start < end) {
1500 isdst = (ts >= start) && (ts < end);
1501 }
1502 else {
1503 isdst = (ts < end) || (ts >= start);
1504 }
1505
1506 if (isdst) {
1507 return &(rule->dst);
1508 }
1509 else {
1510 return &(rule->std);
1511 }
1512 }
1513
1514 /* Calculate the ttinfo and fold that applies for a _tzrule at an epoch time.
1515 *
1516 * This function can determine the _ttinfo that applies at a given epoch time,
1517 * (analogous to trans_list_utc), and whether or not the datetime is in a fold.
1518 * This is to be used in the .fromutc() function.
1519 *
1520 * The year is technically a redundant parameter, because it can be calculated
1521 * from the timestamp, but all callers of this function should have the year
1522 * in the datetime struct anyway, so taking it as a parameter saves unnecessary
1523 * calculation.
1524 **/
1525 static _ttinfo *
1526 find_tzrule_ttinfo_fromutc(_tzrule *rule, int64_t ts, int year,
1527 unsigned char *fold)
1528 {
1529 if (rule->std_only) {
1530 *fold = 0;
1531 return &(rule->std);
1532 }
1533
1534 int64_t start, end;
1535 uint8_t isdst;
1536 tzrule_transitions(rule, year, &start, &end);
1537 start -= rule->std.utcoff_seconds;
1538 end -= rule->dst.utcoff_seconds;
1539
1540 if (start < end) {
1541 isdst = (ts >= start) && (ts < end);
1542 }
1543 else {
1544 isdst = (ts < end) || (ts >= start);
1545 }
1546
1547 // For positive DST, the ambiguous period is one dst_diff after the end of
1548 // DST; for negative DST, the ambiguous period is one dst_diff before the
1549 // start of DST.
1550 int64_t ambig_start, ambig_end;
1551 if (rule->dst_diff > 0) {
1552 ambig_start = end;
1553 ambig_end = end + rule->dst_diff;
1554 }
1555 else {
1556 ambig_start = start;
1557 ambig_end = start - rule->dst_diff;
1558 }
1559
1560 *fold = (ts >= ambig_start) && (ts < ambig_end);
1561
1562 if (isdst) {
1563 return &(rule->dst);
1564 }
1565 else {
1566 return &(rule->std);
1567 }
1568 }
1569
1570 /* Parse a TZ string in the format specified by the POSIX standard:
1571 *
1572 * std offset[dst[offset],start[/time],end[/time]]
1573 *
1574 * std and dst must be 3 or more characters long and must not contain a
1575 * leading colon, embedded digits, commas, nor a plus or minus signs; The
1576 * spaces between "std" and "offset" are only for display and are not actually
1577 * present in the string.
1578 *
1579 * The format of the offset is ``[+|-]hh[:mm[:ss]]``
1580 *
1581 * See the POSIX.1 spec: IEE Std 1003.1-2018 §8.3:
1582 *
1583 * https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap08.html
1584 */
1585 static int
1586 parse_tz_str(zoneinfo_state *state, PyObject *tz_str_obj, _tzrule *out)
1587 {
1588 PyObject *std_abbr = NULL;
1589 PyObject *dst_abbr = NULL;
1590 TransitionRuleType *start = NULL;
1591 TransitionRuleType *end = NULL;
1592 // Initialize offsets to invalid value (> 24 hours)
1593 long std_offset = 1 << 20;
1594 long dst_offset = 1 << 20;
1595
1596 const char *tz_str = PyBytes_AsString(tz_str_obj);
1597 if (tz_str == NULL) {
1598 return -1;
1599 }
1600 const char *p = tz_str;
1601
1602 // Read the `std` abbreviation, which must be at least 3 characters long.
1603 Py_ssize_t num_chars = parse_abbr(p, &std_abbr);
1604 if (num_chars < 1) {
1605 PyErr_Format(PyExc_ValueError, "Invalid STD format in %R", tz_str_obj);
1606 goto error;
1607 }
1608
1609 p += num_chars;
1610
1611 // Now read the STD offset, which is required
1612 num_chars = parse_tz_delta(p, &std_offset);
1613 if (num_chars < 0) {
1614 PyErr_Format(PyExc_ValueError, "Invalid STD offset in %R", tz_str_obj);
1615 goto error;
1616 }
1617 p += num_chars;
1618
1619 // If the string ends here, there is no DST, otherwise we must parse the
1620 // DST abbreviation and start and end dates and times.
1621 if (*p == '\0') {
1622 goto complete;
1623 }
1624
1625 num_chars = parse_abbr(p, &dst_abbr);
1626 if (num_chars < 1) {
1627 PyErr_Format(PyExc_ValueError, "Invalid DST format in %R", tz_str_obj);
1628 goto error;
1629 }
1630 p += num_chars;
1631
1632 if (*p == ',') {
1633 // From the POSIX standard:
1634 //
1635 // If no offset follows dst, the alternative time is assumed to be one
1636 // hour ahead of standard time.
1637 dst_offset = std_offset + 3600;
1638 }
1639 else {
1640 num_chars = parse_tz_delta(p, &dst_offset);
1641 if (num_chars < 0) {
1642 PyErr_Format(PyExc_ValueError, "Invalid DST offset in %R",
1643 tz_str_obj);
1644 goto error;
1645 }
1646
1647 p += num_chars;
1648 }
1649
1650 TransitionRuleType **transitions[2] = {&start, &end};
1651 for (size_t i = 0; i < 2; ++i) {
1652 if (*p != ',') {
1653 PyErr_Format(PyExc_ValueError,
1654 "Missing transition rules in TZ string: %R",
1655 tz_str_obj);
1656 goto error;
1657 }
1658 p++;
1659
1660 num_chars = parse_transition_rule(p, transitions[i]);
1661 if (num_chars < 0) {
1662 PyErr_Format(PyExc_ValueError,
1663 "Malformed transition rule in TZ string: %R",
1664 tz_str_obj);
1665 goto error;
1666 }
1667 p += num_chars;
1668 }
1669
1670 if (*p != '\0') {
1671 PyErr_Format(PyExc_ValueError,
1672 "Extraneous characters at end of TZ string: %R",
1673 tz_str_obj);
1674 goto error;
1675 }
1676
1677 complete:
1678 build_tzrule(state, std_abbr, dst_abbr, std_offset, dst_offset,
1679 start, end, out);
1680 Py_DECREF(std_abbr);
1681 Py_XDECREF(dst_abbr);
1682
1683 return 0;
1684 error:
1685 Py_XDECREF(std_abbr);
1686 if (dst_abbr != NULL && dst_abbr != Py_None) {
1687 Py_DECREF(dst_abbr);
1688 }
1689
1690 if (start != NULL) {
1691 PyMem_Free(start);
1692 }
1693
1694 if (end != NULL) {
1695 PyMem_Free(end);
1696 }
1697
1698 return -1;
1699 }
1700
1701 static int
1702 parse_uint(const char *const p, uint8_t *value)
1703 {
1704 if (!isdigit(*p)) {
1705 return -1;
1706 }
1707
1708 *value = (*p) - '0';
1709 return 0;
1710 }
1711
1712 /* Parse the STD and DST abbreviations from a TZ string. */
1713 static Py_ssize_t
1714 parse_abbr(const char *const p, PyObject **abbr)
1715 {
1716 const char *ptr = p;
1717 const char *str_start;
1718 const char *str_end;
1719
1720 if (*ptr == '<') {
1721 char buff;
1722 ptr++;
1723 str_start = ptr;
1724 while ((buff = *ptr) != '>') {
1725 // From the POSIX standard:
1726 //
1727 // In the quoted form, the first character shall be the less-than
1728 // ( '<' ) character and the last character shall be the
1729 // greater-than ( '>' ) character. All characters between these
1730 // quoting characters shall be alphanumeric characters from the
1731 // portable character set in the current locale, the plus-sign (
1732 // '+' ) character, or the minus-sign ( '-' ) character. The std
1733 // and dst fields in this case shall not include the quoting
1734 // characters.
1735 if (!isalpha(buff) && !isdigit(buff) && buff != '+' &&
1736 buff != '-') {
1737 return -1;
1738 }
1739 ptr++;
1740 }
1741 str_end = ptr;
1742 ptr++;
1743 }
1744 else {
1745 str_start = p;
1746 // From the POSIX standard:
1747 //
1748 // In the unquoted form, all characters in these fields shall be
1749 // alphabetic characters from the portable character set in the
1750 // current locale.
1751 while (isalpha(*ptr)) {
1752 ptr++;
1753 }
1754 str_end = ptr;
1755 }
1756
1757 *abbr = PyUnicode_FromStringAndSize(str_start, str_end - str_start);
1758 if (*abbr == NULL) {
1759 return -1;
1760 }
1761
1762 return ptr - p;
1763 }
1764
1765 /* Parse a UTC offset from a TZ str. */
1766 static Py_ssize_t
1767 parse_tz_delta(const char *const p, long *total_seconds)
1768 {
1769 // From the POSIX spec:
1770 //
1771 // Indicates the value added to the local time to arrive at Coordinated
1772 // Universal Time. The offset has the form:
1773 //
1774 // hh[:mm[:ss]]
1775 //
1776 // One or more digits may be used; the value is always interpreted as a
1777 // decimal number.
1778 //
1779 // The POSIX spec says that the values for `hour` must be between 0 and 24
1780 // hours, but RFC 8536 §3.3.1 specifies that the hours part of the
1781 // transition times may be signed and range from -167 to 167.
1782 long sign = -1;
1783 long hours = 0;
1784 long minutes = 0;
1785 long seconds = 0;
1786
1787 const char *ptr = p;
1788 char buff = *ptr;
1789 if (buff == '-' || buff == '+') {
1790 // Negative numbers correspond to *positive* offsets, from the spec:
1791 //
1792 // If preceded by a '-', the timezone shall be east of the Prime
1793 // Meridian; otherwise, it shall be west (which may be indicated by
1794 // an optional preceding '+' ).
1795 if (buff == '-') {
1796 sign = 1;
1797 }
1798
1799 ptr++;
1800 }
1801
1802 // The hour can be 1 or 2 numeric characters
1803 for (size_t i = 0; i < 2; ++i) {
1804 buff = *ptr;
1805 if (!isdigit(buff)) {
1806 if (i == 0) {
1807 return -1;
1808 }
1809 else {
1810 break;
1811 }
1812 }
1813
1814 hours *= 10;
1815 hours += buff - '0';
1816 ptr++;
1817 }
1818
1819 if (hours > 24 || hours < 0) {
1820 return -1;
1821 }
1822
1823 // Minutes and seconds always of the format ":dd"
1824 long *outputs[2] = {&minutes, &seconds};
1825 for (size_t i = 0; i < 2; ++i) {
1826 if (*ptr != ':') {
1827 goto complete;
1828 }
1829 ptr++;
1830
1831 for (size_t j = 0; j < 2; ++j) {
1832 buff = *ptr;
1833 if (!isdigit(buff)) {
1834 return -1;
1835 }
1836 *(outputs[i]) *= 10;
1837 *(outputs[i]) += buff - '0';
1838 ptr++;
1839 }
1840 }
1841
1842 complete:
1843 *total_seconds = sign * ((hours * 3600) + (minutes * 60) + seconds);
1844
1845 return ptr - p;
1846 }
1847
1848 /* Parse the date portion of a transition rule. */
1849 static Py_ssize_t
1850 parse_transition_rule(const char *const p, TransitionRuleType **out)
1851 {
1852 // The full transition rule indicates when to change back and forth between
1853 // STD and DST, and has the form:
1854 //
1855 // date[/time],date[/time]
1856 //
1857 // This function parses an individual date[/time] section, and returns
1858 // the number of characters that contributed to the transition rule. This
1859 // does not include the ',' at the end of the first rule.
1860 //
1861 // The POSIX spec states that if *time* is not given, the default is 02:00.
1862 const char *ptr = p;
1863 int8_t hour = 2;
1864 int8_t minute = 0;
1865 int8_t second = 0;
1866
1867 // Rules come in one of three flavors:
1868 //
1869 // 1. Jn: Julian day n, with no leap days.
1870 // 2. n: Day of year (0-based, with leap days)
1871 // 3. Mm.n.d: Specifying by month, week and day-of-week.
1872
1873 if (*ptr == 'M') {
1874 uint8_t month, week, day;
1875 ptr++;
1876 if (parse_uint(ptr, &month)) {
1877 return -1;
1878 }
1879 ptr++;
1880 if (*ptr != '.') {
1881 uint8_t tmp;
1882 if (parse_uint(ptr, &tmp)) {
1883 return -1;
1884 }
1885
1886 month *= 10;
1887 month += tmp;
1888 ptr++;
1889 }
1890
1891 uint8_t *values[2] = {&week, &day};
1892 for (size_t i = 0; i < 2; ++i) {
1893 if (*ptr != '.') {
1894 return -1;
1895 }
1896 ptr++;
1897
1898 if (parse_uint(ptr, values[i])) {
1899 return -1;
1900 }
1901 ptr++;
1902 }
1903
1904 if (*ptr == '/') {
1905 ptr++;
1906 Py_ssize_t num_chars =
1907 parse_transition_time(ptr, &hour, &minute, &second);
1908 if (num_chars < 0) {
1909 return -1;
1910 }
1911 ptr += num_chars;
1912 }
1913
1914 CalendarRule *rv = PyMem_Calloc(1, sizeof(CalendarRule));
1915 if (rv == NULL) {
1916 return -1;
1917 }
1918
1919 if (calendarrule_new(month, week, day, hour, minute, second, rv)) {
1920 PyMem_Free(rv);
1921 return -1;
1922 }
1923
1924 *out = (TransitionRuleType *)rv;
1925 }
1926 else {
1927 uint8_t julian = 0;
1928 unsigned int day = 0;
1929 if (*ptr == 'J') {
1930 julian = 1;
1931 ptr++;
1932 }
1933
1934 for (size_t i = 0; i < 3; ++i) {
1935 if (!isdigit(*ptr)) {
1936 if (i == 0) {
1937 return -1;
1938 }
1939 break;
1940 }
1941 day *= 10;
1942 day += (*ptr) - '0';
1943 ptr++;
1944 }
1945
1946 if (*ptr == '/') {
1947 ptr++;
1948 Py_ssize_t num_chars =
1949 parse_transition_time(ptr, &hour, &minute, &second);
1950 if (num_chars < 0) {
1951 return -1;
1952 }
1953 ptr += num_chars;
1954 }
1955
1956 DayRule *rv = PyMem_Calloc(1, sizeof(DayRule));
1957 if (rv == NULL) {
1958 return -1;
1959 }
1960
1961 if (dayrule_new(julian, day, hour, minute, second, rv)) {
1962 PyMem_Free(rv);
1963 return -1;
1964 }
1965 *out = (TransitionRuleType *)rv;
1966 }
1967
1968 return ptr - p;
1969 }
1970
1971 /* Parse the time portion of a transition rule (e.g. following an /) */
1972 static Py_ssize_t
1973 parse_transition_time(const char *const p, int8_t *hour, int8_t *minute,
1974 int8_t *second)
1975 {
1976 // From the spec:
1977 //
1978 // The time has the same format as offset except that no leading sign
1979 // ( '-' or '+' ) is allowed.
1980 //
1981 // The format for the offset is:
1982 //
1983 // h[h][:mm[:ss]]
1984 //
1985 // RFC 8536 also allows transition times to be signed and to range from
1986 // -167 to +167, but the current version only supports [0, 99].
1987 //
1988 // TODO: Support the full range of transition hours.
1989 int8_t *components[3] = {hour, minute, second};
1990 const char *ptr = p;
1991 int8_t sign = 1;
1992
1993 if (*ptr == '-' || *ptr == '+') {
1994 if (*ptr == '-') {
1995 sign = -1;
1996 }
1997 ptr++;
1998 }
1999
2000 for (size_t i = 0; i < 3; ++i) {
2001 if (i > 0) {
2002 if (*ptr != ':') {
2003 break;
2004 }
2005 ptr++;
2006 }
2007
2008 uint8_t buff = 0;
2009 for (size_t j = 0; j < 2; j++) {
2010 if (!isdigit(*ptr)) {
2011 if (i == 0 && j > 0) {
2012 break;
2013 }
2014 return -1;
2015 }
2016
2017 buff *= 10;
2018 buff += (*ptr) - '0';
2019 ptr++;
2020 }
2021
2022 *(components[i]) = sign * buff;
2023 }
2024
2025 return ptr - p;
2026 }
2027
2028 /* Constructor for a _tzrule.
2029 *
2030 * If `dst_abbr` is NULL, this will construct an "STD-only" _tzrule, in which
2031 * case `dst_offset` will be ignored and `start` and `end` are expected to be
2032 * NULL as well.
2033 *
2034 * Returns 0 on success.
2035 */
2036 static int
2037 build_tzrule(zoneinfo_state *state, PyObject *std_abbr, PyObject *dst_abbr,
2038 long std_offset, long dst_offset, TransitionRuleType *start,
2039 TransitionRuleType *end, _tzrule *out)
2040 {
2041 _tzrule rv = {{0}};
2042
2043 rv.start = start;
2044 rv.end = end;
2045
2046 if (build_ttinfo(state, std_offset, 0, std_abbr, &rv.std)) {
2047 goto error;
2048 }
2049
2050 if (dst_abbr != NULL) {
2051 rv.dst_diff = dst_offset - std_offset;
2052 if (build_ttinfo(state, dst_offset, rv.dst_diff, dst_abbr, &rv.dst)) {
2053 goto error;
2054 }
2055 }
2056 else {
2057 rv.std_only = 1;
2058 }
2059
2060 *out = rv;
2061
2062 return 0;
2063 error:
2064 xdecref_ttinfo(&rv.std);
2065 xdecref_ttinfo(&rv.dst);
2066 return -1;
2067 }
2068
2069 /* Destructor for _tzrule. */
2070 static void
2071 free_tzrule(_tzrule *tzrule)
2072 {
2073 xdecref_ttinfo(&(tzrule->std));
2074 if (!tzrule->std_only) {
2075 xdecref_ttinfo(&(tzrule->dst));
2076 }
2077
2078 if (tzrule->start != NULL) {
2079 PyMem_Free(tzrule->start);
2080 }
2081
2082 if (tzrule->end != NULL) {
2083 PyMem_Free(tzrule->end);
2084 }
2085 }
2086
2087 /* Calculate DST offsets from transitions and UTC offsets
2088 *
2089 * This is necessary because each C `ttinfo` only contains the UTC offset,
2090 * time zone abbreviation and an isdst boolean - it does not include the
2091 * amount of the DST offset, but we need the amount for the dst() function.
2092 *
2093 * Thus function uses heuristics to infer what the offset should be, so it
2094 * is not guaranteed that this will work for all zones. If we cannot assign
2095 * a value for a given DST offset, we'll assume it's 1H rather than 0H, so
2096 * bool(dt.dst()) will always match ttinfo.isdst.
2097 */
2098 static void
2099 utcoff_to_dstoff(size_t *trans_idx, long *utcoffs, long *dstoffs,
2100 unsigned char *isdsts, size_t num_transitions,
2101 size_t num_ttinfos)
2102 {
2103 size_t dst_count = 0;
2104 size_t dst_found = 0;
2105 for (size_t i = 0; i < num_ttinfos; ++i) {
2106 dst_count++;
2107 }
2108
2109 for (size_t i = 1; i < num_transitions; ++i) {
2110 if (dst_count == dst_found) {
2111 break;
2112 }
2113
2114 size_t idx = trans_idx[i];
2115 size_t comp_idx = trans_idx[i - 1];
2116
2117 // Only look at DST offsets that have nto been assigned already
2118 if (!isdsts[idx] || dstoffs[idx] != 0) {
2119 continue;
2120 }
2121
2122 long dstoff = 0;
2123 long utcoff = utcoffs[idx];
2124
2125 if (!isdsts[comp_idx]) {
2126 dstoff = utcoff - utcoffs[comp_idx];
2127 }
2128
2129 if (!dstoff && idx < (num_ttinfos - 1)) {
2130 comp_idx = trans_idx[i + 1];
2131
2132 // If the following transition is also DST and we couldn't find
2133 // the DST offset by this point, we're going to have to skip it
2134 // and hope this transition gets assigned later
2135 if (isdsts[comp_idx]) {
2136 continue;
2137 }
2138
2139 dstoff = utcoff - utcoffs[comp_idx];
2140 }
2141
2142 if (dstoff) {
2143 dst_found++;
2144 dstoffs[idx] = dstoff;
2145 }
2146 }
2147
2148 if (dst_found < dst_count) {
2149 // If there are time zones we didn't find a value for, we'll end up
2150 // with dstoff = 0 for something where isdst=1. This is obviously
2151 // wrong — one hour will be a much better guess than 0.
2152 for (size_t idx = 0; idx < num_ttinfos; ++idx) {
2153 if (isdsts[idx] && !dstoffs[idx]) {
2154 dstoffs[idx] = 3600;
2155 }
2156 }
2157 }
2158 }
2159
2160 #define _swap(x, y, buffer) \
2161 buffer = x; \
2162 x = y; \
2163 y = buffer;
2164
2165 /* Calculate transitions in local time from UTC time and offsets.
2166 *
2167 * We want to know when each transition occurs, denominated in the number of
2168 * nominal wall-time seconds between 1970-01-01T00:00:00 and the transition in
2169 * *local time* (note: this is *not* equivalent to the output of
2170 * datetime.timestamp, which is the total number of seconds actual elapsed
2171 * since 1970-01-01T00:00:00Z in UTC).
2172 *
2173 * This is an ambiguous question because "local time" can be ambiguous — but it
2174 * is disambiguated by the `fold` parameter, so we allocate two arrays:
2175 *
2176 * trans_local[0]: The wall-time transitions for fold=0
2177 * trans_local[1]: The wall-time transitions for fold=1
2178 *
2179 * This returns 0 on success and a negative number of failure. The trans_local
2180 * arrays must be freed if they are not NULL.
2181 */
2182 static int
2183 ts_to_local(size_t *trans_idx, int64_t *trans_utc, long *utcoff,
2184 int64_t *trans_local[2], size_t num_ttinfos,
2185 size_t num_transitions)
2186 {
2187 if (num_transitions == 0) {
2188 return 0;
2189 }
2190
2191 // Copy the UTC transitions into each array to be modified in place later
2192 for (size_t i = 0; i < 2; ++i) {
2193 trans_local[i] = PyMem_Malloc(num_transitions * sizeof(int64_t));
2194 if (trans_local[i] == NULL) {
2195 return -1;
2196 }
2197
2198 memcpy(trans_local[i], trans_utc, num_transitions * sizeof(int64_t));
2199 }
2200
2201 int64_t offset_0, offset_1, buff;
2202 if (num_ttinfos > 1) {
2203 offset_0 = utcoff[0];
2204 offset_1 = utcoff[trans_idx[0]];
2205
2206 if (offset_1 > offset_0) {
2207 _swap(offset_0, offset_1, buff);
2208 }
2209 }
2210 else {
2211 offset_0 = utcoff[0];
2212 offset_1 = utcoff[0];
2213 }
2214
2215 trans_local[0][0] += offset_0;
2216 trans_local[1][0] += offset_1;
2217
2218 for (size_t i = 1; i < num_transitions; ++i) {
2219 offset_0 = utcoff[trans_idx[i - 1]];
2220 offset_1 = utcoff[trans_idx[i]];
2221
2222 if (offset_1 > offset_0) {
2223 _swap(offset_1, offset_0, buff);
2224 }
2225
2226 trans_local[0][i] += offset_0;
2227 trans_local[1][i] += offset_1;
2228 }
2229
2230 return 0;
2231 }
2232
2233 /* Simple bisect_right binary search implementation */
2234 static size_t
2235 _bisect(const int64_t value, const int64_t *arr, size_t size)
2236 {
2237 size_t lo = 0;
2238 size_t hi = size;
2239 size_t m;
2240
2241 while (lo < hi) {
2242 m = (lo + hi) / 2;
2243 if (arr[m] > value) {
2244 hi = m;
2245 }
2246 else {
2247 lo = m + 1;
2248 }
2249 }
2250
2251 return hi;
2252 }
2253
2254 /* Find the ttinfo rules that apply at a given local datetime. */
2255 static _ttinfo *
2256 find_ttinfo(zoneinfo_state *state, PyZoneInfo_ZoneInfo *self, PyObject *dt)
2257 {
2258 // datetime.time has a .tzinfo attribute that passes None as the dt
2259 // argument; it only really has meaning for fixed-offset zones.
2260 if (dt == Py_None) {
2261 if (self->fixed_offset) {
2262 return &(self->tzrule_after.std);
2263 }
2264 else {
2265 return &(state->NO_TTINFO);
2266 }
2267 }
2268
2269 int64_t ts;
2270 if (get_local_timestamp(dt, &ts)) {
2271 return NULL;
2272 }
2273
2274 unsigned char fold = PyDateTime_DATE_GET_FOLD(dt);
2275 assert(fold < 2);
2276 int64_t *local_transitions = self->trans_list_wall[fold];
2277 size_t num_trans = self->num_transitions;
2278
2279 if (num_trans && ts < local_transitions[0]) {
2280 return self->ttinfo_before;
2281 }
2282 else if (!num_trans || ts > local_transitions[self->num_transitions - 1]) {
2283 return find_tzrule_ttinfo(&(self->tzrule_after), ts, fold,
2284 PyDateTime_GET_YEAR(dt));
2285 }
2286 else {
2287 size_t idx = _bisect(ts, local_transitions, self->num_transitions) - 1;
2288 assert(idx < self->num_transitions);
2289 return self->trans_ttinfos[idx];
2290 }
2291 }
2292
2293 static int
2294 is_leap_year(int year)
2295 {
2296 const unsigned int ayear = (unsigned int)year;
2297 return ayear % 4 == 0 && (ayear % 100 != 0 || ayear % 400 == 0);
2298 }
2299
2300 /* Calculates ordinal datetime from year, month and day. */
2301 static int
2302 ymd_to_ord(int y, int m, int d)
2303 {
2304 y -= 1;
2305 int days_before_year = (y * 365) + (y / 4) - (y / 100) + (y / 400);
2306 int yearday = DAYS_BEFORE_MONTH[m];
2307 if (m > 2 && is_leap_year(y + 1)) {
2308 yearday += 1;
2309 }
2310
2311 return days_before_year + yearday + d;
2312 }
2313
2314 /* Calculate the number of seconds since 1970-01-01 in local time.
2315 *
2316 * This gets a datetime in the same "units" as self->trans_list_wall so that we
2317 * can easily determine which transitions a datetime falls between. See the
2318 * comment above ts_to_local for more information.
2319 * */
2320 static int
2321 get_local_timestamp(PyObject *dt, int64_t *local_ts)
2322 {
2323 assert(local_ts != NULL);
2324
2325 int hour, minute, second;
2326 int ord;
2327 if (PyDateTime_CheckExact(dt)) {
2328 int y = PyDateTime_GET_YEAR(dt);
2329 int m = PyDateTime_GET_MONTH(dt);
2330 int d = PyDateTime_GET_DAY(dt);
2331 hour = PyDateTime_DATE_GET_HOUR(dt);
2332 minute = PyDateTime_DATE_GET_MINUTE(dt);
2333 second = PyDateTime_DATE_GET_SECOND(dt);
2334
2335 ord = ymd_to_ord(y, m, d);
2336 }
2337 else {
2338 PyObject *num = PyObject_CallMethod(dt, "toordinal", NULL);
2339 if (num == NULL) {
2340 return -1;
2341 }
2342
2343 ord = PyLong_AsLong(num);
2344 Py_DECREF(num);
2345 if (ord == -1 && PyErr_Occurred()) {
2346 return -1;
2347 }
2348
2349 num = PyObject_GetAttrString(dt, "hour");
2350 if (num == NULL) {
2351 return -1;
2352 }
2353 hour = PyLong_AsLong(num);
2354 Py_DECREF(num);
2355 if (hour == -1) {
2356 return -1;
2357 }
2358
2359 num = PyObject_GetAttrString(dt, "minute");
2360 if (num == NULL) {
2361 return -1;
2362 }
2363 minute = PyLong_AsLong(num);
2364 Py_DECREF(num);
2365 if (minute == -1) {
2366 return -1;
2367 }
2368
2369 num = PyObject_GetAttrString(dt, "second");
2370 if (num == NULL) {
2371 return -1;
2372 }
2373 second = PyLong_AsLong(num);
2374 Py_DECREF(num);
2375 if (second == -1) {
2376 return -1;
2377 }
2378 }
2379
2380 *local_ts = (int64_t)(ord - EPOCHORDINAL) * 86400 +
2381 (int64_t)(hour * 3600 + minute * 60 + second);
2382
2383 return 0;
2384 }
2385
2386 /////
2387 // Functions for cache handling
2388
2389 /* Constructor for StrongCacheNode */
2390 static StrongCacheNode *
2391 strong_cache_node_new(PyObject *key, PyObject *zone)
2392 {
2393 StrongCacheNode *node = PyMem_Malloc(sizeof(StrongCacheNode));
2394 if (node == NULL) {
2395 return NULL;
2396 }
2397
2398 node->next = NULL;
2399 node->prev = NULL;
2400 node->key = Py_NewRef(key);
2401 node->zone = Py_NewRef(zone);
2402
2403 return node;
2404 }
2405
2406 /* Destructor for StrongCacheNode */
2407 void
2408 strong_cache_node_free(StrongCacheNode *node)
2409 {
2410 Py_XDECREF(node->key);
2411 Py_XDECREF(node->zone);
2412
2413 PyMem_Free(node);
2414 }
2415
2416 /* Frees all nodes at or after a specified root in the strong cache.
2417 *
2418 * This can be used on the root node to free the entire cache or it can be used
2419 * to clear all nodes that have been expired (which, if everything is going
2420 * right, will actually only be 1 node at a time).
2421 */
2422 void
2423 strong_cache_free(StrongCacheNode *root)
2424 {
2425 StrongCacheNode *node = root;
2426 StrongCacheNode *next_node;
2427 while (node != NULL) {
2428 next_node = node->next;
2429 strong_cache_node_free(node);
2430
2431 node = next_node;
2432 }
2433 }
2434
2435 /* Removes a node from the cache and update its neighbors.
2436 *
2437 * This is used both when ejecting a node from the cache and when moving it to
2438 * the front of the cache.
2439 */
2440 static void
2441 remove_from_strong_cache(zoneinfo_state *state, StrongCacheNode *node)
2442 {
2443 if (state->ZONEINFO_STRONG_CACHE == node) {
2444 state->ZONEINFO_STRONG_CACHE = node->next;
2445 }
2446
2447 if (node->prev != NULL) {
2448 node->prev->next = node->next;
2449 }
2450
2451 if (node->next != NULL) {
2452 node->next->prev = node->prev;
2453 }
2454
2455 node->next = NULL;
2456 node->prev = NULL;
2457 }
2458
2459 /* Retrieves the node associated with a key, if it exists.
2460 *
2461 * This traverses the strong cache until it finds a matching key and returns a
2462 * pointer to the relevant node if found. Returns NULL if no node is found.
2463 *
2464 * root may be NULL, indicating an empty cache.
2465 */
2466 static StrongCacheNode *
2467 find_in_strong_cache(const StrongCacheNode *const root, PyObject *const key)
2468 {
2469 const StrongCacheNode *node = root;
2470 while (node != NULL) {
2471 int rv = PyObject_RichCompareBool(key, node->key, Py_EQ);
2472 if (rv < 0) {
2473 return NULL;
2474 }
2475 if (rv) {
2476 return (StrongCacheNode *)node;
2477 }
2478
2479 node = node->next;
2480 }
2481
2482 return NULL;
2483 }
2484
2485 /* Ejects a given key from the class's strong cache, if applicable.
2486 *
2487 * This function is used to enable the per-key functionality in clear_cache.
2488 */
2489 static int
2490 eject_from_strong_cache(zoneinfo_state *state, const PyTypeObject *const type,
2491 PyObject *key)
2492 {
2493 if (type != state->ZoneInfoType) {
2494 return 0;
2495 }
2496
2497 StrongCacheNode *cache = state->ZONEINFO_STRONG_CACHE;
2498 StrongCacheNode *node = find_in_strong_cache(cache, key);
2499 if (node != NULL) {
2500 remove_from_strong_cache(state, node);
2501
2502 strong_cache_node_free(node);
2503 }
2504 else if (PyErr_Occurred()) {
2505 return -1;
2506 }
2507 return 0;
2508 }
2509
2510 /* Moves a node to the front of the LRU cache.
2511 *
2512 * The strong cache is an LRU cache, so whenever a given node is accessed, if
2513 * it is not at the front of the cache, it needs to be moved there.
2514 */
2515 static void
2516 move_strong_cache_node_to_front(zoneinfo_state *state, StrongCacheNode **root,
2517 StrongCacheNode *node)
2518 {
2519 StrongCacheNode *root_p = *root;
2520 if (root_p == node) {
2521 return;
2522 }
2523
2524 remove_from_strong_cache(state, node);
2525
2526 node->prev = NULL;
2527 node->next = root_p;
2528
2529 if (root_p != NULL) {
2530 root_p->prev = node;
2531 }
2532
2533 *root = node;
2534 }
2535
2536 /* Retrieves a ZoneInfo from the strong cache if it's present.
2537 *
2538 * This function finds the ZoneInfo by key and if found will move the node to
2539 * the front of the LRU cache and return a new reference to it. It returns NULL
2540 * if the key is not in the cache.
2541 *
2542 * The strong cache is currently only implemented for the base class, so this
2543 * always returns a cache miss for subclasses.
2544 */
2545 static PyObject *
2546 zone_from_strong_cache(zoneinfo_state *state, const PyTypeObject *const type,
2547 PyObject *const key)
2548 {
2549 if (type != state->ZoneInfoType) {
2550 return NULL; // Strong cache currently only implemented for base class
2551 }
2552
2553 StrongCacheNode *cache = state->ZONEINFO_STRONG_CACHE;
2554 StrongCacheNode *node = find_in_strong_cache(cache, key);
2555
2556 if (node != NULL) {
2557 StrongCacheNode **root = &(state->ZONEINFO_STRONG_CACHE);
2558 move_strong_cache_node_to_front(state, root, node);
2559 return Py_NewRef(node->zone);
2560 }
2561
2562 return NULL; // Cache miss
2563 }
2564
2565 /* Inserts a new key into the strong LRU cache.
2566 *
2567 * This function is only to be used after a cache miss — it creates a new node
2568 * at the front of the cache and ejects any stale entries (keeping the size of
2569 * the cache to at most ZONEINFO_STRONG_CACHE_MAX_SIZE).
2570 */
2571 static void
2572 update_strong_cache(zoneinfo_state *state, const PyTypeObject *const type,
2573 PyObject *key, PyObject *zone)
2574 {
2575 if (type != state->ZoneInfoType) {
2576 return;
2577 }
2578
2579 StrongCacheNode *new_node = strong_cache_node_new(key, zone);
2580 StrongCacheNode **root = &(state->ZONEINFO_STRONG_CACHE);
2581 move_strong_cache_node_to_front(state, root, new_node);
2582
2583 StrongCacheNode *node = new_node->next;
2584 for (size_t i = 1; i < ZONEINFO_STRONG_CACHE_MAX_SIZE; ++i) {
2585 if (node == NULL) {
2586 return;
2587 }
2588 node = node->next;
2589 }
2590
2591 // Everything beyond this point needs to be freed
2592 if (node != NULL) {
2593 if (node->prev != NULL) {
2594 node->prev->next = NULL;
2595 }
2596 strong_cache_free(node);
2597 }
2598 }
2599
2600 /* Clears all entries into a type's strong cache.
2601 *
2602 * Because the strong cache is not implemented for subclasses, this is a no-op
2603 * for everything except the base class.
2604 */
2605 void
2606 clear_strong_cache(zoneinfo_state *state, const PyTypeObject *const type)
2607 {
2608 if (type != state->ZoneInfoType) {
2609 return;
2610 }
2611
2612 strong_cache_free(state->ZONEINFO_STRONG_CACHE);
2613 state->ZONEINFO_STRONG_CACHE = NULL;
2614 }
2615
2616 static PyObject *
2617 new_weak_cache(void)
2618 {
2619 PyObject *WeakValueDictionary =
2620 _PyImport_GetModuleAttrString("weakref", "WeakValueDictionary");
2621 if (WeakValueDictionary == NULL) {
2622 return NULL;
2623 }
2624 PyObject *weak_cache = PyObject_CallNoArgs(WeakValueDictionary);
2625 Py_DECREF(WeakValueDictionary);
2626 return weak_cache;
2627 }
2628
2629 // This function is not idempotent and must be called on a new module object.
2630 static int
2631 initialize_caches(zoneinfo_state *state)
2632 {
2633 state->TIMEDELTA_CACHE = PyDict_New();
2634 if (state->TIMEDELTA_CACHE == NULL) {
2635 return -1;
2636 }
2637
2638 state->ZONEINFO_WEAK_CACHE = new_weak_cache();
2639 if (state->ZONEINFO_WEAK_CACHE == NULL) {
2640 return -1;
2641 }
2642
2643 return 0;
2644 }
2645
2646 static PyObject *
2647 zoneinfo_init_subclass(PyTypeObject *cls, PyObject *args, PyObject **kwargs)
2648 {
2649 PyObject *weak_cache = new_weak_cache();
2650 if (weak_cache == NULL) {
2651 return NULL;
2652 }
2653
2654 if (PyObject_SetAttrString((PyObject *)cls, "_weak_cache",
2655 weak_cache) < 0) {
2656 Py_DECREF(weak_cache);
2657 return NULL;
2658 }
2659 Py_DECREF(weak_cache);
2660 Py_RETURN_NONE;
2661 }
2662
2663 /////
2664 // Specify the ZoneInfo type
2665 static PyMethodDef zoneinfo_methods[] = {
2666 ZONEINFO_ZONEINFO_CLEAR_CACHE_METHODDEF
2667 ZONEINFO_ZONEINFO_NO_CACHE_METHODDEF
2668 ZONEINFO_ZONEINFO_FROM_FILE_METHODDEF
2669 ZONEINFO_ZONEINFO_UTCOFFSET_METHODDEF
2670 ZONEINFO_ZONEINFO_DST_METHODDEF
2671 ZONEINFO_ZONEINFO_TZNAME_METHODDEF
2672 {"fromutc", (PyCFunction)zoneinfo_fromutc, METH_O,
2673 PyDoc_STR("Given a datetime with local time in UTC, retrieve an adjusted "
2674 "datetime in local time.")},
2675 {"__reduce__", (PyCFunction)zoneinfo_reduce, METH_NOARGS,
2676 PyDoc_STR("Function for serialization with the pickle protocol.")},
2677 ZONEINFO_ZONEINFO__UNPICKLE_METHODDEF
2678 {"__init_subclass__", (PyCFunction)(void (*)(void))zoneinfo_init_subclass,
2679 METH_VARARGS | METH_KEYWORDS | METH_CLASS,
2680 PyDoc_STR("Function to initialize subclasses.")},
2681 {NULL} /* Sentinel */
2682 };
2683
2684 static PyMemberDef zoneinfo_members[] = {
2685 {.name = "key",
2686 .offset = offsetof(PyZoneInfo_ZoneInfo, key),
2687 .type = T_OBJECT_EX,
2688 .flags = READONLY,
2689 .doc = NULL},
2690 {.name = "__weaklistoffset__",
2691 .offset = offsetof(PyZoneInfo_ZoneInfo, weakreflist),
2692 .type = T_PYSSIZET,
2693 .flags = READONLY},
2694 {NULL}, /* Sentinel */
2695 };
2696
2697 static PyType_Slot zoneinfo_slots[] = {
2698 {Py_tp_repr, zoneinfo_repr},
2699 {Py_tp_str, zoneinfo_str},
2700 {Py_tp_getattro, PyObject_GenericGetAttr},
2701 {Py_tp_methods, zoneinfo_methods},
2702 {Py_tp_members, zoneinfo_members},
2703 {Py_tp_new, zoneinfo_new},
2704 {Py_tp_dealloc, zoneinfo_dealloc},
2705 {Py_tp_traverse, zoneinfo_traverse},
2706 {Py_tp_clear, zoneinfo_clear},
2707 {0, NULL},
2708 };
2709
2710 static PyType_Spec zoneinfo_spec = {
2711 .name = "zoneinfo.ZoneInfo",
2712 .basicsize = sizeof(PyZoneInfo_ZoneInfo),
2713 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2714 Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_IMMUTABLETYPE),
2715 .slots = zoneinfo_slots,
2716 };
2717
2718 /////
2719 // Specify the _zoneinfo module
2720 static PyMethodDef module_methods[] = {{NULL, NULL}};
2721
2722 static int
2723 module_traverse(PyObject *mod, visitproc visit, void *arg)
2724 {
2725 zoneinfo_state *state = zoneinfo_get_state(mod);
2726
2727 Py_VISIT(state->ZoneInfoType);
2728 Py_VISIT(state->io_open);
2729 Py_VISIT(state->_tzpath_find_tzfile);
2730 Py_VISIT(state->_common_mod);
2731 Py_VISIT(state->TIMEDELTA_CACHE);
2732 Py_VISIT(state->ZONEINFO_WEAK_CACHE);
2733
2734 StrongCacheNode *node = state->ZONEINFO_STRONG_CACHE;
2735 while (node != NULL) {
2736 StrongCacheNode *next = node->next;
2737 Py_VISIT(node->key);
2738 Py_VISIT(node->zone);
2739 node = next;
2740 }
2741
2742 Py_VISIT(state->NO_TTINFO.utcoff);
2743 Py_VISIT(state->NO_TTINFO.dstoff);
2744 Py_VISIT(state->NO_TTINFO.tzname);
2745
2746 return 0;
2747 }
2748
2749 static int
2750 module_clear(PyObject *mod)
2751 {
2752 zoneinfo_state *state = zoneinfo_get_state(mod);
2753
2754 Py_CLEAR(state->ZoneInfoType);
2755 Py_CLEAR(state->io_open);
2756 Py_CLEAR(state->_tzpath_find_tzfile);
2757 Py_CLEAR(state->_common_mod);
2758 Py_CLEAR(state->TIMEDELTA_CACHE);
2759 Py_CLEAR(state->ZONEINFO_WEAK_CACHE);
2760 clear_strong_cache(state, state->ZoneInfoType);
2761 Py_CLEAR(state->NO_TTINFO.utcoff);
2762 Py_CLEAR(state->NO_TTINFO.dstoff);
2763 Py_CLEAR(state->NO_TTINFO.tzname);
2764
2765 return 0;
2766 }
2767
2768 static void
2769 module_free(void *mod)
2770 {
2771 (void)module_clear((PyObject *)mod);
2772 }
2773
2774 static int
2775 zoneinfomodule_exec(PyObject *m)
2776 {
2777 PyDateTime_IMPORT;
2778 if (PyDateTimeAPI == NULL) {
2779 goto error;
2780 }
2781
2782 zoneinfo_state *state = zoneinfo_get_state(m);
2783 PyObject *base = (PyObject *)PyDateTimeAPI->TZInfoType;
2784 state->ZoneInfoType = (PyTypeObject *)PyType_FromModuleAndSpec(m,
2785 &zoneinfo_spec, base);
2786 if (state->ZoneInfoType == NULL) {
2787 goto error;
2788 }
2789
2790 int rc = PyModule_AddObjectRef(m, "ZoneInfo",
2791 (PyObject *)state->ZoneInfoType);
2792 if (rc < 0) {
2793 goto error;
2794 }
2795
2796 /* Populate imports */
2797 state->_tzpath_find_tzfile =
2798 _PyImport_GetModuleAttrString("zoneinfo._tzpath", "find_tzfile");
2799 if (state->_tzpath_find_tzfile == NULL) {
2800 goto error;
2801 }
2802
2803 state->io_open = _PyImport_GetModuleAttrString("io", "open");
2804 if (state->io_open == NULL) {
2805 goto error;
2806 }
2807
2808 state->_common_mod = PyImport_ImportModule("zoneinfo._common");
2809 if (state->_common_mod == NULL) {
2810 goto error;
2811 }
2812
2813 if (state->NO_TTINFO.utcoff == NULL) {
2814 state->NO_TTINFO.utcoff = Py_NewRef(Py_None);
2815 state->NO_TTINFO.dstoff = Py_NewRef(Py_None);
2816 state->NO_TTINFO.tzname = Py_NewRef(Py_None);
2817 }
2818
2819 if (initialize_caches(state)) {
2820 goto error;
2821 }
2822
2823 return 0;
2824
2825 error:
2826 return -1;
2827 }
2828
2829 static PyModuleDef_Slot zoneinfomodule_slots[] = {
2830 {Py_mod_exec, zoneinfomodule_exec},
2831 {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
2832 {0, NULL},
2833 };
2834
2835 static struct PyModuleDef zoneinfomodule = {
2836 .m_base = PyModuleDef_HEAD_INIT,
2837 .m_name = "_zoneinfo",
2838 .m_doc = "C implementation of the zoneinfo module",
2839 .m_size = sizeof(zoneinfo_state),
2840 .m_methods = module_methods,
2841 .m_slots = zoneinfomodule_slots,
2842 .m_traverse = module_traverse,
2843 .m_clear = module_clear,
2844 .m_free = module_free,
2845 };
2846
2847 PyMODINIT_FUNC
2848 PyInit__zoneinfo(void)
2849 {
2850 return PyModuleDef_Init(&zoneinfomodule);
2851 }