(root)/
glibc-2.38/
hurd/
hurdselect.c
       1  /* Guts of both `select' and `poll' for Hurd.
       2     Copyright (C) 1991-2023 Free Software Foundation, Inc.
       3     This file is part of the GNU C Library.
       4  
       5     The GNU C Library is free software; you can redistribute it and/or
       6     modify it under the terms of the GNU Lesser General Public
       7     License as published by the Free Software Foundation; either
       8     version 2.1 of the License, or (at your option) any later version.
       9  
      10     The GNU C Library is distributed in the hope that it will be useful,
      11     but WITHOUT ANY WARRANTY; without even the implied warranty of
      12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      13     Lesser General Public License for more details.
      14  
      15     You should have received a copy of the GNU Lesser General Public
      16     License along with the GNU C Library; if not, see
      17     <https://www.gnu.org/licenses/>.  */
      18  
      19  #include <sys/time.h>
      20  #include <sys/types.h>
      21  #include <sys/poll.h>
      22  #include <hurd.h>
      23  #include <hurd/fd.h>
      24  #include <hurd/io_request.h>
      25  #include <mach_rpc.h>
      26  #include <stdlib.h>
      27  #include <string.h>
      28  #include <assert.h>
      29  #include <stdint.h>
      30  #include <limits.h>
      31  #include <time.h>
      32  #include <sysdep-cancel.h>
      33  
      34  /* All user select types.  */
      35  #define SELECT_ALL (SELECT_READ | SELECT_WRITE | SELECT_URG)
      36  
      37  /* Used to record that a particular select rpc returned.  Must be distinct
      38     from SELECT_ALL (which better not have the high bit set).  */
      39  #define SELECT_RETURNED ((SELECT_ALL << 1) & ~SELECT_ALL)
      40  #define SELECT_ERROR (SELECT_RETURNED << 1)
      41  
      42  /* Check the first NFDS descriptors either in POLLFDS (if nonnnull) or in
      43     each of READFDS, WRITEFDS, EXCEPTFDS that is nonnull.  If TIMEOUT is not
      44     NULL, time out after waiting the interval specified therein.  Returns
      45     the number of ready descriptors, or -1 for errors.  */
      46  int
      47  _hurd_select (int nfds,
      48  	      struct pollfd *pollfds,
      49  	      fd_set *readfds, fd_set *writefds, fd_set *exceptfds,
      50  	      const struct timespec *timeout, const sigset_t *sigmask)
      51  {
      52    int i;
      53    mach_port_t portset, sigport;
      54    int got, ready;
      55    error_t err;
      56    fd_set rfds, wfds, xfds;
      57    int firstfd, lastfd;
      58    mach_msg_id_t reply_msgid;
      59    mach_msg_timeout_t to;
      60    struct timespec ts;
      61    struct
      62      {
      63        struct hurd_userlink ulink;
      64        struct hurd_fd *cell;
      65        mach_port_t io_port;
      66        int type;
      67        mach_port_t reply_port;
      68        int error;
      69      } d[nfds];
      70    sigset_t oset;
      71    struct hurd_sigstate *ss = NULL;
      72  
      73    if (nfds < 0 || (pollfds == NULL && nfds > FD_SETSIZE))
      74      return __hurd_fail (EINVAL);
      75  
      76  #define IO_SELECT_REPLY_MSGID (21012 + 100) /* XXX */
      77  #define IO_SELECT_TIMEOUT_REPLY_MSGID (21031 + 100) /* XXX */
      78  
      79    if (timeout == NULL)
      80      reply_msgid = IO_SELECT_REPLY_MSGID;
      81    else
      82      {
      83        struct timespec now;
      84  
      85        if (timeout->tv_sec < 0 || ! valid_nanoseconds (timeout->tv_nsec))
      86  	return __hurd_fail (EINVAL);
      87  
      88        err = __clock_gettime (CLOCK_REALTIME, &now);
      89        if (err)
      90  	return -1;
      91  
      92        ts.tv_sec = now.tv_sec + timeout->tv_sec;
      93        ts.tv_nsec = now.tv_nsec + timeout->tv_nsec;
      94  
      95        if (ts.tv_nsec >= 1000000000)
      96  	{
      97  	  ts.tv_sec++;
      98  	  ts.tv_nsec -= 1000000000;
      99  	}
     100  
     101        if (ts.tv_sec < 0)
     102  	ts.tv_sec = LONG_MAX; /* XXX */
     103  
     104        reply_msgid = IO_SELECT_TIMEOUT_REPLY_MSGID;
     105      }
     106  
     107    if (sigmask)
     108      {
     109        /* Add a port to the portset for the case when we get the signal even
     110           before calling __mach_msg.  */
     111  
     112        sigport = __mach_reply_port ();
     113  
     114        ss = _hurd_self_sigstate ();
     115        _hurd_sigstate_lock (ss);
     116        /* And tell the signal thread to message us when a signal arrives.  */
     117        ss->suspended = sigport;
     118        _hurd_sigstate_unlock (ss);
     119  
     120        if (__sigprocmask (SIG_SETMASK, sigmask, &oset))
     121  	{
     122  	  _hurd_sigstate_lock (ss);
     123  	  ss->suspended = MACH_PORT_NULL;
     124  	  _hurd_sigstate_unlock (ss);
     125  	  __mach_port_destroy (__mach_task_self (), sigport);
     126  	  return -1;
     127  	}
     128      }
     129    else
     130      sigport = MACH_PORT_NULL;
     131  
     132    if (pollfds)
     133      {
     134        int error = 0;
     135        /* Collect interesting descriptors from the user's `pollfd' array.
     136  	 We do a first pass that reads the user's array before taking
     137  	 any locks.  The second pass then only touches our own stack,
     138  	 and gets the port references.  */
     139  
     140        for (i = 0; i < nfds; ++i)
     141  	if (pollfds[i].fd >= 0)
     142  	  {
     143  	    int type = 0;
     144  	    if (pollfds[i].events & POLLIN)
     145  	      type |= SELECT_READ;
     146  	    if (pollfds[i].events & POLLOUT)
     147  	      type |= SELECT_WRITE;
     148  	    if (pollfds[i].events & POLLPRI)
     149  	      type |= SELECT_URG;
     150  
     151  	    d[i].io_port = pollfds[i].fd;
     152  	    d[i].type = type;
     153  	  }
     154  	else
     155  	  d[i].type = 0;
     156  
     157        HURD_CRITICAL_BEGIN;
     158        __mutex_lock (&_hurd_dtable_lock);
     159  
     160        for (i = 0; i < nfds; ++i)
     161  	if (d[i].type != 0)
     162  	  {
     163  	    const int fd = (int) d[i].io_port;
     164  
     165  	    if (fd < _hurd_dtablesize)
     166  	      {
     167  		d[i].cell = _hurd_dtable[fd];
     168  		if (d[i].cell != NULL)
     169  		  {
     170  		    d[i].io_port = _hurd_port_get (&d[i].cell->port,
     171  						   &d[i].ulink);
     172  		    if (d[i].io_port != MACH_PORT_NULL)
     173  		      continue;
     174  		  }
     175  	      }
     176  
     177  	    /* Bogus descriptor, make it EBADF already.  */
     178  	    d[i].error = EBADF;
     179  	    d[i].type = SELECT_ERROR;
     180  	    error = 1;
     181  	  }
     182  
     183        __mutex_unlock (&_hurd_dtable_lock);
     184        HURD_CRITICAL_END;
     185  
     186        if (error)
     187  	{
     188  	  /* Set timeout to 0.  */
     189  	  err = __clock_gettime (CLOCK_REALTIME, &ts);
     190  	  if (err)
     191  	    {
     192  	      /* Really bad luck.  */
     193  	      err = errno;
     194  	      HURD_CRITICAL_BEGIN;
     195  	      __mutex_lock (&_hurd_dtable_lock);
     196  	      while (i-- > 0)
     197  		if (d[i].type & ~SELECT_ERROR != 0)
     198  		  _hurd_port_free (&d[i].cell->port, &d[i].ulink,
     199  				   d[i].io_port);
     200  	      __mutex_unlock (&_hurd_dtable_lock);
     201  	      HURD_CRITICAL_END;
     202  	      if (sigmask)
     203  		__sigprocmask (SIG_SETMASK, &oset, NULL);
     204  	      errno = err;
     205  	      return -1;
     206  	    }
     207  	  reply_msgid = IO_SELECT_TIMEOUT_REPLY_MSGID;
     208  	}
     209  
     210        lastfd = i - 1;
     211        firstfd = i == 0 ? lastfd : 0;
     212      }
     213    else
     214      {
     215        /* Collect interested descriptors from the user's fd_set arguments.
     216  	 Use local copies so we can't crash from user bogosity.  */
     217  
     218        if (readfds == NULL)
     219  	FD_ZERO (&rfds);
     220        else
     221  	rfds = *readfds;
     222        if (writefds == NULL)
     223  	FD_ZERO (&wfds);
     224        else
     225  	wfds = *writefds;
     226        if (exceptfds == NULL)
     227  	FD_ZERO (&xfds);
     228        else
     229  	xfds = *exceptfds;
     230  
     231        HURD_CRITICAL_BEGIN;
     232        __mutex_lock (&_hurd_dtable_lock);
     233  
     234        /* Collect the ports for interesting FDs.  */
     235        firstfd = lastfd = -1;
     236        for (i = 0; i < nfds; ++i)
     237  	{
     238  	  int type = 0;
     239  	  if (readfds != NULL && FD_ISSET (i, &rfds))
     240  	    type |= SELECT_READ;
     241  	  if (writefds != NULL && FD_ISSET (i, &wfds))
     242  	    type |= SELECT_WRITE;
     243  	  if (exceptfds != NULL && FD_ISSET (i, &xfds))
     244  	    type |= SELECT_URG;
     245  	  d[i].type = type;
     246  	  if (type)
     247  	    {
     248  	      if (i < _hurd_dtablesize)
     249  		{
     250  		  d[i].cell = _hurd_dtable[i];
     251  		  if (d[i].cell != NULL)
     252  		    d[i].io_port = _hurd_port_get (&d[i].cell->port,
     253  						   &d[i].ulink);
     254  		}
     255  	      if (i >= _hurd_dtablesize || d[i].cell == NULL ||
     256  		  d[i].io_port == MACH_PORT_NULL)
     257  		{
     258  		  /* If one descriptor is bogus, we fail completely.  */
     259  		  while (i-- > 0)
     260  		    if (d[i].type != 0)
     261  		      _hurd_port_free (&d[i].cell->port, &d[i].ulink,
     262  				       d[i].io_port);
     263  		  break;
     264  		}
     265  	      lastfd = i;
     266  	      if (firstfd == -1)
     267  		firstfd = i;
     268  	    }
     269  	}
     270  
     271        __mutex_unlock (&_hurd_dtable_lock);
     272        HURD_CRITICAL_END;
     273  
     274        if (i < nfds)
     275  	{
     276  	  if (sigmask)
     277  	    __sigprocmask (SIG_SETMASK, &oset, NULL);
     278  	  return __hurd_fail (EBADF);
     279  	}
     280  
     281        if (nfds > _hurd_dtablesize)
     282  	nfds = _hurd_dtablesize;
     283      }
     284  
     285  
     286    err = 0;
     287    got = 0;
     288  
     289    /* Send them all io_select request messages.  */
     290  
     291    if (firstfd == -1)
     292      {
     293        if (sigport == MACH_PORT_NULL)
     294  	/* But not if there were no ports to deal with at all.
     295  	   We are just a pure timeout.  */
     296  	portset = __mach_reply_port ();
     297        else
     298  	portset = sigport;
     299      }
     300    else
     301      {
     302        portset = MACH_PORT_NULL;
     303  
     304        for (i = firstfd; i <= lastfd; ++i)
     305  	if (!(d[i].type & ~SELECT_ERROR))
     306  	  d[i].reply_port = MACH_PORT_NULL;
     307  	else
     308  	  {
     309  	    int type = d[i].type;
     310  	    d[i].reply_port = __mach_reply_port ();
     311  	    if (timeout == NULL)
     312  	      err = __io_select_request (d[i].io_port, d[i].reply_port, type);
     313  	    else
     314  	      err = __io_select_timeout_request (d[i].io_port, d[i].reply_port,
     315  						 ts, type);
     316  	    if (!err)
     317  	      {
     318  		if (firstfd == lastfd && sigport == MACH_PORT_NULL)
     319  		  /* When there's a single descriptor, we don't need a
     320  		     portset, so just pretend we have one, but really
     321  		     use the single reply port.  */
     322  		  portset = d[i].reply_port;
     323  		else if (got == 0)
     324  		  /* We've got multiple reply ports, so we need a port set to
     325  		     multiplex them.  */
     326  		  {
     327  		    /* We will wait again for a reply later.  */
     328  		    if (portset == MACH_PORT_NULL)
     329  		      /* Create the portset to receive all the replies on.  */
     330  		      err = __mach_port_allocate (__mach_task_self (),
     331  						  MACH_PORT_RIGHT_PORT_SET,
     332  						  &portset);
     333  		    if (! err)
     334  		      /* Put this reply port in the port set.  */
     335  		      __mach_port_move_member (__mach_task_self (),
     336  					       d[i].reply_port, portset);
     337  		  }
     338  	      }
     339  	    else
     340  	      {
     341  		/* No error should happen, but record it for later
     342  		   processing.  */
     343  		d[i].error = err;
     344  		d[i].type |= SELECT_ERROR;
     345  		++got;
     346  	      }
     347  	    _hurd_port_free (&d[i].cell->port, &d[i].ulink, d[i].io_port);
     348  	  }
     349  
     350        if (got == 0 && sigport != MACH_PORT_NULL)
     351  	{
     352  	  if (portset == MACH_PORT_NULL)
     353  	    /* Create the portset to receive the signal message on.  */
     354  	    __mach_port_allocate (__mach_task_self (), MACH_PORT_RIGHT_PORT_SET,
     355  				  &portset);
     356  	  /* Put the signal reply port in the port set.  */
     357  	  __mach_port_move_member (__mach_task_self (), sigport, portset);
     358  	}
     359      }
     360  
     361    /* GOT is the number of replies (or errors), while READY is the number of
     362       replies with at least one type bit set.  */
     363    ready = 0;
     364  
     365    /* Now wait for reply messages.  */
     366    if (!err && got == 0)
     367      {
     368        /* Now wait for io_select_reply messages on PORT,
     369  	 timing out as appropriate.  */
     370  
     371        union
     372  	{
     373  	  mach_msg_header_t head;
     374  #ifdef MACH_MSG_TRAILER_MINIMUM_SIZE
     375  	  struct
     376  	    {
     377  	      mach_msg_header_t head;
     378  	      NDR_record_t ndr;
     379  	      error_t err;
     380  	    } error;
     381  	  struct
     382  	    {
     383  	      mach_msg_header_t head;
     384  	      NDR_record_t ndr;
     385  	      error_t err;
     386  	      int result;
     387  	      mach_msg_trailer_t trailer;
     388  	    } success;
     389  #else
     390  	  struct
     391  	    {
     392  	      mach_msg_header_t head;
     393  	      mach_msg_type_t err_type;
     394  	      error_t err;
     395  	    } error;
     396  	  struct
     397  	    {
     398  	      mach_msg_header_t head;
     399  	      mach_msg_type_t err_type;
     400  	      error_t err;
     401  	      mach_msg_type_t result_type;
     402  	      int result;
     403  	    } success;
     404  #endif
     405  	} msg;
     406        mach_msg_option_t options;
     407        error_t msgerr;
     408  
     409        /* We rely on servers to implement the timeout, but when there are none,
     410  	 do it on the client side.  */
     411        if (timeout != NULL && firstfd == -1)
     412  	{
     413  	  options = MACH_RCV_TIMEOUT;
     414  	  to = timeout->tv_sec * 1000 + (timeout->tv_nsec + 999999) / 1000000;
     415  	}
     416        else
     417  	{
     418  	  options = 0;
     419  	  to = MACH_MSG_TIMEOUT_NONE;
     420  	}
     421  
     422        int cancel_oldtype = LIBC_CANCEL_ASYNC();
     423        while ((msgerr = __mach_msg (&msg.head,
     424  				   MACH_RCV_MSG | MACH_RCV_INTERRUPT | options,
     425  				   0, sizeof msg, portset, to,
     426  				   MACH_PORT_NULL)) == MACH_MSG_SUCCESS)
     427  	{
     428  	  LIBC_CANCEL_RESET (cancel_oldtype);
     429  
     430  	  /* We got a message.  Decode it.  */
     431  #ifdef MACH_MSG_TYPE_BIT
     432  	  static const mach_msg_type_t inttype = {
     433  	    .msgt_name = MACH_MSG_TYPE_INTEGER_T,
     434  	    .msgt_size = sizeof (integer_t) * 8,
     435  	    .msgt_number = 1,
     436  	    .msgt_inline = TRUE,
     437  	    .msgt_longform = FALSE,
     438  	    .msgt_deallocate = FALSE,
     439  	    .msgt_unused = 0
     440  	  };
     441  #endif
     442  
     443  	  if (sigport != MACH_PORT_NULL && sigport == msg.head.msgh_local_port)
     444  	    {
     445  	      /* We actually got interrupted by a signal before
     446  		 __mach_msg; poll for further responses and then
     447  		 return quickly. */
     448  	      err = EINTR;
     449  	      goto poll;
     450  	    }
     451  
     452  	  if (msg.head.msgh_id == reply_msgid
     453  	      && msg.head.msgh_size >= sizeof msg.error
     454  	      && !(msg.head.msgh_bits & MACH_MSGH_BITS_COMPLEX)
     455  #ifdef MACH_MSG_TYPE_BIT
     456  	      && !BAD_TYPECHECK (&msg.error.err_type, &inttype)
     457  #endif
     458  	      )
     459  	    {
     460  	      /* This is a properly formatted message so far.
     461  		 See if it is a success or a failure.  */
     462  	      if (msg.error.err == EINTR
     463  		  && msg.head.msgh_size == sizeof msg.error)
     464  		{
     465  		  /* EINTR response; poll for further responses
     466  		     and then return quickly.  */
     467  		  err = EINTR;
     468  		  goto poll;
     469  		}
     470  	      /* Keep in mind msg.success.result can be 0 if a timeout
     471  		 occurred.  */
     472  	      if (msg.error.err
     473  #ifdef MACH_MSG_TYPE_BIT
     474  		  || BAD_TYPECHECK (&msg.success.result_type, &inttype)
     475  #endif
     476  		  || msg.head.msgh_size != sizeof msg.success)
     477  		{
     478  		  /* Error or bogus reply.  */
     479  		  if (!msg.error.err)
     480  		    msg.error.err = EIO;
     481  		  __mach_msg_destroy (&msg.head);
     482  		}
     483  
     484  	      /* Look up the respondent's reply port and record its
     485  		 readiness.  */
     486  	      {
     487  		int had = got;
     488  		if (firstfd != -1)
     489  		  for (i = firstfd; i <= lastfd; ++i)
     490  		    if (d[i].type
     491  			&& d[i].reply_port == msg.head.msgh_local_port)
     492  		      {
     493  			if (msg.error.err)
     494  			  {
     495  			    d[i].error = msg.error.err;
     496  			    d[i].type = SELECT_ERROR;
     497  			    ++ready;
     498  			  }
     499  			else
     500  			  {
     501  			    d[i].type &= msg.success.result;
     502  			    if (d[i].type)
     503  			      ++ready;
     504  			  }
     505  
     506  			d[i].type |= SELECT_RETURNED;
     507  			++got;
     508  		      }
     509  		assert (got > had);
     510  	      }
     511  	    }
     512  
     513  	  if (msg.head.msgh_remote_port != MACH_PORT_NULL)
     514  	    __mach_port_deallocate (__mach_task_self (),
     515  				    msg.head.msgh_remote_port);
     516  
     517  	  if (got)
     518  	  poll:
     519  	    {
     520  	      /* Poll for another message.  */
     521  	      to = 0;
     522  	      options |= MACH_RCV_TIMEOUT;
     523  	    }
     524  	}
     525        LIBC_CANCEL_RESET (cancel_oldtype);
     526  
     527        if (msgerr == MACH_RCV_INTERRUPTED)
     528  	/* Interruption on our side (e.g. signal reception).  */
     529  	err = EINTR;
     530  
     531        if (ready)
     532  	/* At least one descriptor is known to be ready now, so we will
     533  	   return success.  */
     534  	err = 0;
     535      }
     536  
     537    if (firstfd != -1)
     538      for (i = firstfd; i <= lastfd; ++i)
     539        if (d[i].reply_port != MACH_PORT_NULL)
     540  	__mach_port_destroy (__mach_task_self (), d[i].reply_port);
     541  
     542    if (sigport != MACH_PORT_NULL)
     543      {
     544        _hurd_sigstate_lock (ss);
     545        ss->suspended = MACH_PORT_NULL;
     546        _hurd_sigstate_unlock (ss);
     547        __mach_port_destroy (__mach_task_self (), sigport);
     548      }
     549  
     550    if ((firstfd == -1 && sigport == MACH_PORT_NULL)
     551        || ((firstfd != lastfd || sigport != MACH_PORT_NULL) && portset != MACH_PORT_NULL))
     552      /* Destroy PORTSET, but only if it's not actually the reply port for a
     553         single descriptor (in which case it's destroyed in the previous loop;
     554         not doing it here is just a bit more efficient).  */
     555      __mach_port_destroy (__mach_task_self (), portset);
     556  
     557    if (err)
     558      {
     559        if (sigmask)
     560  	__sigprocmask (SIG_SETMASK, &oset, NULL);
     561        return __hurd_fail (err);
     562      }
     563  
     564    if (pollfds)
     565      /* Fill in the `revents' members of the user's array.  */
     566      for (i = 0; i < nfds; ++i)
     567        {
     568  	int type = d[i].type;
     569  	int revents = 0;
     570  
     571  	if (type & SELECT_ERROR)
     572  	  switch (d[i].error)
     573  	    {
     574  	      case EPIPE:
     575  		revents = POLLHUP;
     576  		break;
     577  	      case EBADF:
     578  		revents = POLLNVAL;
     579  		break;
     580  	      default:
     581  		revents = POLLERR;
     582  		break;
     583  	    }
     584  	else
     585  	  if (type & SELECT_RETURNED)
     586  	    {
     587  	      if (type & SELECT_READ)
     588  		revents |= POLLIN;
     589  	      if (type & SELECT_WRITE)
     590  		revents |= POLLOUT;
     591  	      if (type & SELECT_URG)
     592  		revents |= POLLPRI;
     593  	    }
     594  
     595  	pollfds[i].revents = revents;
     596        }
     597    else
     598      {
     599        /* Below we recalculate READY to include an increment for each operation
     600  	 allowed on each fd.  */
     601        ready = 0;
     602  
     603        /* Set the user bitarrays.  We only ever have to clear bits, as all
     604  	 desired ones are initially set.  */
     605        if (firstfd != -1)
     606  	for (i = firstfd; i <= lastfd; ++i)
     607  	  {
     608  	    int type = d[i].type;
     609  
     610  	    if ((type & SELECT_RETURNED) == 0)
     611  	      type = 0;
     612  
     613  	    /* Callers of select don't expect to see errors, so we simulate
     614  	       readiness of the erring object and the next call hopefully
     615  	       will get the error again.  */
     616  	    if (type & SELECT_ERROR)
     617  	      {
     618  		type = 0;
     619  		if (readfds != NULL && FD_ISSET (i, readfds))
     620  		  type |= SELECT_READ;
     621  		if (writefds != NULL && FD_ISSET (i, writefds))
     622  		  type |= SELECT_WRITE;
     623  		if (exceptfds != NULL && FD_ISSET (i, exceptfds))
     624  		  type |= SELECT_URG;
     625  	      }
     626  
     627  	    if (type & SELECT_READ)
     628  	      ready++;
     629  	    else if (readfds)
     630  	      FD_CLR (i, readfds);
     631  	    if (type & SELECT_WRITE)
     632  	      ready++;
     633  	    else if (writefds)
     634  	      FD_CLR (i, writefds);
     635  	    if (type & SELECT_URG)
     636  	      ready++;
     637  	    else if (exceptfds)
     638  	      FD_CLR (i, exceptfds);
     639  	  }
     640      }
     641  
     642    if (sigmask && __sigprocmask (SIG_SETMASK, &oset, NULL))
     643      return -1;
     644  
     645    return ready;
     646  }